def rolling_tree_apply(tup): group_series, seq_series, kwargs = tup fname = '/home/will/SubCData/Trees/Tree-%(sub)s-%(Prot)s-%(Start)i-%(WinSize)i.newick' % kwargs if os.path.exists(fname): return True alpha = generic_dna if kwargs['Prot'] == 'LTR' else generic_protein seq_series = seq_series.dropna(thresh = 5) vseq, vgroup = seq_series.align(group_series.dropna(), join = 'inner', axis = 0) nseq_ser = vseq.apply(append_seq, axis = 1) nseqs = sorted(nseq_ser.to_dict().items()) trop_dict = vgroup.to_dict() #print nseqs #try: # tree, dmat = TreeingTools.phylip_tree_collapse_unique(nseqs, alphabet=alpha, use_fast=True) #except: # return False #print 'treeing', fname tree = TreeingTools.run_FastTree(nseqs, alphabet=alpha, uniq_seqs=True) with open(fname, 'w') as handle: tree.write(handle, schema='newick') return True try: tree, dmat = TreeingTools.phylip_tree_collapse_unique(nseqs, alphabet=alpha, use_fast=True) benj_res = TreeingTools.check_distance_pvals(dmat, trop_dict, nreps = 50) except: return kwargs benj_res.update(kwargs) try: out = TreeingTools.evaluate_association_index(tree, trop_dict) benj_res['AI'], benj_res['AI-pval'], benj_res['AI-null'] = out except: benj_res['AI'], benj_res['AI-pval'], benj_res['AI-null'] = (None, None, None) return benj_res
def test_fast_tree(): seqs = tree_seqs() tree = TreeingTools.run_FastTree(seqs, alphabet=generic_dna) for tst in check_tree(tree): yield tst
wanted_pat = pat_data[cols.keys()].dropna() wanted_scores = ltr_df_cp[score_cols+seq_cols].dropna() wanted_scores['TFJoin'] = wanted_scores[seq_cols].apply(lambda x: ''.join(x), axis=1) #wanted_scores = wanted_scores.drop(seq_cols, axis=1) check_data = pd.concat(wanted_pat.align(wanted_scores, axis=0, join='inner'), axis=1).rename(columns = cols) check_data = check_data.fillna(check_data[score_cols].min()) ncols = dict((col, col.replace('-', '_').replace('/', '_')) for col in check_data.columns) check_data = check_data.rename(columns = ncols) # <codecell> import TreeingTools tree = TreeingTools.run_FastTree(check_data['TFJoin'].to_dict().items(), alphabet=TreeingTools.generic_dna) # <codecell> import networkx as nx from itertools import combinations import csv with open('ltr_tree.nwk', 'w') as handle: tree.write_to_stream(handle, schema = 'phylip', exclude_chars=True) # <codecell> # <codecell>