def calculate_region(arg): gname, sub, prot, start, win, nseqs, trop_dict = arg treename = 'quicktrees/%s-%s-%s-%i-%i.tree' % (gname, sub, prot, start, win) matfname = 'quicktrees/%s-%s-%s-%i-%i.pkl' % (gname, sub, prot, start, win) if os.path.exists(treename): #benj_res = 'Already Processed' #return gname, sub, prot, win, start, benj_res with open(matfname) as handle: dmat = pickle.load(handle) with open(treename) as handle: tree = dendropy.Tree.get_from_stream(handle, 'newick') else: is_aa = prot != 'LTR' alphabet = generic_protein if is_aa else generic_dna try: tree, dmat = TreeingTools.phylip_tree_collapse_unique(nseqs, alphabet=alphabet) except ValueError: benj_res = 'Too few unique sequences to process' return gname, sub, prot, win, start, benj_res except: benj_res = 'uncaught exception in dist-mat' return gname, sub, prot, win, start, benj_res print 'writing' with open(matfname, 'w') as handle: pickle.dump(dmat, handle) with open(treename, 'w') as handle: tree.write_to_stream(handle, 'newick') try: benj_res = TreeingTools.check_distance_pvals(dmat, trop_dict, nreps = 50) except AssertionError: benj_res = 'too few groups' return gname, sub, prot, win, start, benj_res except: benj_res = 'uncaught exception' return gname, sub, prot, win, start, benj_res try: out = TreeingTools.evaluate_association_index(tree, trop_dict) benj_res['AI'], benj_res['AI-pval'], benj_res['AI-null'] = out except: benj_res['AI'], benj_res['AI-pval'], benj_res['AI-null'] = ('error', 'error', 'error') return gname, sub, prot, win, start, benj_res
def rolling_tree_apply(tup): group_series, seq_series, kwargs = tup fname = '/home/will/SubCData/Trees/Tree-%(sub)s-%(Prot)s-%(Start)i-%(WinSize)i.newick' % kwargs if os.path.exists(fname): return True alpha = generic_dna if kwargs['Prot'] == 'LTR' else generic_protein seq_series = seq_series.dropna(thresh = 5) vseq, vgroup = seq_series.align(group_series.dropna(), join = 'inner', axis = 0) nseq_ser = vseq.apply(append_seq, axis = 1) nseqs = sorted(nseq_ser.to_dict().items()) trop_dict = vgroup.to_dict() #print nseqs #try: # tree, dmat = TreeingTools.phylip_tree_collapse_unique(nseqs, alphabet=alpha, use_fast=True) #except: # return False #print 'treeing', fname tree = TreeingTools.run_FastTree(nseqs, alphabet=alpha, uniq_seqs=True) with open(fname, 'w') as handle: tree.write(handle, schema='newick') return True try: tree, dmat = TreeingTools.phylip_tree_collapse_unique(nseqs, alphabet=alpha, use_fast=True) benj_res = TreeingTools.check_distance_pvals(dmat, trop_dict, nreps = 50) except: return kwargs benj_res.update(kwargs) try: out = TreeingTools.evaluate_association_index(tree, trop_dict) benj_res['AI'], benj_res['AI-pval'], benj_res['AI-null'] = out except: benj_res['AI'], benj_res['AI-pval'], benj_res['AI-null'] = (None, None, None) return benj_res
# <codecell> # <codecell> #with open('allgp120.fasta', 'w') as handle: tres = [] for key, row in wanted_data[['gp120-seq-align', 'Tropism']].dropna().iterrows(): oname = key+'-'+row['Tropism'] tres.append((oname, ''.join(row['gp120-seq-align']))) # <codecell> tree, dmat = TreeingTools.phylip_tree_collapse_unique(tres, alphabet=generic_protein) # <codecell> with open('gp120tree.nexus', 'w') as handle: tree.write_to_stream(handle, 'nexus') # <codecell> import networkx with open('gp120tree.dot') as handle: new_tree = networkx.read_dot(handle) # <codecell> pos = networkx.spring_layout(new_tree, dim=100)