def calculate_region(arg): prot, start, win, nseqs, trop_dict = arg fname = 'phyliptrees/%s-%i-%i.tree' % (prot, start, win) if os.path.exists(fname): contree = dendropy.Tree.get_from_path(fname, 'nexus') treeset = dendropy.TreeList.get_from_path(fname + 'set', 'nexus') else: alphabet = generic_protein if prot != 'LTR' else generic_dna contree = TreeingTools.phylip_tree(nseqs, alphabet=alphabet) treeset = dendropy.TreeList([contree]) contree.write_to_path(fname, 'nexus') treeset.write_to_path(fname + 'set', 'nexus') try: bats_res = TreeingTools.run_bats(treeset, trop_dict, nreps = 1000) except: bats_res = None try: dmat = TreeingTools.get_pairwise_distances(contree) benj_res = TreeingTools.check_distance_pvals(dmat, trop_dict, nreps = 50) except: benj_res = None return prot, win, start, bats_res, benj_res
def calculate_region(arg): gname, sub, prot, start, win, nseqs, trop_dict = arg treename = 'quicktrees/%s-%s-%s-%i-%i.tree' % (gname, sub, prot, start, win) matfname = 'quicktrees/%s-%s-%s-%i-%i.pkl' % (gname, sub, prot, start, win) if os.path.exists(treename): #benj_res = 'Already Processed' #return gname, sub, prot, win, start, benj_res with open(matfname) as handle: dmat = pickle.load(handle) with open(treename) as handle: tree = dendropy.Tree.get_from_stream(handle, 'newick') else: is_aa = prot != 'LTR' alphabet = generic_protein if is_aa else generic_dna try: tree, dmat = TreeingTools.phylip_tree_collapse_unique(nseqs, alphabet=alphabet) except ValueError: benj_res = 'Too few unique sequences to process' return gname, sub, prot, win, start, benj_res except: benj_res = 'uncaught exception in dist-mat' return gname, sub, prot, win, start, benj_res print 'writing' with open(matfname, 'w') as handle: pickle.dump(dmat, handle) with open(treename, 'w') as handle: tree.write_to_stream(handle, 'newick') try: benj_res = TreeingTools.check_distance_pvals(dmat, trop_dict, nreps = 50) except AssertionError: benj_res = 'too few groups' return gname, sub, prot, win, start, benj_res except: benj_res = 'uncaught exception' return gname, sub, prot, win, start, benj_res try: out = TreeingTools.evaluate_association_index(tree, trop_dict) benj_res['AI'], benj_res['AI-pval'], benj_res['AI-null'] = out except: benj_res['AI'], benj_res['AI-pval'], benj_res['AI-null'] = ('error', 'error', 'error') return gname, sub, prot, win, start, benj_res
def rolling_tree_apply(tup): group_series, seq_series, kwargs = tup fname = '/home/will/SubCData/Trees/Tree-%(sub)s-%(Prot)s-%(Start)i-%(WinSize)i.newick' % kwargs if os.path.exists(fname): return True alpha = generic_dna if kwargs['Prot'] == 'LTR' else generic_protein seq_series = seq_series.dropna(thresh = 5) vseq, vgroup = seq_series.align(group_series.dropna(), join = 'inner', axis = 0) nseq_ser = vseq.apply(append_seq, axis = 1) nseqs = sorted(nseq_ser.to_dict().items()) trop_dict = vgroup.to_dict() #print nseqs #try: # tree, dmat = TreeingTools.phylip_tree_collapse_unique(nseqs, alphabet=alpha, use_fast=True) #except: # return False #print 'treeing', fname tree = TreeingTools.run_FastTree(nseqs, alphabet=alpha, uniq_seqs=True) with open(fname, 'w') as handle: tree.write(handle, schema='newick') return True try: tree, dmat = TreeingTools.phylip_tree_collapse_unique(nseqs, alphabet=alpha, use_fast=True) benj_res = TreeingTools.check_distance_pvals(dmat, trop_dict, nreps = 50) except: return kwargs benj_res.update(kwargs) try: out = TreeingTools.evaluate_association_index(tree, trop_dict) benj_res['AI'], benj_res['AI-pval'], benj_res['AI-null'] = out except: benj_res['AI'], benj_res['AI-pval'], benj_res['AI-null'] = (None, None, None) return benj_res