def write_tree_rates(rate_dict, alignment_dict, cdna_dicts, out_filename, xfold_only, xfold_degeneracy): outfile = file(out_filename, 'w') outfile.write("orf\tnal\tcor\tpcor\tncor\tns\tss\tmsid\tmfal\tadn\n") ngenes = 0 for orf in rate_dict.keys(): (length, spec_orf_list, aligned_prots) = alignment_dict[orf] (dns, dss, tree) = rate_dict[orf] root = phylip.parse_tree(tree) species = root.leaves() msid = align_stats(aligned_prots, sequence_identity, stats.Mean) mfal = align_stats(aligned_prots, frac_aligned, min) #msid = min_seq_id(aligned_prots) #mfal = min_frac_aligned(aligned_prots) if mfal < 0.5: print "# rejected orf (mfal,msid) %s (%1.2f,%1.2f)" % (orf, mfal, msid) continue num_genes = len(species) # Compute correlation between nonsyn-syn changes try: #target_orf = dict(spec_orf_list)[spec_name] ((r, p, n), ns, ss) = get_rate_correlation_windowed(dns, dss, aligned_prots, cdna_dicts, spec_orf_list, xfold_only, xfold_degeneracy) outfile.write("%s\t%d\t%1.3f\t%1.3f\t%d\t%1.2f\t%1.2f\t%1.2f\t%1.2f\t%1.4f\n" % \ (orf,num_genes,r,p,n,sum(ns),sum(ss),msid,mfal,sum(ns)/float(len(dns)))) ngenes += 1 outfile.flush() except stats.StatsError, se: continue
def write_tree_rates(rate_dict, alignment_dict, cdna_dicts, out_filename, xfold_only, xfold_degeneracy): outfile = file(out_filename,'w') outfile.write("orf\tnal\tcor\tpcor\tncor\tns\tss\tmsid\tmfal\tadn\n") ngenes = 0 for orf in rate_dict.keys(): (length, spec_orf_list, aligned_prots) = alignment_dict[orf] (dns, dss, tree) = rate_dict[orf] root = phylip.parse_tree(tree) species = root.leaves() msid = align_stats(aligned_prots, sequence_identity, stats.Mean) mfal = align_stats(aligned_prots, frac_aligned, min) #msid = min_seq_id(aligned_prots) #mfal = min_frac_aligned(aligned_prots) if mfal < 0.5: print "# rejected orf (mfal,msid) %s (%1.2f,%1.2f)" % (orf, mfal, msid) continue num_genes = len(species) # Compute correlation between nonsyn-syn changes try: #target_orf = dict(spec_orf_list)[spec_name] ((r,p,n), ns, ss) = get_rate_correlation_windowed(dns, dss, aligned_prots, cdna_dicts, spec_orf_list, xfold_only, xfold_degeneracy) outfile.write("%s\t%d\t%1.3f\t%1.3f\t%d\t%1.2f\t%1.2f\t%1.2f\t%1.2f\t%1.4f\n" % \ (orf,num_genes,r,p,n,sum(ns),sum(ss),msid,mfal,sum(ns)/float(len(dns)))) ngenes += 1 outfile.flush() except stats.StatsError, se: continue
def get_tree_rates(ortho_dict, alignment_dict, cdna_dicts, tree_string, begin_index, end_index): tree = phylip.parse_tree(tree_string) tree_species = [n.name for n in tree.leaves()] n_genes = 0 # Individual genes rate_ancestor_cache = {} # Assemble gene list keys = [] for gene in ortho_dict.keys(): try: (length, corr_keys, aligned_prots) = alignment_dict[gene] alignment_species = [spec for (spec,orf) in corr_keys] except KeyError, ke: print "# Couldn't find alignments for", ke continue # Must have concordance between tree species and alignment species if set(tree_species).intersection(set(alignment_species)) == set(tree_species): keys.append(gene)
def get_tree_rates(ortho_dict, alignment_dict, cdna_dicts, tree_string, begin_index, end_index): tree = phylip.parse_tree(tree_string) tree_species = [n.name for n in tree.leaves()] n_genes = 0 # Individual genes rate_ancestor_cache = {} # Assemble gene list keys = [] for gene in ortho_dict.keys(): try: (length, corr_keys, aligned_prots) = alignment_dict[gene] alignment_species = [spec for (spec, orf) in corr_keys] except KeyError, ke: print "# Couldn't find alignments for", ke continue # Must have concordance between tree species and alignment species if set(tree_species).intersection( set(alignment_species)) == set(tree_species): keys.append(gene)