def printTreeWrapper(rootNode, newickOutput=False, outputFile=None): if newickOutput: if outputFile is None: print(newick.dumps(rootNode)) else: with open(outputFile, 'a') as out: out.write(newick.dumps(rootNode) + "\n") else: print("Begin printing tree.") printTree(rootNode) print("End printing tree.")
def test_no_lengths_equiv(self): for ts in ( self.all_nodes_samples_example(), self.only_internal_samples_example(), self.mixed_node_samples_example(), ): for t in ts.trees(): newick_nolengths = t.newick(include_branch_lengths=False) newick_nolengths = newick.loads(newick_nolengths)[0] newick_lengths = t.newick() newick_lengths = newick.loads(newick_lengths)[0] for node in newick_lengths.walk(): node.length = None assert newick.dumps(newick_nolengths) == newick.dumps( newick_lengths)
def rescale_newick(trees_str): import math trees = newick.loads(trees_str) lmin = float_info.max lmax = -float_info.max for tree in trees: for n in tree.walk(): if n.length > lmax: lmax = n.length if n.length < lmin and not n.length == 0: lmin = n.length factor = 1 / lmin for tree in trees: for n in tree.walk(): n.length = n.length * 4411532 if n.length < 0.1: n.length = 0 elif n.length <= 1: pass else: n.length = math.sqrt(n.length) return newick.dumps(trees)
def main(): """Just tests.""" #tree=newick.loads("((Dmel_CG7377:5.71073e-07,Dsim_Dsim\GD12794:0.426781)n1:0.0026795,(Dmel_CG33268:0.022453,(Dsim_Dsim\GD14314:0.015169,Dsec_Dsec\GM25283:0.029888)n3:0.079816)n2:0.0026795)n0;") #tree=newick.loads("(((((((((((((((((((((((((A,B)N1,C)N2,D)N3,E)N4,F)N5,G)N6,H)N7,I)N8,J)N9,K)N10,L)N11,M)N12,N)N13,O)N14,P)N15,Q)N16,R)N17,S)N18,T)N19,U)N20,V)N21,W)N22,X)N23,Y)N24,Z)N25;") #Example tree species_tree = read_tree( "../proteomes_repeats_removed/OrthoFinder/Results_Jan20/Species_Tree/SpeciesTree_rooted_node_labels.txt" ) sp_nnodes = calculate_nnodes(species_tree) species_tree_nodes = get_all_nodes(species_tree, sp_nnodes) tree = read_tree( "../proteomes_repeats_removed/OrthoFinder/Results_Jan20/Resolved_Gene_Trees/OG0000028_tree.txt" ) print(tree.ascii_art()) #tree = read_tree("../proteomes_repeats_removed/OrthoFinder/Results_Jan20/Resolved_Gene_Trees/OG0012151_tree.txt") nnodes = calculate_nnodes(tree) nodes = get_all_nodes(tree, nnodes, [], []) backup = newick.dumps(tree) for node in nodes: tree = newick.loads(backup) if is_duplication(node): #print(node) # do the old checky sides = node.descendants for i in range(len(sides)): if is_species_like(sides[i], species_tree_nodes): #print(sides[i].ascii_art()) #print("blammo") if node_with_deletion(sides[i], sides[(i + 1) % 2]): print("it's good") break
def reroot_tree(tree, outgroup): """Reroot the tree so that the outgroup is the outgroup. Arguments: tree - a tree in python newick format outgroup - a list of taxa that ought to make up the entirety of the outgroup Returns: rooted_tree - a tree with the correct outgroup in a format. Requires: Dendropy """ newick_string = newick.dumps(tree) tree = ete3.Tree(newick_string, format=1) print(tree) try: if len(outgroup) == 1: tree.set_outgroup(outgroup[0]) else: mrca = tree.get_common_ancestor(outgroup) tree.set_outgroup(mrca) except: taxa = [] for leaf in tree: taxa.append(leaf.name) outgroup = list(set(taxa) - set(outgroup)) #print(outgroup) if len(outgroup) == 1: tree.set_outgroup(outgroup[0]) else: mrca = tree.get_common_ancestor(outgroup) tree.set_outgroup(mrca) return tree
def postorder_create(node, prefix, hal): if node.name is None: raise RuntimeError("Requires a tree with all ancestors labeled.") sys.stderr.write("working on node %r\n" % (node.name)) c2h = prefix + '-' + node.name + '.c2h' hal_fa = prefix + '-' + node.name + '.hal.fa' # get outgroup list (everything in c2h except children / anc) outgroups = [] for species_line in check_output("grep -E '^s' %s | cut -f 2 | uniq" % c2h, shell=True).splitlines(): # strip ' marks on either side species = species_line[1:-1] if species != node.name and species not in [ n.name for n in node.descendants ]: outgroups.append(species) # get local newick string subtree = deepcopy(node) for child in subtree.descendants: child.descendants = [] newick = dumps(subtree) # actually perform the addition cmd = ['halAppendCactusSubtree', c2h, hal_fa, newick, hal] if len(outgroups) > 0: cmd.extend(['--outgroups', ",".join(outgroups)]) sys.stderr.write('Running command %r\n' % cmd) check_call(cmd) # recurse for child in node.descendants: if len(child.descendants) == 0: # Leaf continue postorder_create(child, prefix, hal)
def test_dumps(*trees): for ex in [ '(,,(,));', '(A,B,(C,D));', '(A,B,(C,D)E)F;', '(:0.1,:0.2,(:0.3,:0.4):0.5);', '((B:0.2,(C:0.3,D:0.4)E:0.5)F:0.1)A;', ]: assert ex == dumps(loads(ex)[0])
def test_dumps(self, *trees): for ex in [ '(,,(,));', '(A,B,(C,D));', '(A,B,(C,D)E)F;', '(:0.1,:0.2,(:0.3,:0.4):0.5);', '((B:0.2,(C:0.3,D:0.4)E:0.5)F:0.1)A;', ]: self.assertEqual(ex, dumps(loads(ex)[0]))
def sanitise_tree(self, tree, tree_type): """ Makes any changes to a user-provided tree required to make it suitable for passing to BEAST. In particular, this method checks that the supplied string or the contents of the supplied file: * seems to be a valid Newick tree * contains no duplicate taxa * has taxa which are a superset of the languages in the analysis * has no polytomies or unifurcations. """ # Make sure tree can be parsed try: tree = newick.loads(tree)[0] except: raise ValueError("Could not parse %s tree. Is it valid Newick?" % tree_type) # Make sure starting tree contains no duplicate taxa tree_langs = tree.get_leaf_names() if not len(set(tree_langs)) == len(tree_langs): dupes = set([l for l in tree_langs if tree_langs.count(l) > 1]) dupestring = ",".join( ["%s (%d)" % (d, tree_langs.count(d)) for d in dupes]) raise ValueError("%s tree contains duplicate taxa: %s" % (tree_type.capitalize(), dupestring)) tree_langs = set(tree_langs) # Make sure languges in tree is a superset of languages in the analysis if not tree_langs.issuperset(self.languages): missing_langs = set(self.languages).difference(tree_langs) miss_string = ",".join(missing_langs) raise ValueError( "Some languages in the data are not in the %s tree: %s" % (tree_type, miss_string)) # If the trees' language set is a proper superset, prune the tree to fit the analysis if not tree_langs == self.languages: tree.prune_by_names(self.languages, inverse=True) self.messages.append( "[INFO] %s tree includes languages not present in any data set and will be pruned." % tree_type.capitalize()) # Get the tree looking nice tree.remove_redundant_nodes() if tree_type == "starting": tree.resolve_polytomies() # Remove lengths for a monophyly tree if tree_type == "monophyly": for n in tree.walk(): n._length = None # Checks if tree_type == "starting": assert all([len(n.descendants) in (0, 2) for n in tree.walk()]) assert len(tree.get_leaves()) == len(self.languages) assert all([l.name for l in tree.get_leaves()]) # Done return newick.dumps(tree)
def relabel_newick(trees_str): ''' Relabel newick tree from guid to eartag ''' trees = newick.loads(trees_str) eartags = [] for tree in trees: for node in tree.walk(): if node.name: node.name, eartags = get_eartag(node.name, eartags) return newick.dumps(trees)
def newick_tree_species(self): """ Returns a Newick tree with the species present in the current clade. :return: Newick tree (string) with species for the current clade """ species = {s.code: s.name for s in Species.query.all()} tree = newick.loads(self.newick_tree)[0] for code, name in species.items(): node = tree.get_node(code) if node is not None: node.name = name return newick.dumps([tree])
def _convert_to_phyloxml(self, seq_id_to_seq_name: Dict[SequenceID, str] = None) -> str: if not self.nodes: return None newick_str = self._convert_to_newick(seq_id_to_seq_name) tree = Phylo.read(StringIO(newick_str), 'newick') Phylo.write(tree, 'drzewko.xml', 'phyloxml') tree_xml = Phylo.PhyloXMLIO.read("drzewko.xml") sorted_nodes = sorted(self.nodes, key=lambda x: x.consensus_id) nodes_to_process = [(None, sorted_nodes[0])] newick_tree = None while nodes_to_process: n = nodes_to_process.pop() node_parent_label = n[0] node = n[1] if seq_id_to_seq_name: label = seq_id_to_seq_name[node.sequences_ids[0]] if len( node.sequences_ids ) == 1 else f"Consenses {node.consensus_id}" else: label = node.sequences_ids[0].value if len( node.sequences_ids ) == 1 else f"Consensus {node.consensus_id}" if node.parent_node_id is None: length = "1" else: parent_minComp = sorted_nodes[ node.parent_node_id].mincomp.root_value().value length = str((1 - parent_minComp) - (1 - node.mincomp.root_value().value)) newick_node = Node(name=label, length=length) if newick_tree is None: newick_tree = newick_node else: parent_node = newick_tree.get_node(node_parent_label) parent_node.add_descendant(newick_node) for child in node.children_nodes_ids: nodes_to_process.append((label, sorted_nodes[child])) return dumps(newick_tree)
def main(): """Write all gene trees appropriate to a file.""" species_treefile, orthogroups_file, gene_tree_dir, outfile, extention = get_args() #species tree species_tree = tm.read_tree(species_treefile) sp_nnodes = tm.calculate_nnodes(species_tree) species_tree_nodes = tm.get_all_nodes(species_tree, sp_nnodes) #Gene trees #This is the old filterred list #candidates = mod.get_file_data("candidates") lines = mod.get_file_data(orthogroups_file) candidates = [] for line in lines[1:]: candidates.append(line.split("\t")[0]) out = open(outfile, "w") for family_name in candidates: sys.stderr.write(family_name + "\n") #print(tree_dir + "/" + family_name + extention) try: tree = tm.read_tree(gene_tree_dir + "/" + family_name + extention) except: sys.stderr.write(family_name + "not in candidates\n") continue nnodes = tm.calculate_nnodes(tree) sys.stderr.write("nodes = " + str(nnodes) + "\n") nodes = tm.get_all_nodes(tree, nnodes, [], []) backup = newick.dumps(tree) for node in nodes: sys.stderr.write("node = " + str(node) + "\n") tree = newick.loads(backup) if tm.is_duplication(node): sys.stderr.write("is duplication\n") # do the old checky sides = node.descendants for i in range(len(sides)): if tm.is_species_like(sides[i], species_tree_nodes): #print("blammo") if tm.node_with_deletion(sides[i], sides[(i + 1) % 2]): out.write(family_name + "\t" + node.name + "\n") sys.stderr.write("good node!\n\n") break
def __replace_ids(tree_string, conversion_table): """ Replaces identifiers in a newick string with those defined in the conversion table :param tree_string: tree in newick format :param conversion_table: dict with name conversion :return: parsed tree, in newick format """ tree = newick.loads(tree_string.strip(), strip_comments=True)[0] # Remove internal names, and need to be replaced with proper reconciliation. tree.remove_internal_names() for leaf in tree.get_leaves(): if leaf.name in conversion_table.keys(): leaf.name = conversion_table[leaf.name] return newick.dumps([tree])
def rescale_newick(trees_str): trees = newick.loads(trees_str) lmin = float_info.max lmax = -float_info.max for tree in trees: for n in tree.walk(): if n.length > lmax: lmax = n.length if n.length < lmin and not n.length == 0: lmin = n.length factor = 1 / lmin for tree in trees: for n in tree.walk(): n.length = int(n.length * factor) return newick.dumps(trees)
def handle_starting_tree(self): """ Makes any changes to the user-provided starting tree required to make it suitable for passing to BEAST. In particular, this method checks that the supplied string or the contents of the supplied file: * seems to be a valid Newick tree * contains no duplicate taxa * has taxa which are a superset of the languages in the analysis * has no polytomies or unifurcations. """ if os.path.exists(self.starting_tree): with io.open(self.starting_tree, encoding="UTF-8") as fp: self.starting_tree = fp.read().strip() if self.starting_tree: # Make sure starting tree can be parsed try: tree = newick.loads(self.starting_tree)[0] except: raise ValueError("Could not parse starting tree. Is it valid Newick?") # Make sure starting tree contains no duplicate taxa tree_langs = [n.name for n in tree.walk() if n.is_leaf] if not len(set(tree_langs)) == len(tree_langs): dupes = [l for l in tree_langs if tree_langs.count(l) > 1] dupestring = ",".join(["%s (%d)" % (d, tree_langs.count(d)) for d in dupes]) raise ValueError("Starting tree contains duplicate taxa: %s" % dupestring) tree_langs = set(tree_langs) # Make sure languges in tree is a superset of languages in the analysis if not tree_langs.issuperset(self.languages): missing_langs = set(self.languages).difference(tree_langs) miss_string = ",".join(missing_langs) raise ValueError("Some languages in the data are not in the starting tree: %s" % miss_string) # If the trees' language set is a proper superset, prune the tree to fit the analysis if not tree_langs == self.languages: tree.prune_by_names(self.languages, inverse=True) self.messages.append("[INFO] Starting tree includes languages not present in any data set and will be pruned.") # Get the tree looking nice tree.remove_redundant_nodes() tree.resolve_polytomies() # Replace the starting_tree from the config with the new one self.starting_tree = newick.dumps(tree)
def modify(file_name: str, tree: PhyTree): finished = False while not finished: print("Tree has following groups:") print(tree.get_nodes()) print("Add leaf:") node = input() if re.match(r"[{} ]", node): print("Leaf name can not include {, }, and space") continue print(f"Add leaf '{node}' to group:") group = input() if not re.match(r'[{}]', group): print("Target group has to be enclosed by {} brackets") continue current_structure = dumps(tree.get_newick()) try: tree.add_to_group(node, group) except ValueError as e: print(e) tree.parse_string(current_structure) finished = ask_if_finished() tree.save(file_name)
def main(): """ Write appropriate trees to a file. For each tree: get the nodes of the tree look at the duplication nodes for each side: look at one side to see if it is species-like if it is, check if the other one is node with deletion if success, add to the file or whatever. """ treedir, species_tree, out_file = get_args() species_tree = tm.read_tree(species_tree) sp_tree_nodes = tm.get_all_nodes(species_tree, tm.calculate_nnodes(species_tree), [], []) treefiles = glob.glob(treedir + "/*rooted") good_trees = [] # for i in range(len(treefiles)): for i in [16]: tree = tm.read_tree(treefiles[i]) nodes = tm.get_all_nodes(tree, tm.calculate_nnodes(tree), [], []) backup = newick.dumps(tree) for node in tree.descendants: tree = newick.loads(backup) if tm.is_duplication(node): for j in range(2): if tm.is_species_like(node.descendants[j], sp_tree_nodes): print("species_like_side") if tm.node_with_deletion(node.descendants[j], node.descendants[(j + 1) % 2]): good_trees.append(treefiles[i]) out = open(out_file, "w") out.write("\n".join(good_trees)) out.close()
def relabel_newick(trees_str): print(trees_str) ''' Relabel newick tree from guid to eartag ''' trees = newick.loads(trees_str) eartags = [] global guid_sample_name_map if not guid_sample_name_map: guid_sample_name_map = requests.get( 'http://127.0.0.1:5007/api/all_guid_sample_names').json() sample_name_eartag_map = requests.get( 'http://127.0.0.1:5007/api/all_sample_names_eartags').json() for tree in trees: for node in tree.walk(): if node.name: node.name, eartags = get_eartag(node.name, eartags, sample_name_eartag_map, guid_sample_name_map) return newick.dumps(trees)
def tree_stripped(self): tree = newick.loads(self.data_newick)[0] tree.remove_lengths() return newick.dumps([tree])
def reconcile_trees(self): print("\n1.====================Getting into function reconcile_trees") # Fetch required data from the database sequences = Sequence.query.all() #print("\n1.1.=============================Sequences Joined: " + ', '.join(sequences)) #FAILS, bad print statement for list obj clades = Clade.query.all() #print("\n1.2. =========================Clades: ", *clades, sep='\n') # print works seq_to_species = {s.name: s.species.code for s in sequences} #print("\n2.=========================seq_to_species: ", *seq_to_species, sep='::') seq_to_id = {s.name: s.id for s in sequences} clade_to_species = {c.name: json.loads(c.species) for c in clades} clade_to_id = {c.name: c.id for c in clades} new_associations = [] phyloxml_data = {} for t in self.trees: # Load tree from Newick string and start reconciliating tree = newick.loads(t.data_newick)[0] print("\n3.=========================tree loaded ok") for node in tree.walk(): if len(node.descendants) != 2: #print("\n4.==========length of node descendant=" + str(len(node.descendants))) if not node.is_binary: print("\n5.================Non-Binary-node: " + str(node.is_binary)) # Print warning in case there is a non-binary node #sdash: commenting out this original print statement because none binary-node doesn't have id nor label. Process stops at this print statement for non-binary trees. print( "Non-Binary tree: " + t.data_newick ) #sdash: this print statement will show which tree is non-binary and is skipped. Doesn't stop the reconcile process. #sdash May-03-2019#original# #print("[%d, %s] Skipping node... Can only reconcile binary nodes ..." % (tree.id, tree.label)) # Otherwise it is a leaf node and can be skipped continue branch_one_seq = [ l.name.strip() for l in node.descendants[0].get_leaves() ] # print("\n6.===============Branch-one-seq: " + ', '.join(branch_one_seq)) branch_two_seq = [ l.name.strip() for l in node.descendants[1].get_leaves() ] # print("\n7.===============Branch-two-seq: " + ', '.join(branch_two_seq)) branch_one_species = set([ seq_to_species[s] for s in branch_one_seq if s in seq_to_species.keys() ]) print( "\n8.===============Branch-one-spp: " + ', '.join(branch_one_species) ) #Empty set, length=0; seq_to_species length=143271; SO, problem in forming this set definition ## TO DO: #Possibly the seq name seq_to_species doesn't match in branch_one_seq and # hence, it is an empty set. Next check this possibility. Tue June 25. branch_two_species = set([ seq_to_species[s] for s in branch_two_seq if s in seq_to_species.keys() ]) print("\n9.===============Branch-two-spp: " + ', '.join(branch_two_species)) all_species = branch_one_species.union(branch_two_species) clade, _ = phylo.get_clade(all_species, clade_to_species) duplication = phylo.is_duplication(branch_one_species, branch_two_species, clade_to_species) duplication_consistency = None if duplication: duplication_consistency = phylo.duplication_consistency( branch_one_species, branch_two_species) tags = [ clade_to_id[clade] if clade is not None else 0, 'D' if duplication else 'S', duplication_consistency if duplication else 0 ] node.name = '_'.join([str(t) for t in tags]) if clade is not None: for seq_one in branch_one_seq: for seq_two in branch_two_seq: new_associations.append({ 'sequence_one_id': seq_to_id[seq_one], 'sequence_two_id': seq_to_id[seq_two], 'tree_id': t.id, 'clade_id': clade_to_id[clade], 'duplication': 1 if duplication else 0, 'duplication_consistency_score': duplication_consistency }) new_associations.append({ 'sequence_one_id': seq_to_id[seq_two], 'sequence_two_id': seq_to_id[seq_one], 'tree_id': t.id, 'clade_id': clade_to_id[clade], 'duplication': 1 if duplication else 0, 'duplication_consistency_score': duplication_consistency }) if len(new_associations) > 400: db.engine.execute( SequenceSequenceCladeAssociation.__table__.insert(), new_associations) new_associations = [] # add newick tree to memory phyloxml_data[t.id] = newick.dumps([tree]) db.engine.execute(SequenceSequenceCladeAssociation.__table__.insert(), new_associations) # Update PhyloXML data file for all trees for t in self.trees: if t.id in phyloxml_data.keys(): t.data_phyloxml = phyloxml_data[t.id] db.session.commit()
def test_all_removal(): tree = loads('((B:0.2,(C:0.3,D:0.4)E:0.5)F:0.1)A;')[0] tree.remove_names() tree.remove_lengths() topology_only = dumps(tree) assert topology_only == '((,(,)));'
def test_length_removal(): tree = loads('((B:0.2,(C:0.3,D:0.4)E:0.5)F:0.1)A;')[0] tree.remove_lengths() assert dumps(tree) == '((B,(C,D)E)F)A;'
def test_leaf_name_removal(): tree = loads('((B:0.2,(C:0.3,D:0.4)E:0.5)F:0.1)A;')[0] tree.remove_leaf_names() assert dumps(tree) == '((:0.2,(:0.3,:0.4)E:0.5)F:0.1)A;'
def test_leaf_name_removal(self): tree = loads('((B:0.2,(C:0.3,D:0.4)E:0.5)F:0.1)A;')[0] tree.remove_leaf_names() nameless = dumps(tree) self.assertEqual(nameless, '((:0.2,(:0.3,:0.4)E:0.5)F:0.1)A;')
def test_internal_name_removal(): tree = loads('((B:0.2,(C:0.3,D:0.4)E:0.5)F:0.1)A;')[0] tree.remove_internal_names() assert dumps(tree) == '((B:0.2,(C:0.3,D:0.4):0.5):0.1);'
def test_length_removal(self): tree = loads('((B:0.2,(C:0.3,D:0.4)E:0.5)F:0.1)A;')[0] tree.remove_lengths() nameless = dumps(tree) self.assertEqual(nameless, '((B,(C,D)E)F)A;')
def reconcile_trees(self): # Fetch required data from the database sequences = Sequence.query.all() clades = Clade.query.all() seq_to_species = {s.name: s.species.code for s in sequences} seq_to_id = {s.name: s.id for s in sequences} clade_to_species = {c.name: json.loads(c.species) for c in clades} clade_to_id = {c.name: c.id for c in clades} new_associations = [] phyloxml_data = {} for t in self.trees: # Load tree from Newick string and start reconciliating tree = newick.loads(t.data_newick)[0] for node in tree.walk(): if len(node.descendants) != 2: if not node.is_binary: # Print warning in case there is a non-binary node print( "[%d, %s] Skipping node... Can only reconcile binary nodes ..." % (tree.id, tree.label)) # Otherwise it is a leaf node and can be skipped continue branch_one_seq = [ l.name.strip() for l in node.descendants[0].get_leaves() ] branch_two_seq = [ l.name.strip() for l in node.descendants[1].get_leaves() ] branch_one_species = set([ seq_to_species[s] for s in branch_one_seq if s in seq_to_species.keys() ]) branch_two_species = set([ seq_to_species[s] for s in branch_two_seq if s in seq_to_species.keys() ]) all_species = branch_one_species.union(branch_two_species) clade, _ = phylo.get_clade(all_species, clade_to_species) duplication = phylo.is_duplication(branch_one_species, branch_two_species, clade_to_species) duplication_consistency = None if duplication: duplication_consistency = phylo.duplication_consistency( branch_one_species, branch_two_species) tags = [ clade_to_id[clade] if clade is not None else 0, 'D' if duplication else 'S', duplication_consistency if duplication else 0 ] node.name = '_'.join([str(t) for t in tags]) if clade is not None: for seq_one in branch_one_seq: for seq_two in branch_two_seq: new_associations.append({ 'sequence_one_id': seq_to_id[seq_one], 'sequence_two_id': seq_to_id[seq_two], 'tree_id': t.id, 'clade_id': clade_to_id[clade], 'duplication': 1 if duplication else 0, 'duplication_consistency_score': duplication_consistency }) new_associations.append({ 'sequence_one_id': seq_to_id[seq_two], 'sequence_two_id': seq_to_id[seq_one], 'tree_id': t.id, 'clade_id': clade_to_id[clade], 'duplication': 1 if duplication else 0, 'duplication_consistency_score': duplication_consistency }) if len(new_associations) > 400: db.engine.execute( SequenceSequenceCladeAssociation.__table__.insert(), new_associations) new_associations = [] # add newick tree to memory phyloxml_data[t.id] = newick.dumps([tree]) db.engine.execute(SequenceSequenceCladeAssociation.__table__.insert(), new_associations) # Update PhyloXML data file for all trees for t in self.trees: if t.id in phyloxml_data.keys(): t.data_phyloxml = phyloxml_data[t.id] db.session.commit()
def add_trees_general(): form = AddGeneralTreesForm(request.form) if request.method == 'POST': new_method = TreeMethod() new_method.description = request.form.get('description') new_method.gene_family_method_id = request.form.get( 'gene_family_method_id') db.session.add(new_method) try: # Commit to DB remainder db.session.commit() except Exception as _: db.session.rollback() flash('Failed to add TreeMethod to the DB!', 'danger') return redirect(url_for('admin.index')) # Get original gene family names (used to link trees to families) gfs = GeneFamily.query.filter( GeneFamily.method_id == new_method.gene_family_method_id).all() name_to_id = {gf.name: gf.id for gf in gfs} tree_data = request.files[form.general_tree_archive.name].read() fd, temp_path = mkstemp() with open(temp_path, 'wb') as tree_data_writer: tree_data_writer.write(tree_data) new_trees = [] with tarfile.open(temp_path, mode='r:gz') as tf: for name, entry in zip(tf.getnames(), tf): tree_string = str( tf.extractfile(entry).read().decode('utf-8')).replace( '\r', '').replace('\n', '') # get the gene families original name from the filename current_tree_name = str(name.split('.')[0]) gf_id = None if current_tree_name in name_to_id.keys(): gf_id = name_to_id[current_tree_name] else: print( '%s: Family %s not found in gene families generated using method %d !' % (name, current_tree_name, new_method.gene_family_method_id)) tree = newick.loads(tree_string)[0] new_trees.append({ "gf_id": gf_id, "label": current_tree_name + "_tree", "method_id": new_method.id, "data_newick": tree_string, "data_phyloxml": newick.dumps([tree]) }) # add 400 trees at the time, more can cause problems with some database engines if len(new_trees) > 400: db.engine.execute(Tree.__table__.insert(), new_trees) new_trees = [] # add the last set of trees db.engine.execute(Tree.__table__.insert(), new_trees) flash('Added trees to DB.', 'success') return redirect(url_for('admin.index')) else: if not form.validate(): flash('Unable to validate data, potentially missing fields', 'danger') return redirect(url_for('admin.index')) else: abort(405)
for m in range(shape[0]): if m != i and m != j: newDist = 0.5 * (disMatrix[i, m] + disMatrix[j, m] - disMatrix[i, j]) disMatrix[shape[0], m] = newDist disMatrix[m, shape[0]] = newDist shape = (shape[0] + 1, shape[1] + 1) disMatrix[i, :] = np.nan #float("inf") disMatrix[j, :] = np.nan #float("inf") disMatrix[:, i] = np.nan #float("inf") disMatrix[:, j] = np.nan #float("inf") keys = list(S.keys()) dim = disMatrix[keys[0], keys[2]] dij = disMatrix[keys[0], keys[1]] djm = disMatrix[keys[1], keys[2]] gamma_v_i = 0.5 * (dij + dim - djm) gamma_v_j = 0.5 * (dij + djm - dim) gamma_v_m = 0.5 * (dim + djm - dij) S[keys[0]].length = gamma_v_i S[keys[1]].length = gamma_v_j S[keys[2]].length = gamma_v_m S["root"] = newick.Node.create( name="root", descendants=[S[keys[0]], S[keys[1]], S[keys[2]]]) rename_recursive(S["root"]) print(newick.dumps(S["root"]))
def tree_data(req, species_query, experiment_count=lambda s: s.count_experiments): node_data = {} ntrees = [] colormap = collections.Counter() colormap2 = collections.Counter() count_leafs = species_query.count() species = species_query.order_by( Species.kingdom, Species.phylum_sortkey, Species.klass_sortkey, Species.order_sortkey, Species.family_sortkey, Species.genus_sortkey, Species.sortkey).options( joinedload(common.Language.valuesets).joinedload( common.ValueSet.values)) coverage = {} nodes = [] ngenus = 0 for kingdom, items1 in itertools.groupby(species, lambda s: s.kingdom): node1 = newick.Node() for phylum, items2 in itertools.groupby(items1, lambda s: s.phylum): nid = '_'.join((phylum, )) nodes.append((nid, 'Phylum', 'classes')) if phylum not in coverage: coverage[phylum] = {} node2 = newick.Node(nid) for klass, items3 in itertools.groupby(items2, lambda s: s.klass): nid = '_'.join((phylum, klass)) nodes.append((nid, 'Class', 'orders')) if klass not in coverage[phylum]: coverage[phylum][klass] = {} node3 = newick.Node(nid) for order, items4 in itertools.groupby(items3, lambda s: s.order): nid = '_'.join((phylum, klass, order)) nodes.append((nid, 'Order', 'families')) if order not in coverage[phylum][klass]: coverage[phylum][klass][order] = {} node4 = newick.Node(nid) for family, items5 in itertools.groupby( items4, lambda s: s.family): nid = '_'.join((phylum, klass, order, family)) nodes.append((nid, 'Family', 'genera')) if family not in coverage[phylum][klass][order]: coverage[phylum][klass][order][family] = {} node5 = newick.Node(nid) for genus, items6 in itertools.groupby( items5, lambda s: s.genus): ngenus += 1 nid = '_'.join( (phylum, klass, order, family, genus)) nodes.append((nid, 'Genus', 'species')) items6 = list(items6) coverage[phylum][klass][order][family][ genus] = len(items6) colormap.update([s.family for s in items6]) colormap2.update([s.klass for s in items6]) node6 = newick.Node.create( name=nid, descendants=[ newick.Node( '%s{__id__%s}' % (s.name.replace(' ', '_'), s.id)) for s in items6 ]) node_data.update({ s.id: species_node(s, req, experiment_count(s)) for s in items6 }) node5.add_descendant(node6) node4.add_descendant(node5) node3.add_descendant(node4) node2.add_descendant(node3) node1.add_descendant(node2) ntrees.append(node1) node_data.update({ nid: coverage_data(req, nid, rank, subranks, coverage) for nid, rank, subranks in nodes }) res = dict( count_leafs=count_leafs, newick=newick.dumps(ntrees), colormap={ k[0]: (v, svg.data_url(svg.icon(v.replace('#', 'c')))) for k, v in zip(colormap.most_common(), color.qualitative_colors(len(colormap))) }, colormap2={ k[0]: (v, svg.data_url(svg.icon(v.replace('#', 's')))) for k, v in zip(colormap2.most_common(), color.qualitative_colors(len(colormap), set='tol')) }, node_data=node_data) res['edgecolors'] = {k: v[0] for k, v in res['colormap2'].items()} return res