def __init__(self, parent): """ CONSTRUCTOR """ super().__init__(parent) self.ui = frm_webtree_designer.Ui_Dialog(self) self.setWindowTitle("Reports") # Disable the browser host until its enabled self.ui.WIDGET_MAIN.setVisible(False) self.is_browser = False self.browser_ctrl = None self.html = "" # Setup the base class self.bind_to_label(self.ui.LBL_BROWSER_WARNING) self.add_select_button(self.ui.FRA_TOOLBAR) # Enable our browser? switch = LegoGuiController.get_settings().enable_browser if switch == BROWSE_MODE.ASK: pass elif switch == BROWSE_MODE.INBUILT: self.enable_inbuilt_browser() elif switch == BROWSE_MODE.SYSTEM: self.ui.BTN_BROWSE_HERE.setVisible(False) else: raise SwitchError( "LegoGuiController.get_settings().enable_browser", switch) # Show the selection self.update_page()
def __str__(self): o = groot.data.config.options().fusion_namer if o == groot.constants.EFusionNames.ACCID: return self.component_out.get_accid() elif o == groot.constants.EFusionNames.READABLE: return "Create {}".format(self.component_out) else: raise SwitchError("groot.data.config.options().fusion_namer", o)
def __str__(self): o = groot.data.config.options().fusion_namer if o == groot.constants.EFusionNames.ACCID: return self.get_accid() elif o == groot.constants.EFusionNames.READABLE: return "NRFG" if self.is_clean else "Unprocessed NRFG" else: raise SwitchError("groot.data.config.options().fusion_namer", o)
def __str__(self): o = groot.data.config.options().fusion_namer if o == groot.constants.EFusionNames.ACCID: return self.get_accid() elif o == groot.constants.EFusionNames.READABLE: return "Set of {} genes".format(len(self.contents)) else: raise SwitchError("groot.data.config.options().fusion_namer", o)
def __str__(self): o = groot.data.config.options().fusion_namer if o == groot.constants.EFusionNames.ACCID: return self.get_accid() elif o == groot.constants.EFusionNames.READABLE: return "{} ({} genes)".format(self.event, len(self.pertinent_inner)) else: raise SwitchError("groot.data.config.options().fusion_namer", o)
def __str__(self): o = groot.data.config.options().fusion_namer if o == groot.constants.EFusionNames.ACCID: return self.get_accid() elif o == groot.constants.EFusionNames.READABLE: return "Supertree of {} trees ({} nodes)".format( len(self.__subset.pregraphs), len(self.graph.nodes)) else: raise SwitchError("groot.data.config.options().fusion_namer", o)
def __str__(self): o = groot.data.config.options().fusion_namer if o == groot.constants.EFusionNames.ACCID: return self.get_accid() elif o == groot.constants.EFusionNames.READABLE: return "Pregraph for {} ({} nodes)".format(self.component, len(self.graph.nodes)) else: raise SwitchError("groot.data.config.options().fusion_namer", o)
def to_string(gene, start, end) -> str: o = groot.data.config.options().domain_namer if o == groot.constants.EDomainNames.START_END: return "{}[{}:{}]".format(gene, start, end) elif o == groot.constants.EDomainNames.START_END_LENGTH: return "{}[{}({})]".format(gene, start, end - start + 1) elif o == groot.constants.EDomainNames.START_LENGTH: return "{}[{}:{}({})]".format(gene, start, end, end - start + 1) else: raise SwitchError("global_view.options().domain_namer", o)
def __str__(self) -> str: """ OVERRIDE """ o = groot.data.options().component_namer if o == groot.constants.EComponentNames.ACCID: return self.get_accid() elif o == groot.constants.EComponentNames.FIRST: for x in sorted(self.major_genes, key=cast(Any, str)): return str(x) else: raise SwitchError("groot.data.options().component_namer", o)
def reposition_tree(tree: MGraph) -> bool: """ Re-lays out a tree using `LegoSequence.position`. """ for node in tree: d = node.data if isinstance(d, Gene): if d.position == EPosition.OUTGROUP: node.make_outgroup() return True elif d.position == EPosition.NONE: pass else: raise SwitchError("node.data.position", d.position) return False
def tree_neighbor_joining(model: str, alignment: str) -> str: """ Uses PAUP to generate the tree using neighbour-joining. There are some major issues with Paup. Please see the troubleshooting section of Groot's readme. :param model: Format, a string `n` or `p` denoting the site type. :param alignment: Alignment in FASTA format. :return: The tree in Newick format. """ # TODO: Use an alternative that doesn't have the PAUP time-out problem. file_helper.write_all_text("in_file.fasta", alignment) script = """ toNEXUS format=FASTA fromFile=in_file.fasta toFile=in_file.nexus dataType=protein replace=yes; execute in_file.nexus; NJ; SaveTrees file=out_file.nwk format=Newick root=Yes brLens=Yes replace=yes; quit;""" if model == "n": site_type = "nucleotide" elif model == "p": site_type = "protein" else: raise SwitchError("model", model) script = script.format(site_type) file_helper.write_all_text("in_file.paup", script) txt = groot.run_subprocess(["paup", "-n", "in_file.paup"], collect=True, no_err=True) # The return code seems to have no bearing on Paup's actual output, so ignore it and look for the specific text. if "This version of PAUP has expired." in txt: raise ValueError( "'This version of PAUP has expired'. Please update your software or use a different method and try again." ) r = file_helper.read_all_text("out_file.nwk", details="the expected output from paup") if not r: raise ValueError("Paup produced an empty file.") return r
def position(self, item: TSide) -> bool: """ Returns `True` if `item` appears in the `destination` list, or `False` if it appears in the `source` list. Supports: Gene, domain or component. Note that only the component of the _gene_ is considered, not the individual domains. Raises `KeyError` if it does not appear in either. """ if isinstance(item, Domain): if item.gene is self.left.gene: return False if item.gene is self.right.gene: return True raise KeyError( "I cannot find the domain '{}' within this edge.".format(item)) elif isinstance(item, Gene): if item is self.left.gene: return False if item is self.right.gene: return True raise KeyError( "I cannot find the domain '{}' within this edge. This edge's genes are '{}' and '{}'." .format(item, self.left.gene, self.right.gene)) elif isinstance(item, Component): if self.left.gene in item.major_genes: if self.right.gene in item.major_genes: raise KeyError( "I can find the component '{}' within this edge, but both sides of the edge have this same component. This edge's genes are '{}' and '{}'." .format(item, self.left.gene, self.right.gene)) return False if self.right.gene in item.major_genes: return True raise KeyError( "I cannot find the component '{}' within this edge. This edge's genes are '{}' and '{}'." .format(item, self.left.gene, self.right.gene)) elif isinstance(item, bool): return item else: raise SwitchError("position.item", item, instance=True)
def __make_outgroup_parents_roots(nrfg: MGraph) -> None: """ Finally, nodes explicitly flagged as roots or outgroups should be made so We don't "reclade" the nodes here (i.e. (A,B,C) becomes A->B and A->C and not A,(B,C) as earlier, because the intermediate clades should already be present """ LOG("Fixing outgroups...") for node in nrfg: if isinstance(node.data, Gene) and node.data.position != EPosition.NONE: if node.data.position == EPosition.OUTGROUP: # We call "make root" and not "make outgroup" because the network should # already have the right topology, we just need to adjust the directions LOG("Make outgroup: {}".format(node)) LOG("--i.e. make root: {}".format(node.relation)) node.relation.make_root( node_filter=lambda x: not lego_graph.is_fusion_like(x), ignore_cycles=True) else: raise SwitchError("node.data.position", node.data.position)
def tree_maximum_likelihood(model: str, alignment: str) -> str: """ Uses Raxml to generate the tree using maximum likelihood. The model used is GTRCAT for RNA sequences, and PROTGAMMAWAG for protein sequences. """ file_helper.write_all_text("in_file.fasta", alignment) bio_helper.convert_file("in_file.fasta", "in_file.phy", "fasta", "phylip") if model == "n": method = "GTRCAT" elif model == "p": method = "PROTGAMMAWAG" else: raise SwitchError("model", model) groot.run_subprocess( "raxml -T 4 -m {} -p 1 -s in_file.phy -# 20 -n t".format(method).split( " ")) return file_helper.read_all_text("RAxML_bestTree.t", "the expected output from raxml")
def by_url(self, link: str, validate=False) -> bool: if ":" in link: key, value = link.split(":", 1) else: key = link value = None if key == "action": try: visualiser = gui_workflow.handlers().find_by_key(value) except KeyError: if validate: return False else: raise if validate: return True visualiser.execute(self.window, EIntent.DIRECT, None) elif key == "file_save": if validate: return True self.run(groot.file_save, value) elif key == "file_load": if validate: return True self.run(groot.file_load, value) elif key == "file_sample": if validate: return True self.run(groot.file_sample, value) else: if validate: return False else: raise SwitchError("link", link)
def to_extension(self): if self == EFormat.NEWICK: return ".nwk" elif self == EFormat.ASCII: return ".txt" elif self == EFormat.ETE_ASCII: return ".txt" elif self == EFormat.ETE_GUI: return "" elif self == EFormat.CSV: return ".csv" elif self == EFormat.TSV: return ".tsv" elif self == EFormat.VISJS: return ".html" elif self == EFormat.CYJS: return ".html" elif self == EFormat.SVG: return ".html" elif self == EFormat.COMPACT: return ".edg" else: raise SwitchError("self", self)
def import_file(self): filters = "Valid files (*.fasta *.fa *.faa *.blast *.tsv *.composites *.txt *.comp)", "FASTA files (*.fasta *.fa *.faa)", "BLAST output (*.blast *.tsv)" file_name, filter = QFileDialog.getOpenFileName( self.window, "Select file", None, ";;".join(filters), options=QFileDialog.DontUseNativeDialog) if not file_name: return filter_index = filters.index(filter) if filter_index == 0: self.run(groot.import_file, file_name) elif filter_index == 0: self.run(groot.import_genes, file_name) elif filter_index == 1: self.run(groot.import_similarities, file_name) else: raise SwitchError("filter_index", filter_index)
def create(format_str: Optional[str], graph: INamedGraph, model: Model, format: EFormat) -> str: """ Converts a graph or set of graphs to its string representation. :param format_str: String describing how the nodes are formatted. See `specify_graph_help` for details. :param graph: Graph to output :param model: Source model :param format: Output format :return: The string representing the graph(s) """ text = [] def __lego_style(node: MNode) -> NodeStyle: if lego_graph.is_fusion_like(node): background = "#FF0000" shape = EShape.STAR elif lego_graph.is_sequence_node(node): background = None shape = EShape.BOX else: background = "#FFFFFF" shape = EShape.ELLIPSE return NodeStyle.default(node=node, format_str=format_str, background=background, shape=shape) if format == EFormat.VISJS: text.append( exporting.export_vis_js(graph.graph, fnode=__lego_style, title=graph.name)) elif format == EFormat.COMPACT: text.append(exporting.export_compact(graph.graph, fnode=__lego_style)) elif format == EFormat.CYJS: text.append( exporting.export_cytoscape_js(graph.graph, fnode=__lego_style, title=graph.name)) elif format == EFormat.ASCII: text.append(exporting.export_ascii(graph.graph, fnode=__lego_style)) elif format == EFormat.ETE_ASCII: text.append(__ete_tree_to_ascii(graph.graph, model, fnode=__lego_style)) elif format == EFormat.NEWICK: text.append(exporting.export_newick(graph.graph, fnode=__lego_style)) elif format == EFormat.ETE_GUI: __ete_show_tree(graph.graph, model, fnode=__lego_style) elif format == EFormat.CSV: text.append(exporting.export_edgelist(graph.graph, fnode=__lego_style)) elif format == EFormat.TSV: text.append( exporting.export_edgelist(graph.graph, fnode=__lego_style, delimiter="\t")) elif format == EFormat.SVG: text.append( exporting.export_svg(graph.graph, fnode=__lego_style, title=graph.name, html=True)) else: raise SwitchError("format", format) return "\n".join(text)
def create_test(types: str = "1", no_blast: bool = False, size: int = 2, run: bool = True) -> groot.EChanges: """ Creates a GROOT unit test in the sample data folder. * GROOT should be installed in developer mode, otherwise there may be no write access to the sample data folder. * Requires the `faketree` library. :param run: Run test after creating it. :param no_blast: Perform no BLAST :param size: Clade size :param types: Type(s) of test(s) to create. :return: List of created test directories """ # noinspection PyPackageRequirements import faketree as FAKE print("START") r = [] args_random_tree = { "suffix": "1", "delimiter": "_", "size": size, "outgroup": True } # args_fn = "-d 0.2" mutate_args = "" if not types: raise ValueError("Missing :param:`types`.") for index, name in enumerate(types): tdir = TestDirectory(None) print("Test {} of {}".format(index + 1, len(types))) try: FAKE.new_tree() # The SeqGen mutator has a weird problem where, given a root `(X,O)R` in which `R` # is set as a result of an earlier tree, `O` will be more similar to the leaves of # that earlier tree than to the leaves in X. For this reason we use a simple random # model and not SeqGen. mutate_fn = FAKE.make_random if name == "0": # 0 no fusions outgroups = FAKE.create_random_tree(["A"], **args_random_tree) a, = (x.parent for x in outgroups) mutate_fn([a], *mutate_args) elif name == "1": # 1 fusion point; 3 genes; 2 origins # # # Should be an acyclic 2-rooted tree: # # A # \ # -->C # / # B # # Trees outgroups = FAKE.create_random_tree(["A", "B", "C"], **args_random_tree) a, b, c = (x.parent for x in outgroups) __remove_outgroups(outgroups, 2) mutate_fn([a, b, c], *mutate_args) # Fusion point fa = FAKE.get_random_node(a, avoid=outgroups) fb = FAKE.get_random_node(b, avoid=outgroups) FAKE.create_branch([fa, fb], c) FAKE.make_composite_node([c]) elif name == "4": # 2 fusion points; 4 genes; 2 origins # (Possibly the most difficult scenario because the result is cyclic) # # Should be a cyclic 2-rooted graph: # # # A-------- # \ \ # -->C -->D # / / # B-------- # # Trees outgroups = FAKE.create_random_tree(["A", "B", "C", "D"], **args_random_tree) a, b, c, d = (x.parent for x in outgroups) mutate_fn([a, b, c, d], *mutate_args) __remove_outgroups(outgroups, 2, 3) # Fusion points fa1 = FAKE.get_random_node(a, avoid=outgroups) fb1 = FAKE.get_random_node(b, avoid=outgroups) fa2 = FAKE.get_random_node(a, avoid=outgroups) fb2 = FAKE.get_random_node(b, avoid=outgroups) FAKE.create_branch([fa1, fb1], c) FAKE.create_branch([fa2, fb2], d) FAKE.make_composite_node([c, d]) elif name == "5": # 2 fusion points; 5 genes; 3 origins # # # Should be an acyclic 3-rooted tree: # # A # \ # -->C # / \ # B -->E # / # D # Trees outgroups = FAKE.create_random_tree(["A", "B", "C", "D", "E"], **args_random_tree) a, b, c, d, e = (x.parent for x in outgroups) mutate_fn([a, b, c, d, e], *mutate_args) __remove_outgroups(outgroups, 2, 4) # Fusion points fa = FAKE.get_random_node(a, avoid=outgroups) fb = FAKE.get_random_node(b, avoid=outgroups) fc = FAKE.get_random_node(c, avoid=outgroups) fd = FAKE.get_random_node(d, avoid=outgroups) FAKE.create_branch([fa, fb], c) FAKE.create_branch([fc, fd], e) FAKE.make_composite_node([c, e]) elif name == "7": # 3 fusion points; 7 genes; 4 origins # # Should be an acyclic 4-rooted tree: # # A # \ # -->C # / \ # B \ # -->G # D / # \ / # -->F # / # E # # Trees outgroups = FAKE.create_random_tree( ["A", "B", "C", "D", "E", "F", "G"], **args_random_tree) a, b, c, d, e, f, g = (x.parent for x in outgroups) mutate_fn([a, b, c, d, e, f, g], *mutate_args) __remove_outgroups(outgroups, 2, 5, 6) # Fusion points fa = FAKE.get_random_node(a, avoid=outgroups) fb = FAKE.get_random_node(b, avoid=outgroups) fc = FAKE.get_random_node(c, avoid=outgroups) fd = FAKE.get_random_node(d, avoid=outgroups) fe = FAKE.get_random_node(e, avoid=outgroups) ff = FAKE.get_random_node(f, avoid=outgroups) FAKE.create_branch([fa, fb], c) FAKE.create_branch([fd, fe], f) FAKE.create_branch([fc, ff], g) FAKE.make_composite_node([c, f, g]) else: raise SwitchError("name", name) FAKE.generate() file_helper.create_directory(tdir.t_folder) os.chdir(tdir.t_folder) FAKE.print_trees(format=mgraph.EGraphFormat.ASCII, file="tree.txt") FAKE.print_trees(format=mgraph.EGraphFormat.TSV, file="tree.tsv", name=True, mutator=False, sequence=False, length=False) FAKE.print_fasta(which=FAKE.ESubset.ALL, file="all.fasta.hidden") FAKE.print_fasta(which=FAKE.ESubset.LEAVES, file="leaves.fasta") if not no_blast: blast = [] # noinspection SpellCheckingInspection intermake.subprocess_helper.run_subprocess( [ "blastp", "-subject", "leaves.fasta", "-query", "leaves.fasta", "-outfmt", "6" ], collect_stdout=blast.append) file_helper.write_all_text("leaves.blast", blast) guid = uuid.uuid4() outgroups_str = ",".join(x.data.name for x in outgroups if x.parent.is_root) file_helper.write_all_text("groot.ini", [ "[groot_wizard]", "tolerance=50", "outgroups={}".format(outgroups_str), "", "[groot_test]", "name={}".format(name), "size={}".format(size), "guid={}".format(guid) ]) path_ = os.path.abspath(".") print("FINAL PATH: " + path_) r.append(path_) except FAKE.RandomChoiceError as ex: print("FAILURE {}".format(ex)) return groot.EChanges.INFORMATION if run: run_test(tdir.t_name) return groot.EChanges.INFORMATION
def __create_supertree(algorithm: supertree_algorithms.Algorithm, subset: Subset) -> MGraph: """ Generates a supertree from a set of trees. :param algorithm: Algorithm to use. See `algorithm_help`. :param subset: Subset of genes from which we generate the consensus from :return: The consensus graph (this may be a reference to one of the input `graphs`) """ # Get our algorithm ins = FunctionInspector(algorithm.function) # We allow two kinds of algorithm # - Python algorithms, which takes a `LegoSubset` instance # - External algorithms, which takes a newick-formatted string if ins.args[0].annotation == Subset: # Python algorithms get the subset instance input = subset else: # External algorithms get newick strings for each possible tree in the subset input_lines = __graphs_to_newick(subset.pregraphs) if __is_redundant(subset.pregraphs, input_lines): return subset.pregraphs[0].graph input = "\n".join(input_lines) + "\n" # Run the algorithm! output = external_runner.run_in_temporary(algorithm, input) # We allow two types of result # - `MGraph` objects # - `str` objects, which denote a newick-formatted string if isinstance(output, MGraph): result = output elif isinstance(output, str): # We don't reclade the newick, it's pointless at this stage and we remove redundancies during the NRFG_CLEAN stage anyway result = lego_graph.import_newick(output, subset.model, reclade=False) else: raise SwitchError("create_supertree::output", output, instance=True) # Assert the result # - All elements of the subset are in the supertree for element in subset.contents: if isinstance(element, Gene): if element in result.nodes.data: continue elif isinstance(element, Point): if element.formation in result.nodes.data: continue raise ValueError( _MSG1.format( element, string_helper.format_array( result.nodes.data, format=lambda x: "{}:{}".format(type(x).__name__, x), sort=True), type(element).__name__)) # - All (non-clade) elements of the supertree are in the subset for node in result.nodes: if lego_graph.is_clade(node): continue if lego_graph.is_formation(node): if any(x.formation is node.data for x in subset.contents if isinstance(x, Point)): continue if lego_graph.is_sequence_node(node): if node.data in subset.contents: continue raise ValueError( _MSG2.format( node.data, string_helper.format_array( subset.contents, format=lambda x: "{}:{}".format(type(x).__name__, x), sort=True), type(node.data).__name__)) return result
def __enumerate_2genes(calc_seq: Set[object], comparison: QuartetComparison, html: List[str], n: int, ini_data: TIniData) -> None: if array_helper.get_num_combinations(calc_seq, n) > 100: return html.append('<table border=1 style="border-collapse: collapse;">') html.append( "<tr><td colspan=5><b>BREAKDOWN FOR COMBINATIONS OF {}</b></td></tr>". format(n)) html.append( "<tr><td>total</td><td>hit</td><td>miss</td><td>missing in left</td><td>missing in right</td></tr>" ) ini_sect: TIniSection = {} ini_data["n_quartets_{}".format(n)] = ini_sect for comb in sorted(itertools.combinations(calc_seq, n), key=cast(Callable, str)): # type: Iterable[object] n_tot = [] n_hit = [] n_mis = [] n_mil = [] n_mir = [] for quartet in comparison.all: assert isinstance(quartet, AbstractQuartet) if all(x in quartet.get_unsorted_key() for x in comb): n_tot.append(quartet) if quartet in comparison.match: n_hit.append(quartet) elif quartet in comparison.mismatch: n_mis.append(quartet) elif quartet in comparison.missing_in_left: n_mil.append(quartet) elif quartet in comparison.missing_in_right: n_mir.append(quartet) else: raise SwitchError("quartet(in)", quartet) if not n_mis and not n_mil and not n_mir: continue html.append("<tr>") i = [] # COMBINATION NAME name = string_helper.format_array(comb) html.append("<td>{}</td>".format(name)) # HIT txt = string_helper.percent(len(n_hit), len(n_tot)) if n_hit else "" html.append("<td>{}</td>".format(txt)) i.append(txt) # MISS txt = string_helper.percent(len(n_mis), len(n_tot)) if n_mis else "" html.append("<td>{}</td>".format(txt)) i.append(txt) # MISSING IN LEFT txt = string_helper.percent(len(n_mil), len(n_tot)) if n_mil else "" html.append("<td>{}</td>".format(txt)) i.append(txt) # MISSING IN RIGHT txt = string_helper.percent(len(n_mir), len(n_tot)) if n_mil else "" html.append("<td>{}</td>".format(txt)) i.append(txt) html.append("</tr>") ini_sect[name] = "; ".join(str(x) for x in i) # Write out full quartets (if < 10) i = [] if len(n_hit) < len(n_mis) < 10: for quartet in n_mis: html.append("<tr>") html.append("<td></td>") html.append("<td colspan=4>{}</td>".format(quartet)) html.append("</tr>") i.append(quartet) ini_sect[name + "_list"] = "; ".join(str(x) for x in i) html.append("</table><br/>")
def create_trees(algorithm: tree_algorithms.Algorithm, components: Optional[List[Component]] = None) -> None: """ Creates a tree from the component. Requisites: `create_alignments` :param algorithm: Algorithm to use. See `algorithm_help`. :param components: Component, or `None` for all. :returns: Nothing, the tree is set as the component's `tree` field. """ # Get the current model model = global_view.current_model() # Get the site type if model.site_type == ESiteType.DNA: site_type = "n" elif model.site_type == ESiteType.PROTEIN: site_type = "p" else: raise SwitchError("site_type", model.site_type) # Get the components components = cli_view_utils.get_component_list(components) # Assert that we are in a position to create the trees model.get_status(constants.STAGES.TREES_8).assert_create() assert all( x.alignment is not None for x in components ), "Cannot generate the tree because the alignment has not yet been specified." assert all( x.tree is None for x in components ), "Cannot generate the tree because the tree has already been generated." # Iterate the components for component in pr.pr_iterate(components, "Generating trees"): # Handle the edge cases for a tree of three or less num_genes = len(component.minor_genes) if num_genes <= 3: if num_genes == 1: newick = "({});" elif num_genes == 2: newick = "({},{});" elif num_genes == 3: newick = "(({},{}),{});" else: raise SwitchError("num_genes", num_genes) newick = newick.format(*(x.legacy_accession for x in component.minor_genes)) else: # Run the algorithm normally newick = external_runner.run_in_temporary(algorithm, site_type, component.alignment) # Set the tree on the component set_tree(component, newick) # Show the completion message after = sum(x.tree is not None for x in model.components) pr.printx( "<verbose>{} trees generated. {} of {} components have a tree.</verbose>" .format(len(components), after, len(model.components))) return EChanges.COMP_DATA