def file_save( file_name: isOptional[isFilename[EFileMode.WRITE, constants.EXT_MODEL]] = None ) -> EChanges: """ Saves the model :param file_name: Filename. File to load. Either specify a complete path, or the name of the file in the `sessions` folder. If not specified the current filename is used. :return: """ model = global_view.current_model() if file_name: file_name = __fix_path(file_name) else: file_name = model.file_name if not file_name: raise ValueError( "Cannot save because a filename has not been specified.") config.remember_file(file_name) sys.setrecursionlimit(10000) with pr.pr_action("Saving file to «{}»".format(file_name)): model.file_name = file_name io_helper.save_binary(file_name, model) model.file_name = file_name pr.printx("<verbose>Saved model to <file>{}</file></verbose>", file_name) return EChanges.FILE_NAME
def create_fusions() -> EChanges: """ Finds the fusion points in the model. i.e. Given the events (see `find_events`), find the exact points at which the fusion(s) occur. Requisites: `create_trees` """ model = global_view.current_model() model.get_status(constants.STAGES.FUSIONS_9).assert_create() r: List[Fusion] = [] for event in __find_fusion_events(model): __LOG("Processing fusion event: {}", event) event.points = [] for component in model.components: __find_fusion_points(event, component) r.append(event) model.fusions = FusionCollection(r) n = len(model.fusions) pr.printx("<verbose>{} {} detected</verbose>".format( n, "fusion" if n == 1 else "fusions")) return EChanges.MODEL_DATA
def file_load(file_name: isFilename[EFileMode.READ]) -> EChanges: """ Loads the model from a file :param file_name: File to load. If you don't specify a path, the following folders are attempted (in order): * The current working directory * `$(DATA_FOLDER)sessions` """ if path.isfile(file_name): file_name = path.abspath(file_name) else: file_name = __fix_path(file_name) try: model: Model = io_helper.load_binary(file_name, type_=Model) except Exception as ex: raise ValueError( "Failed to load the model «{}». Either this is not a Groot model or this model was saved using a different version of Groot." .format(file_name)) from ex model.file_name = file_name global_view.set_model(model) config.remember_file(file_name) pr.printx("<verbose>Loaded model: {}</verbose>".format(file_name)) return EChanges.MODEL_OBJECT
def drop_trees(components: Optional[List[Component]] = None) -> bool: """ Removes component tree(s). :param components: Component(s), or `None` for all. """ components = cli_view_utils.get_component_list(components) count = 0 for component in components: if component.model.get_status(constants.STAGES.FUSIONS_9): raise ValueError( "Refusing to drop the tree because fusions have already been recorded. Did you mean to drop the fusions first?" ) if component.tree is not None: component.tree = None component.tree_unrooted = None component.tree_newick = None count += 1 pr.printx( "<verbose>{} trees removed across {} components.</verbose>".format( count, len(components))) return EChanges.COMP_DATA
def create_alignments(algorithm: alignment_algorithms.Algorithm, component: Optional[List[Component]] = None) -> EChanges: """ Aligns the component. If no component is specified, aligns all components. Requisites: `create_minor` and FASTA data. :param algorithm: Algorithm to use. See `algorithm_help`. :param component: Component to align, or `None` for all. """ model = global_view.current_model() if not all(x.site_array for x in model.genes): raise ValueError( "Refusing to make alignments because there is no site data. Did you mean to load the site data (FASTA) first?" ) to_do = cli_view_utils.get_component_list(component) before = sum(x.alignment is not None for x in model.components) for component_ in pr.pr_iterate(to_do, "Aligning"): fasta = component_.get_unaligned_legacy_fasta() component_.alignment = external_runner.run_in_temporary( algorithm, component_.model, fasta) after = sum(x.alignment is not None for x in model.components) pr.printx( "<verbose>{} components aligned. {} of {} components have an alignment ({}).</verbose>" .format(len(to_do), after, len(model.components), string_helper.as_delta(after - before))) return EChanges.COMP_DATA
def file_new() -> EChanges: """ Starts a new model """ global_view.new_model() pr.printx("<verbose>New model instantiated.</verbose>") return EChanges.MODEL_OBJECT
def drop_alignment(component: Optional[List[Component]] = None) -> EChanges: """ Removes the alignment data from the component. If no component is specified, drops all alignments. :param component: Component to drop the alignment for, or `None` for all. """ to_do = cli_view_utils.get_component_list(component) count = 0 for component_ in to_do: component_.alignment = None count += 1 pr.printx("<verbose>{} alignments removed across {} components.</verbose>". format(count, len(to_do))) return EChanges.COMP_DATA
def import_directory(directory: str, query: bool = False, filter: EImportFilter = (EImportFilter.DATA | EImportFilter.SCRIPT), reset: bool = True) -> EChanges: """ Imports all importable files from a specified directory :param query: Query the directory (don't import anything). :param reset: Whether to clear data from the model first. :param directory: Directory to import :param filter: Filter on import """ if reset: if not query: workflow.s010_file.file_new() else: pr.printx("Importing will start a new model.") model = global_view.current_model() contents = file_helper.list_dir(directory) if filter.DATA: for file_name in contents: import_file(model, file_name, skip_bad_extensions=True, filter=EImportFilter.DATA, query=query) if filter.SCRIPT: for file_name in contents: import_file(model, file_name, skip_bad_extensions=True, filter=EImportFilter.SCRIPT, query=query) if query: return EChanges.NONE if reset: return EChanges.MODEL_OBJECT else: return EChanges.MODEL_ENTITIES
def print_status() -> EChanges: """ Prints the status of the model. :return: """ model = global_view.current_model() with pr.pr_section(model.name): r = [] r.append("<table>") for stage in STAGES: status = model.get_status(stage) r.append("<tr>") r.append("<td>{}</td>".format( ("{}. {}:".format(stage.index, stage.name)).ljust(20))) if status.is_complete: r.append("<td><positive>{}</positive></td>".format(status)) else: if status.is_hot: ex = " - Consider running <command>create_{}</command>".format( stage.name.lower()) else: ex = "" if status.is_partial: r.append("<td><neutral>{}</neutral>{}".format(status, ex)) else: r.append("<td><negative>{}</negative>{}".format( status, ex)) r.append("</td>") r.append("</tr>") r.append("</table>") pr.printx("".join(r)) return EChanges.INFORMATION
def drop_fusions() -> EChanges: """ Removes all fusion points from the model. """ model = global_view.current_model() previous = len(model.fusions) model.get_status(constants.STAGES.FUSIONS_9).assert_drop() removed_count = 0 model.fusions.clear() # Reset trees for component in model.components: s080_tree.set_tree(component, component.tree) pr.printx( "<verbose>Removed {} fusion events and {} fusion points from the model.</verbose>" .format(previous, removed_count)) return EChanges.COMP_DATA
def create_pregraphs(): """ Creates the pregraphs. Requisites: `create_subsets` """ model = global_view.current_model() # Special case - if no subsets just stop now if model.get_status(STAGES.PREGRAPHS_13).is_complete and len( model.subsets) == 0: pr.printx("<verbose>No subsets - nothing to do.</verbose>") return model.get_status(STAGES.PREGRAPHS_13).assert_create() for subset in model.subsets: __subset_to_possible_graphs(subset) __assert_recreatable(subset) return EChanges.MODEL_DATA
def drop_major( components: Optional[List[Component]] = None ) -> EChanges: """ Drops all components from the model. The components are removed from :ivar:`model.components`. :param components: Components to drop. If `None` then all components are dropped. """ model = global_view.current_model() model.get_status( STAGES.MAJOR_4 ).assert_drop() previous_count = len( model.components ) if not components: model.components.clear() else: for component in components: model.components.remove( component ) pr.printx( "<verbose>{} components dropped</verbose>".format( previous_count - len( model.components ) ) ) return EChanges.COMPONENTS
def import_genes(file_name: str) -> EChanges: """ Imports a FASTA file into your model. If data already exists in the model, only sequence data matching sequences already in the model is loaded. :param file_name: File to import """ model = global_view.current_model() model.get_status(STAGES.SEQUENCES_2).assert_import() model.user_comments.append("IMPORT_FASTA \"{}\"".format(file_name)) with LOG("IMPORT FASTA FROM '{}'".format(file_name)): obtain_only = model._has_data() num_updates = 0 idle = 0 idle_counter = 10000 for name, sequence_data in bio_helper.parse_fasta(file=file_name): sequence = _make_gene(model, str(name), obtain_only, len(sequence_data), True) if sequence: LOG("FASTA UPDATES {} WITH ARRAY OF LENGTH {}".format( sequence, len(sequence_data))) num_updates += 1 sequence.site_array = str(sequence_data) idle = 0 else: idle += 1 if idle == idle_counter: LOG("THIS FASTA IS BORING...") idle_counter *= 2 idle = 0 pr.printx("<verbose>Imported Fasta from <file>{}</file>.</verbose>", file_name) return EChanges.MODEL_ENTITIES
def file_sample(name: Optional[str] = None, query: bool = False, load: bool = False) -> EChanges: """ Lists the available samples, or loads the specified sample. :param name: Name of sample. :param query: When set the sample is viewed but not loaded. :param load: When set data is imported but any scripts (if present) are not run. :return: """ if name: file_name = path.join(sample_data.get_sample_data_folder(), name) if not path.isdir(file_name): raise ValueError( "'{}' is not a valid sample directory.".format(name)) if not query: pr.printx("<verbose>Loading sample dataset «{}».</verbose>".format( file_name)) else: print("Sample data: «{}».".format(file_name)) return wizard.import_directory( file_name, filter=(wizard.EImportFilter.DATA | wizard.EImportFilter.SCRIPT) if not load else wizard.EImportFilter.DATA, query=query) else: for sample_dir in sample_data.get_samples(): print(file_helper.get_filename(sample_dir)) else: print( "No samples available. Please download and add sample data to `{}`." .format(sample_data.get_sample_data_folder())) return EChanges.NONE
def import_gene_names(file: _T, header: bool = False): """ Loads in the displayed gene names from a file. :param header: Ignore first row? :param file: Path to a CSV or TSV file with two columns: accessions, display name. """ model = global_view.current_model() tot = 0 with open(file) as in_: if header: next(in_) for row in in_: if "\t" in row: accession, name = row.split("\t", 1) elif "," in row: accession, name = row.split(",", 1) else: accession, name = None, None if accession: accession = accession.strip() name = name.strip() gene = model.genes.get(accession) if gene is None: warnings.warn("No such gene: {}".format(accession), UserWarning) continue gene.display_name = name tot += 1 pr.printx("<verbose>{} genes renamed</verbose>".format(tot))
def create_domains(algorithm: domain_algorithms.Algorithm): """ Creates the domains. Existing domains are always replaced. Domains are only used for viewing and have no bearing on the actual calculations. :param algorithm: Mode of domain generation. See `algorithm_help`. """ model = global_view.current_model() if not model.genes: raise ValueError( "Cannot generate domains because there are no sequences.") model.user_domains.clear() for sequence in model.genes: for domain in algorithm(sequence): model.user_domains.add(domain) pr.printx( "<verbose>Domains created, there are now {} domains.</verbose>".format( len(model.user_domains))) return EChanges.DOMAINS
def set_genes(accessions: List[str], sites: Optional[List[str]]) -> EChanges: """ Adds a new sequence to the model :param sites: Sequence sites. Optional. If specified, the same number of `sites` as `accessions` must be provided. :param accessions: Sequence accession(s) """ model = global_view.current_model() model.get_status(STAGES.SEQUENCES_2).assert_set() for i, accession in enumerate(accessions): sequence = __add_new_gene(model, accession) if sites: site = sites[i] sequence.site_array = site sequence.length = len(site) pr.printx("<verbose>Added: {} (n={})</verbose>".format( sequence, sequence.site_array.__len__())) return EChanges.MODEL_ENTITIES
def __end_line(self, _: None): pr.printx('</section>')
def import_file(file_name: isFilename[EFileMode.READ], skip_bad_extensions: bool = False, filter: EImportFilter = EImportFilter.DATA, query: bool = False) -> EChanges: """ Imports a file. _How_ the file is imported is determined by its extension. `.groot` --> `file_load` `.fasta` --> `import_fasta` `.blast` --> `import_blast` `.composite` --> `import_composite` `.imk` --> `source` (runs the script) :param file_name: Name of file to import. :param skip_bad_extensions: When set, if the file has an extension we don't recognise, no error is raised. :param filter: Specifies what kind of files we are allowed to import. :param query: When set the kind of the file is printed to `sys.stdout` and the file is not imported. :return: Nothing is returned, the file data is incorporated into the model and messages are sent via `sys.stdout`. """ ext = file_helper.get_extension(file_name).lower() if filter.DATA: if ext == ".blast": if not query: return workflow.s030_similarity.import_similarities(file_name) else: pr.printx("BLAST: <file>{}</file>.".format( pr.escape(file_name))) return EChanges.INFORMATION elif ext in (".fasta", ".fa", ".faa"): if not query: return workflow.s020_sequences.import_genes(file_name) else: pr.printx("FASTA: <file>{}</file>.".format( pr.escape(file_name))) return EChanges.INFORMATION if filter.SCRIPT: if ext == ".imk": if not query: pr.printx( "<verbose>Run script <file>{}</file>.</verbose>".format( pr.escape(file_name))) commands.execute_cli_text(file_name) return EChanges.MODEL_OBJECT else: pr.printx("Script: <file>{}</file>.".format( pr.escape(file_name))) return EChanges.INFORMATION if filter.MODEL: if ext == constants.EXT_MODEL: if not query: return workflow.s010_file.file_load(file_name) else: pr.printx("Model: <file>{}</file>.".format( pr.escape(file_name))) return EChanges.INFORMATION if skip_bad_extensions: return EChanges.NONE raise ValueError( "Cannot import the file '{}' because I don't recognise the extension '{}'." .format(file_name, ext))
def __start_line(self, title: object): title = "WIZARD: " + str(title) pr.printx('<section name="">'.format(title)) return ManagedWith(on_exit=self.__end_line)
def __import_blast_format_6(e_value_tol, file, file_title, length_tol, model, obtain_only): LOG("IMPORT {} BLAST FROM '{}'", "MERGE" if obtain_only else "NEW", file_title) for index, line in enumerate(file): line = line.strip() if line and not line.startswith("#") and not line.startswith(";"): # BLASTN query acc. | subject acc. | | % identity, alignment length, mismatches, gap opens, q. start, q. end, s. start, s. end, evalue, bit score # MEGABLAST query id | subject ids | query acc.ver | subject acc.ver | % identity, alignment length, mismatches, gap opens, q. start, q. end, s. start, s. end, evalue, bit score # Fields: # Split by tabs or spaces if "\t" in line: e = line.split("\t") else: e = [x for x in line.split(" ") if x] if len(e) == 14: del e[2:4] # Assertion if len(e) != 12: raise ValueError( "BLAST file '{}' should contain 12 values, but line #{} contains {}: {}" .format(file_title, index + 1, len(e), repr(line))) query_accession = e[0] query_start = int(e[6]) query_end = int(e[7]) query_length = query_end - query_start subject_accession = e[1] subject_start = int(e[8]) subject_end = int(e[9]) subject_length = subject_end - subject_start e_value = float(e[10]) LOG("BLAST SAYS {} {}:{} ({}) --> {} {}:{} ({})".format( query_accession, query_start, query_end, query_length, subject_accession, subject_start, subject_end, subject_length)) if e_value_tol is not None and e_value > e_value_tol: LOG("REJECTED E VALUE") continue if length_tol is not None and query_length < length_tol: LOG("REJECTED LENGTH") continue assert query_length > 0 and subject_length > 0 query_s = _make_gene(model, query_accession, obtain_only, 0, True) subject_s = _make_gene(model, subject_accession, obtain_only, 0, True) if query_s and subject_s and query_s is not subject_s: query = Domain(query_s, query_start, query_end) subject = Domain(subject_s, subject_start, subject_end) LOG("BLAST UPDATES AN EDGE THAT JOINS {} AND {}".format( query, subject)) __make_edge(model, query, subject) pr.printx( "<verbose>Imported Blast from «{}».</verbose>".format(file_title))
def create_major( tol: int = 0, debug: bool = False ) -> EChanges: """ Detects model components. First step of finding the components. We classify each component as a set of "major" genes. Components are defined as sets of genes that share a similarity path between them, where each edge between element 𝓧 and 𝓨 in that path: * Is sourced from no less than 𝓧's length, less the tolerance * Is targeted to no less than 𝓨's length, less the tolerance * The difference between 𝓧 and 𝓨's length is less than the tolerance We'll grab the minor domains that this component extends into in the next step. Requisites: Sequence similarity (BLAST data) must have been loaded :param debug: Assert the creation. :param tol: Tolerance value :returns: Nothing, the components are written to :ivar:`model.components`. """ model = global_view.current_model() model.get_status( STAGES.MAJOR_4 ).assert_create() model.components.clear() # Find connected components components = ComponentFinder() # Basic assertions LOG_MAJOR( "There are {} sequences.", len( model.genes ) ) missing_edges = [] for sequence in model.genes: edges = model.edges.find_gene( sequence ) if not edges: missing_edges.append( sequence ) if missing_edges: raise ValueError( "Refusing to detect components because some sequences have no edges: «{}»".format( string_helper.format_array( missing_edges ) ) ) # Iterate sequences for sequence_alpha in model.genes: assert isinstance( sequence_alpha, Gene ) alpha_edges = model.edges.find_gene( sequence_alpha ) any_accept = False LOG_MAJOR( "Sequence {} contains {} edges.", sequence_alpha, len( alpha_edges ) ) for edge in alpha_edges: assert isinstance( edge, Edge ) source_difference = abs( edge.left.length - edge.left.gene.length ) destination_difference = abs( edge.right.length - edge.right.gene.length ) total_difference = abs( edge.left.gene.length - edge.right.gene.length ) LOG_MAJOR_V( "{}", edge ) LOG_MAJOR_V( "-- Source difference ({})", source_difference ) LOG_MAJOR_V( "-- Destination difference ({})", destination_difference ) LOG_MAJOR_V( "-- Total difference ({})", total_difference ) if source_difference > tol: LOG_MAJOR_V( "-- ==> REJECTED (SOURCE)" ) continue elif destination_difference > tol: LOG_MAJOR_V( "-- ==> REJECTED (DEST)" ) continue elif total_difference > tol: LOG_MAJOR_V( "-- ==> REJECTED (TOTAL)" ) continue else: LOG_MAJOR_V( "-- ==> ACCEPTED" ) if debug and edge.left.gene.accession[0] != edge.right.gene.accession[0]: raise ValueError( "Debug assertion failed. This edge not rejected: {}".format( edge ) ) any_accept = True beta = edge.opposite( sequence_alpha ).gene LOG_MAJOR( "-- {:<40} LINKS {:<5} AND {:<5}", edge, sequence_alpha, beta ) components.join( sequence_alpha, beta ) if debug and not any_accept: raise ValueError( "Debug assertion failed. This sequence has no good edges: {}".format( sequence_alpha ) ) # Create the components! sequences_in_components = set() for index, sequence_list in enumerate( components.tabulate() ): model.components.add( Component( model, index, sequence_list ) ) LOG_MAJOR( "COMPONENT MAJOR: {}", sequence_list ) sequences_in_components.update( sequence_list ) # Create components for orphans for sequence in model.genes: if sequence not in sequences_in_components: LOG_MAJOR( "ORPHAN: {}", sequence ) model.components.add( Component( model, len( model.components ), (sequence,) ) ) # An assertion for component in model.components: assert isinstance( component, Component ) if len( component.major_genes ) == 1: warnings.warn( "There are components with just one sequence in them. Maybe you meant to use a tolerance higher than {}?".format( tol ), UserWarning ) break pr.printx( "<verbose>{} components detected.</verbose>".format( len( model.components ) ) ) return EChanges.COMPONENTS
def create_wizard(new: Optional[bool] = None, name: Optional[str] = None, imports: Optional[List[str]] = None, outgroups: Optional[List[str]] = None, tolerance: Optional[int] = None, alignment: Optional[str] = None, supertree: Optional[str] = None, tree: Optional[str] = None, view: Optional[bool] = None, save: Optional[bool] = None, pause: str = None) -> None: """ Sets up a workflow that you can activate in one go. If you don't fill out the parameters then whatever UI you are using will prompt you for them. If you have a set of default parameters that you'd like to preserve, take a look at the `alias` command. This method is represented in the GUI by the wizard window. :param new: Create a new model? :values:`true→yes, false→no, none→ask` :param name: Name the model? You can specify a complete path or just a name. If no name (empty) is specified, then the model is not saved. :values:`empty→no name, none→ask` :param outgroups: Outgroup accessions? :values:`none→ask` :param imports: Import files into the model? :values:`none→ask` :param tolerance: Component identification tolerance? :values:`none→ask` :param alignment: Alignment method? :values:`empty→default, none→ask` :param supertree: Supertree method? :values:`empty→default,none→ask` :param tree: Tree generation method? :values:`empty→default, none→ask` :param view: View the final NRFG in Vis.js? :values:`true→yes, false→no, none→ask` :param save: Save file to disk? (requires `name`) :values:`true→yes, false→no, none→ask` :param pause: Pause after stage default value. :values:`none→ask` """ if new is None: x = pr.pr_question( "Are you starting a new model, or do you want to continue with your current data?", ["new", "continue"]) new = (x == "new") if name is None: name = pr.pr_question( "Name your model.\n" "You can specify a complete path or just a name.\n" "If you don't enter a name, your won't have the option to save your file using the wizard, though you can still do so manually." ) if not name: warn("Your file will not be saved by the wizard.", UserWarning) if imports is None: imports = [] while True: ex = "\nEnter a blank line when you don't want to add any more files." if imports else "" file_name = pr.pr_question( "Enter file paths to import BLAST or FASTA files, one per line." + ex) if file_name: imports.append(file_name) else: break if outgroups is None: outgroups = [] while True: ex = "\nEnter a blank line when you don't want to add any more outgroups." outgroup = pr.pr_question( "Enter outgroup accessions, one per line." + ex) if outgroup: outgroups.append(outgroup) else: break if tolerance is None: success = False while not success: tolerance_str = pr.pr_question( "What tolerance do you want to use for the component identification?" ) try: tolerance = int(tolerance_str) success = True except: pr.printx( "Something went wrong. Let's try that question again.") success = False if alignment is None: alignment = pr.pr_question( "Which function do you want to use for the sequence alignment? Enter a blank line for the default.", list(workflow.s070_alignment.alignment_algorithms.keys) + [""]) if tree is None: tree = pr.pr_question( "Which function do you want to use for the tree generation? Enter a blank line for the default.", list(workflow.s080_tree.tree_algorithms.keys) + [""]) if supertree is None: supertree = pr.pr_question( "Which function do you want to use for the supertree generation? Enter a blank line for the default.", list(workflow.s140_supertrees.supertree_algorithms.keys) + [""]) pauses = set() map = { "i": STAGES.SEQ_AND_SIM_ps, "m": STAGES.MAJOR_4, "d": STAGES.MINOR_5, "a": STAGES.ALIGNMENTS_7, "t": STAGES.TREES_8, "f": STAGES.FUSIONS_9, "S": STAGES.SPLITS_10, "C": STAGES.CONSENSUS_11, "e": STAGES.SUBSETS_12, "p": STAGES.PREGRAPHS_13, "s": STAGES.SUPERTREES_14, "u": STAGES.FUSE_15, "n": STAGES.CLEAN_16, "c": STAGES.CHECKED_17 } if pause is None: for k, v in map.items(): pr.printx("<key>{}</key> = <value>{}</value>".format( pr.escape(k), pr.escape(v))) pause = pr.pr_question("Enter pauses (as above):") for c in pause: if c in map: pauses.add(map[c]) else: raise ValueError("Unknown pause command: {} in {}".format( repr(c), repr(pause))) if view is None: view = pr.pr_question( "Do you wish the wizard to show you the final NRFG in Vis.js?") if save is None: if not name: save = False else: save = pr.pr_question( "Save your model after each stage completes?") walkthrough = Wizard(new=new, name=name, imports=imports, pauses=pause, tolerance=tolerance, alignment=alignment, tree=tree, view=view, save=save, outgroups=outgroups, supertree=supertree) walkthrough.make_active() pr.pr_verbose( "The wizard has been created paused.\nYou can use the {} and {} commands to manage your wizard." .format(continue_wizard, drop_wizard))
def create_trees(algorithm: tree_algorithms.Algorithm, components: Optional[List[Component]] = None) -> None: """ Creates a tree from the component. Requisites: `create_alignments` :param algorithm: Algorithm to use. See `algorithm_help`. :param components: Component, or `None` for all. :returns: Nothing, the tree is set as the component's `tree` field. """ # Get the current model model = global_view.current_model() # Get the site type if model.site_type == ESiteType.DNA: site_type = "n" elif model.site_type == ESiteType.PROTEIN: site_type = "p" else: raise SwitchError("site_type", model.site_type) # Get the components components = cli_view_utils.get_component_list(components) # Assert that we are in a position to create the trees model.get_status(constants.STAGES.TREES_8).assert_create() assert all( x.alignment is not None for x in components ), "Cannot generate the tree because the alignment has not yet been specified." assert all( x.tree is None for x in components ), "Cannot generate the tree because the tree has already been generated." # Iterate the components for component in pr.pr_iterate(components, "Generating trees"): # Handle the edge cases for a tree of three or less num_genes = len(component.minor_genes) if num_genes <= 3: if num_genes == 1: newick = "({});" elif num_genes == 2: newick = "({},{});" elif num_genes == 3: newick = "(({},{}),{});" else: raise SwitchError("num_genes", num_genes) newick = newick.format(*(x.legacy_accession for x in component.minor_genes)) else: # Run the algorithm normally newick = external_runner.run_in_temporary(algorithm, site_type, component.alignment) # Set the tree on the component set_tree(component, newick) # Show the completion message after = sum(x.tree is not None for x in model.components) pr.printx( "<verbose>{} trees generated. {} of {} components have a tree.</verbose>" .format(len(components), after, len(model.components))) return EChanges.COMP_DATA