Example #1
0
    def on_BTN_SAVE_TO_FILE_clicked(self) -> None:
        """
        Signal handler:
        """
        file_name: str = qt_gui_helper.browse_save(self, "HTML (*.html)")

        if file_name:
            file_helper.write_all_text(file_name, self.html)
Example #2
0
def blastp(fasta: str) -> str:
    """
    Uses protein blast to create the similarity matrix.
    """
    file_helper.write_all_text("fasta.fasta", fasta)
    subprocess_helper.run_subprocess([
        "blastp", "-query", "fasta.fasta", "-subject", "fasta.fasta",
        "-outfmt", "6", "-out", "blast.blast"
    ])
    return file_helper.read_all_text("blast.blast")
Example #3
0
def align_muscle(model: Model, fasta: str) -> str:
    """
    Uses MUSCLE to align.
    """
    ignore(model)

    file_helper.write_all_text("in_file.fasta", fasta)

    subprocess_helper.run_subprocess(
        ["muscle", "-in", "in_file.fasta", "-out", "out_file.fasta"])

    return file_helper.read_all_text("out_file.fasta")
Example #4
0
 def __update_browser(self):
     if self.is_browser:
         file_name = path.join(
             Controller.ACTIVE.app.local_data.local_folder(
                 im_constants.FOLDER_TEMPORARY), "groot_temp.html")
         file_helper.write_all_text(file_name, self.html)
         self.browser_ctrl.load(
             QUrl.fromLocalFile(file_name)
         )  # nb. setHtml doesn't work with visjs, so we always need to use a temporary file
         self.ui.LBL_TITLE.setToolTip(self.browser_ctrl.url().toString())
     else:
         title = string_helper.regex_extract("<title>(.*?)</title>",
                                             self.html)
         self.__on_title_changed(title)
         self.ui.TXT_BROWSER.setHtml(self.html)
         self.ui.LBL_BROWSER_WARNING.setVisible("<script" in self.html)
Example #5
0
def tree_neighbor_joining(model: str, alignment: str) -> str:
    """
    Uses PAUP to generate the tree using neighbour-joining.
    
    There are some major issues with Paup. Please see the troubleshooting section of Groot's readme.
    
    :param model:       Format, a string `n` or `p` denoting the site type.
    :param alignment:   Alignment in FASTA format.
    :return:            The tree in Newick format.
    """
    # TODO: Use an alternative that doesn't have the PAUP time-out problem.
    file_helper.write_all_text("in_file.fasta", alignment)

    script = """
    toNEXUS format=FASTA fromFile=in_file.fasta toFile=in_file.nexus dataType=protein replace=yes;
    execute in_file.nexus;
    NJ;
    SaveTrees file=out_file.nwk format=Newick root=Yes brLens=Yes replace=yes;
    quit;"""

    if model == "n":
        site_type = "nucleotide"
    elif model == "p":
        site_type = "protein"
    else:
        raise SwitchError("model", model)

    script = script.format(site_type)
    file_helper.write_all_text("in_file.paup", script)

    txt = groot.run_subprocess(["paup", "-n", "in_file.paup"],
                               collect=True,
                               no_err=True)

    # The return code seems to have no bearing on Paup's actual output, so ignore it and look for the specific text.
    if "This version of PAUP has expired." in txt:
        raise ValueError(
            "'This version of PAUP has expired'. Please update your software or use a different method and try again."
        )

    r = file_helper.read_all_text("out_file.nwk",
                                  details="the expected output from paup")

    if not r:
        raise ValueError("Paup produced an empty file.")

    return r
Example #6
0
def supertree_clann(inputs: str) -> str:
    """
    Uses CLANN to generate a supertree.
    
    :param inputs:      Input trees in Newick format.
    :return:            The consensus supertree in Newick format.
    """
    file_helper.write_all_text("in_file.nwk", inputs)

    script = """
    execute in_file.nwk;
    hs savetrees=out_file.nwk;
    quit
    """

    subprocess_helper.run_subprocess(["clann"], stdin=script)

    result = file_helper.read_all_text("out_file.nwk")

    return result.split(";")[0]
Example #7
0
def tree_maximum_likelihood(model: str, alignment: str) -> str:
    """
    Uses Raxml to generate the tree using maximum likelihood.
    The model used is GTRCAT for RNA sequences, and PROTGAMMAWAG for protein sequences.
    """
    file_helper.write_all_text("in_file.fasta", alignment)
    bio_helper.convert_file("in_file.fasta", "in_file.phy", "fasta", "phylip")

    if model == "n":
        method = "GTRCAT"
    elif model == "p":
        method = "PROTGAMMAWAG"
    else:
        raise SwitchError("model", model)

    groot.run_subprocess(
        "raxml -T 4 -m {} -p 1 -s in_file.phy -# 20 -n t".format(method).split(
            " "))

    return file_helper.read_all_text("RAxML_bestTree.t",
                                     "the expected output from raxml")
Example #8
0
def create_test(types: str = "1",
                no_blast: bool = False,
                size: int = 2,
                run: bool = True) -> groot.EChanges:
    """
    Creates a GROOT unit test in the sample data folder.
    
    * GROOT should be installed in developer mode, otherwise there may be no write access to the sample data folder.
    * Requires the `faketree` library. 
    
    :param run:         Run test after creating it.
    :param no_blast:    Perform no BLAST 
    :param size:        Clade size
    :param types:       Type(s) of test(s) to create.
    :return: List of created test directories 
    """
    # noinspection PyPackageRequirements
    import faketree as FAKE
    print("START")
    r = []
    args_random_tree = {
        "suffix": "1",
        "delimiter": "_",
        "size": size,
        "outgroup": True
    }
    # args_fn = "-d 0.2"
    mutate_args = ""

    if not types:
        raise ValueError("Missing :param:`types`.")

    for index, name in enumerate(types):
        tdir = TestDirectory(None)

        print("Test {} of {}".format(index + 1, len(types)))

        try:
            FAKE.new_tree()
            # The SeqGen mutator has a weird problem where, given a root `(X,O)R` in which `R`
            # is set as a result of an earlier tree, `O` will be more similar to the leaves of
            # that earlier tree than to the leaves in X. For this reason we use a simple random
            # model and not SeqGen.
            mutate_fn = FAKE.make_random

            if name == "0":
                # 0 no fusions
                outgroups = FAKE.create_random_tree(["A"], **args_random_tree)
                a, = (x.parent for x in outgroups)
                mutate_fn([a], *mutate_args)
            elif name == "1":
                # 1 fusion point; 3 genes; 2 origins
                #
                # # Should be an acyclic 2-rooted tree:
                #
                # A
                #  \
                #   -->C
                #  /
                # B
                #

                # Trees
                outgroups = FAKE.create_random_tree(["A", "B", "C"],
                                                    **args_random_tree)
                a, b, c = (x.parent for x in outgroups)
                __remove_outgroups(outgroups, 2)

                mutate_fn([a, b, c], *mutate_args)

                # Fusion point
                fa = FAKE.get_random_node(a, avoid=outgroups)
                fb = FAKE.get_random_node(b, avoid=outgroups)
                FAKE.create_branch([fa, fb], c)
                FAKE.make_composite_node([c])
            elif name == "4":
                # 2 fusion points; 4 genes; 2 origins
                # (Possibly the most difficult scenario because the result is cyclic)
                #
                # Should be a cyclic 2-rooted graph:
                #
                #
                # A--------
                #  \       \
                #   -->C    -->D
                #  /       /
                # B--------
                #

                # Trees
                outgroups = FAKE.create_random_tree(["A", "B", "C", "D"],
                                                    **args_random_tree)
                a, b, c, d = (x.parent for x in outgroups)
                mutate_fn([a, b, c, d], *mutate_args)
                __remove_outgroups(outgroups, 2, 3)

                # Fusion points
                fa1 = FAKE.get_random_node(a, avoid=outgroups)
                fb1 = FAKE.get_random_node(b, avoid=outgroups)
                fa2 = FAKE.get_random_node(a, avoid=outgroups)
                fb2 = FAKE.get_random_node(b, avoid=outgroups)
                FAKE.create_branch([fa1, fb1], c)
                FAKE.create_branch([fa2, fb2], d)
                FAKE.make_composite_node([c, d])

            elif name == "5":
                # 2 fusion points; 5 genes; 3 origins
                #
                # # Should be an acyclic 3-rooted tree:
                #
                # A
                #  \
                #   -->C
                #  /    \
                # B      -->E
                #       /
                #      D

                # Trees
                outgroups = FAKE.create_random_tree(["A", "B", "C", "D", "E"],
                                                    **args_random_tree)
                a, b, c, d, e = (x.parent for x in outgroups)
                mutate_fn([a, b, c, d, e], *mutate_args)
                __remove_outgroups(outgroups, 2, 4)

                # Fusion points
                fa = FAKE.get_random_node(a, avoid=outgroups)
                fb = FAKE.get_random_node(b, avoid=outgroups)
                fc = FAKE.get_random_node(c, avoid=outgroups)
                fd = FAKE.get_random_node(d, avoid=outgroups)
                FAKE.create_branch([fa, fb], c)
                FAKE.create_branch([fc, fd], e)
                FAKE.make_composite_node([c, e])
            elif name == "7":
                # 3 fusion points; 7 genes; 4 origins
                #
                # Should be an acyclic 4-rooted tree:
                #
                # A
                #  \
                #   -->C
                #  /    \
                # B      \
                #         -->G
                # D      /
                #  \    /
                #   -->F
                #  /
                # E
                #

                # Trees
                outgroups = FAKE.create_random_tree(
                    ["A", "B", "C", "D", "E", "F", "G"], **args_random_tree)
                a, b, c, d, e, f, g = (x.parent for x in outgroups)
                mutate_fn([a, b, c, d, e, f, g], *mutate_args)
                __remove_outgroups(outgroups, 2, 5, 6)

                # Fusion points
                fa = FAKE.get_random_node(a, avoid=outgroups)
                fb = FAKE.get_random_node(b, avoid=outgroups)
                fc = FAKE.get_random_node(c, avoid=outgroups)
                fd = FAKE.get_random_node(d, avoid=outgroups)
                fe = FAKE.get_random_node(e, avoid=outgroups)
                ff = FAKE.get_random_node(f, avoid=outgroups)
                FAKE.create_branch([fa, fb], c)
                FAKE.create_branch([fd, fe], f)
                FAKE.create_branch([fc, ff], g)
                FAKE.make_composite_node([c, f, g])
            else:
                raise SwitchError("name", name)

            FAKE.generate()

            file_helper.create_directory(tdir.t_folder)
            os.chdir(tdir.t_folder)

            FAKE.print_trees(format=mgraph.EGraphFormat.ASCII, file="tree.txt")
            FAKE.print_trees(format=mgraph.EGraphFormat.TSV,
                             file="tree.tsv",
                             name=True,
                             mutator=False,
                             sequence=False,
                             length=False)
            FAKE.print_fasta(which=FAKE.ESubset.ALL, file="all.fasta.hidden")
            FAKE.print_fasta(which=FAKE.ESubset.LEAVES, file="leaves.fasta")

            if not no_blast:
                blast = []
                # noinspection SpellCheckingInspection
                intermake.subprocess_helper.run_subprocess(
                    [
                        "blastp", "-subject", "leaves.fasta", "-query",
                        "leaves.fasta", "-outfmt", "6"
                    ],
                    collect_stdout=blast.append)

                file_helper.write_all_text("leaves.blast", blast)

            guid = uuid.uuid4()
            outgroups_str = ",".join(x.data.name for x in outgroups
                                     if x.parent.is_root)

            file_helper.write_all_text("groot.ini", [
                "[groot_wizard]", "tolerance=50",
                "outgroups={}".format(outgroups_str), "", "[groot_test]",
                "name={}".format(name), "size={}".format(size),
                "guid={}".format(guid)
            ])

            path_ = os.path.abspath(".")
            print("FINAL PATH: " + path_)
            r.append(path_)

        except FAKE.RandomChoiceError as ex:
            print("FAILURE {}".format(ex))
            return groot.EChanges.INFORMATION

        if run:
            run_test(tdir.t_name)

    return groot.EChanges.INFORMATION
Example #9
0
def run_test(name: str) -> groot.EChanges:
    """
    Runs a test case and saves the results to the global results folder. 
    
    :param name:       A name or path to the test case.
                       If no full path is provided the "samples" folder will be assumed.
                       The test case folder must contain:
                        
                            * The data (BLAST, FASTA)
                            * A `tree.csv` file describing the expected results (in edge-list format)
                            * A `groot.ini` file describing the parameters to use.
                             
    :return:           Nothing is returned, the results are saved to the global results folder. 
    """

    # Load sample file
    tdir = TestDirectory(name)

    # Define outputs
    file_helper.create_directory(tdir.r_folder, overwrite=True)

    # Check the requisite files exist
    if not os.path.isdir(tdir.t_folder):
        raise ValueError(
            "This is not a test case (it is not even a folder, «{}»).".format(
                tdir.t_folder))

    if not os.path.isfile(tdir.t_tree):
        raise ValueError(
            "This is not a test case (it is missing the edge list file, «{}»)."
            .format(tdir.t_tree))

    if not os.path.isfile(tdir.t_ini):
        raise ValueError(
            "This is not a test case (it is missing the INI file, «{}»).".
            format(tdir.t_ini))

    # Read the test specs
    specs = io_helper.load_ini(tdir.t_ini)

    if "groot_test" not in specs:
        raise ValueError(
            "This is not a test case (it is missing the `groot_test` section from the INI file, «{}»)."
            .format(tdir.t_ini))

    if not "groot_wizard" in specs:
        raise ValueError(
            "This is not a test case (it is missing the «wizard» section from the INI «{}»)."
            .format(tdir.t_ini))

    wizard_params = specs["groot_wizard"]

    try:
        wiz_tol = int(wizard_params["tolerance"])
        wiz_og = wizard_params["outgroups"].split(",")
    except KeyError as ex:
        raise ValueError(
            "This is not a test case (it is missing the «{}» setting from the «wizard» section of the INI «{}»)."
            .format(ex, tdir.t_ini))

    # Copy the test files to the output folder
    for file in file_helper.list_dir(tdir.t_folder):
        shutil.copy(
            file, file_helper.format_path(file,
                                          tdir.r_folder + "/input_{N}{E}"))

    # Create settings
    walkthrough = groot.Wizard(
        new=True,
        name=tdir.r_model,
        imports=groot.sample_data.get_sample_contents(tdir.t_folder),
        pauses=set(),
        tolerance=wiz_tol,
        outgroups=wiz_og,
        alignment="",
        tree="maximum_likelihood",  # "neighbor_joining",
        view=False,
        save=False,
        supertree="clann")

    try:
        # Execute the wizard (no pauses are set so this only requires 1 `step`)
        walkthrough.make_active()
        walkthrough.step()

        if not walkthrough.is_completed:
            raise ValueError("Expected wizard to complete but it did not.")

        # Add the original graph to the Groot `Model` in case we debug
        test_tree_file_data = groot.UserGraph(mgraph.importing.import_edgelist(
            file_helper.read_all_text(tdir.t_tree), delimiter="\t"),
                                              name="original_graph")
        groot.rectify_nodes(test_tree_file_data.graph, groot.current_model())
        groot.current_model().user_graphs.append(
            groot.FixedUserGraph(test_tree_file_data.graph, "original_graph"))
    finally:
        # Save the final model regardless of whether the test succeeded
        groot.file_save(tdir.r_model)

    # Perform the comparison
    model = groot.current_model()
    differences = groot.compare_graphs(model.fusion_graph_clean,
                                       test_tree_file_data)
    q = differences.raw_data["quartets"]["match_quartets"]
    print("match_quartets: " + q)

    # Write the results---

    # ---Summary
    io_helper.save_ini(tdir.r_summary, differences.raw_data)

    # ---Alignments
    groot.print_alignments(file=tdir.r_alignments)

    # ---Differences
    file_helper.write_all_text(tdir.r_comparison,
                               differences.html,
                               newline=True)
    differences.name = "test_differences"
    groot.current_model().user_reports.append(differences)

    # ---Model
    groot.file_save(tdir.r_model)

    # Done
    intermake.pr.printx(
        "<verbose>The test has completed, see «{}».</verbose>".format(
            tdir.r_comparison))
    return groot.EChanges.MODEL_OBJECT