Esempio n. 1
0
 def execute_rscript(self, script):
     cmd = []
     cmd.append("Rscript")
     cmd.append("--vanilla")
     cmd.append("-")
     p = subprocess.Popen(cmd, stdin=subprocess.PIPE, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
     stdout, stderr = processio.communicate(p, script)
     if p.returncode != 0:
         print(script)
         for row in stderr.split("\n"):
             print("# {}".format(row))
         sys.exit(p.returncode)
     results = {}
     num_lines_with_results = 0
     for line in stdout.split("\n"):
         if not line.startswith(Rcalculator.RESULT_FLAG_LEADER):
             continue
         parts = line[len(Rcalculator.RESULT_FLAG_LEADER) :].split("=")
         assert len(parts) == 2
         key = parts[0].strip()
         try:
             value = float(parts[1].strip())
         except ValueError as e:
             value = "NA"
         results[key] = value
         num_lines_with_results += 1
     return results
Esempio n. 2
0
def prune_taxa_from_trees(trees, taxa, paup_path='paup'):
    """
    Drops Taxon objects given in container ``taxa`` from TreeList ``trees``
    """
    tf = tempfile.NamedTemporaryFile("w", delete=True)
    trees.write_to_stream(tf, schema='nexus')
    tf.flush()
    output_tree_file_handle = tempfile.NamedTemporaryFile("w+", delete=True)
    output_tree_filepath = output_tree_file_handle.name
    tax_idxs = [ str(trees.taxon_namespace.index(t)+1) for t in taxa ]
    tax_idxs = " ".join(tax_idxs)
    paup_template = """\
    set warnreset=no;
    exe %s;
    gett file=%s storebrlens=yes;
    delete %s / prune;
    savetrees file=%s format=nexus brlens=user taxablk=yes maxdecimals=20;
    """ % (tf.name,
            tf.name,
            tax_idxs,
            output_tree_filepath)
    paup_run = subprocess.Popen(['%s -n' % paup_path],
                                shell=True,
                                stdin=subprocess.PIPE,
                                stdout=subprocess.PIPE)
    stdout, stderr = processio.communicate(paup_run, paup_template)
    t = dendropy.TreeList.get_from_path(output_tree_filepath,
            "nexus",
            taxon_namespace=trees.taxon_namespace)
    output_tree_file_handle.close()
    return t
Esempio n. 3
0
 def generate(
         self,
         trees,
         dataset=None,
         taxon_namespace=None,
         input_sequences=None,
         **kwargs):
     args=self._compose_arguments()
     # with open("x.txt", "w") as inputf:
     with self.get_tempfile() as inputf:
         if input_sequences is not None:
             input_sequences.write_to_stream(inputf, schema="phylip",)
             inputf.write("{}\n".format(len(trees)))
         trees.write_to_stream(inputf,
                 "newick",
                 suppress_rooting=True,
                 suppress_internal_node_labels=True)
         inputf.flush()
         args.append(inputf.name)
         # print("seq-gen args: = %s" % " ".join(args))
         run = subprocess.Popen(args, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
         stdout, stderr = processio.communicate(run)
         if stderr or run.returncode != 0:
             raise RuntimeError("Seq-gen error: %s" % stderr)
         if taxon_namespace is None:
             taxon_namespace = trees.taxon_namespace
         if dataset is None:
             dataset = dendropy.DataSet(**kwargs)
             if taxon_namespace is not None:
                 dataset.attach_taxon_namespace(taxon_namespace)
         dataset.read(data=stdout, schema="nexus")
         return dataset
Esempio n. 4
0
 def execute_rscript(self, script):
     cmd = []
     cmd.append("Rscript")
     cmd.append("--vanilla")
     cmd.append("-")
     p = subprocess.Popen(cmd,
             stdin=subprocess.PIPE,
             stdout=subprocess.PIPE,
             stderr=subprocess.PIPE,
             )
     stdout, stderr = processio.communicate(p, script)
     if p.returncode != 0:
         print(script)
         for row in stderr.split("\n"):
             print("# {}".format(row))
         sys.exit(p.returncode)
     results = {}
     num_lines_with_results = 0
     for line in stdout.split("\n"):
         if not line.startswith(Rcalculator.RESULT_FLAG_LEADER):
             continue
         parts = line[len(Rcalculator.RESULT_FLAG_LEADER):].split("=")
         assert len(parts) == 2
         key = parts[0].strip()
         try:
             value = float(parts[1].strip())
         except ValueError as e:
             value = "NA"
         results[key] = value
         num_lines_with_results += 1
     return results
Esempio n. 5
0
def estimate_ultrametric_tree(char_matrix,
                              topology_tree=None,
                              paup_path=PAUP_PATH):
    post_est_commands = """\
    set crit=likelihood;
    root rootmethod=midpoint;
    lset userbr=no nst = 1 basefreq = eq rates = eq clock =yes;
    lscore;
    """
    if topology_tree is None:
        ultrametric_tree = estimate_tree(
            char_matrix,
            tree_est_criterion="nj",
            num_states=2,
            unequal_base_freqs=False,
            gamma_rates=False,
            prop_invar=False,
            extra_post_est_commands=post_est_commands)
        return ultrametric_tree
    else:
        paup_block = """\
        set warnreset=no;
        exe '%(data_file)s';
        gettrees file= '%(intree_file)s' warntree=no;
        %(post_est_commands)s;
        savetrees file=%(outtree_file)s format=nexus root=yes brlens=yes taxablk=yes maxdecimals=20;
        """
        cf = tempfile.NamedTemporaryFile("w", delete=True)
        char_matrix.write_to_stream(cf, schema='nexus')
        cf.flush()
        input_tree_file_handle = tempfile.NamedTemporaryFile("w", delete=True)
        input_tree_filepath = input_tree_file_handle.name
        topology_tree.write_to_stream(input_tree_file_handle, schema="nexus")
        input_tree_file_handle.flush()
        # output_tree_file_handle, output_tree_filepath = tempfile.mkstemp(text=True)
        output_tree_file_handle = tempfile.NamedTemporaryFile("w+",
                                                              delete=True)
        output_tree_filepath = output_tree_file_handle.name
        paup_args = {}
        paup_args["data_file"] = cf.name
        paup_args["intree_file"] = input_tree_filepath
        paup_args["post_est_commands"] = post_est_commands
        paup_args["outtree_file"] = output_tree_filepath
        paup_block = paup_block % paup_args
        paup_run = subprocess.Popen(['%s -n' % paup_path],
                                    shell=True,
                                    stdin=subprocess.PIPE,
                                    stdout=subprocess.PIPE)
        stdout, stderr = processio.communicate(paup_run, paup_block)
        t = dendropy.Tree.get_from_path(
            output_tree_filepath,
            "nexus",
            taxon_namespace=char_matrix.taxon_namespace)
        cf.close()
        input_tree_file_handle.close()
        output_tree_file_handle.close()
        return t
Esempio n. 6
0
    def compare_one_to_many(
        self,
        ref_tree,
        comparison_trees,
        command_args=None,
        newick_output_kwargs=None,
    ):
        """

        Compare ``ref_tree'' to each tree in ``comparison_trees``.

        Parameters
        ----------
        ref_tree : |Tree|
            A |Tree| object to be compared to every tree in ``comparison_trees``.
        comparison_trees : |Tree|
            An (ordered) iterable of trees to which ``ref_tree`` should be
            compared.
        command_args : list or None
            An iterable of (string) arguments to be passed to the program.
        newick_output_kwargs : dict or None
            A collection of keyword arguments to pass to the tree string
            composition routines (that will generate the tree strings to be
            used as input to rspr).

        Returns
        -------
        scores : list[numeric]
            A list of the SPR distances from ``ref_tree'' to
            ``comparison_trees``, in order of the trees given.
        """
        if newick_output_kwargs is None:
            newick_output_kwargs = {}
        # tf = tempfile.NamedTemporaryFile("w", delete=True)
        tf = textprocessing.StringIO()
        ref_tree.write(file=tf, schema="newick", **newick_output_kwargs)
        for t in comparison_trees:
            t.write(file=tf, schema="newick", **newick_output_kwargs)
        command = []
        command.append("rspr")  # TODO: command path as instance attribute
        command.extend(["-pairwise", "0", "1"])
        if command_args is not None:
            command.extend(command_args)
        p = subprocess.Popen(
            command,
            stdin=subprocess.PIPE,
            stdout=subprocess.PIPE,
            stderr=subprocess.PIPE,
        )
        stdout, stderr = processio.communicate(p, commands=tf.getvalue())
        result_fields = stdout.strip("\n").split(",")
        assert len(result_fields) == 1 + len(
            comparison_trees
        ), "Expecting length {} + 1 for results, but received {}: {}".format(
            len(comparison_trees), len(result_fields), result_fields)
        return [int(v) for v in result_fields[1:]]
Esempio n. 7
0
def estimate_ultrametric_tree(
        char_matrix,
        topology_tree=None,
        paup_path=PAUP_PATH):
    post_est_commands = """\
    set crit=likelihood;
    root rootmethod=midpoint;
    lset userbr=no nst = 1 basefreq = eq rates = eq clock =yes;
    lscore;
    """
    if topology_tree is None:
        ultrametric_tree = estimate_tree(char_matrix,
                tree_est_criterion="nj",
                num_states=2,
                unequal_base_freqs=False,
                gamma_rates=False,
                prop_invar=False,
                extra_post_est_commands=post_est_commands)
        return ultrametric_tree
    else:
        paup_block = """\
        set warnreset=no;
        exe '%(data_file)s';
        gettrees file= '%(intree_file)s' warntree=no;
        %(post_est_commands)s;
        savetrees file=%(outtree_file)s format=nexus root=yes brlens=yes taxablk=yes maxdecimals=20;
        """
        cf = tempfile.NamedTemporaryFile("w", delete=True)
        char_matrix.write_to_stream(cf, schema='nexus')
        cf.flush()
        input_tree_file_handle = tempfile.NamedTemporaryFile("w", delete=True)
        input_tree_filepath = input_tree_file_handle.name
        topology_tree.write_to_stream(input_tree_file_handle, schema="nexus")
        input_tree_file_handle.flush()
        # output_tree_file_handle, output_tree_filepath = tempfile.mkstemp(text=True)
        output_tree_file_handle = tempfile.NamedTemporaryFile("w+", delete=True)
        output_tree_filepath = output_tree_file_handle.name
        paup_args = {}
        paup_args["data_file"] = cf.name
        paup_args["intree_file"] = input_tree_filepath
        paup_args["post_est_commands"] = post_est_commands
        paup_args["outtree_file"] = output_tree_filepath
        paup_block = paup_block % paup_args
        paup_run = subprocess.Popen(['%s -n' % paup_path],
                                    shell=True,
                                    stdin=subprocess.PIPE,
                                    stdout=subprocess.PIPE)
        stdout, stderr = processio.communicate(paup_run, paup_block)
        t = dendropy.Tree.get_from_path(output_tree_filepath, "nexus", taxon_namespace=char_matrix.taxon_namespace)
        cf.close()
        input_tree_file_handle.close()
        output_tree_file_handle.close()
        return t
Esempio n. 8
0
def muscle_align(char_matrix, muscle_args=None, muscle_path='muscle'):
    cmd = [muscle_path]
    if muscle_args:
        cmd = cmd + muscle_args
    p = subprocess.Popen(cmd,
                         stdin=subprocess.PIPE,
                         stdout=subprocess.PIPE,
                         stderr=subprocess.PIPE)
    stdout, stderr = processio.communicate(p, char_matrix.as_string("fasta"))
    if p.returncode:
        raise Exception(stderr)
    d = char_matrix.__class__.get_from_string(
        stdout, "fasta", taxon_namespace=char_matrix.taxon_namespace)
    return d
Esempio n. 9
0
    def estimate_niche_evolution_rate(self, trees):
        trees = self.tree_postprocessor.process_trees(trees)
        for tree_idx, tree in enumerate(trees):

            taxa = tree.poll_taxa()
            taxon_state_set_map = {}
            for taxon in taxa:
                taxon_state_set_map[taxon] = set()
                for idx, i in enumerate(taxon.habitat_code):
                    if i == "1":
                        taxon_state_set_map[taxon].add(str(idx + 1))

            tree.taxon_namespace = dendropy.TaxonNamespace(taxa)
            for nd in tree:
                nd.label = None  # BayesTraits gets confused with internal taxon labels, especially those with periods etc.
            tree.write_to_path(self.tree_file_name,
                               "nexus",
                               translate_tree_taxa=True)

            name_to_symbol_map = postprocess.NameToSymbolMap()
            dataf = open(self.data_file_name, "w")
            for taxon in taxa:
                row = [taxon.label]
                states = sorted([
                    name_to_symbol_map[s] for s in taxon_state_set_map[taxon]
                ])
                row.append("".join(states))
                dataf.write("{}\n".format("\t".join(row)))
            dataf.close()

            bt_commands = []
            bt_commands.append("1")  # multstate
            bt_commands.append("1")  # ml; 2 == mcmc
            if True:  #len(name_to_symbol_map.SYMBOLS) > 7:
                bt_commands.append("restrictall q{}{}".format(
                    name_to_symbol_map.SYMBOLS[0],
                    name_to_symbol_map.SYMBOLS[1]))
            bt_commands.append("run")
            # bt_commands = "\n".join(bt_commands)
            p = subprocess.Popen(
                ["BayesTraits", self.tree_file_name, self.data_file_name],
                stdout=subprocess.PIPE,
                stdin=subprocess.PIPE,
            )
            stdout, stderr = processio.communicate(p, bt_commands)
            stdout = stdout.split("\n")
            result = dict(zip(stdout[-3].split("\t"), stdout[-2].split("\t")))
            del result['']
            print(result)
Esempio n. 10
0
def muscle_align(char_matrix, muscle_args=None, muscle_path='muscle'):
    cmd = [muscle_path]
    if muscle_args:
        cmd = cmd + muscle_args
    p = subprocess.Popen(cmd,
            stdin=subprocess.PIPE,
            stdout=subprocess.PIPE,
            stderr=subprocess.PIPE)
    stdout, stderr = processio.communicate(p, char_matrix.as_string("fasta"))
    if p.returncode:
        raise Exception(stderr)
    d = char_matrix.__class__.get_from_string(stdout,
            "fasta",
            taxon_namespace=char_matrix.taxon_namespace)
    return d
Esempio n. 11
0
 def execute_analysis(self, config_path, tree_path,
                      is_use_decimal_value_type):
     cmd = [
         os.path.join(_pathmap.BIN_DIR,
                      "delineate-estimate-speciation-completion-rate.py"),
         "-c", config_path, "-t", tree_path, "-I", "-i"
     ]
     if is_use_decimal_value_type:
         cmd.append("--underflow-protect")
     p = subprocess.Popen(
         cmd,
         stdout=subprocess.PIPE,
     )
     stdout, stderr = processio.communicate(p)
     return self._load_results(stdout)
Esempio n. 12
0
    def estimate_niche_evolution_rate(self, trees):
        trees = self.tree_postprocessor.process_trees(trees)
        for tree_idx, tree in enumerate(trees):

            taxa = tree.poll_taxa()
            taxon_state_set_map = {}
            for taxon in taxa:
                taxon_state_set_map[taxon] = set()
                for idx, i in enumerate(taxon.habitat_code):
                    if i == "1":
                        taxon_state_set_map[taxon].add(str(idx+1))

            tree.taxon_namespace = dendropy.TaxonNamespace(taxa)
            for nd in tree:
                nd.label = None # BayesTraits gets confused with internal taxon labels, especially those with periods etc.
            tree.write_to_path(
                    self.tree_file_name,
                    "nexus",
                    translate_tree_taxa=True)

            name_to_symbol_map = postprocess.NameToSymbolMap()
            dataf = open(self.data_file_name, "w")
            for taxon in taxa:
                row = [taxon.label]
                states = sorted([name_to_symbol_map[s] for s in taxon_state_set_map[taxon]])
                row.append("".join(states))
                dataf.write("{}\n".format("\t".join(row)))
            dataf.close()

            bt_commands = []
            bt_commands.append("1") # multstate
            bt_commands.append("1") # ml; 2 == mcmc
            if True: #len(name_to_symbol_map.SYMBOLS) > 7:
                bt_commands.append("restrictall q{}{}".format(
                    name_to_symbol_map.SYMBOLS[0],
                    name_to_symbol_map.SYMBOLS[1]))
            bt_commands.append("run")
            # bt_commands = "\n".join(bt_commands)
            p = subprocess.Popen(
                    ["BayesTraits", self.tree_file_name, self.data_file_name],
                    stdout=subprocess.PIPE,
                    stdin=subprocess.PIPE,
                    )
            stdout, stderr = processio.communicate(p, bt_commands)
            stdout = stdout.split("\n")
            result = dict(zip(stdout[-3].split("\t"), stdout[-2].split("\t")))
            del result['']
            print(result)
Esempio n. 13
0
 def _run_vcs(self, cmd):
     if textprocessing.is_str_type(cmd):
         cmd = self.vcs_app_path + " " + cmd
     else:
         cmd.insert(0, self.vcs_app_path)
     try:
         p = subprocess.Popen(cmd,
             shell=True,
             cwd=os.path.abspath(self.repo_path),
             stdin=subprocess.PIPE,
             stdout=subprocess.PIPE,
             stderr=subprocess.PIPE)
         stdout, stderr = processio.communicate(p)
         retcode = p.returncode
     except OSError as e:
         return -999, "", str(e)
     return retcode, stdout, stderr
Esempio n. 14
0
 def _run_vcs(self, cmd):
     if textprocessing.is_str_type(cmd):
         cmd = self.vcs_app_path + " " + cmd
     else:
         cmd.insert(0, self.vcs_app_path)
     try:
         p = subprocess.Popen(cmd,
             shell=True,
             cwd=os.path.abspath(self.repo_path),
             stdin=subprocess.PIPE,
             stdout=subprocess.PIPE,
             stderr=subprocess.PIPE)
         stdout, stderr = processio.communicate(p)
         retcode = p.returncode
     except OSError as e:
         return -999, "", str(e)
     return retcode, stdout, stderr
Esempio n. 15
0
 def estimate_trait_transition_rates_using_geiger(self, tree,
                                                  profile_results,
                                                  trait_names):
     self.create_geiger_traits_data(tree=tree,
                                    num_trait_types=len(trait_names))
     rcmds = []
     rcmds.append("library(parallel, quietly=T)")
     rcmds.append("library(ape, quietly=T)")
     rcmds.append("library(geiger, quietly=T)")
     rcmds.append("tree1 <- read.nexus('{}')".format(self.tree_file_name))
     rcmds.append("traits <- read.csv('{}', header=F, row.names=1)".format(
         self.traits_data_file_name))
     for trait_idx, trait_name in enumerate(trait_names):
         trait_var = "trait{}".format(trait_idx)
         rcmds.append("{} <- round(traits[,{}])".format(
             trait_var, trait_idx + 1))
         rcmds.append("names({}) <- row.names(traits)".format(trait_var))
         rcmds.append("m = fitDiscrete(tree1, {})".format(trait_var))
         rcmds.append(r"cat(c(m$opt$q12), sep='\n')")
     rcmds = "\n".join(rcmds)
     rfile = open(self.commands_file_name, "w")
     rfile.write(rcmds + "\n")
     rfile.flush()
     rfile.close()
     shell_cmd = [
         "R", "--vanilla", "--no-save", "--slave", "--silent", "-f",
         self.commands_file_name
     ]
     p = subprocess.Popen(
         shell_cmd,
         stdout=subprocess.PIPE,
     )
     stdout, stderr = processio.communicate(p)
     if p.returncode != 0:
         if self.fail_on_estimation_error:
             raise Exception(p.returncode)
         else:
             rows = ["NA" for i in range(len(trait_names))]
     else:
         rows = [row.strip() for row in stdout.split("\n")]
         rows = [float(row) for row in rows if row]
         assert len(rows) == len(trait_names), rows
     for field_name, rate in zip(trait_names, rows):
         profile_results["trait.{}.est.transition.rate".format(
             field_name)] = rate
Esempio n. 16
0
 def estimate_dec_rates_lagrange(self, tree, profile_results, **kwargs):
     tree.write_to_path(
         self.newick_tree_file_name,
         "newick",
         suppress_rooting=True,
     )
     self.create_lagrangecpp_geography_file(
         tree=tree, output_path=self.geography_data_file_name)
     configf = open(self.commands_file_name, "w")
     configf.write("treefile = {}\n".format(self.newick_tree_file_name))
     configf.write("datafile = {}\n".format(self.geography_data_file_name))
     configf.flush()
     configf.close()
     shell_cmd = ["lagrange_cpp", self.commands_file_name]
     try:
         p = subprocess.Popen(
             shell_cmd,
             stdout=subprocess.PIPE,
         )
     except OSError as e:
         raise OSError("Failed to execute command: {}".format(
             " ".join(shell_cmd)))
     stdout, stderr = processio.communicate(p)
     if p.returncode != 0:
         if self.fail_on_estimation_error:
             raise Exception(p.returncode)
         else:
             profile_results["lagrange.dec.dispersal.rate"] = "NA"
             profile_results["lagrange.dec.extinction.rate"] = "NA"
     else:
         match = ArchipelagoProfiler.LAGRANGE_CPP_EXTRACT_PATTERN.match(
             stdout)
         if not match:
             if self.fail_on_estimation_error:
                 raise Exception(
                     "Failed to extract results from Lagrange estimation")
             else:
                 profile_results["lagrange.dec.dispersal.rate"] = "NA"
                 profile_results["lagrange.dec.extinction.rate"] = "NA"
         else:
             results = match.groups(1)
             profile_results["lagrange.dec.dispersal.rate"] = float(
                 results[0])
             profile_results["lagrange.dec.extinction.rate"] = float(
                 results[1])
Esempio n. 17
0
 def estimate_trait_transition_rates_using_bayestraits(
         self, tree, profile_results, trait_names):
     for trait_idx, trait_name in enumerate(trait_names):
         symbols = self.create_bayestraits_traits_data(
             tree, trait_idx, output_path=self.traits_data_file_name)
         master_rate = "q{}{}".format(symbols[0], symbols[1])
         bt_commands = []
         bt_commands.append("1")  # multstate
         bt_commands.append("1")  # ml; 2 == mcmc
         bt_commands.append("restrictall {}".format(master_rate))
         bt_commands.append("run")
         bt_commands = "\n".join(bt_commands)
         p = subprocess.Popen(
             [
                 "BayesTraits",
                 self.tree_file_name,
                 self.traits_data_file_name,
             ],
             stdout=subprocess.PIPE,
             stdin=subprocess.PIPE,
         )
         stdout, stderr = processio.communicate(p, bt_commands)
         stdout_rows = stdout.split("\n")
         targeted_row_idx = None
         for row_idx, row in enumerate(stdout_rows):
             # if "q01" in row and "q10" in row:
             if row.startswith("Tree No\tLh\tq"):
                 targeted_row_idx = row_idx + 1
                 break
         if targeted_row_idx is None:
             if self.fail_on_estimation_error:
                 raise Exception(
                     "Failed to extract results from BayesTraits estimation"
                 )
             else:
                 rate = "NA"
         else:
             result = dict(
                 zip(stdout_rows[targeted_row_idx - 1].split("\t"),
                     stdout_rows[targeted_row_idx].split("\t")))
             rate = float(result[master_rate])
         profile_results["trait.{}.est.transition.rate".format(
             trait_name)] = rate
Esempio n. 18
0
 def estimate_pure_dispersal_weight(
     self,
     tree,
     profile_results,
 ):
     self.create_bayestraits_geography_file(
         tree, output_path=self.geography_data_file_name)
     bt_commands = []
     bt_commands.append("1")  # multstate
     bt_commands.append("1")  # ml; 2 == mcmc
     bt_commands.append("restrictall q01")
     bt_commands.append("run")
     bt_commands = "\n".join(bt_commands)
     p = subprocess.Popen(
         [
             "BayesTraits",
             self.tree_file_name,
             self.geography_data_file_name,
         ],
         stdout=subprocess.PIPE,
         stdin=subprocess.PIPE,
     )
     stdout, stderr = processio.communicate(p, bt_commands)
     stdout_rows = stdout.split("\n")
     targeted_row_idx = None
     for row_idx, row in enumerate(stdout_rows):
         # if "q01" in row and "q10" in row:
         if row.startswith("Tree No\tLh\tq"):
             targeted_row_idx = row_idx + 1
             break
     if targeted_row_idx is None:
         if self.fail_on_estimation_error:
             raise Exception(
                 "Failed to extract results from BayesTraits estimation")
         else:
             rate = "NA"
     else:
         result = dict(
             zip(stdout_rows[targeted_row_idx - 1].split("\t"),
                 stdout_rows[targeted_row_idx].split("\t")))
         rate = float(result['q01'])
     profile_results["area.est.transition.rate"] = rate
Esempio n. 19
0
 def generate(self, trees, dataset=None, taxon_namespace=None, **kwargs):
     args=self._compose_arguments()
     tree_inputf = self.get_tempfile()
     trees.write_to_path(tree_inputf.name,
             "newick",
             suppress_rooting=True,
             suppress_internal_node_labels=True)
     tree_inputf.flush()
     args.append(tree_inputf.name)
     #_LOG.debug("seq-gen args: = %s" % " ".join(args))
     run = subprocess.Popen(args, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
     stdout, stderr = processio.communicate(run)
     if stderr or run.returncode != 0:
         raise RuntimeError("Seq-gen error: %s" % stderr)
     if taxon_namespace is None:
         taxon_namespace = trees.taxon_namespace
     if dataset is None:
         dataset = dendropy.DataSet(**kwargs)
         if taxon_namespace is not None:
             dataset.attach_taxon_namespace(taxon_namespace)
     dataset.read(data=stdout, schema="nexus")
     return dataset
Esempio n. 20
0
 def generate(self, trees, dataset=None, taxon_namespace=None, **kwargs):
     args=self._compose_arguments()
     tree_inputf = self.get_tempfile()
     trees.write_to_path(tree_inputf.name,
             "newick",
             suppress_rooting=True,
             suppress_internal_node_labels=True)
     tree_inputf.flush()
     args.append(tree_inputf.name)
     #_LOG.debug("seq-gen args: = %s" % " ".join(args))
     run = subprocess.Popen(args, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
     stdout, stderr = processio.communicate(run)
     if stderr or run.returncode != 0:
         raise RuntimeError("Seq-gen error: %s" % stderr)
     if taxon_namespace is None:
         taxon_namespace = trees.taxon_namespace
     if dataset is None:
         dataset = dendropy.DataSet(**kwargs)
         if taxon_namespace is not None:
             dataset.attach_taxon_namespace(taxon_namespace)
     dataset.read(data=stdout, schema="nexus")
     return dataset
Esempio n. 21
0
    def call(
            paup_commands,
            suppress_standard_preamble=False,
            ignore_error_returncode=False,
            ignore_nonempty_stderr=False,
            strip_extraneous_prompts_from_stdout=True,
            strip_extraneous_prompts_from_stderr=True,
            cwd=None,
            env=None,
            paup_path=PAUP_PATH
            ):
        """
        Executes a sequence of commands in PAUP* and returns the results.

        Parameters
        ----------
        paup_commands : iterable of strings
            A list or some other iterable of strings representing PAUP
            commands.
        suppress_standard_preamble : bool
            If |True|, then the command sequence will not be prefaced by the
            standard preamble.
        ignore_error_returncode : bool
            If |True|, then a non-0 return code from the PAUP process will not
            result in an exception being raised.
        ignore_nonempty_stderr : bool
            If |True|, then the PAUP process writing to standard error will not
            result in an exception being raised.
        strip_extraneous_prompts_from_stdout : bool
            If |True|, then all occurrences of 'paup>' will be removed from the
            standard output contents.
        strip_extraneous_prompts_from_stderr : bool
            If |True|, then all occurrences of 'paup>' will be removed from the
            standard error contents.
        cwd : string
            Set the working directory of the PAUP* process to this directory.
        env : dictionary
            Environmental variables to set for the PAUP* process.
        paup_path : string
            Path to the PAUP* executable.

        Returns
        -------
        returncode : exit value of PAUP process.
        stdout : string
            Contents of the PAUP process standard output.
        stderr : string
            Contents of the PAUP process standard error.
        """
        if textprocessing.is_str_type(paup_commands):
            commands = [paup_commands]
        else:
            commands = list(paup_commands)
        if not suppress_standard_preamble:
            commands.insert(0, STANDARD_PREAMBLE)
        commands.append("quit")
        paup_block = ";\n".join(commands) + ";\n"
        invocation_command = [paup_path, "-n", "-u"]
        p = subprocess.Popen(
                invocation_command,
                stdin=subprocess.PIPE,
                stdout=subprocess.PIPE,
                stderr=subprocess.PIPE,
                cwd=cwd,
                env=env,
                )
        raw_stdout, raw_stderr = processio.communicate(p, paup_block)
        stdout = raw_stdout
        stderr = raw_stderr
        if strip_extraneous_prompts_from_stdout:
            # weird dev/paup error ... lots or prompts spring up
            stdout = stdout.replace("paup>", "")
        if strip_extraneous_prompts_from_stderr:
            # weird dev/paup error ... lots or prompts spring up
            stderr = stderr.replace("paup>", "")
            chk_stderr = stderr
        else:
            chk_stderr = stderr.replace("paup>", "")
        if (p.returncode != 0 and not ignore_error_returncode) or (chk_stderr != "" and not ignore_nonempty_stderr):
            raise error.ExternalServiceError(
                    service_name="PAUP*",
                    invocation_command=invocation_command,
                    service_input=paup_block,
                    returncode = p.returncode,
                    stdout=raw_stdout,
                    stderr=raw_stderr)
        return p.returncode, stdout, stderr
Esempio n. 22
0
    def estimate_tree(self, char_matrix, raxml_args=None):

        # set up taxa
        taxa = char_matrix.taxon_namespace

        # create working directory
        self._create_working_dir()

        # remap taxon labels
        self.taxon_label_map = {}
        self._remap_taxon_labels(taxa)

        # clean working directory of previous runs
        self._preclean_working_dir()

        # write input sequences
        raxml_seqs_filepath = os.path.join(self.working_dir_path,
                                           self.input_seq_fname)
        # self._send_info("Creating RAxML dummy sequences file: {}".format(raxml_seqs_filepath))
        # if not self._check_overwrite(raxml_seqs_filepath):
        #     sys.exit(0)
        raxml_seqs_filepath_out = open(raxml_seqs_filepath, "w")
        char_matrix.write_to_stream(raxml_seqs_filepath_out, "phylip")
        raxml_seqs_filepath_out.flush()
        raxml_seqs_filepath_out.close()
        self.files_to_clean.append(raxml_seqs_filepath)
        self.files_to_clean.append(raxml_seqs_filepath + ".reduced")

        # run RAxML
        if raxml_args is None:
            raxml_args = []
        cmd = [
            self.raxml_path, '-m', 'GTRCAT', '-s', raxml_seqs_filepath, '-n',
            self.name, '-p',
            str(random.randint(0, sys.maxsize))
        ] + raxml_args
        # self._send_info("Executing: {}".format(" ".join(cmd)))
        if self.verbosity >= 2:
            stdout_pipe = None
            stderr_pipe = None
        else:
            stdout_pipe = subprocess.PIPE
            stderr_pipe = subprocess.PIPE
        p = subprocess.Popen(cmd,
                             stdout=stdout_pipe,
                             stderr=stderr_pipe,
                             cwd=self.working_dir_path)
        stdout, stderr = processio.communicate(p)
        if p.returncode != 0:
            sys.stderr.write("[RAxML run failed]:\n\n%s\n\n" % (" ".join(cmd)))
            sys.stdout.write(stdout)
            sys.stderr.write(stderr)
            sys.exit(p.returncode)

        # # read result
        raxml_best_tree_fpath = os.path.join(self.working_dir_path,
                                             self.best_tree_fname)
        if not os.path.exists(raxml_best_tree_fpath):
            self._send_error(
                "RAxML result not found: {}".format(raxml_best_tree_fpath))
            sys.exit(1)
        best_tree = dendropy.Tree.get_from_path(raxml_best_tree_fpath,
                                                "newick",
                                                taxon_namespace=taxa)

        # remap labels
        for taxon in best_tree.taxon_namespace:
            taxon.label = self.taxon_label_map[taxon.label]

        # # write results
        # mapped_tree.write_to_stream(self.output_dest, self.output_format)

        # clean-up
        self._postclean_working_dir()

        # # return result
        return best_tree
Esempio n. 23
0
def estimate_tree(char_matrix,
                    tree_est_criterion="likelihood",
                    num_states=6,
                    unequal_base_freqs=True,
                    gamma_rates=True,
                    prop_invar=True,
                    extra_pre_est_commands=None,
                    extra_post_est_commands=None,
                    paup_path='paup'):
    """
    Given a dataset, ``char_matrix``, estimates a tree using the given criterion.
    """
    paup_args = {
        'nst': num_states,
        'basefreq' : unequal_base_freqs and 'estimate' or 'equal',
        'rates' : gamma_rates and 'gamma' or 'equal',
        'pinvar' : prop_invar and 'estimate' or '0',
    }
    cf = tempfile.NamedTemporaryFile("w", delete=True)
    char_matrix.write_to_stream(cf, schema='nexus')
    cf.flush()
    paup_args['datafile'] = cf.name
    # output_tree_file_handle, output_tree_filepath = tempfile.mkstemp(text=True)
    output_tree_file_handle = tempfile.NamedTemporaryFile("w+", delete=True)
    output_tree_filepath = output_tree_file_handle.name
    paup_args['est_tree_file'] = output_tree_filepath
    if extra_pre_est_commands:
        if textprocessing.is_str_type(extra_pre_est_commands):
            extra_pre_est_commands = [extra_pre_est_commands]
        paup_args["pre_est_commands"] = ";\n".join(extra_pre_est_commands)
    else:
        paup_args["pre_est_commands"] = ""
    if extra_post_est_commands:
        if textprocessing.is_str_type(extra_post_est_commands):
            extra_post_est_commands = [extra_post_est_commands]
        paup_args["post_est_commands"] = ";\n".join(extra_post_est_commands)
    else:
        paup_args["post_est_commands"] = ""
    paup_template = """\
    set warnreset=no;
    exe %(datafile)s;
    """
    if tree_est_criterion.startswith("like"):
        paup_template += """\
    lset tratio=estimate rmatrix=estimate nst=%(nst)s basefreq=%(basefreq)s rates=%(rates)s shape=estimate pinvar=%(pinvar)s userbrlens=yes;
    """
    if tree_est_criterion not in ["nj", "upgma"] :
        paup_template += """\
        set crit=%s;
        """ % tree_est_criterion
    paup_template += """\
    %(pre_est_commands)s;
    """

    if tree_est_criterion in ["nj", "upgma"] :
        paup_template += tree_est_criterion + ";"
    else:
        paup_template += "hsearch;"

    paup_template += """\
    %(post_est_commands)s;
    savetrees file=%(est_tree_file)s format=nexus root=yes brlens=yes taxablk=yes maxdecimals=20;
    """
    paup_run = subprocess.Popen(['%s -n' % paup_path],
                                shell=True,
                                stdin=subprocess.PIPE,
                                stdout=subprocess.PIPE)
    stdout, stderr = processio.communicate(paup_run, paup_template % paup_args)
    t = dendropy.Tree.get_from_path(output_tree_filepath, "nexus", taxon_namespace=char_matrix.taxon_namespace)
    cf.close()
    output_tree_file_handle.close()
    return t
Esempio n. 24
0
def run_phyml(
        phyml_path, char_matrix, data_type=None, parsimony_starting_tree=False,
        bootstrap=None, subst_model=None, amino_acid_rates=None,
        state_freqs=None, ts_tv_ratio=None, prop_invar=None, gamma_cats=None,
        gamma_shape=None, gamma_cat_median=False, free_rates=False,
        integrated_branch_length=False, codon_position=None, search_move=None,
        starting_tree=None, optimization=None, random_starting_tree=False,
        num_random_starting_trees=None, random_seed=None,
        site_likelihoods=False, trace_search=False, run_id=None,
        alias_subpattern=False):
    """
    Wrapper for running PhyML via its command-line interface.

    A parameter value set to None will in most cases result in the PhyML
    default value. Check the output to verify that your analysis was set up
    properly. Consult the PhyML documentation for details on parameters and
    default values.

    Parameters
    ----------
    phyml_path : str
        Path to PhyML executable.
    char_matrix : |CharacterMatrix|
        Matrix with data to be analyzed.
    data_type : str
        PhyML data type: "nt" (default) for nucleotide, "aa" for
        amino-acid sequences, or "generic".
    parsimony_starting_tree : bool
        If True, a minimum parsimony starting tree is used. This
        option is taken into account when `starting_tree` is False
        and when tree topology modifications are to be done.
    bootstrap : int
        * > 0 : the number of bootstrap replicates to generate.
        *   0 : neither approximate likelihood ratio test nor bootstrap values
                will be computed.
        *  -1 : approximate likelihood ratio test returning aLRT statistics.
        *  -2 : approximate likelihood ratio test returning Chi2-based.
                parametric branch supports.
        *  -4 : SH-like branch supports alone.
        *  -5 : (default) approximate Bayes branch supports.
    subst_model : str
        Substitution model name.
        * Nucleotide-based models : "HKY85" (default), "JC69", "K80", "F81",
          "F84", "TN93", "GTR", or a custom GTR-family model, e.g. "00000".
        * Amino-acid based models : "LG" (default), "WAG" ,"JTT", "MtREV",
          "Dayhoff", "DCMut", "RtREV", "CpREV", "VT", "AB", "Blosum62",
          "MtMam", "MtArt", "HIVw", "HIVb", "custom".
    amino_acid_rates : str
        amino acid substitution rate matrix in PAML format. It is compulsory
        to use this option when analyzing amino acid sequences with the
        "custom" substitution model.
    state_freqs : str or list of floats
        * "e" : the character frequencies will be determined as follows :
            - Nucleotide sequences: (Empirical) the equilibrium base
              frequencies are estimated by counting the occurence of the
              different bases in the alignment.
            - Amino-acid sequences: (Empirical) the equilibrium amino-acid
              frequencies are estimated by counting the occurence of the
              different amino-acids in the alignment.
        * "m" : the character frequencies are determined as follows :
            - Nucleotide sequences: (ML) the equilibrium base frequencies are
              estimated using maximum likelihood.
            - Amino-acid sequences: (Model) the equilibrium amino-acid
              frequencies are estimated using the frequencies defined by
              the substitution model.
        * "fA,fC,fG,fT" : only valid for nucleotide-based models. fA, fC, fG
            and fT are floating numbers that correspond to the frequencies of
            A, C, G and T respectively (WARNING: do not use any blank space
            between your values of nucleotide frequencies, only commas!)
    ts_tv : float or str
        transition/transversion ratio. DNA sequences only. Can be a fixed
        positive value (ex: 4.0) or "e" to get the maximum likelihood
        estimate.
    prop_invar : float or str
        proportion of invariable sites. Can be a fixed value in the [0,1]
        range or "e" to get the maximum likelihood estimate.
    gamma_cats : int
        number of relative substitution rate categories. Must be a positive
        integer. Default value 4.
    gamma_shape : float or str
        distribution of the gamma distribution shape parameter. Can be a
        fixed positive value or "e" to get the maximum likelihood estimate.
    gamma_cat_median : bool
        If True, use median instead of mean as the middle of each substitution
        rate class in the discrete gamma distribution.
    free_rates : bool
        If True, the FreeRate model of substitution rate variation across
        sites will be used.
    integrated_branch_length : bool
        If True, the integrated length (IL) model will be used. The IL model
        can be considered as an approximation to the covarion model.
    codon_position : {1, 2, 3}
        When analyzing an alignment of coding sequences, use this option to
        consider only the first, second or the third coding position.
    search_move : {"NNI", "SPR", "BEST"}
        Tree topology search operation option. Can be either "NNI" (default,
        fast) or "SPR" (a bit slower than NNI) or "BEST" (best of NNI and SPR
        search).
    starting_tree : |Tree|
        User-provided starting tree.
    optimization : {"tlr", "tl", "lr", "l", "r", "n"}
        Specify which parameters to optmimize. Tree topology (t),
        branch lengths (l), rate parameters (r) and no parameter (n).
    random_starting_tree : bool
        If True, sets the initial tree to random. It is only valid if SPR
        searches are to be performed.
    num_random_starting_trees : int
        Number of initial random trees to be used. It is only valid if SPR
        searches are to be performed.
    random_seed : int
        Seed used to initiate the random number generator.
    site_likelihoods : bool
        If True, return likelood for each site.
    trace_search : bool
        If True, return each phylogeny explored during the tree search.
    run_id : str
        Append an ID-string to the PhyML output.
    alias_subpattern : bool
        If True, site aliasing is generalized at the subtree level.
        Sometimes lead to faster calculations. See Kosakovsky Pond SL,
        Muse SV, Sytematic Biology (2004) for an example.

    Returns
    -------
    result : :class:`~dendropy.interop.phyml.PhymlResult`
    """
    char_matrix_f = tempfile.NamedTemporaryFile()

    # Compose arguments
    args = []
    args.append(phyml_path)
    args.append("-i%s" % char_matrix_f.name)
    if data_type:
        args.append("-d%s" % str(data_type))
    args.append("-q")
    args.append("-n1")
    if parsimony_starting_tree:
        args.append("-p")
    if bootstrap:
        args.append("-b%s" % str(bootstrap))
    if subst_model:
        args.append("-m%s" % str(subst_model))
    if amino_acid_rates:
        args.extend(["--aa_rate_file", char_matrix_f.name + "_aa_rate"])
    if state_freqs:
        if isinstance(state_freqs, str):
            args.append("-f%s" % state_freqs)
        else:
            args.append("-f%s" % (",".join([str(s) for s in state_freqs])))
    if ts_tv_ratio:
        args.append("-t%s" % str(ts_tv_ratio))
    if prop_invar:
        args.append("-v%s" % str(prop_invar))
    if gamma_cats:
        args.append("-c%s" % str(gamma_cats))
    if gamma_shape:
        args.append("-a%s" % str(gamma_shape))
    if gamma_cat_median:
        args.append("--use_median")
    if free_rates:
        args.append("--freerates")
    if integrated_branch_length:
        args.append("--il")
    if codon_position:
        args.extend(["--codpos", str(codon_position)])
    if search_move:
        args.append("-s%s" % str(search_move))
    if starting_tree:
        args.append("-u%s" % str(char_matrix_f.name + "_starting_tree"))
    if optimization:
        args.append("-o%s" % str(optimization))
    if random_starting_tree:
        args.append("--rand_start")
    if num_random_starting_trees:
        args.extend(["--n_rand_starts", str(num_random_starting_trees)])
    if site_likelihoods:
        args.append("--print_site_lnl")
    if random_seed:
        args.extend(["--r_seed%s" % str(random_seed)])
    if trace_search:
        args.append("--print_trace")
    if run_id:
        args.extend(["--run_id", run_id])
    args.append("--quiet")
    args.append("--no_memory_check")
    if alias_subpattern:
        args.append("--alias_subpatt")
    command_line = " ".join(args)
    try:
        # Write data to files
        char_matrix.write_to_path(
            char_matrix_f.name, "phylip", spaces_to_underscores=True)
        if starting_tree:
            starting_tree.write_to_path(
                char_matrix_f.name + "_starting_tree",
                "newick", preserve_spaces=False)
        if amino_acid_rates:
            with open(char_matrix_f.name + "_aa_rate", "w") as aa_rate_f:
                aa_rate_f.write(str(amino_acid_rates))
        # Call PhyML
        proc = subprocess.Popen(
            args, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
        stdout, stderr = processio.communicate(proc)
        # Check output
        if stderr or proc.returncode != 0:
            if stderr:
                raise RuntimeError(
                    "PhyML error: %s\n%s" % (command_line, stderr))
            else:
                raise RuntimeError("PhyML error: %s" % stdout)
        else:
            # Collect output
            output_files = {}

            result = PhymlResult()
            result.command_line = command_line
            result.stdout_text = stdout

            output_files["_phyml_tree"], result.best_tree = (
                _read_phyml_file(char_matrix_f.name, "_phyml_tree", "tree"))

            output_files["_phyml_stats"], result.stats_text = (
                _read_phyml_file(char_matrix_f.name, "_phyml_stats", "text"))

            if bootstrap and bootstrap > 0:
                output_files["_phyml_boot_trees"], result.boot_trees = (
                    _read_phyml_file(
                        char_matrix_f.name, "_phyml_boot_trees", "treelist"))

                output_files["_phyml_boot_stats"], result.boot_stats_text = (
                    _read_phyml_file(
                        char_matrix_f.name, "_phyml_boot_stats", "text"))

            if random_starting_tree:
                output_files["_phyml_rand_trees"], result.rand_trees = (
                    _read_phyml_file(
                        char_matrix_f.name, "_phyml_rand_trees", "treelist"))

            if site_likelihoods:
                output_files["_phyml_lk"], result.site_likelihoods_text = (
                    _read_phyml_file(char_matrix_f.name, "_phyml_lk", "text"))

            if trace_search:
                output_files["_phyml_trace"], result.search_trace_trees = (
                    _read_phyml_file(
                        char_matrix_f.name, "_phyml_trace", "treelist"))

            result.output_files = output_files

    finally:
        # Clean up
        char_matrix_f.close()
        for phyml_file in glob.glob(char_matrix_f.name + "*"):
            os.remove(phyml_file)

    return result
Esempio n. 25
0
    def call(r_commands,
            ignore_error_returncode=False,
            cwd=None,
            env=None,
            rscript_path=RSCRIPT_EXECUTABLE,
            ):
        """
        Executes a sequence of commans in R and returns the results.

        Note that newlines ('\n') and other special characters will be
        converted before being passed to the R interpreter, so need to
        be escaped or entered as raw string expressions.

        That is, instead of, e.g.:

            returncode, stdout, stderr = RService.call([
                "cat('hello, world\n')",
            ])

        use this:

            returncode, stdout, stderr = RService.call([
                "cat('hello, world\\n')",
            ])

        or:

            returncode, stdout, stderr = RService.call([
                r"cat('hello, world\n')",
            ])

        Parameters
        ----------
        r_commands : iterable of strings
            A list or some other iterable of strings of R commands.
        ignore_error_returncode : bool
            If `True`, then a non-0 return code from the R process will not
            result in an exception being raised.
        cwd : string
            Set the working directory of the R process to this directory.
        env : dictionary
            Environmental variables to set for the R process.
        rscript_path : string
            Path to the Rscript executable.

        Returns
        -------
        returncode : exit value of the R process
        stdout : string
            Contents of the R process standard output.
        stderr : string
            Contents of the R process standard error.
        """
        if not isinstance(r_commands, str):
            r_commands = "\n".join(r_commands)
        r_commands += "\n"
        invocation_command = [RSCRIPT_EXECUTABLE, rsubprocess_pipe_path]
        p = subprocess.Popen(
                invocation_command,
                stdin=subprocess.PIPE,
                stdout=subprocess.PIPE,
                stderr=subprocess.PIPE,
                cwd=cwd,
                env=env,
                )
        stdout, stderr = processio.communicate(p, r_commands)
        if (p.returncode != 0 and not ignore_error_returncode):
            raise error.ExternalServiceError(
                    service_name="Rscript",
                    invocation_command=invocation_command,
                    service_input=r_commands,
                    returncode = p.returncode,
                    stdout=stdout,
                    stderr=stderr)
        return p.returncode, stdout, stderr
Esempio n. 26
0
    def call(r_commands,
            ignore_error_returncode=False,
            cwd=None,
            env=None,
            rscript_path=RSCRIPT_EXECUTABLE,
            ):
        """
        Executes a sequence of commands in R and returns the results. All the
        noise is sunk into the stderr return variable, and just the output
        comes out cleanly in the stdout return variable.

        Parameters
        ----------
        r_commands : iterable of strings
            A list or some other iterable of strings of R commands.
        ignore_error_returncode : bool
            If |True|, then a non-0 return code from the R process will not
            result in an exception being raised.
        cwd : string
            Set the working directory of the R process to this directory.
        env : dictionary
            Environmental variables to set for the R process.
        rscript_path : string
            Path to the Rscript executable.

        Returns
        -------
        returncode : exit value of the R process
        stdout : string
            Contents of the R process standard output.
        stderr : string
            Contents of the R process standard error.

        Examples
        --------

        Build up a script (``s``) to calculate a range of values, print them
        to the standard output, and then post-process this to extract the
        values::

            import itertools
            from dendropy.interop import rstats

            bb = [0.01, 0.05, 0.10, 0.50, 1.0]
            cc = [0.01, 0.05, 0.10, 0.50, 1.0]
            ee = [0.0, 0.1, 0.2]

            # store commands of script as a list
            # to be passed to the ``call()``
            s = []

            # set options, load required libraries, etc.
            s.append("options(digits=22)")
            s.append("library(PBD)")

            # build up list of commands in script
            params = []
            for b, c, e in itertools.product(bb, cc, ee):
                s.append("print(pbd_durspec_mean(pars=c({},{},{})))".format(b, c, e))

            # execute script
            returncode, stdout, stderr  = rstats.call(s)

            # peek at the results
            print(stdout)

            # [1] 69.31472
            # [1] 9.853723
            # [1] 4.981369
            # [1] 0.9950331
            # ...

            # post-process the stdout to extract values
            results = [float(x.split(" ")[1]) for x in stdout.split("\n") if x]

        Notes
        -----

        Note that newlines ('\n') and other special characters will be
        converted before being passed to the R interpreter, so need to
        be escaped or entered as raw string expressions.

        That is, instead of, e.g.::

            returncode, stdout, stderr = RService.call([
                "cat('hello, world\n')",
            ])

        use this::

            returncode, stdout, stderr = RService.call([
                "cat('hello, world\\n')",
            ])

        or::

            returncode, stdout, stderr = RService.call([
                r"cat('hello, world\n')",
            ])

        """
        if not textprocessing.is_str_type(r_commands):
            r_commands = "\n".join(r_commands)
        r_commands += "\n"
        invocation_command = [RSCRIPT_EXECUTABLE, rsubprocess_pipe_path]
        p = subprocess.Popen(
                invocation_command,
                stdin=subprocess.PIPE,
                stdout=subprocess.PIPE,
                stderr=subprocess.PIPE,
                cwd=cwd,
                env=env,
                )
        stdout, stderr = processio.communicate(p, r_commands)
        if (p.returncode != 0 and not ignore_error_returncode):
            raise error.ExternalServiceError(
                    service_name="Rscript",
                    invocation_command=invocation_command,
                    service_input=r_commands,
                    returncode = p.returncode,
                    stdout=stdout,
                    stderr=stderr)
        return p.returncode, stdout, stderr
Esempio n. 27
0
def pscore_trees(
        trees,
        char_matrix,
        pset_option_list=None,
        pscore_option_list=None,
        paup_path=PAUP_PATH):

    if pset_option_list is not None:
        pset = "pset " + " ".join(pset_option_list)
    else:
        pset = ""

    scorefile = tempfile.NamedTemporaryFile("w+", delete=True)
    pscore_command = "pscore / scorefile={}".format(scorefile.name)
    if pscore_option_list is not None:
        pscore_command = pscore_command + " ".join(pscore_option_list)
    else:
        pscore_command = pscore_command

    post_est_commands = """\
    set crit=parsimony;
    {pset}
    {pscore_command}
    """.format(pset=pset, pscore_command=pscore_command)

    paup_block = """\
    set warnreset=no;
    exe '{data_file}';
    gettrees file= '{intree_file}' warntree=no;
    {post_est_commands};
    """

    cf = tempfile.NamedTemporaryFile("w", delete=True)
    char_matrix.write_to_stream(cf, schema='nexus')
    cf.flush()
    input_tree_file_handle = tempfile.NamedTemporaryFile("w", delete=True)
    input_tree_filepath = input_tree_file_handle.name
    trees.write_to_stream(input_tree_file_handle, schema="nexus")
    input_tree_file_handle.flush()
    paup_args = {}
    paup_args["data_file"] = cf.name
    paup_args["intree_file"] = input_tree_filepath
    paup_args["post_est_commands"] = post_est_commands
    paup_block = paup_block.format(**paup_args)
    paup_run = subprocess.Popen(['%s -n' % paup_path],
                                shell=True,
                                stdin=subprocess.PIPE,
                                stdout=subprocess.PIPE)
    stdout, stderr = processio.communicate(paup_run, paup_block)
    if stderr:
        sys.stderr.write("\n*** ERROR FROM PAUP ***")
        sys.stderr.write(stderr)
        sys.exit(1)
    scores_str = open(scorefile.name, "r").read()
    score_rows = [r for r in scores_str.split("\n")[1:] if r != ""]
    assert len(score_rows) == len(trees)
    scores = [int(s.split()[1]) for s in score_rows]
    assert len(scores) == len(trees)
    cf.close()
    input_tree_file_handle.close()
    scorefile.close()
    return scores
Esempio n. 28
0
    def map_bipartitions(self, target_tree_fpath, bootstrap_trees_fpaths):

        # set up taxa
        taxa = dendropy.TaxonNamespace()
        taxon_label_map = {}

        # read target tree
        target_tree_fpath = self._expand_path(target_tree_fpath)
        # self._send_info("Reading target tree file: {}".format(target_tree_fpath))
        target_tree = self._get_trees(target_tree_fpath,
                                      taxon_namespace=taxa)[0]

        # read boostrap trees
        boot_trees = dendropy.TreeList()
        for fpath in bootstrap_trees_fpaths:
            fpath = self._expand_path(fpath)
            # self._send_info("Reading bootstrap tree file: {}".format(fpath))
            self._get_trees(tree_filepath=fpath,
                            tree_list=boot_trees,
                            taxon_namespace=taxa)
        # self._send_info("Read: {} taxa, {} bootstrap trees".format(len(taxa), len(boot_trees)))

        # create working directory
        self._create_working_dir()

        # remap taxon labels
        self.taxon_label_map = {}
        self._remap_taxon_labels(taxa)

        # write input target tree
        raxml_target_tree_filepath = os.path.join(
            self.working_dir_path, "{}.target_tree".format(self.name))
        # self._send_info("Creating RAxML target tree file: {}".format(raxml_target_tree_filepath))
        if not self._check_overwrite(raxml_target_tree_filepath):
            sys.exit(0)
        target_tree.write_to_path(raxml_target_tree_filepath, "newick")
        self.files_to_clean.append(raxml_target_tree_filepath)

        # write input bootstrap trees
        raxml_bootstrap_trees_filepath = os.path.join(
            self.working_dir_path, "{}.boot_trees".format(self.name))
        # self._send_info("Creating RAxML bootstrap tree file: {}".format(raxml_bootstrap_trees_filepath))
        if not self._check_overwrite(raxml_bootstrap_trees_filepath):
            sys.exit(0)
        boot_trees.write_to_path(raxml_bootstrap_trees_filepath, "newick")
        self.files_to_clean.append(raxml_bootstrap_trees_filepath)

        # write input (dummy) sequences
        raxml_seqs_filepath = os.path.join(self.working_dir_path,
                                           "{}.seqs".format(self.name))
        # self._send_info("Creating RAxML dummy sequences file: {}".format(raxml_seqs_filepath))
        if not self._check_overwrite(raxml_seqs_filepath):
            sys.exit(0)
        raxml_seqs_filepath_out = open(raxml_seqs_filepath, "w")
        self._write_dummy_seqs(taxa, raxml_seqs_filepath_out)
        raxml_seqs_filepath_out.flush()
        raxml_seqs_filepath_out.close()
        self.files_to_clean.append(raxml_seqs_filepath)

        # clean working directory of previous runs
        self._preclean_working_dir()

        # run RAxML
        cmd = [
            self.raxml_path, '-f', 'b', '-t',
            os.path.basename(raxml_target_tree_filepath), '-z',
            os.path.basename(raxml_bootstrap_trees_filepath), '-s',
            os.path.basename(raxml_seqs_filepath), '-m', 'GTRCAT', '-n',
            self.name
        ]
        # self._send_info("Executing: {}".format(" ".join(cmd)))
        if self.verbosity >= 2:
            stdout_pipe = None
            stderr_pipe = None
        else:
            stdout_pipe = subprocess.PIPE
            stderr_pipe = subprocess.PIPE
        p = subprocess.Popen(cmd,
                             stdout=stdout_pipe,
                             stderr=stderr_pipe,
                             cwd=self.working_dir_path)
        stdout, stderr = processio.communicate(p)
        if p.returncode != 0:
            self._send_error("RAxML run failed")
            if self.verbosity < 2:
                sys.stdout.write(stdout)
                sys.stderr.write(stderr)
            sys.exit(p.returncode)

        # read result
        raxml_mapped_tree_fpath = os.path.join(self.working_dir_path,
                                               self.bipartitions_fname)
        if not os.path.exists(raxml_mapped_tree_fpath):
            self._send_error(
                "RAxML result not found: {}".format(raxml_mapped_tree_fpath))
            sys.exit(1)
        mapped_tree = dendropy.Tree.get_from_path(raxml_mapped_tree_fpath,
                                                  "newick")

        # remap labels
        for taxon in mapped_tree.taxon_namespace:
            taxon.label = taxon_label_map[taxon.label]

#         # write results
#         mapped_tree.write_to_stream(self.output_dest, self.output_format)

# clean-up
        self.files_to_clean.append(raxml_mapped_tree_fpath)
        self.files_to_clean.append(self.info_fname)
        self._postclean_working_dir()

        # return result
        return mapped_tree
Esempio n. 29
0
    def estimate_dec(self,
            newick_tree_filepath,
            geography_filepath,
            max_range_size,
            **kwargs
            ):

        param_settings = []
        for param_name in ("b", "e", "d", "j", "ysv", "y", "s", "v"):
            if "fixed_" + param_name in kwargs:
                param_settings.append(PARAM_SETTING_TEMPLATE.format(
                        param_name=param_name, param_aspect="type", value='"fixed"'))
                for param_aspect in ("min", "max", "init", "est"):
                    param_settings.append(PARAM_SETTING_TEMPLATE.format(
                        param_name=param_name, param_aspect=param_aspect, value=kwargs["fixed_"+param_name]))
            else:
                for param_aspect in ("min_", "max_", "init_", "est_"):
                    if param_aspect + param_name in kwargs:
                        param_settings.append(PARAM_SETTING_TEMPLATE.format(
                            param_name=param_name, param_aspect=param_aspect[:-1], value=kwargs[param_aspect+param_name]))
        param_settings = "\n".join(param_settings)
        rcmds = R_TEMPLATE.format(
            patch_code=self.patch_code,
            param_settings=param_settings,
            tree_filepath=newick_tree_filepath,
            geography_filepath=geography_filepath,
            max_range_size=max_range_size,
            results_filepath=self.results_file_name,
            )
        rfile = open(self.commands_file_name, "w")
        rfile.write(rcmds + "\n")
        rfile.flush()
        rfile.close()
        shell_cmd = ["R",
                "--vanilla",
                "--no-save",
                "--slave",
                "--silent",
                "-f",
                self.commands_file_name]
        p = subprocess.Popen(
                shell_cmd,
                stdout=subprocess.PIPE if not self.debug_mode else None,
                stderr=subprocess.PIPE if not self.debug_mode else None,
                )
        stdout, stderr = processio.communicate(p)
        if p.returncode != 0:
            if self.fail_on_estimation_error:
                raise Exception("Non-zero return code: {}\n{}\n{}".format(
                        p.returncode,
                        stdout,
                        stderr,
                        ))
            else:
                return None
        results_rows = open(self.results_file_name, "r").read().split("\n")
        results_table = collections.OrderedDict()
        for row in results_rows[1:21]:
            cols = row.split()
            if cols[0] == "desc" or cols[0] == "note":
                break
            try:
                results_table[cols[0]] = float(cols[5])
            except IndexError:
                raise IndexError(cols)
        return results_table
Esempio n. 30
0
def estimate_model(char_matrix,
                    tree_model=None,
                    num_states=6,
                    unequal_base_freqs=True,
                    gamma_rates=True,
                    prop_invar=True,
                    tree_est_criterion="likelihood",
                    tree_user_brlens=True,
                    paup_path='paup'):
    """
    Given a dataset, ``char_matrix``, uses client-supplied tree or estimates a
    tree, and character substitution model for the data.
    Returns a tuple, consisting of a trees block with the tree(s) used for the
    estimated character model, and a dictionary with estimates of rates, kappa,
    base_frequencies, alpha, prop_invar, etc. as well as likelihood.
    """
    paup_args = {
        'nst': num_states,
        'basefreq' : unequal_base_freqs and 'estimate' or 'equal',
        'rates' : gamma_rates and 'gamma' or 'equal',
        'pinvar' : prop_invar and 'estimate' or '0',
    }
    if tree_model is not None:
        assert tree_model.taxon_namespace is char_matrix.taxon_namespace
        tf = tempfile.NamedTemporaryFile("w", delete=True)
        tree_model.write_to_stream(tf, 'nexus')
        tf.flush()
        paup_args['tree'] = "gettrees file=%s storebrlens=yes;" % tf.name
    else:
        if tree_est_criterion in ["nj", "upgma"] :
            paup_args['tree'] = tree_est_criterion
        else:
            paup_args['tree'] = "set crit=%s; hsearch; set crit=like;" % tree_est_criterion
    if tree_user_brlens:
        paup_args['userbrlens'] = 'yes'
    else:
        paup_args['userbrlens'] = 'no'

    cf = tempfile.NamedTemporaryFile("w", delete=True)
    char_matrix.write_to_stream(cf, schema='nexus')
    cf.flush()
    paup_args['datafile'] = cf.name
    # output_tree_file_handle, output_tree_filepath = tempfile.mkstemp(text=True)
    output_tree_file_handle = tempfile.NamedTemporaryFile("w+", delete=True)
    output_tree_filepath = output_tree_file_handle.name
    paup_args['est_tree_file'] = output_tree_filepath
    paup_template = """\
    set warnreset=no;
    exe %(datafile)s;
    set crit=like;
    lset tratio=estimate rmatrix=estimate nst=%(nst)s basefreq=%(basefreq)s rates=%(rates)s shape=estimate pinvar=%(pinvar)s userbrlens=%(userbrlens)s;
    %(tree)s;
    lscore 1 / userbrlens=%(userbrlens)s;
    savetrees file=%(est_tree_file)s format=nexus root=yes brlens=yes taxablk=yes maxdecimals=20;
"""
    paup_run = subprocess.Popen(['%s -n' % paup_path],
                                shell=True,
                                stdin=subprocess.PIPE,
                                stdout=subprocess.PIPE)
    stdout, stderr = processio.communicate(paup_run, paup_template % paup_args)
    results = {}
    patterns = {
        'likelihood' : re.compile('-ln L\s+([\d\.]+)'),
        'rAC' : re.compile('  AC\s+([\d\.]+)'),
        'rAG' : re.compile('  AG\s+([\d\.]+)'),
        'rAT' : re.compile('  AT\s+([\d\.]+)'),
        'rCG' : re.compile('  CG\s+([\d\.]+)'),
        'rCT' : re.compile('  CT\s+([\d\.]+)'),
        'rGT' : re.compile('  GT\s+([\d\.]+)'),
        'kappa': re.compile('  kappa\s+([\d\.]+)'),
        'prop_invar' : re.compile('P_inv\s+([\d\.]+)'),
        'alpha' : re.compile('Shape\s+([\S]+)'),
        'pA' : re.compile('  A\s+([\d\.]+)'),
        'pC' : re.compile('  C\s+([\d\.]+)'),
        'pG' : re.compile('  G\s+([\d\.]+)'),
        'pT' : re.compile('  T\s+([\d\.]+)'),
    }
    for value_name in patterns:
        results[value_name] = None
    for line in stdout.split('\n'):
        for value_name in patterns:
            m = patterns[value_name].match(line)
            if m:
                results[value_name] = m.group(1)
    for value_name in results.keys():
        if value_name == 'likelihood':
            results[value_name] = -1 * float(results[value_name])
            results["log_likelihood"] = results[value_name]
        elif results[value_name] is not None:
            try:
                results[value_name] = float(results[value_name])
            except:
                pass
    t = dendropy.Tree.get_from_path(output_tree_filepath, "nexus", taxon_namespace=char_matrix.taxon_namespace)
    cf.close()
    output_tree_file_handle.close()
    return t, results
Esempio n. 31
0
    def estimate_tree(self,
            char_matrix,
            raxml_args=None):

        # set up taxa
        taxa = char_matrix.taxon_namespace

        # create working directory
        self._create_working_dir()

        # remap taxon labels
        self.taxon_label_map = {}
        self._remap_taxon_labels(taxa)

        # clean working directory of previous runs
        self._preclean_working_dir()

        # write input sequences
        raxml_seqs_filepath = os.path.join(self.working_dir_path, self.input_seq_fname)
        # self._send_info("Creating RAxML dummy sequences file: {}".format(raxml_seqs_filepath))
        # if not self._check_overwrite(raxml_seqs_filepath):
        #     sys.exit(0)
        raxml_seqs_filepath_out = open(raxml_seqs_filepath, "w")
        char_matrix.write_to_stream(raxml_seqs_filepath_out, "phylip")
        raxml_seqs_filepath_out.flush()
        raxml_seqs_filepath_out.close()
        self.files_to_clean.append(raxml_seqs_filepath)
        self.files_to_clean.append(raxml_seqs_filepath + ".reduced")

        # run RAxML
        if raxml_args is None:
            raxml_args = []
        cmd = [self.raxml_path,
                '-m',
                'GTRCAT',
                '-s', raxml_seqs_filepath,
                '-n', self.name,
                '-p', str(random.randint(0, sys.maxsize))] + raxml_args
        # self._send_info("Executing: {}".format(" ".join(cmd)))
        if self.verbosity >= 2:
            stdout_pipe = None
            stderr_pipe = None
        else:
            stdout_pipe = subprocess.PIPE
            stderr_pipe = subprocess.PIPE
        p = subprocess.Popen(cmd,
            stdout=stdout_pipe,
            stderr=stderr_pipe,
            cwd=self.working_dir_path)
        stdout, stderr = processio.communicate(p)
        if p.returncode != 0:
            sys.stderr.write("[RAxML run failed]:\n\n%s\n\n" % (" ".join(cmd)))
            sys.stdout.write(stdout)
            sys.stderr.write(stderr)
            sys.exit(p.returncode)

        # # read result
        raxml_best_tree_fpath = os.path.join(self.working_dir_path, self.best_tree_fname)
        if not os.path.exists(raxml_best_tree_fpath):
            self._send_error("RAxML result not found: {}".format(raxml_best_tree_fpath))
            sys.exit(1)
        best_tree = dendropy.Tree.get_from_path(raxml_best_tree_fpath,
                "newick",
                taxon_namespace=taxa)

        # remap labels
        for taxon in best_tree.taxon_namespace:
            taxon.label = self.taxon_label_map[taxon.label]

        # # write results
        # mapped_tree.write_to_stream(self.output_dest, self.output_format)

        # clean-up
        self._postclean_working_dir()

        # # return result
        return best_tree
Esempio n. 32
0
    def call(r_commands,
            ignore_error_returncode=False,
            cwd=None,
            env=None,
            rscript_path=RSCRIPT_EXECUTABLE,
            ):
        """
        Executes a sequence of commands in R and returns the results. All the
        noise is sunk into the stderr return variable, and just the output
        comes out cleanly in the stdout return variable.

        Parameters
        ----------
        r_commands : iterable of strings
            A list or some other iterable of strings of R commands.
        ignore_error_returncode : bool
            If |True|, then a non-0 return code from the R process will not
            result in an exception being raised.
        cwd : string
            Set the working directory of the R process to this directory.
        env : dictionary
            Environmental variables to set for the R process.
        rscript_path : string
            Path to the Rscript executable.

        Returns
        -------
        returncode : exit value of the R process
        stdout : string
            Contents of the R process standard output.
        stderr : string
            Contents of the R process standard error.

        Examples
        --------

        Build up a script (``s``) to calculate a range of values, print them
        to the standard output, and then post-process this to extract the
        values::

            import itertools
            from dendropy.interop import rstats

            bb = [0.01, 0.05, 0.10, 0.50, 1.0]
            cc = [0.01, 0.05, 0.10, 0.50, 1.0]
            ee = [0.0, 0.1, 0.2]

            # store commands of script as a list
            # to be passed to the ``call()``
            s = []

            # set options, load required libraries, etc.
            s.append("options(digits=22)")
            s.append("library(PBD)")

            # build up list of commands in script
            params = []
            for b, c, e in itertools.product(bb, cc, ee):
                s.append("print(pbd_durspec_mean(pars=c({},{},{})))".format(b, c, e))

            # execute script
            returncode, stdout, stderr  = rstats.call(s)

            # peek at the results
            print(stdout)

            # [1] 69.31472
            # [1] 9.853723
            # [1] 4.981369
            # [1] 0.9950331
            # ...

            # post-process the stdout to extract values
            results = [float(x.split(" ")[1]) for x in stdout.split("\n") if x]

        Notes
        -----

        Note that newlines ('\n') and other special characters will be
        converted before being passed to the R interpreter, so need to
        be escaped or entered as raw string expressions.

        That is, instead of, e.g.::

            returncode, stdout, stderr = RService.call([
                "cat('hello, world\n')",
            ])

        use this::

            returncode, stdout, stderr = RService.call([
                "cat('hello, world\\n')",
            ])

        or::

            returncode, stdout, stderr = RService.call([
                r"cat('hello, world\n')",
            ])

        """
        if not textprocessing.is_str_type(r_commands):
            r_commands = "\n".join(r_commands)
        r_commands += "\n"
        invocation_command = [RSCRIPT_EXECUTABLE, rsubprocess_pipe_path]
        p = subprocess.Popen(
                invocation_command,
                stdin=subprocess.PIPE,
                stdout=subprocess.PIPE,
                stderr=subprocess.PIPE,
                cwd=cwd,
                env=env,
                )
        stdout, stderr = processio.communicate(p, r_commands)
        if (p.returncode != 0 and not ignore_error_returncode):
            raise error.ExternalServiceError(
                    service_name="Rscript",
                    invocation_command=invocation_command,
                    service_input=r_commands,
                    returncode = p.returncode,
                    stdout=stdout,
                    stderr=stderr)
        return p.returncode, stdout, stderr
Esempio n. 33
0
    def map_bipartitions(self, target_tree_fpath, bootstrap_trees_fpaths):

        # set up taxa
        taxa = dendropy.TaxonNamespace()
        taxon_label_map = {}

        # read target tree
        target_tree_fpath = self._expand_path(target_tree_fpath)
        # self._send_info("Reading target tree file: {}".format(target_tree_fpath))
        target_tree = self._get_trees(target_tree_fpath, taxon_namespace=taxa)[0]

        # read boostrap trees
        boot_trees = dendropy.TreeList()
        for fpath in bootstrap_trees_fpaths:
            fpath = self._expand_path(fpath)
            # self._send_info("Reading bootstrap tree file: {}".format(fpath))
            self._get_trees(tree_filepath=fpath, tree_list=boot_trees, taxon_namespace=taxa)
        # self._send_info("Read: {} taxa, {} bootstrap trees".format(len(taxa), len(boot_trees)))

        # create working directory
        self._create_working_dir()

        # remap taxon labels
        self.taxon_label_map = {}
        self._remap_taxon_labels(taxa)

        # write input target tree
        raxml_target_tree_filepath = os.path.join(self.working_dir_path, "{}.target_tree".format(self.name))
        # self._send_info("Creating RAxML target tree file: {}".format(raxml_target_tree_filepath))
        if not self._check_overwrite(raxml_target_tree_filepath):
            sys.exit(0)
        target_tree.write_to_path(raxml_target_tree_filepath, "newick")
        self.files_to_clean.append(raxml_target_tree_filepath)

        # write input bootstrap trees
        raxml_bootstrap_trees_filepath = os.path.join(self.working_dir_path, "{}.boot_trees".format(self.name))
        # self._send_info("Creating RAxML bootstrap tree file: {}".format(raxml_bootstrap_trees_filepath))
        if not self._check_overwrite(raxml_bootstrap_trees_filepath):
            sys.exit(0)
        boot_trees.write_to_path(raxml_bootstrap_trees_filepath, "newick")
        self.files_to_clean.append(raxml_bootstrap_trees_filepath)

        # write input (dummy) sequences
        raxml_seqs_filepath = os.path.join(self.working_dir_path, "{}.seqs".format(self.name))
        # self._send_info("Creating RAxML dummy sequences file: {}".format(raxml_seqs_filepath))
        if not self._check_overwrite(raxml_seqs_filepath):
            sys.exit(0)
        raxml_seqs_filepath_out = open(raxml_seqs_filepath, "w")
        self._write_dummy_seqs(taxa, raxml_seqs_filepath_out)
        raxml_seqs_filepath_out.flush()
        raxml_seqs_filepath_out.close()
        self.files_to_clean.append(raxml_seqs_filepath)

        # clean working directory of previous runs
        self._preclean_working_dir()

        # run RAxML
        cmd = [self.raxml_path, '-f', 'b',
                '-t', os.path.basename(raxml_target_tree_filepath),
                '-z', os.path.basename(raxml_bootstrap_trees_filepath),
                '-s', os.path.basename(raxml_seqs_filepath),
                '-m', 'GTRCAT',
                '-n', self.name]
        # self._send_info("Executing: {}".format(" ".join(cmd)))
        if self.verbosity >= 2:
            stdout_pipe = None
            stderr_pipe = None
        else:
            stdout_pipe = subprocess.PIPE
            stderr_pipe = subprocess.PIPE
        p = subprocess.Popen(cmd,
            stdout=stdout_pipe,
            stderr=stderr_pipe,
            cwd=self.working_dir_path)
        stdout, stderr = processio.communicate(p)
        if p.returncode != 0:
            self._send_error("RAxML run failed")
            if self.verbosity < 2:
                sys.stdout.write(stdout)
                sys.stderr.write(stderr)
            sys.exit(p.returncode)

        # read result
        raxml_mapped_tree_fpath = os.path.join(self.working_dir_path, self.bipartitions_fname)
        if not os.path.exists(raxml_mapped_tree_fpath):
            self._send_error("RAxML result not found: {}".format(raxml_mapped_tree_fpath))
            sys.exit(1)
        mapped_tree = dendropy.Tree.get_from_path(raxml_mapped_tree_fpath, "newick")

        # remap labels
        for taxon in mapped_tree.taxon_namespace:
            taxon.label = taxon_label_map[taxon.label]

#         # write results
#         mapped_tree.write_to_stream(self.output_dest, self.output_format)

        # clean-up
        self.files_to_clean.append(raxml_mapped_tree_fpath)
        self.files_to_clean.append(self.info_fname)
        self._postclean_working_dir()

        # return result
        return mapped_tree