def run_papara(tree: str, ref_msa: dict, study_fasta: str, out_dir: str, threads=1, print_cmds=False): '''Run PaPaRa to place study sequences into reference multiple-sequence alignment (MSA). Will return dictionary of the the output MSA (sequence ids as keys). Expects path to tree and study FASTA as strings. Expects reference MSA as a dictionary output by read_fasta. This MSA will be converted to phylip format before running PaPaRa.''' # Get absolute paths to input files. tree = path.abspath(tree) study_fasta = path.abspath(study_fasta) # Change working directory to out directory (but keep track of original). # This is necessary because PaPaRa outputs into the current working # directory. orig_wd = getcwd() chdir(out_dir) # Convert ref sequences from MSA FASTA to phylip. write_phylip(ref_msa, "ref_seqs.phylip") # Make call to papara to place sequences (outputs phylip format). system_call_check("papara -t " + tree + " -s ref_seqs.phylip " + "-q " + study_fasta + " -j " + str(threads) + " -n out", print_command=print_cmds, print_stdout=print_cmds, print_stderr=print_cmds) # Change back to original working directory. chdir(orig_wd) # Read in papara phylip output and return. return(read_phylip(path.join(out_dir, "papara_alignment.out"), check_input=True))
def test_read_write_phylip(self): '''Basic test that Phylip files are read and written correctly.''' test_seqs_dict = { "seq1": "GNATNGAC", "seq2": "GTCGTGGC", "seq3": "GNCTGAGA" } # Write these sequences temp file and then read them back in again. with TemporaryDirectory() as temp_dir: outfile = path.join(temp_dir, "test.phylip") write_phylip(test_seqs_dict, outfile) test_seqs_dict_in = read_phylip(outfile) self.assertEqual(test_seqs_dict, test_seqs_dict_in)