def test_split_ref_study_papara(self): '''Basic test for split_ref_study_papara function.''' # Read in PaPaRa output. papara_out = read_phylip(exp_papara_phylip) # Read in expected output files. exp_fasta = [read_fasta(exp_ref_fasta), read_fasta(exp_study_fasta)] with TemporaryDirectory() as temp_dir: out_ref_fasta = path.join(temp_dir, "ref_test.fna") out_study_fasta = path.join(temp_dir, "study_test.fna") split_ref_study_papara(papara_out=papara_out, ref_seqnames=set(exp_fasta[0].keys()), ref_fasta=out_ref_fasta, study_fasta=out_study_fasta) # Read in output files. obs_fasta = [ read_fasta(out_ref_fasta), read_fasta(out_study_fasta) ] self.assertEqual(exp_fasta, obs_fasta)
def run_papara(tree: str, ref_msa: dict, study_fasta: str, out_dir: str, threads=1, print_cmds=False): '''Run PaPaRa to place study sequences into reference multiple-sequence alignment (MSA). Will return dictionary of the the output MSA (sequence ids as keys). Expects path to tree and study FASTA as strings. Expects reference MSA as a dictionary output by read_fasta. This MSA will be converted to phylip format before running PaPaRa.''' # Get absolute paths to input files. tree = path.abspath(tree) study_fasta = path.abspath(study_fasta) # Change working directory to out directory (but keep track of original). # This is necessary because PaPaRa outputs into the current working # directory. orig_wd = getcwd() chdir(out_dir) # Convert ref sequences from MSA FASTA to phylip. write_phylip(ref_msa, "ref_seqs.phylip") # Make call to papara to place sequences (outputs phylip format). system_call_check("papara -t " + tree + " -s ref_seqs.phylip " + "-q " + study_fasta + " -j " + str(threads) + " -n out", print_command=print_cmds, print_stdout=print_cmds, print_stderr=print_cmds) # Change back to original working directory. chdir(orig_wd) # Read in papara phylip output and return. return(read_phylip(path.join(out_dir, "papara_alignment.out"), check_input=True))
def test_run_papara(self): '''Basic test for run_papara function.''' exp_phylip = read_phylip(exp_papara_phylip) in_msa = read_fasta(test_msa) with TemporaryDirectory() as temp_dir: obs_phylip = run_papara(tree=test_tree, ref_msa=in_msa, out_dir=temp_dir, study_fasta=test_study_seqs) self.assertEqual(exp_phylip, obs_phylip)
def place_seqs_pipeline(study_fasta, ref_msa, tree, out_tree, threads, papara_output, out_dir, chunk_size, print_cmds): '''Full pipeline for running sequence placement.''' # Read in ref seqs FASTA as a dict. ref_msa = read_fasta(ref_msa) # Either read in PaPaRa output or run it. if papara_output: # Read in PaPaRa output if already done. papara_out = read_phylip(papara_output, check_input=True) else: # Run PaPaRa to place study sequences and read in Phylip file. papara_out = run_papara(tree=tree, ref_msa=ref_msa, study_fasta=study_fasta, out_dir=out_dir, threads=threads, print_cmds=print_cmds) # Specify split FASTA files to be created. study_msa_fastafile = path.join(out_dir, "study_seqs_papara.fasta") ref_msa_fastafile = path.join(out_dir, "ref_seqs_papara.fasta") # Split PaPaRa output into two FASTA files containging study and reference # sequences respectively. split_ref_study_papara(papara_out=papara_out, ref_seqnames=set(list(ref_msa.keys())), study_fasta=study_msa_fastafile, ref_fasta=ref_msa_fastafile) # Run EPA-NG to output .jplace file. epa_out_dir = path.join(out_dir, "epa_out") run_epa_ng(tree=tree, ref_msa_fastafile=ref_msa_fastafile, study_msa_fastafile=study_msa_fastafile, chunk_size=chunk_size, threads=threads, out_dir=epa_out_dir, print_cmds=print_cmds) jplace_outfile = path.join(epa_out_dir, "epa_result.jplace") gappa_jplace_to_newick(jplace_file=jplace_outfile, outfile=out_tree, print_cmds=print_cmds)
def test_read_write_phylip(self): '''Basic test that Phylip files are read and written correctly.''' test_seqs_dict = { "seq1": "GNATNGAC", "seq2": "GTCGTGGC", "seq3": "GNCTGAGA" } # Write these sequences temp file and then read them back in again. with TemporaryDirectory() as temp_dir: outfile = path.join(temp_dir, "test.phylip") write_phylip(test_seqs_dict, outfile) test_seqs_dict_in = read_phylip(outfile) self.assertEqual(test_seqs_dict, test_seqs_dict_in)