def test_mafft_inputs(sb_resources, hf): # FASTA tester = sb_resources.get_one("d f") tester = Alb.generate_msa(tester, 'mafft') assert hf.buddy2hash(tester) == 'f94e0fd591dad83bd94201f0af038904'
def test_mafft_outputs(sb_resources, hf): # CLUSTAL tester = sb_resources.get_one("d f") tester = Alb.generate_msa(tester, 'mafft', '--clustalout') assert hf.buddy2hash(tester) == 'd6046c77e2bdb5683188e5de653affe5'
def test_clustalw_multi_param(sb_resources, hf): tester = sb_resources.get_one("d f") tester = Alb.generate_msa(tester, clustalw_bin, '-output=phylip -noweights') assert hf.buddy2hash(tester) == 'ae9126eb8c482a82d4060d175803c478'
def test_pagan(sb_resources, hf): # FASTA tester = sb_resources.get_one("d f") tester = Alb.generate_msa(tester, 'pagan') assert hf.buddy2hash(tester) in [ 'da1c6bb365e2da8cb4e7fad32d7dafdb', '1219647676b359a5ad0be6d9dda81c73' ] # NEXUS tester = sb_resources.get_one("d f") tester = Alb.generate_msa(tester, 'pagan', '-f nexus') assert hf.buddy2hash(tester) in [ 'f93607e234441a2577fa7d8a387ef7ec', '42bfddd38fa4ed75a99841abf2112e54' ] # PHYLIPI tester = sb_resources.get_one("d f") tester = Alb.generate_msa(tester, 'pagan', '-f phylipi') assert hf.buddy2hash(tester) in [ '09dd492fde598670d7cfee61d4e2eab8', '438e1551b3f1c8526fc8a44eaf2a3dc1' ] # PHYLIPS tester = sb_resources.get_one("d f") tester = Alb.generate_msa(tester, 'pagan', '-f phylips') assert hf.buddy2hash(tester) in [ '249c88cb64d41c47388514c65bf8fff1', '6366e50da5a6b33d2d281d6ea13df0b7' ] # Multi-param tester = sb_resources.get_one("d f") tester = Alb.generate_msa(tester, 'pagan', '-f nexus --translate') assert hf.buddy2hash(tester) == 'dd140ec4eb895ce75d574498a58aa28a' # A few edge cases tester = sb_resources.get_one("d f") tester = Sb.pull_recs(tester, "α[2345]") Alb.generate_msa(tester, "pagan", "-f foo", quiet=True) tester = sb_resources.get_one("d f") tester = Sb.pull_recs(tester, "α[2345]") Alb.generate_msa(tester, "pagan", "-f nexus", quiet=True) tester = sb_resources.get_one("d f") tester = Sb.pull_recs(tester, "α[2345]") Alb.generate_msa(tester, "pagan", "-f phylipi", quiet=True)
def test_muscle_outputs(sb_resources, hf): # FASTA tester = sb_resources.get_one("d f") tester = Alb.generate_msa(tester, 'muscle', '-clw') assert hf.buddy2hash(tester) == '91542667cef761ccaf39d8cb4e877944'
def test_muscle_multi_param(sb_resources, hf): tester = sb_resources.get_one("d f") tester = Alb.generate_msa(tester, 'muscle', '-clw -diags') assert hf.buddy2hash(tester) == '91542667cef761ccaf39d8cb4e877944'
def test_prank_inputs(sb_resources): # FASTA tester = Sb.pull_recs(sb_resources.get_one("d f"), 'α1') tester = Alb.generate_msa(tester, 'prank', '-once') assert tester.out_format == 'fasta'
def test_prank_outputs1(sb_resources): # NEXUS tester = Sb.pull_recs(sb_resources.get_one("d f"), 'α1') tester = Alb.generate_msa(tester, 'prank', '-f=nexus -once') assert tester.out_format == 'nexus'
def test_generate_alignments_genbank(sb_resources, hf): tester = sb_resources.get_one("p g") tester = Alb.generate_msa(tester, "mafft") assert hf.buddy2hash(tester) == "a4ab6b2a2ddda38a4d04abc18c54d18b"
def test_generate_alignments_edges2(tool, params, sb_resources): tester = sb_resources.get_one("d f") tester = Sb.pull_recs(tester, "α[2345]") Alb.generate_msa(tester, tool, params, quiet=True)
def test_mafft_multi_param(sb_resources, hf): tester = sb_resources.get_one("d f") tester = Alb.generate_msa(tester, 'mafft', '--clustalout --noscore') assert hf.buddy2hash(tester) == 'd6046c77e2bdb5683188e5de653affe5'
def test_muscle_inputs(sb_resources, hf): # FASTA tester = sb_resources.get_one("d f") tester = Alb.generate_msa(tester, 'muscle') assert hf.buddy2hash(tester) == '5ec18f3e0c9f5cf96944a1abb130232f'
def test_prank_outputs3(sb_resources): # PHYLIPS tester = Sb.pull_recs(sb_resources.get_one("d f"), 'α1') tester = Alb.generate_msa(tester, 'prank', params='-f=phylips -once') assert tester.out_format == 'phylipsr'
def test_generate_alignments_genbank(sb_resources, hf): tester = sb_resources.get_one("p g") tester = Alb.generate_msa(tester, "mafft") assert hf.buddy2hash(tester) == "ff3d1e474b1b1b76fdda02ebcb225cff"
os.makedirs(in_args.outdir) os.makedirs("%s/alignments" % in_args.outdir) os.makedirs("%s/mcmcmc" % in_args.outdir) os.makedirs("%s/sim_scores" % in_args.outdir) os.makedirs("%s/psi_pred" % in_args.outdir) if in_args.psi_pred and os.path.isdir(in_args.psi_pred): files = os.listdir(in_args.psi_pred) for f in files: shutil.copyfile("%s/%s" % (in_args.psi_pred, f), "%s/psi_pred/%s" % (in_args.outdir, f)) print("\nExecuting PSI-Pred") MyFuncs.run_multicore_function(sequences.records, _psi_pred) print("\nGenerating initial all-by-all") #alignbuddy, scores_data = create_all_by_all_scores(sequences) alignbuddy = Alb.generate_msa(Sb.make_copy(sequences), tool="mafft", params="--globalpair --thread -1", quiet=True) alignbuddy.write("%s/alignments/group_0.aln" % in_args.outdir) scores_data = pd.read_csv("temp_group0.csv", index_col=False) scores_data.to_csv("%s/sim_scores/group_0.csv" % in_args.outdir, index=False) group_0 = pd.concat([scores_data.seq1, scores_data.seq2]) group_0 = group_0.value_counts() group_0 = Cluster([i for i in group_0.index], scores_data) print("\nScoring base cluster") group_0.score() #taxa_count = [x.split("-")[0] for x in master_cluster.seq_ids] #taxa_count = pd.Series(taxa_count) #taxa_count = taxa_count.value_counts() print("Creating clusters")