Example #1
0
    def testDiagnoseMulti(self):
        multi_dir = data_source_path('testmulti/caenophidia')
        fp = os.path.join(multi_dir, 'caenophidia_mos.fasta')
        fp2 = os.path.join(multi_dir, 'caenophidia_mos2.fasta')
        s = summary_stats_from_parse([fp, fp2], ["DNA", "RNA", "PROTEIN"],
                                     careful_parse=False)
        self.assertEqual(s[0], "PROTEIN")
        self.assertEqual(s[1], [(114, 189), (109, 202)])
        self.assertEqual(
            s[2], 116
        )  # two taxa names were changed and 5 were deleted, so the union is 116
        self.assertEqual(s[3], False)

        fp3 = data_source_path('smallrna.fasta')
        s = summary_stats_from_parse([fp3, fp3], ["DNA", "RNA", "PROTEIN"],
                                     careful_parse=False)
        self.assertEqual(s[0], "RNA")
        self.assertEqual(s[1], [(32, 1650), (32, 1650)])
        self.assertEqual(s[2], 32)
        self.assertEqual(s[3], True)
        self.assertRaises(Exception,
                          summary_stats_from_parse, [fp, fp3],
                          ["DNA", "RNA", "PROTEIN"],
                          careful_parse=False)
        _LOG.warn(
            "WARNING: summary_stats_from_parse will read multi with dna and protein as entirely protein. MIXED data type support is needed!"
        )

        fp4 = data_source_path('small.fasta')
        fp5 = data_source_path('smallunaligned.fasta')
        s = summary_stats_from_parse([fp4, fp4], ["DNA", "RNA", "PROTEIN"],
                                     careful_parse=False)
        self.assertEqual(s[0], "DNA")
        self.assertEqual(s[1], [(32, 1650), (32, 1650)])
        self.assertEqual(s[2], 32)
        self.assertEqual(s[3], True)
        self.assertRaises(Exception,
                          summary_stats_from_parse, [fp, fp3],
                          ["DNA", "RNA", "PROTEIN"],
                          careful_parse=False)
        _LOG.warn(
            "WARNING: summary_stats_from_parse will read multi with dna and protein as entirely protein. MIXED data type support is needed!"
        )

        fp4 = data_source_path('small.fasta')
        fp5 = data_source_path('smallunaligned.fasta')
        s = summary_stats_from_parse([fp4, fp5], ["DNA", "RNA", "PROTEIN"],
                                     careful_parse=False)
        self.assertEqual(s[0], "DNA")
        self.assertEqual(s[1], [(32, 1650), (32, 1650)])
        self.assertEqual(s[2], 32)
        self.assertEqual(s[3], False)
        self.assertRaises(Exception,
                          summary_stats_from_parse, [fp, fp3],
                          ["DNA", "RNA", "PROTEIN"],
                          careful_parse=False)
        _LOG.warn(
            "WARNING: summary_stats_from_parse will read multi with dna and protein as entirely protein. MIXED data type support is needed!"
        )
Example #2
0
    def testDiagnoseMulti(self):
        multi_dir = data_source_path("testmulti/caenophidia")
        fp = os.path.join(multi_dir, "caenophidia_mos.fasta")
        fp2 = os.path.join(multi_dir, "caenophidia_mos2.fasta")
        s = summary_stats_from_parse([fp, fp2], ["DNA", "RNA", "PROTEIN"], careful_parse=False)
        self.assertEqual(s[0], "PROTEIN")
        self.assertEqual(s[1], [(114, 189), (109, 202)])
        self.assertEqual(s[2], 116)  # two taxa names were changed and 5 were deleted, so the union is 116
        self.assertEqual(s[3], False)

        fp3 = data_source_path("smallrna.fasta")
        s = summary_stats_from_parse([fp3, fp3], ["DNA", "RNA", "PROTEIN"], careful_parse=False)
        self.assertEqual(s[0], "RNA")
        self.assertEqual(s[1], [(32, 1650), (32, 1650)])
        self.assertEqual(s[2], 32)
        self.assertEqual(s[3], True)
        self.assertRaises(
            Exception, summary_stats_from_parse, [fp, fp3], ["DNA", "RNA", "PROTEIN"], careful_parse=False
        )
        _LOG.warn(
            "WARNING: summary_stats_from_parse will read multi with dna and protein as entirely protein. MIXED data type support is needed!"
        )

        fp4 = data_source_path("small.fasta")
        fp5 = data_source_path("smallunaligned.fasta")
        s = summary_stats_from_parse([fp4, fp4], ["DNA", "RNA", "PROTEIN"], careful_parse=False)
        self.assertEqual(s[0], "DNA")
        self.assertEqual(s[1], [(32, 1650), (32, 1650)])
        self.assertEqual(s[2], 32)
        self.assertEqual(s[3], True)
        self.assertRaises(
            Exception, summary_stats_from_parse, [fp, fp3], ["DNA", "RNA", "PROTEIN"], careful_parse=False
        )
        _LOG.warn(
            "WARNING: summary_stats_from_parse will read multi with dna and protein as entirely protein. MIXED data type support is needed!"
        )

        fp4 = data_source_path("small.fasta")
        fp5 = data_source_path("smallunaligned.fasta")
        s = summary_stats_from_parse([fp4, fp5], ["DNA", "RNA", "PROTEIN"], careful_parse=False)
        self.assertEqual(s[0], "DNA")
        self.assertEqual(s[1], [(32, 1650), (32, 1650)])
        self.assertEqual(s[2], 32)
        self.assertEqual(s[3], False)
        self.assertRaises(
            Exception, summary_stats_from_parse, [fp, fp3], ["DNA", "RNA", "PROTEIN"], careful_parse=False
        )
        _LOG.warn(
            "WARNING: summary_stats_from_parse will read multi with dna and protein as entirely protein. MIXED data type support is needed!"
        )
Example #3
0
 def testDiagnoseProt(self):
     fp = data_source_path("caenophidia_mos.fasta")
     print fp
     s = summary_stats_from_parse([fp], ["DNA", "RNA", "PROTEIN"], careful_parse=False)
     self.assertEqual(s[0], "PROTEIN")
     self.assertEqual(s[1], [(114, 189)])
     self.assertEqual(s[2], 114)
     self.assertEqual(s[3], False)
     s = summary_stats_from_parse([fp], ["DNA", "RNA", "PROTEIN"], careful_parse=True)
     self.assertEqual(s[0], "PROTEIN")
     self.assertEqual(s[1], [(114, 189)])
     self.assertEqual(s[2], 114)
     self.assertEqual(s[3], False)
     self.assertRaises(Exception, summary_stats_from_parse, [fp], ["DNA", "RNA"], careful_parse=False)
     self.assertRaises(Exception, summary_stats_from_parse, [fp], ["DNA", "RNA"], careful_parse=True)
Example #4
0
 def testDiagnoseRNA(self):
     fp = data_source_path("smallrna.fasta")
     print fp
     s = summary_stats_from_parse([fp], ["DNA", "RNA", "PROTEIN"], careful_parse=False)
     self.assertEqual(s[0], "RNA")
     self.assertEqual(s[1], [(32, 1650)])
     self.assertEqual(s[2], 32)
     self.assertEqual(s[3], True)
     s = summary_stats_from_parse([fp], ["DNA", "RNA", "PROTEIN"], careful_parse=True)
     self.assertEqual(s[0], "RNA")
     self.assertEqual(s[1], [(32, 1650)])
     self.assertEqual(s[2], 32)
     self.assertEqual(s[3], True)
     self.assertRaises(Exception, summary_stats_from_parse, [fp], ["DNA", "PROTEIN"], careful_parse=False)
     _LOG.warn("WARNING: summary_stats_from_parse does not distinguish between RNA and DNA in 'careful' mode")
Example #5
0
 def testDiagnoseDNA(self):
     fp = data_source_path("small.fasta")
     print fp
     s = summary_stats_from_parse([fp], ["DNA", "RNA", "PROTEIN"], careful_parse=False)
     self.assertEqual(s[0], "DNA")
     self.assertEqual(s[1], [(32, 1650)])
     self.assertEqual(s[2], 32)
     self.assertEqual(s[3], True)
     s = summary_stats_from_parse([fp], ["DNA", "RNA", "PROTEIN"], careful_parse=True)
     self.assertEqual(s[0], "DNA")
     self.assertEqual(s[1], [(32, 1650)])
     self.assertEqual(s[2], 32)
     self.assertEqual(s[3], True)
     self.assertRaises(Exception, summary_stats_from_parse, [fp], ["RNA"], careful_parse=False)
     self.assertRaises(Exception, summary_stats_from_parse, [fp], ["RNA"], careful_parse=True)
Example #6
0
def populate_auto_options(user_config, md, force=False):
    if user_config.commandline.input is None:
        sys.exit("ERROR: Input file(s) not specified.")
    from pasta.usersettingclasses import get_list_of_seq_filepaths_from_dir
    from pasta.alignment import summary_stats_from_parse
    try:
        if user_config.commandline.multilocus:
            fn_list = get_list_of_seq_filepaths_from_dir(
                user_config.commandline.input)
        else:
            fn_list = [user_config.commandline.input]
        datatype_list = [user_config.commandline.datatype.upper()]
        careful_parse = user_config.commandline.untrusted
        summary_stats = summary_stats_from_parse(fn_list,
                                                 datatype_list,
                                                 md,
                                                 careful_parse=careful_parse)
    except:
        if user_config.commandline.auto:
            MESSENGER.send_error(
                "Error reading input while setting options for the --auto mode\n"
            )
        else:
            MESSENGER.send_error("Error reading input\n")
        raise
    if force or user_config.commandline.auto:
        user_config.commandline.auto = False
        auto_opts = get_auto_defaults_from_summary_stats(
            summary_stats[0], summary_stats[1], summary_stats[2])
        user_config.get('sate').set_values_from_dict(auto_opts['sate'])
        user_config.get('commandline').set_values_from_dict(
            auto_opts['commandline'])
        user_config.get('fasttree').set_values_from_dict(auto_opts['fasttree'])
Example #7
0
def populate_auto_options(user_config, md, force=False):
    if user_config.commandline.input is None:
        sys.exit("ERROR: Input file(s) not specified.")
    from pasta.usersettingclasses import get_list_of_seq_filepaths_from_dir
    from pasta.alignment import summary_stats_from_parse
    try:
        if user_config.commandline.multilocus:
            fn_list = get_list_of_seq_filepaths_from_dir(user_config.commandline.input)
        else:
            fn_list = [user_config.commandline.input]
        datatype_list = [user_config.commandline.datatype.upper()]
        careful_parse = user_config.commandline.untrusted
        summary_stats = summary_stats_from_parse(fn_list, datatype_list, md, careful_parse=careful_parse)
    except:
        if user_config.commandline.auto:
            MESSENGER.send_error("Error reading input while setting options for the --auto mode\n")
        else:
            MESSENGER.send_error("Error reading input\n")
        raise
    if force or user_config.commandline.auto:
        user_config.commandline.auto = False
        auto_opts = get_auto_defaults_from_summary_stats(summary_stats[0], summary_stats[1], summary_stats[2])
        user_config.get('sate').set_values_from_dict(auto_opts['sate'])
        user_config.get('commandline').set_values_from_dict(auto_opts['commandline'])
        user_config.get('fasttree').set_values_from_dict(auto_opts['fasttree'])
Example #8
0
 def testDiagnoseProt(self):
     fp = data_source_path('caenophidia_mos.fasta')
     print(fp)
     s = summary_stats_from_parse([fp], ["DNA", "RNA", "PROTEIN"],
                                  careful_parse=False)
     self.assertEqual(s[0], "PROTEIN")
     self.assertEqual(s[1], [(114, 189)])
     self.assertEqual(s[2], 114)
     self.assertEqual(s[3], False)
     s = summary_stats_from_parse([fp], ["DNA", "RNA", "PROTEIN"],
                                  careful_parse=True)
     self.assertEqual(s[0], "PROTEIN")
     self.assertEqual(s[1], [(114, 189)])
     self.assertEqual(s[2], 114)
     self.assertEqual(s[3], False)
     self.assertRaises(Exception,
                       summary_stats_from_parse, [fp], ["DNA", "RNA"],
                       careful_parse=False)
     self.assertRaises(Exception,
                       summary_stats_from_parse, [fp], ["DNA", "RNA"],
                       careful_parse=True)
Example #9
0
 def testDiagnoseRNA(self):
     fp = data_source_path('smallrna.fasta')
     print(fp)
     s = summary_stats_from_parse([fp], ["DNA", "RNA", "PROTEIN"],
                                  careful_parse=False)
     self.assertEqual(s[0], "RNA")
     self.assertEqual(s[1], [(32, 1650)])
     self.assertEqual(s[2], 32)
     self.assertEqual(s[3], True)
     s = summary_stats_from_parse([fp], ["DNA", "RNA", "PROTEIN"],
                                  careful_parse=True)
     self.assertEqual(s[0], "RNA")
     self.assertEqual(s[1], [(32, 1650)])
     self.assertEqual(s[2], 32)
     self.assertEqual(s[3], True)
     self.assertRaises(Exception,
                       summary_stats_from_parse, [fp], ["DNA", "PROTEIN"],
                       careful_parse=False)
     _LOG.warn(
         "WARNING: summary_stats_from_parse does not distinguish between RNA and DNA in 'careful' mode"
     )
Example #10
0
 def testDiagnoseDNA(self):
     fp = data_source_path('small.fasta')
     print(fp)
     s = summary_stats_from_parse([fp], ["DNA", "RNA", "PROTEIN"],
                                  careful_parse=False)
     self.assertEqual(s[0], "DNA")
     self.assertEqual(s[1], [(32, 1650)])
     self.assertEqual(s[2], 32)
     self.assertEqual(s[3], True)
     s = summary_stats_from_parse([fp], ["DNA", "RNA", "PROTEIN"],
                                  careful_parse=True)
     self.assertEqual(s[0], "DNA")
     self.assertEqual(s[1], [(32, 1650)])
     self.assertEqual(s[2], 32)
     self.assertEqual(s[3], True)
     self.assertRaises(Exception,
                       summary_stats_from_parse, [fp], ["RNA"],
                       careful_parse=False)
     self.assertRaises(Exception,
                       summary_stats_from_parse, [fp], ["RNA"],
                       careful_parse=True)