def testDiagnoseMulti(self): multi_dir = data_source_path('testmulti/caenophidia') fp = os.path.join(multi_dir, 'caenophidia_mos.fasta') fp2 = os.path.join(multi_dir, 'caenophidia_mos2.fasta') s = summary_stats_from_parse([fp, fp2], ["DNA", "RNA", "PROTEIN"], careful_parse=False) self.assertEqual(s[0], "PROTEIN") self.assertEqual(s[1], [(114, 189), (109, 202)]) self.assertEqual( s[2], 116 ) # two taxa names were changed and 5 were deleted, so the union is 116 self.assertEqual(s[3], False) fp3 = data_source_path('smallrna.fasta') s = summary_stats_from_parse([fp3, fp3], ["DNA", "RNA", "PROTEIN"], careful_parse=False) self.assertEqual(s[0], "RNA") self.assertEqual(s[1], [(32, 1650), (32, 1650)]) self.assertEqual(s[2], 32) self.assertEqual(s[3], True) self.assertRaises(Exception, summary_stats_from_parse, [fp, fp3], ["DNA", "RNA", "PROTEIN"], careful_parse=False) _LOG.warn( "WARNING: summary_stats_from_parse will read multi with dna and protein as entirely protein. MIXED data type support is needed!" ) fp4 = data_source_path('small.fasta') fp5 = data_source_path('smallunaligned.fasta') s = summary_stats_from_parse([fp4, fp4], ["DNA", "RNA", "PROTEIN"], careful_parse=False) self.assertEqual(s[0], "DNA") self.assertEqual(s[1], [(32, 1650), (32, 1650)]) self.assertEqual(s[2], 32) self.assertEqual(s[3], True) self.assertRaises(Exception, summary_stats_from_parse, [fp, fp3], ["DNA", "RNA", "PROTEIN"], careful_parse=False) _LOG.warn( "WARNING: summary_stats_from_parse will read multi with dna and protein as entirely protein. MIXED data type support is needed!" ) fp4 = data_source_path('small.fasta') fp5 = data_source_path('smallunaligned.fasta') s = summary_stats_from_parse([fp4, fp5], ["DNA", "RNA", "PROTEIN"], careful_parse=False) self.assertEqual(s[0], "DNA") self.assertEqual(s[1], [(32, 1650), (32, 1650)]) self.assertEqual(s[2], 32) self.assertEqual(s[3], False) self.assertRaises(Exception, summary_stats_from_parse, [fp, fp3], ["DNA", "RNA", "PROTEIN"], careful_parse=False) _LOG.warn( "WARNING: summary_stats_from_parse will read multi with dna and protein as entirely protein. MIXED data type support is needed!" )
def testDiagnoseMulti(self): multi_dir = data_source_path("testmulti/caenophidia") fp = os.path.join(multi_dir, "caenophidia_mos.fasta") fp2 = os.path.join(multi_dir, "caenophidia_mos2.fasta") s = summary_stats_from_parse([fp, fp2], ["DNA", "RNA", "PROTEIN"], careful_parse=False) self.assertEqual(s[0], "PROTEIN") self.assertEqual(s[1], [(114, 189), (109, 202)]) self.assertEqual(s[2], 116) # two taxa names were changed and 5 were deleted, so the union is 116 self.assertEqual(s[3], False) fp3 = data_source_path("smallrna.fasta") s = summary_stats_from_parse([fp3, fp3], ["DNA", "RNA", "PROTEIN"], careful_parse=False) self.assertEqual(s[0], "RNA") self.assertEqual(s[1], [(32, 1650), (32, 1650)]) self.assertEqual(s[2], 32) self.assertEqual(s[3], True) self.assertRaises( Exception, summary_stats_from_parse, [fp, fp3], ["DNA", "RNA", "PROTEIN"], careful_parse=False ) _LOG.warn( "WARNING: summary_stats_from_parse will read multi with dna and protein as entirely protein. MIXED data type support is needed!" ) fp4 = data_source_path("small.fasta") fp5 = data_source_path("smallunaligned.fasta") s = summary_stats_from_parse([fp4, fp4], ["DNA", "RNA", "PROTEIN"], careful_parse=False) self.assertEqual(s[0], "DNA") self.assertEqual(s[1], [(32, 1650), (32, 1650)]) self.assertEqual(s[2], 32) self.assertEqual(s[3], True) self.assertRaises( Exception, summary_stats_from_parse, [fp, fp3], ["DNA", "RNA", "PROTEIN"], careful_parse=False ) _LOG.warn( "WARNING: summary_stats_from_parse will read multi with dna and protein as entirely protein. MIXED data type support is needed!" ) fp4 = data_source_path("small.fasta") fp5 = data_source_path("smallunaligned.fasta") s = summary_stats_from_parse([fp4, fp5], ["DNA", "RNA", "PROTEIN"], careful_parse=False) self.assertEqual(s[0], "DNA") self.assertEqual(s[1], [(32, 1650), (32, 1650)]) self.assertEqual(s[2], 32) self.assertEqual(s[3], False) self.assertRaises( Exception, summary_stats_from_parse, [fp, fp3], ["DNA", "RNA", "PROTEIN"], careful_parse=False ) _LOG.warn( "WARNING: summary_stats_from_parse will read multi with dna and protein as entirely protein. MIXED data type support is needed!" )
def testDiagnoseProt(self): fp = data_source_path("caenophidia_mos.fasta") print fp s = summary_stats_from_parse([fp], ["DNA", "RNA", "PROTEIN"], careful_parse=False) self.assertEqual(s[0], "PROTEIN") self.assertEqual(s[1], [(114, 189)]) self.assertEqual(s[2], 114) self.assertEqual(s[3], False) s = summary_stats_from_parse([fp], ["DNA", "RNA", "PROTEIN"], careful_parse=True) self.assertEqual(s[0], "PROTEIN") self.assertEqual(s[1], [(114, 189)]) self.assertEqual(s[2], 114) self.assertEqual(s[3], False) self.assertRaises(Exception, summary_stats_from_parse, [fp], ["DNA", "RNA"], careful_parse=False) self.assertRaises(Exception, summary_stats_from_parse, [fp], ["DNA", "RNA"], careful_parse=True)
def testDiagnoseRNA(self): fp = data_source_path("smallrna.fasta") print fp s = summary_stats_from_parse([fp], ["DNA", "RNA", "PROTEIN"], careful_parse=False) self.assertEqual(s[0], "RNA") self.assertEqual(s[1], [(32, 1650)]) self.assertEqual(s[2], 32) self.assertEqual(s[3], True) s = summary_stats_from_parse([fp], ["DNA", "RNA", "PROTEIN"], careful_parse=True) self.assertEqual(s[0], "RNA") self.assertEqual(s[1], [(32, 1650)]) self.assertEqual(s[2], 32) self.assertEqual(s[3], True) self.assertRaises(Exception, summary_stats_from_parse, [fp], ["DNA", "PROTEIN"], careful_parse=False) _LOG.warn("WARNING: summary_stats_from_parse does not distinguish between RNA and DNA in 'careful' mode")
def testDiagnoseDNA(self): fp = data_source_path("small.fasta") print fp s = summary_stats_from_parse([fp], ["DNA", "RNA", "PROTEIN"], careful_parse=False) self.assertEqual(s[0], "DNA") self.assertEqual(s[1], [(32, 1650)]) self.assertEqual(s[2], 32) self.assertEqual(s[3], True) s = summary_stats_from_parse([fp], ["DNA", "RNA", "PROTEIN"], careful_parse=True) self.assertEqual(s[0], "DNA") self.assertEqual(s[1], [(32, 1650)]) self.assertEqual(s[2], 32) self.assertEqual(s[3], True) self.assertRaises(Exception, summary_stats_from_parse, [fp], ["RNA"], careful_parse=False) self.assertRaises(Exception, summary_stats_from_parse, [fp], ["RNA"], careful_parse=True)
def populate_auto_options(user_config, md, force=False): if user_config.commandline.input is None: sys.exit("ERROR: Input file(s) not specified.") from pasta.usersettingclasses import get_list_of_seq_filepaths_from_dir from pasta.alignment import summary_stats_from_parse try: if user_config.commandline.multilocus: fn_list = get_list_of_seq_filepaths_from_dir( user_config.commandline.input) else: fn_list = [user_config.commandline.input] datatype_list = [user_config.commandline.datatype.upper()] careful_parse = user_config.commandline.untrusted summary_stats = summary_stats_from_parse(fn_list, datatype_list, md, careful_parse=careful_parse) except: if user_config.commandline.auto: MESSENGER.send_error( "Error reading input while setting options for the --auto mode\n" ) else: MESSENGER.send_error("Error reading input\n") raise if force or user_config.commandline.auto: user_config.commandline.auto = False auto_opts = get_auto_defaults_from_summary_stats( summary_stats[0], summary_stats[1], summary_stats[2]) user_config.get('sate').set_values_from_dict(auto_opts['sate']) user_config.get('commandline').set_values_from_dict( auto_opts['commandline']) user_config.get('fasttree').set_values_from_dict(auto_opts['fasttree'])
def populate_auto_options(user_config, md, force=False): if user_config.commandline.input is None: sys.exit("ERROR: Input file(s) not specified.") from pasta.usersettingclasses import get_list_of_seq_filepaths_from_dir from pasta.alignment import summary_stats_from_parse try: if user_config.commandline.multilocus: fn_list = get_list_of_seq_filepaths_from_dir(user_config.commandline.input) else: fn_list = [user_config.commandline.input] datatype_list = [user_config.commandline.datatype.upper()] careful_parse = user_config.commandline.untrusted summary_stats = summary_stats_from_parse(fn_list, datatype_list, md, careful_parse=careful_parse) except: if user_config.commandline.auto: MESSENGER.send_error("Error reading input while setting options for the --auto mode\n") else: MESSENGER.send_error("Error reading input\n") raise if force or user_config.commandline.auto: user_config.commandline.auto = False auto_opts = get_auto_defaults_from_summary_stats(summary_stats[0], summary_stats[1], summary_stats[2]) user_config.get('sate').set_values_from_dict(auto_opts['sate']) user_config.get('commandline').set_values_from_dict(auto_opts['commandline']) user_config.get('fasttree').set_values_from_dict(auto_opts['fasttree'])
def testDiagnoseProt(self): fp = data_source_path('caenophidia_mos.fasta') print(fp) s = summary_stats_from_parse([fp], ["DNA", "RNA", "PROTEIN"], careful_parse=False) self.assertEqual(s[0], "PROTEIN") self.assertEqual(s[1], [(114, 189)]) self.assertEqual(s[2], 114) self.assertEqual(s[3], False) s = summary_stats_from_parse([fp], ["DNA", "RNA", "PROTEIN"], careful_parse=True) self.assertEqual(s[0], "PROTEIN") self.assertEqual(s[1], [(114, 189)]) self.assertEqual(s[2], 114) self.assertEqual(s[3], False) self.assertRaises(Exception, summary_stats_from_parse, [fp], ["DNA", "RNA"], careful_parse=False) self.assertRaises(Exception, summary_stats_from_parse, [fp], ["DNA", "RNA"], careful_parse=True)
def testDiagnoseRNA(self): fp = data_source_path('smallrna.fasta') print(fp) s = summary_stats_from_parse([fp], ["DNA", "RNA", "PROTEIN"], careful_parse=False) self.assertEqual(s[0], "RNA") self.assertEqual(s[1], [(32, 1650)]) self.assertEqual(s[2], 32) self.assertEqual(s[3], True) s = summary_stats_from_parse([fp], ["DNA", "RNA", "PROTEIN"], careful_parse=True) self.assertEqual(s[0], "RNA") self.assertEqual(s[1], [(32, 1650)]) self.assertEqual(s[2], 32) self.assertEqual(s[3], True) self.assertRaises(Exception, summary_stats_from_parse, [fp], ["DNA", "PROTEIN"], careful_parse=False) _LOG.warn( "WARNING: summary_stats_from_parse does not distinguish between RNA and DNA in 'careful' mode" )
def testDiagnoseDNA(self): fp = data_source_path('small.fasta') print(fp) s = summary_stats_from_parse([fp], ["DNA", "RNA", "PROTEIN"], careful_parse=False) self.assertEqual(s[0], "DNA") self.assertEqual(s[1], [(32, 1650)]) self.assertEqual(s[2], 32) self.assertEqual(s[3], True) s = summary_stats_from_parse([fp], ["DNA", "RNA", "PROTEIN"], careful_parse=True) self.assertEqual(s[0], "DNA") self.assertEqual(s[1], [(32, 1650)]) self.assertEqual(s[2], 32) self.assertEqual(s[3], True) self.assertRaises(Exception, summary_stats_from_parse, [fp], ["RNA"], careful_parse=False) self.assertRaises(Exception, summary_stats_from_parse, [fp], ["RNA"], careful_parse=True)