def test_species_with_three_words_name(self): """should correctly parse a db name that contains a three words species name""" n = EnsemblDbName("mustela_putorius_furo_core_70_1") self.assertEqual(n.prefix, "mustela_putorius_furo") self.assertEqual(n.type, "core") self.assertEqual(n.build, "1") self.assertEqual(n.species, "Mustela putorius furo") n = EnsemblDbName("canis_lupus_familiaris_core_102_31") self.assertEqual(n.species, "Canis lupus familiaris")
def test_ensemblgenomes_names(self): """correctly handle the ensemblgenomes naming system""" n = EnsemblDbName("aedes_aegypti_core_5_58_1e") self.assertEqual(n.prefix, "aedes_aegypti") self.assertEqual(n.type, "core") self.assertEqual(n.release, "5") self.assertEqual(n.general_release, "58") self.assertEqual(n.build, "1e") n = EnsemblDbName("ensembl_compara_metazoa_6_59") self.assertEqual(n.release, "6") self.assertEqual(n.general_release, "59") self.assertEqual(n.type, "compara")
def reduce_dirnames(dirnames, species_dbs, verbose=False, debug=False): """returns EnsemblNames corresponding to species db's and sort by type sort order put's core db's first, compara and variation last""" if debug: pprint(dirnames) db_names = [] for record in dirnames: record = record.strip() if not record or record.endswith(".gz"): continue record = record.split()[-1] if not record[0].isalpha(): continue try: name = EnsemblDbName(record) except (TypeError, RuntimeError): # a non-species if debug: print(record) continue if name.species in species_dbs: if name.type not in species_dbs[name.species] and species_dbs[ name.species]: if debug or verbose: print("Skipping", name) continue db_names.append(name) elif name.type == "compara" and "compara" in species_dbs: db_names.append(name) db_names = _sort_dbs(db_names) return db_names
def test_name_without_build(self): """should correctly handle a db name without a build""" n = EnsemblDbName("pongo_pygmaeus_core_49_1") self.assertEqual(n.prefix, "pongo_pygmaeus") self.assertEqual(n.type, "core") self.assertEqual(n.build, "1")
def test_cmp_name(self): """should validly compare names by attributes""" n1 = EnsemblDbName("homo_sapiens_core_46_36h") n2 = EnsemblDbName("homo_sapiens_core_46_36h") self.assertEqual(n1, n2)
def test_species_with_three_words_name(self): """should correctly parse a db name that contains a three words species name""" n = EnsemblDbName("mustela_putorius_furo_core_70_1") self.assertEqual(n.prefix, "mustela_putorius_furo") self.assertEqual(n.type, "core") self.assertEqual(n.build, "1")