def find_mammals(cursor, trivial_name_list): mammals = [] for trivial_name in trivial_name_list: switch_to_db(cursor, get_compara_name(cursor)) tax_id = trivial2taxid(cursor, trivial_name) parent_id = taxid2parentid(cursor, tax_id) tax_id = parent_id is_mammal = False while tax_id: qry = "select name_txt from names where tax_id= %d " % int(tax_id) qry += " and name_class = 'scientific name'" rows = search_db(cursor, qry) if rows and rows[0][0]: if 'mammal' in rows[0][0].lower(): is_mammal = True break elif 'vertebrat' in rows[0][0].lower(): # if the thing wasa mammal, we would have found it by now is_mammal = False break parent_id = taxid2parentid(cursor, tax_id) if parent_id and parent_id > 1: tax_id = parent_id else: tax_id = None if is_mammal: mammals.append(trivial_name) return mammals
def get_common_name(cursor, species): switch_to_db(cursor, get_compara_name(cursor)) tax_id = species2taxid(cursor, species) switch_to_db(cursor, get_ncbi_tax_name(cursor)) qry = "select name_txt from names where " qry += "tax_id = %d and " % tax_id qry += "name_class = 'genbank common name'" rows = search_db(cursor, qry) if rows: if ('ERROR' in rows[0]): search_db(cursor, qry, verbose=True) return "" else: return rows[0][0] return ""
def fill_db_names_table(cursor, ensembl_db_name): compara_db_name = get_compara_name(cursor) # how is this suppesd to work if they keep pulling name out of their arses like this exceptions = {'cricetulus_griseus': 'cricetulus_griseus_crigri', 'heterocephalus_glaber': 'heterocephalus_glaber_male', # some ornythologists are confused 'cyanoderma_ruficeps': 'stachyris_ruficeps'} # I am not sure what use if any I have from haveing both male and female H.glaber # male annotation is newer for species_name, db_name in ensembl_db_name.items(): name = species_name if species_name not in exceptions else exceptions[species_name] qry = f"select genome_db_id from {compara_db_name}.genome_db where name='{name}'" genome_db_id = hard_landing_search(cursor, qry)[0][0] print(genome_db_id, species_name, db_name) fixed_fields={"genome_db_id":genome_db_id, "species_name":species_name, "db_name":ensembl_db_name[species_name]} store_or_update(cursor, "db_names", fixed_fields=fixed_fields, update_fields={}, primary_key='genome_db_id') return
def get_trivial_names(cursor, all_species, common_name): tax_id = {} db_name = get_compara_name (cursor) if (not db_name): print("compara db not found") exit(1) switch_to_db(cursor, db_name) for species in all_species: tax_id[species] = ncbi_species2taxid(cursor, species) trivial_names = set_trivial_names() for species in all_species: trivial = trivial_names.get(species, "not found") common = common_name.get(species, "not found") if trivial=="not found": if common == "not found": trivial = species.lower().replace(" ", "_").replace("'", "_") else: trivial = common.replace("common ","").lower().replace(" ", "_").replace("'", "_") trivial_names[species] = trivial return trivial_names, tax_id
def feed_name_shorthands (cursor, all_species): short = {} short['ailuropoda_melanoleuca'] = 'AME' short['anas_platyrhynchos'] = 'APL' short['anolis_carolinensis'] = 'ACA' short['astyanax_mexicanus'] = 'AMX' short['bos_taurus'] = 'BTA' short['callithrix_jacchus'] = 'CJA' short['canis_familiaris'] = 'CAF' short['cavia_porcellus'] = 'CPO' short['choloepus_hoffmanni'] = 'CHO' short['danio_rerio'] = 'DAR' short['dasypus_novemcinctus'] = 'DNO' short['dipodomys_ordii'] = 'DOR' short['echinops_telfairi'] = 'ETE' short['equus_caballus'] = 'ECA' short['erinaceus_europaeus'] = 'EEU' short['felis_catus'] = 'FCA' short['ficedula_albicollis'] = 'FAL' short['gadus_morhua'] = 'GMO' short['gallus_gallus'] = 'GAL' short['gasterosteus_aculeatus'] = 'GAC' short['gorilla_gorilla'] = 'GGO' short['homo_sapiens'] = '' short['ictidomys_tridecemlineatus'] = 'STO' short['latimeria_chalumnae'] = 'LAC' short['lepisosteus_oculatus'] = 'LOC' short['loxodonta_africana'] = 'LAF' short['macaca_mulatta'] = 'MMU' short['macropus_eugenii'] = 'MEU' short['meleagris_gallopavo'] = 'MGA' short['microcebus_murinus'] = 'MIC' short['monodelphis_domestica'] = 'MOD' short['mus_musculus'] = 'MUS' short['mustela_putorius_furo'] = 'MPU' short['myotis_lucifugus'] = 'MLU' short['nomascus_leucogenys'] = 'NLE' short['ochotona_princeps'] = 'OPR' short['oreochromis_niloticus'] = 'ONI' short['ornithorhynchus_anatinus'] = 'OAN' short['oryctolagus_cuniculus'] = 'OCU' short['oryzias_latipes'] = 'ORL' short['ovis_aries'] = 'OAR' short['otolemur_garnettii'] = 'OGA' short['pan_troglodytes'] = 'PTR' short['papio_anubis'] = 'PAN' short['poecilia_formosa'] = 'PFO' short['pelodiscus_sinensis'] = 'PSI' short['petromyzon_marinus'] = 'PMA' short['pongo_abelii'] = 'PPY' short['procavia_capensis'] = 'PCA' short['pteropus_vampyrus'] = 'PVA' short['rattus_norvegicus'] = 'RNO' short['sarcophilus_harrisii'] = 'SHA' short['sorex_araneus'] = 'SAR' short['sus_scrofa'] = 'SSC' short['taeniopygia_guttata'] = 'TGU' short['takifugu_rubripes'] = 'TRU' short['tarsius_syrichta'] = 'TSY' short['tetraodon_nigroviridis'] = 'TNI' short['tupaia_belangeri'] = 'TBE' short['tursiops_truncatus'] = 'TTR' short['vicugna_pacos'] = 'VPA' short['xenopus_tropicalis'] = 'XET' short['xiphophorus_maculatus'] = 'XMA' db_name = get_compara_name (cursor) qry = "use %s " % db_name search_db (cursor, qry) table = 'species_name_shorthands' # if the table does not exist, make it if not check_table_exists (cursor, db_name, table): qry = "CREATE TABLE " + table + " (id INT(10) PRIMARY KEY AUTO_INCREMENT)" rows = search_db (cursor, qry) if (rows): return False qry = "ALTER TABLE %s ADD %s VARCHAR(100)" % (table, 'species') rows = search_db (cursor, qry) if (rows): return False qry = "ALTER TABLE %s ADD %s VARCHAR(10)" % (table, 'shorthand') rows = search_db (cursor, qry) if (rows): return False for species in all_species: if short.has_key(species): fixed_fields = {} update_fields = {} fixed_fields ['species'] = species update_fields ['shorthand'] = short[species] store_or_update (cursor, table, fixed_fields, update_fields) else: print "short for ", species, " not found " short[species] = ""
def feed_trivial_names (cursor, all_species): tax_id = {} trivial = {} trivial['ailuropoda_melanoleuca'] = 'panda' trivial['anas_platyrhynchos'] = 'duck' trivial['anolis_carolinensis'] = 'anole_lizard' trivial['astyanax_mexicanus'] = 'blind_cavefish' trivial['bos_taurus'] = 'cow' trivial['callithrix_jacchus'] = 'marmoset' trivial['canis_familiaris'] = 'dog' trivial['cavia_porcellus'] = 'guinea_pig' trivial['choloepus_hoffmanni'] = 'sloth' trivial['danio_rerio'] = 'zebrafish' trivial['dasypus_novemcinctus'] = 'armadillo' trivial['dipodomys_ordii'] = 'kangaroo_rat' trivial['echinops_telfairi'] = 'madagascar_hedgehog' trivial['equus_caballus'] = 'horse' trivial['erinaceus_europaeus'] = 'european_hedgehog' trivial['felis_catus'] = 'cat' trivial['ficedula_albicollis'] = 'flycatcher' trivial['gadus_morhua'] = 'cod' trivial['gallus_gallus'] = 'chicken' trivial['gasterosteus_aculeatus'] = 'stickleback' trivial['gorilla_gorilla'] = 'gorilla' trivial['homo_sapiens'] = 'human' trivial['ictidomys_tridecemlineatus'] = 'squirrel' trivial['latimeria_chalumnae'] = 'coelacanth' trivial['lepisosteus_oculatus'] = 'spotted_gar' trivial['loxodonta_africana'] = 'elephant' trivial['macaca_mulatta'] = 'macaque' trivial['macropus_eugenii'] = 'wallaby' trivial['meleagris_gallopavo'] = 'turkey' trivial['microcebus_murinus'] = 'lemur' trivial['monodelphis_domestica'] = 'opossum' trivial['mus_musculus'] = 'mouse' trivial['mustela_putorius_furo'] = 'ferret' trivial['myotis_lucifugus'] = 'bat' trivial['nomascus_leucogenys'] = 'gibbon' trivial['ochotona_princeps'] = 'pika' trivial['oreochromis_niloticus'] = 'tilapia' trivial['ornithorhynchus_anatinus'] = 'platypus' trivial['oryctolagus_cuniculus'] = 'rabbit' trivial['oryzias_latipes'] = 'medaka' trivial['otolemur_garnettii'] = 'galago_lemur' trivial['ovis_aries'] = 'sheep' trivial['pan_troglodytes'] = 'chimpanzee' trivial['papio_anubis'] = 'baboon' trivial['pelodiscus_sinensis'] = 'turtle' trivial['petromyzon_marinus'] = 'lamprey' trivial['poecilia_formosa'] = 'amazon_molly' trivial['pongo_abelii'] = 'orangutan' trivial['procavia_capensis'] = 'hyrax' trivial['pteropus_vampyrus'] = 'flying_fox' trivial['rattus_norvegicus'] = 'rat' trivial['sarcophilus_harrisii'] = 'tasmanian_devil' trivial['sorex_araneus'] = 'european_shrew' trivial['sus_scrofa'] = 'pig' trivial['taeniopygia_guttata'] = 'zebra_finch' trivial['takifugu_rubripes'] = 'fugu' trivial['tarsius_syrichta'] = 'tarsier' trivial['tetraodon_nigroviridis'] = 'pufferfish' trivial['tupaia_belangeri'] = 'tree_shrew' trivial['tursiops_truncatus'] = 'dolphin' trivial['vicugna_pacos'] = 'alpaca' trivial['xenopus_tropicalis'] = 'xenopus' trivial['xiphophorus_maculatus'] = 'platyfish' db_name = get_compara_name (cursor) if (not db_name): print "compara db not found" exit(1) qry = "use %s " % db_name search_db (cursor, qry) for species in all_species: tax_id[species] = species2taxid (cursor, species) # switch to ncbi taxonomy database db_name = get_ncbi_tax_name (cursor) if (not db_name): print "ncbi taxonomy db not found" exit(1) qry = "use %s " % db_name search_db (cursor, qry) for species in all_species: if trivial.has_key(species): fixed_fields = {} update_fields = {} fixed_fields ['tax_id'] = tax_id[species] fixed_fields ['name_class'] = 'trivial' update_fields['name_txt'] = trivial[species] store_or_update (cursor, 'names', fixed_fields, update_fields) else: print "trivial for ", species, " not found " trivial[species] = "" return True