예제 #1
0
def find_mammals(cursor, trivial_name_list):

    mammals = []
    for trivial_name in trivial_name_list:
        switch_to_db(cursor, get_compara_name(cursor))
        tax_id = trivial2taxid(cursor, trivial_name)
        parent_id = taxid2parentid(cursor, tax_id)

        tax_id = parent_id
        is_mammal = False
        while tax_id:
            qry = "select name_txt from names where tax_id= %d " % int(tax_id)
            qry += " and name_class = 'scientific name'"
            rows = search_db(cursor, qry)
            if rows and rows[0][0]:
                if 'mammal' in rows[0][0].lower():
                    is_mammal = True
                    break
                elif 'vertebrat' in rows[0][0].lower():
                    # if the thing wasa mammal, we would have found it by now
                    is_mammal = False
                    break

            parent_id = taxid2parentid(cursor, tax_id)
            if parent_id and parent_id > 1:
                tax_id = parent_id
            else:
                tax_id = None

        if is_mammal:
            mammals.append(trivial_name)

    return mammals
예제 #2
0
def get_common_name(cursor, species):
    switch_to_db(cursor, get_compara_name(cursor))
    tax_id = species2taxid(cursor, species)
    switch_to_db(cursor, get_ncbi_tax_name(cursor))
    qry = "select name_txt from names where "
    qry += "tax_id = %d and " % tax_id
    qry += "name_class = 'genbank common name'"
    rows = search_db(cursor, qry)
    if rows:
        if ('ERROR' in rows[0]):
            search_db(cursor, qry, verbose=True)
            return ""
        else:
            return rows[0][0]

    return ""
예제 #3
0
def fill_db_names_table(cursor, ensembl_db_name):
	compara_db_name = get_compara_name(cursor)
	# how is this suppesd to work if they keep pulling name out of their arses like this
	exceptions = {'cricetulus_griseus': 'cricetulus_griseus_crigri',
					'heterocephalus_glaber': 'heterocephalus_glaber_male',
					# some ornythologists are confused
					'cyanoderma_ruficeps': 'stachyris_ruficeps'}
	# I am not sure what use if any I have from haveing both male and female H.glaber
	# male annotation is newer
	for species_name, db_name in ensembl_db_name.items():
		name = species_name if species_name not in exceptions else exceptions[species_name]
		qry = f"select genome_db_id from {compara_db_name}.genome_db where name='{name}'"
		genome_db_id = hard_landing_search(cursor, qry)[0][0]
		print(genome_db_id, species_name, db_name)
		fixed_fields={"genome_db_id":genome_db_id, "species_name":species_name, "db_name":ensembl_db_name[species_name]}
		store_or_update(cursor, "db_names", fixed_fields=fixed_fields, update_fields={}, primary_key='genome_db_id')
	return
예제 #4
0
def get_trivial_names(cursor, all_species, common_name):

	tax_id  = {}
	db_name = get_compara_name (cursor)
	if (not db_name):
		print("compara db not found")
		exit(1)
	switch_to_db(cursor, db_name)
	for species in all_species:
		tax_id[species] = ncbi_species2taxid(cursor, species)

	trivial_names = set_trivial_names()
	for species in all_species:
		trivial = trivial_names.get(species, "not found")
		common = common_name.get(species, "not found")
		if trivial=="not found":
			if common == "not found":
				trivial = species.lower().replace(" ", "_").replace("'", "_")
			else:
				trivial = common.replace("common ","").lower().replace(" ", "_").replace("'", "_")
			trivial_names[species] = trivial

	return trivial_names, tax_id
예제 #5
0
def  feed_name_shorthands (cursor, all_species):

    short = {}
    short['ailuropoda_melanoleuca'] = 'AME'
    short['anas_platyrhynchos']     = 'APL'
    short['anolis_carolinensis']    = 'ACA'
    short['astyanax_mexicanus']     = 'AMX'
    short['bos_taurus']             = 'BTA'
    short['callithrix_jacchus'] = 'CJA'
    short['canis_familiaris'] = 'CAF'
    short['cavia_porcellus'] = 'CPO'
    short['choloepus_hoffmanni'] = 'CHO'
    short['danio_rerio'] = 'DAR'
    short['dasypus_novemcinctus'] = 'DNO'
    short['dipodomys_ordii'] = 'DOR'
    short['echinops_telfairi'] = 'ETE'
    short['equus_caballus'] = 'ECA'
    short['erinaceus_europaeus'] = 'EEU'
    short['felis_catus'] = 'FCA'
    short['ficedula_albicollis'] = 'FAL'
    short['gadus_morhua'] = 'GMO'
    short['gallus_gallus'] = 'GAL'
    short['gasterosteus_aculeatus'] = 'GAC'
    short['gorilla_gorilla'] = 'GGO'
    short['homo_sapiens'] = ''
    short['ictidomys_tridecemlineatus'] = 'STO'
    short['latimeria_chalumnae'] = 'LAC'
    short['lepisosteus_oculatus'] = 'LOC'
    short['loxodonta_africana'] = 'LAF'
    short['macaca_mulatta'] = 'MMU'
    short['macropus_eugenii'] = 'MEU'
    short['meleagris_gallopavo'] = 'MGA'
    short['microcebus_murinus'] = 'MIC'
    short['monodelphis_domestica'] = 'MOD'
    short['mus_musculus'] = 'MUS'
    short['mustela_putorius_furo'] = 'MPU'
    short['myotis_lucifugus'] = 'MLU'
    short['nomascus_leucogenys'] = 'NLE'
    short['ochotona_princeps'] = 'OPR'
    short['oreochromis_niloticus'] = 'ONI'
    short['ornithorhynchus_anatinus'] = 'OAN'
    short['oryctolagus_cuniculus'] = 'OCU'
    short['oryzias_latipes'] = 'ORL'
    short['ovis_aries'] = 'OAR'
    short['otolemur_garnettii'] = 'OGA'
    short['pan_troglodytes'] = 'PTR'
    short['papio_anubis'] = 'PAN'
    short['poecilia_formosa'] = 'PFO'
    short['pelodiscus_sinensis'] = 'PSI'
    short['petromyzon_marinus'] = 'PMA'
    short['pongo_abelii'] = 'PPY'
    short['procavia_capensis'] = 'PCA'
    short['pteropus_vampyrus'] = 'PVA'
    short['rattus_norvegicus'] = 'RNO'
    short['sarcophilus_harrisii'] = 'SHA'
    short['sorex_araneus'] = 'SAR'
    short['sus_scrofa'] = 'SSC'
    short['taeniopygia_guttata'] = 'TGU'
    short['takifugu_rubripes'] = 'TRU'
    short['tarsius_syrichta'] = 'TSY'
    short['tetraodon_nigroviridis'] = 'TNI'
    short['tupaia_belangeri'] = 'TBE'
    short['tursiops_truncatus'] = 'TTR'
    short['vicugna_pacos'] = 'VPA'
    short['xenopus_tropicalis'] = 'XET'
    short['xiphophorus_maculatus'] = 'XMA'


    db_name = get_compara_name (cursor)
    qry = "use %s " % db_name
    search_db (cursor, qry)

    table = 'species_name_shorthands'
    # if the table does not exist, make it
    if not check_table_exists (cursor, db_name, table):
        qry  = "CREATE TABLE " + table + "  (id INT(10) PRIMARY KEY AUTO_INCREMENT)"
        rows = search_db (cursor, qry)
        if (rows): return False

        qry = "ALTER TABLE %s  ADD %s VARCHAR(100)" % (table, 'species')
        rows = search_db (cursor, qry)
        if (rows): return False
        qry = "ALTER TABLE %s  ADD %s VARCHAR(10)" % (table, 'shorthand')
        rows = search_db (cursor, qry)
        if (rows): return False


    for species in all_species:
        if short.has_key(species):
            fixed_fields  = {}
            update_fields = {}
            fixed_fields  ['species']   = species
            update_fields ['shorthand'] = short[species]
            store_or_update (cursor, table, fixed_fields, update_fields)
        else:
            print "short for ", species, " not found "
            short[species] = ""
예제 #6
0
def  feed_trivial_names (cursor, all_species):

    tax_id  = {}
    trivial = {}

    trivial['ailuropoda_melanoleuca'] = 'panda' 
    trivial['anas_platyrhynchos']     = 'duck'
    trivial['anolis_carolinensis']    = 'anole_lizard' 
    trivial['astyanax_mexicanus']     = 'blind_cavefish'
    trivial['bos_taurus']             = 'cow' 
    trivial['callithrix_jacchus']     = 'marmoset' 
    trivial['canis_familiaris']       = 'dog' 
    trivial['cavia_porcellus']        = 'guinea_pig' 
    trivial['choloepus_hoffmanni']    = 'sloth' 
    trivial['danio_rerio']            = 'zebrafish' 
    trivial['dasypus_novemcinctus']   = 'armadillo' 
    trivial['dipodomys_ordii']        = 'kangaroo_rat' 
    trivial['echinops_telfairi']      = 'madagascar_hedgehog' 
    trivial['equus_caballus']         = 'horse' 
    trivial['erinaceus_europaeus']    = 'european_hedgehog' 
    trivial['felis_catus']            = 'cat' 
    trivial['ficedula_albicollis']    = 'flycatcher'
    trivial['gadus_morhua']           = 'cod' 
    trivial['gallus_gallus']          = 'chicken' 
    trivial['gasterosteus_aculeatus'] = 'stickleback' 
    trivial['gorilla_gorilla']        = 'gorilla' 
    trivial['homo_sapiens']           = 'human' 
    trivial['ictidomys_tridecemlineatus']  = 'squirrel' 
    trivial['latimeria_chalumnae']         = 'coelacanth' 
    trivial['lepisosteus_oculatus']        = 'spotted_gar'
    trivial['loxodonta_africana']          = 'elephant' 
    trivial['macaca_mulatta']              = 'macaque' 
    trivial['macropus_eugenii']            = 'wallaby' 
    trivial['meleagris_gallopavo']         = 'turkey' 
    trivial['microcebus_murinus']          = 'lemur' 
    trivial['monodelphis_domestica']       = 'opossum' 
    trivial['mus_musculus']                = 'mouse' 
    trivial['mustela_putorius_furo']       = 'ferret' 
    trivial['myotis_lucifugus']            = 'bat' 
    trivial['nomascus_leucogenys']         = 'gibbon' 
    trivial['ochotona_princeps']           = 'pika' 
    trivial['oreochromis_niloticus']       = 'tilapia' 
    trivial['ornithorhynchus_anatinus']    = 'platypus' 
    trivial['oryctolagus_cuniculus']       = 'rabbit' 
    trivial['oryzias_latipes']             = 'medaka' 
    trivial['otolemur_garnettii']          = 'galago_lemur' 
    trivial['ovis_aries']                  = 'sheep'
    trivial['pan_troglodytes']             = 'chimpanzee' 
    trivial['papio_anubis']                = 'baboon' 
    trivial['pelodiscus_sinensis']         = 'turtle' 
    trivial['petromyzon_marinus']          = 'lamprey' 
    trivial['poecilia_formosa']            = 'amazon_molly'
    trivial['pongo_abelii']                = 'orangutan' 
    trivial['procavia_capensis']           = 'hyrax' 
    trivial['pteropus_vampyrus']           = 'flying_fox' 
    trivial['rattus_norvegicus']           = 'rat' 
    trivial['sarcophilus_harrisii']        = 'tasmanian_devil' 
    trivial['sorex_araneus']               = 'european_shrew' 
    trivial['sus_scrofa']                  = 'pig' 
    trivial['taeniopygia_guttata']         = 'zebra_finch' 
    trivial['takifugu_rubripes']           = 'fugu' 
    trivial['tarsius_syrichta']            = 'tarsier' 
    trivial['tetraodon_nigroviridis']      = 'pufferfish' 
    trivial['tupaia_belangeri']            = 'tree_shrew' 
    trivial['tursiops_truncatus']          = 'dolphin' 
    trivial['vicugna_pacos']               = 'alpaca' 
    trivial['xenopus_tropicalis']          = 'xenopus' 
    trivial['xiphophorus_maculatus']       = 'platyfish' 



    db_name = get_compara_name (cursor)
    if (not db_name):
        print "compara db not found"
        exit(1)
    qry = "use %s " % db_name
    search_db (cursor, qry)
    for species in all_species:
        tax_id[species] = species2taxid (cursor, species)

    # switch to ncbi taxonomy database
    db_name = get_ncbi_tax_name (cursor)
    if (not db_name):
        print "ncbi taxonomy db not found"
        exit(1)

    qry = "use %s " % db_name
    search_db (cursor, qry)
    for species in all_species:
        if trivial.has_key(species):
            fixed_fields  = {}
            update_fields = {}
            fixed_fields ['tax_id']     = tax_id[species]
            fixed_fields ['name_class'] = 'trivial'
            update_fields['name_txt']   = trivial[species]
            store_or_update (cursor, 'names', fixed_fields, update_fields)
        else:
            print "trivial for ", species, " not found "
            trivial[species] = ""

    return True