def fetch_and_assign_taxids(seq, table): non_decimal = re.compile(r"[^\d.]+") results = uniprot.fetch_uniprot_metadata(seq) time.sleep(1) connection = mdb.connect(host="localhost", db="FamilyCpolymerases", user="******") cursor = connection.cursor() for k, v in results.iteritems(): TaxID = non_decimal.sub("", results[k]["TaxID"]) ID = results[k]["id"] k = repr(k) print "IDs:{} {} {}".format(k, TaxID, ID) # TaxID = non_decimal.sub('', results[seq[0]]['TaxID']) # ID = results[seq[0]]['id'] # sql='select * from `{}` where `Accession_Number` like {}' # print sql.format(table,repr(k)) # cursor.execute(sql.format(table,repr(k))) # rows=cursor.fetchall() # print rows sql = "update {} set {}='{}' where Accession_Number={}" print sql.format(table, "`OrganismID`", TaxID, k) cursor.execute(sql.format(table, "`OrganismID`", TaxID, k)) print sql.format(table, "`Entry_Name`", ID, k) cursor.execute(sql.format(table, "`Entry_Name`", ID, k)) connection.commit() time.sleep(1) connection.close()
def fetch_and_assign_taxids(seq,table): non_decimal = re.compile(r'[^\d.]+') results=uniprot.fetch_uniprot_metadata(seq) time.sleep(1) connection = mdb.connect(host='localhost',db='FamilyCpolymerases',user='******') cursor=connection.cursor() for k,v in results.iteritems(): TaxID = non_decimal.sub('', results[k]['TaxID']) ID = results[k]['id'] k=repr(k) print 'IDs:{} {} {}'.format(k,TaxID,ID) # TaxID = non_decimal.sub('', results[seq[0]]['TaxID']) # ID = results[seq[0]]['id'] # sql='select * from `{}` where `Accession_Number` like {}' # print sql.format(table,repr(k)) # cursor.execute(sql.format(table,repr(k))) # rows=cursor.fetchall() # print rows sql="update {} set {}='{}' where Accession_Number={}" print sql.format(table,'`OrganismID`',TaxID,k) cursor.execute(sql.format(table,'`OrganismID`',TaxID,k)) print sql.format(table,'`Entry_Name`',ID,k) cursor.execute(sql.format(table,'`Entry_Name`',ID,k)) connection.commit() time.sleep(1) connection.close()
def fetch_and_assign_taxids(seq,table): non_decimal = re.compile(r'[^\d.]+') results=uniprot.fetch_uniprot_metadata(seq) time.sleep(1) connection = mdb.connect(host='localhost',db='FamilyCpolymerases',user='******') cursor=connection.cursor() for k,v in results.iteritems(): TaxID = non_decimal.sub('', results[k]['TaxID']) ID = results[k]['id'] k=repr(k) sql="update {} set {}='{}' where Accession_Number={}" cursor.execute(sql.format(table,'OrganismID',TaxID,k)) # logger.debug('update TaxID sql statement for table {}:{}'.format(table,sql.format(table,'OrganismID',TaxID,k))) cursor.execute(sql.format(table,'Entry_Name',ID,k)) # logger.debug('update ID sql statement for table {}:{}'.format(table,sql.format(table,'Entry_Name',ID,k))) connection.commit() time.sleep(1) connection.close()
def fetch_uniprot_data( self, uniprot_ids: Union[str, List[str]]) -> List[Mapping[str, str]]: """ Fetches a list of dicts of UniProt metadata, one per UniProt ID. Raises: ValueError: If a UniProt ID wasn't found. """ if isinstance(uniprot_ids, str): # not a list type uniprot_ids = [uniprot_ids] # if we don't prevent these here, we'll get a ValueError from below, which is confusing # That's because uniprot.fetch_uniprot_metadata will only return one per unique ID if len(set(uniprot_ids)) != len(uniprot_ids): raise MultipleMatchesError( "Set of UniProt IDs cannot contain duplicates") with silenced(no_stderr=False): uniprot_data = uniprot.fetch_uniprot_metadata(uniprot_ids) if uniprot_data is None or uniprot_data == {} or len( uniprot_data) != len(uniprot_ids): raise LookupError( f"At least one UniProt ID not found in {uniprot_ids}") return list(uniprot_data.values())