def save_structures(cfg, log, db, listItem): try: if cfg.FullReload: db.executeCommand(cfg.sqlTruncateCandidate) for item in listItem: log.info('Saving Candidate: %s...' % item) if PDB.get_file(cfg, log, item): pdb = PDB.parse_header(cfg, item) db.executeCommand( cfg.sqlInsertCandidate, (item, MySQLdb.escape_string( PDB.get_content()), MySQLdb.escape_string( pdb['name']), MySQLdb.escape_string( pdb['author']), pdb['deposition_date'], pdb['release_date'], '0', pdb['resolution'], pdb['head'], pdb['structure_method'], pdb['compound']['1']['chain'] if 'chain' in pdb['compound']['1'] else '', pdb['compound']['1']['ec_number'] if 'ec_number' in pdb['compound']['1'] else '', pdb['source']['1']['organism_taxid'] if 'organism_taxid' in pdb['source']['1'] else '', pdb['source']['1']['organism_scientific'] if 'organism_scientific' in pdb['source']['1'] else '', pdb['source']['1']['expression_system_taxid'] if 'expression_system_taxid' in pdb['source']['1'] else '', pdb['source']['1']['expression_system'] if 'expression_system' in pdb['source']['1'] else '')) db.commit() except: log.error(traceback.format_exc()) db.rollback()
def save_structures(cfg,log,db,listItem): try: if cfg.FullReload: db.executeCommand(cfg.sqlTruncateCandidate) for item in listItem: log.info('Saving Candidate: %s...' % item) if PDB.get_file(cfg,log,item): pdb = PDB.parse_header(cfg,item) db.executeCommand(cfg.sqlInsertCandidate,(item, MySQLdb.escape_string(PDB.get_content()), MySQLdb.escape_string(pdb['name']), MySQLdb.escape_string(pdb['author']), pdb['deposition_date'], pdb['release_date'], pdb['version'], pdb['resolution'], pdb['head'], pdb['structure_method'], pdb['compound']['1']['chain'], pdb['compound']['1']['ec_number'], pdb['source']['1']['organism_taxid'],pdb['source']['1']['organism_scientific'],pdb['source']['1']['expression_system_taxid'],pdb['source']['1']['expression_system'])) db.commit() except: log.error(traceback.format_exc()) db.rollback()
def retrieve_structures(cfg,log): final_list = None try: resList = [] for item in cfg.pdbQueries: res = PDB.execute_advanced_query(log, cfg.pdbAdvancedSearchURL,item) resList.append(res.split('\n')) log.info('Intersecting results...') final_list = list(eval(PDB.merge_results(resList))) final_list = filter(len,final_list) except: log.error(traceback.format_exc()) return final_list
def retrieve_genbank_info(cfg,log,db): try: structs = db.getData(cfg.sqlSelectCandidates).fetchall() html = None for item in structs: html = None try: html = PDB.get_genbank_info(cfg,log,item['pdbID']) tree = ElementTree.fromstring(html) gb_sequence = tree.findall('.//GBSeq/GBSeq_sequence')[0].text gb_taxonomy = tree.findall('.//GBSeq/GBSeq_taxonomy')[0].text gb_seq_length = tree.findall('.//GBSeq/GBSeq_length')[0].text gb_seqids = tree.findall('.//GBSeq/GBSeq_other-seqids/GBSeqid') gb_gi = None for node in gb_seqids: if 'gi|' in node.text: gb_gi = node.text.replace('gi|','') db.executeCommand(cfg.sqlUpdateGenBank,(gb_taxonomy, gb_sequence, gb_seq_length, gb_gi, item['pdbID'])) log.info('Updating information with GenBank data for structure %s.' % item['pdbID']) except: log.info('No information found on GenBank for structure %s.' % item['pdbID']) if html is not None: raise time.sleep(2) db.commit() except: log.error(traceback.format_exc())
def retrieve_go_terms(cfg,log,db): try: if cfg.FullReload: db.executeCommand(cfg.sqlTruncateGoTerms) structs = db.getData(cfg.sqlSelectCandidates).fetchall() for strut in structs: log.info('Getting GO terms for structure %s' % strut['pdbID']) json_string = PDB.get_go_terms(cfg,strut['pdbID']) if json_string and json_string['goTerms'] and json_string['goTerms']['term']: if type(json_string['goTerms']['term']) == dict: item = json_string['goTerms']['term'] if '@synonyms' in json_string['goTerms']['term']: db.executeCommand(cfg.sqlInsertGoTerms,(strut['pdbID'], item['@chainId'], item['@id'],item['detail']['@name'],item['detail']['@definition'],item['detail']['@synonyms'],item['detail']['@ontology'])) else: db.executeCommand(cfg.sqlInsertGoTerms,(strut['pdbID'], item['@chainId'], item['@id'],item['detail']['@name'],item['detail']['@definition'],None,item['detail']['@ontology'])) else: for item in json_string['goTerms']['term']: if '@synonyms' in json_string['goTerms']['term']: db.executeCommand(cfg.sqlInsertGoTerms,(strut['pdbID'], item['@chainId'], item['@id'],item['detail']['@name'],item['detail']['@definition'],item['detail']['@synonyms'],item['detail']['@ontology'])) else: db.executeCommand(cfg.sqlInsertGoTerms,(strut['pdbID'], item['@chainId'], item['@id'],item['detail']['@name'],item['detail']['@definition'],None,item['detail']['@ontology'])) db.commit() except: log.error(traceback.format_exc())
def relate_structures(cfg,log,db, pm): try: log.info('Merging Training and Result Sets...') rows = db.getData(cfg.sqlCopyTrainingSetIntoLiterature).fetchall() for row in rows: article = pm.get_pubmed_article(row['pubmed_id']) pm.save_pubmed_article(row['pubmed_id'], article, 'Literature') if cfg.FullReload: db.executeCommand(cfg.sqlTruncateRel) candidates = db.getData(cfg.sqlSelectCandidates).fetchall() for addFile in candidates: key = addFile['pdbID'] f = PDB.parse_prody(cfg,key) if f.journal.pmid != "": db.executeCommand(cfg.sqlInsertRel, (addFile['pdbID'], f.journal.pmid, 'Originator')) log.info('Opening article to find references to structures...') fileName = pm.get_related_pubmed_articles(f.journal.pmid) for item in fileName[0]['LinkSetDb'][0]['Link']: db.executeCommand(cfg.sqlInsertRel, (addFile['pdbID'], item['Id'], 'Related')) db.commit() except: db.rollback() log.error(traceback.format_exc()) raise Exception('Training Set was not built.')
def retrieve_pathways(cfg, log, db): try: if cfg.FullReload: db.executeCommand(cfg.sqlTruncatePathway) structs = db.getData(cfg.sqlSelectCandidates).fetchall() html = None for item in structs: log.info('Finding pathway data for structure %s.' % item[0]) html = None try: html = PDB.get_pathways_info(cfg, log, item[1]) soup = BeautifulSoup(html) links = soup.findAll('a') for link in links: if 'href' in link.attrs[0]: if 'show_pathway' in link.attrs[0][1]: db.executeCommand( cfg.sqlInsertPathway, (item[0], cfg.keggRootURL + link.attrs[0][1], link.contents[0])) except: raise db.commit() except: log.error(traceback.format_exc())
def retrieve_genbank_info(cfg, log, db): try: structs = db.getData(cfg.sqlSelectCandidates).fetchall() html = None for item in structs: html = None try: html = PDB.get_genbank_info(cfg, log, item[0]) tree = ElementTree.fromstring(html) gb_sequence = tree.findall('.//GBSeq/GBSeq_sequence')[0].text gb_taxonomy = tree.findall('.//GBSeq/GBSeq_taxonomy')[0].text gb_seq_length = tree.findall('.//GBSeq/GBSeq_length')[0].text gb_seqids = tree.findall('.//GBSeq/GBSeq_other-seqids/GBSeqid') gb_gi = None for node in gb_seqids: if 'gi|' in node.text: gb_gi = node.text.replace('gi|', '') db.executeCommand( cfg.sqlUpdateGenBank, (gb_taxonomy, gb_sequence, gb_seq_length, gb_gi, item[0])) log.info( 'Updating information with GenBank data for structure %s.' % item[0]) except: log.info('No information found on GenBank for structure %s.' % item[0]) #if html is not None: # raise time.sleep(2) db.commit() except: log.error(traceback.format_exc())
def retrieve_structures(cfg, log): final_list = None try: resList = [] for item in cfg.pdbQueries: res = PDB.execute_advanced_query(log, cfg.pdbAdvancedSearchURL, item) resList.append(res.split('\n')) log.info('Intersecting results...') final_list = list(eval(PDB.merge_results(resList))) final_list = filter(len, final_list) except: log.error(traceback.format_exc()) return final_list
def retrieve_ligands(cfg, log, db): try: if cfg.FullReload: db.executeCommand(cfg.sqlTruncateLigands) structs = db.getData(cfg.sqlSelectCandidates).fetchall() for strut in structs: json_string = PDB.get_ligands(cfg, strut[0]) if json_string and json_string['structureId'] and json_string[ 'structureId']['ligandInfo']: if type(json_string['structureId']['ligandInfo'] ['ligand']) == dict: item = json_string['structureId']['ligandInfo']['ligand'] db.executeCommand( cfg.sqlInsertLigand, (strut[0], item['@chemicalID'], item['chemicalName'], item['@type'], item['formula'], item['@molecularWeight'])) else: for item in json_string['structureId']['ligandInfo'][ 'ligand']: db.executeCommand( cfg.sqlInsertLigand, (strut[0], item['@chemicalID'], item['chemicalName'], item['@type'], item['formula'], item['@molecularWeight'])) db.commit() except: log.error(traceback.format_exc()) db.rollback()
def relate_structures(cfg, log, db, pm): try: log.info('Merging Training and Result Sets...') rows = db.getData(cfg.sqlCopyTrainingSetIntoLiterature).fetchall() for row in rows: article = pm.get_pubmed_article(row['pubmed_id']) pm.save_pubmed_article(row['pubmed_id'], article, 'Literature') if cfg.FullReload: db.executeCommand(cfg.sqlTruncateRel) candidates = db.getData(cfg.sqlSelectCandidates).fetchall() for addFile in candidates: key = addFile['pdbID'] f = PDB.parse_prody(cfg, key) if f.journal.pmid != "": db.executeCommand( cfg.sqlInsertRel, (addFile['pdbID'], f.journal.pmid, 'Originator')) log.info('Opening article to find references to structures...') fileName = pm.get_related_pubmed_articles(f.journal.pmid) for item in fileName[0]['LinkSetDb'][0]['Link']: db.executeCommand( cfg.sqlInsertRel, (addFile['pdbID'], item['Id'], 'Related')) db.commit() except: db.rollback() log.error(traceback.format_exc()) raise Exception('Training Set was not built.')
def retrieve_go_terms(cfg, log, db): try: if cfg.FullReload: db.executeCommand(cfg.sqlTruncateGoTerms) structs = db.getData(cfg.sqlSelectCandidates).fetchall() for strut in structs: log.info('Getting GO terms for structure %s' % strut[0]) json_string = PDB.get_go_terms(cfg, strut[0]) if json_string and json_string['goTerms'] and json_string[ 'goTerms']['term']: if type(json_string['goTerms']['term']) == dict: item = json_string['goTerms']['term'] if '@synonyms' in json_string['goTerms']['term']: db.executeCommand( cfg.sqlInsertGoTerms, (strut[0], item['@chainId'], item['@id'], item['detail']['@name'], item['detail']['@definition'], item['detail']['@synonyms'], item['detail']['@ontology'])) else: db.executeCommand( cfg.sqlInsertGoTerms, (strut[0], item['@chainId'], item['@id'], item['detail']['@name'], item['detail']['@definition'], None, item['detail']['@ontology'])) else: for item in json_string['goTerms']['term']: if '@synonyms' in json_string['goTerms']['term']: db.executeCommand( cfg.sqlInsertGoTerms, (strut[0], item['@chainId'], item['@id'], item['detail']['@name'], item['detail']['@definition'], item['detail']['@synonyms'], item['detail']['@ontology'])) else: db.executeCommand( cfg.sqlInsertGoTerms, (strut[0], item['@chainId'], item['@id'], item['detail']['@name'], item['detail']['@definition'], None, item['detail']['@ontology'])) db.commit() except: log.error(traceback.format_exc())
def retrieve_ligands(cfg,log,db): try: if cfg.FullReload: db.executeCommand(cfg.sqlTruncateLigands) structs = db.getData(cfg.sqlSelectCandidates).fetchall() for strut in structs: json_string = PDB.get_ligands(cfg,strut['pdbID']) if json_string and json_string['structureId'] and json_string['structureId']['ligandInfo']: if type(json_string['structureId']['ligandInfo']['ligand']) == dict: item = json_string['structureId']['ligandInfo']['ligand'] db.executeCommand(cfg.sqlInsertLigand,(strut['pdbID'], item['@chemicalID'], item['chemicalName'],item['@type'],item['formula'],item['@molecularWeight'])) else: for item in json_string['structureId']['ligandInfo']['ligand']: db.executeCommand(cfg.sqlInsertLigand,(strut['pdbID'], item['@chemicalID'], item['chemicalName'],item['@type'],item['formula'],item['@molecularWeight'])) db.commit() except: log.error(traceback.format_exc()) db.rollback()
def retrieve_pathways(cfg,log,db): try: if cfg.FullReload: db.executeCommand(cfg.sqlTruncatePathway) structs = db.getData(cfg.sqlSelectCandidates).fetchall() html = None for item in structs: log.info('Finding pathway data for structure %s.' % item['pdbID']) html = None try: html = PDB.get_pathways_info(item['pdbECnumber']) soup = BeautifulSoup(html) links = soup.findAll('a') for link in links: if 'href' in link.attrs[0]: if 'show_pathway' in link.attrs[0][1]: db.executeCommand(cfg.sqlInsertPathway,(item['pdbID'],cfg.keggRootURL + link.attrs[0][1],link.contents[0])) except: raise db.commit() except: log.error(traceback.format_exc())
def build_training_set(cfg, log, db, pm): try: listAdded = [] if cfg.FullReload: db.executeCommand(cfg.sqlTruncateTrainingSet) log.info('Getting Reference articles...') candidates = db.getData(cfg.sqlSelectCandidates).fetchall() for addFile in candidates: key = addFile[0] f = PDB.parse_prody(cfg, key) if f.status == 'Imported' and ( f.journal.pmid not in listAdded) and f.journal.pmid != "": article = pm.get_pubmed_article(f.journal.pmid) pm.save_pubmed_article(f.journal.pmid, article, 'Training') listAdded.append(f.journal.pmid) db.commit() except: log.error(traceback.format_exc()) db.rollback() raise Exception('Training Set was not built.')
try: listAdded = [] <<<<<<< HEAD ======= >>>>>>> origin/master if cfg.FullReload: db.executeCommand(cfg.sqlTruncateTrainingSet) log.info('Getting Reference articles...') candidates = db.getData(cfg.sqlSelectCandidates).fetchall() for addFile in candidates: key = addFile['pdbID'] f = PDB.parse_prody(cfg,key) if (f.journal.pmid not in listAdded) and f.journal.pmid != "": article = pm.get_pubmed_article(f.journal.pmid) pm.save_pubmed_article(f.journal.pmid,article,'Training') listAdded.append(f.journal.pmid) db.commit() except: log.error(traceback.format_exc()) db.rollback() raise Exception('Training Set was not built.') def search_literature(cfg,log,db,pm): try: if cfg.FullReload: db.executeCommand(cfg.sqlTruncateLiterature)