Beispiel #1
0
def save_structures(cfg, log, db, listItem):
    try:
        if cfg.FullReload:
            db.executeCommand(cfg.sqlTruncateCandidate)

        for item in listItem:
            log.info('Saving Candidate: %s...' % item)
            if PDB.get_file(cfg, log, item):
                pdb = PDB.parse_header(cfg, item)
                db.executeCommand(
                    cfg.sqlInsertCandidate,
                    (item, MySQLdb.escape_string(
                        PDB.get_content()), MySQLdb.escape_string(
                            pdb['name']), MySQLdb.escape_string(
                                pdb['author']), pdb['deposition_date'],
                     pdb['release_date'], '0', pdb['resolution'], pdb['head'],
                     pdb['structure_method'], pdb['compound']['1']['chain']
                     if 'chain' in pdb['compound']['1'] else '',
                     pdb['compound']['1']['ec_number']
                     if 'ec_number' in pdb['compound']['1'] else '',
                     pdb['source']['1']['organism_taxid']
                     if 'organism_taxid' in pdb['source']['1'] else '',
                     pdb['source']['1']['organism_scientific']
                     if 'organism_scientific' in pdb['source']['1'] else '',
                     pdb['source']['1']['expression_system_taxid'] if
                     'expression_system_taxid' in pdb['source']['1'] else '',
                     pdb['source']['1']['expression_system']
                     if 'expression_system' in pdb['source']['1'] else ''))

                db.commit()
    except:
        log.error(traceback.format_exc())
        db.rollback()
Beispiel #2
0
def save_structures(cfg,log,db,listItem):
    try:
        if cfg.FullReload:
            db.executeCommand(cfg.sqlTruncateCandidate)
        
        for item in listItem:
            log.info('Saving Candidate: %s...' % item)
            if PDB.get_file(cfg,log,item):
                pdb = PDB.parse_header(cfg,item)
                db.executeCommand(cfg.sqlInsertCandidate,(item, MySQLdb.escape_string(PDB.get_content()), MySQLdb.escape_string(pdb['name']), MySQLdb.escape_string(pdb['author']), pdb['deposition_date'], pdb['release_date'], pdb['version'], pdb['resolution'], pdb['head'], pdb['structure_method'], pdb['compound']['1']['chain'], pdb['compound']['1']['ec_number'], pdb['source']['1']['organism_taxid'],pdb['source']['1']['organism_scientific'],pdb['source']['1']['expression_system_taxid'],pdb['source']['1']['expression_system']))

        db.commit()
    except:
        log.error(traceback.format_exc())
        db.rollback()
Beispiel #3
0
def retrieve_structures(cfg,log):
    final_list = None
    try:
        resList = []
        for item in cfg.pdbQueries:
            res = PDB.execute_advanced_query(log, cfg.pdbAdvancedSearchURL,item)
            resList.append(res.split('\n'))
                                
        log.info('Intersecting results...')
        final_list = list(eval(PDB.merge_results(resList)))
        final_list = filter(len,final_list)
    except:
        log.error(traceback.format_exc())
    
    return final_list
Beispiel #4
0
def retrieve_genbank_info(cfg,log,db):
    try:
        structs = db.getData(cfg.sqlSelectCandidates).fetchall()
        html = None
        for item in structs:
            html = None
            try:
                html = PDB.get_genbank_info(cfg,log,item['pdbID'])
                tree = ElementTree.fromstring(html)
                gb_sequence = tree.findall('.//GBSeq/GBSeq_sequence')[0].text
                gb_taxonomy = tree.findall('.//GBSeq/GBSeq_taxonomy')[0].text
                gb_seq_length = tree.findall('.//GBSeq/GBSeq_length')[0].text
                gb_seqids = tree.findall('.//GBSeq/GBSeq_other-seqids/GBSeqid')
                gb_gi = None
                for node in gb_seqids:
                    if 'gi|' in node.text:
                        gb_gi = node.text.replace('gi|','')

                db.executeCommand(cfg.sqlUpdateGenBank,(gb_taxonomy, gb_sequence, gb_seq_length, gb_gi, item['pdbID']))
                log.info('Updating information with GenBank data for structure  %s.' % item['pdbID'])
            except:
                log.info('No information found on GenBank for structure  %s.' % item['pdbID'])
                if html is not None:
                    raise
            time.sleep(2)

        db.commit()
    except:
        log.error(traceback.format_exc())
Beispiel #5
0
def retrieve_go_terms(cfg,log,db):
    try:
        if cfg.FullReload:
            db.executeCommand(cfg.sqlTruncateGoTerms)

        structs = db.getData(cfg.sqlSelectCandidates).fetchall()

        for strut in structs:
            log.info('Getting GO terms for structure %s' % strut['pdbID'])
            json_string = PDB.get_go_terms(cfg,strut['pdbID'])
            if json_string and json_string['goTerms'] and json_string['goTerms']['term']:
                if type(json_string['goTerms']['term']) == dict:
                    item = json_string['goTerms']['term']
                    if '@synonyms' in json_string['goTerms']['term']:
                        db.executeCommand(cfg.sqlInsertGoTerms,(strut['pdbID'], item['@chainId'], item['@id'],item['detail']['@name'],item['detail']['@definition'],item['detail']['@synonyms'],item['detail']['@ontology']))
                    else:
                        db.executeCommand(cfg.sqlInsertGoTerms,(strut['pdbID'], item['@chainId'], item['@id'],item['detail']['@name'],item['detail']['@definition'],None,item['detail']['@ontology']))
                else:
                    for item in json_string['goTerms']['term']:
                        if '@synonyms' in json_string['goTerms']['term']:
                            db.executeCommand(cfg.sqlInsertGoTerms,(strut['pdbID'], item['@chainId'], item['@id'],item['detail']['@name'],item['detail']['@definition'],item['detail']['@synonyms'],item['detail']['@ontology']))
                        else:
                            db.executeCommand(cfg.sqlInsertGoTerms,(strut['pdbID'], item['@chainId'], item['@id'],item['detail']['@name'],item['detail']['@definition'],None,item['detail']['@ontology']))

        db.commit()
    except:
        log.error(traceback.format_exc())
Beispiel #6
0
def relate_structures(cfg,log,db, pm):
    try:
        log.info('Merging Training and Result Sets...')
        rows = db.getData(cfg.sqlCopyTrainingSetIntoLiterature).fetchall()
        for row in rows:
            article = pm.get_pubmed_article(row['pubmed_id'])
            pm.save_pubmed_article(row['pubmed_id'], article, 'Literature')
        
        if cfg.FullReload:
            db.executeCommand(cfg.sqlTruncateRel)

        candidates = db.getData(cfg.sqlSelectCandidates).fetchall()

        for addFile in candidates:
            key = addFile['pdbID']
            f = PDB.parse_prody(cfg,key)
            if f.journal.pmid != "":
                db.executeCommand(cfg.sqlInsertRel, (addFile['pdbID'], f.journal.pmid, 'Originator'))
                log.info('Opening article to find references to structures...')
                fileName = pm.get_related_pubmed_articles(f.journal.pmid)
                for item in fileName[0]['LinkSetDb'][0]['Link']:
                    db.executeCommand(cfg.sqlInsertRel, (addFile['pdbID'], item['Id'], 'Related'))

        db.commit()
    except:
        db.rollback()
        log.error(traceback.format_exc())
        raise Exception('Training Set was not built.')
Beispiel #7
0
def retrieve_pathways(cfg, log, db):
    try:
        if cfg.FullReload:
            db.executeCommand(cfg.sqlTruncatePathway)

        structs = db.getData(cfg.sqlSelectCandidates).fetchall()
        html = None
        for item in structs:
            log.info('Finding pathway data for structure  %s.' % item[0])
            html = None
            try:
                html = PDB.get_pathways_info(cfg, log, item[1])
                soup = BeautifulSoup(html)
                links = soup.findAll('a')
                for link in links:
                    if 'href' in link.attrs[0]:
                        if 'show_pathway' in link.attrs[0][1]:
                            db.executeCommand(
                                cfg.sqlInsertPathway,
                                (item[0], cfg.keggRootURL + link.attrs[0][1],
                                 link.contents[0]))
            except:
                raise

        db.commit()
    except:
        log.error(traceback.format_exc())
Beispiel #8
0
def retrieve_genbank_info(cfg, log, db):
    try:
        structs = db.getData(cfg.sqlSelectCandidates).fetchall()
        html = None
        for item in structs:
            html = None
            try:
                html = PDB.get_genbank_info(cfg, log, item[0])
                tree = ElementTree.fromstring(html)
                gb_sequence = tree.findall('.//GBSeq/GBSeq_sequence')[0].text
                gb_taxonomy = tree.findall('.//GBSeq/GBSeq_taxonomy')[0].text
                gb_seq_length = tree.findall('.//GBSeq/GBSeq_length')[0].text
                gb_seqids = tree.findall('.//GBSeq/GBSeq_other-seqids/GBSeqid')
                gb_gi = None
                for node in gb_seqids:
                    if 'gi|' in node.text:
                        gb_gi = node.text.replace('gi|', '')

                db.executeCommand(
                    cfg.sqlUpdateGenBank,
                    (gb_taxonomy, gb_sequence, gb_seq_length, gb_gi, item[0]))
                log.info(
                    'Updating information with GenBank data for structure  %s.'
                    % item[0])
            except:
                log.info('No information found on GenBank for structure  %s.' %
                         item[0])
                #if html is not None:
                #    raise
            time.sleep(2)

        db.commit()
    except:
        log.error(traceback.format_exc())
Beispiel #9
0
def retrieve_structures(cfg, log):
    final_list = None
    try:
        resList = []
        for item in cfg.pdbQueries:
            res = PDB.execute_advanced_query(log, cfg.pdbAdvancedSearchURL,
                                             item)
            resList.append(res.split('\n'))

        log.info('Intersecting results...')
        final_list = list(eval(PDB.merge_results(resList)))
        final_list = filter(len, final_list)
    except:
        log.error(traceback.format_exc())

    return final_list
Beispiel #10
0
def retrieve_ligands(cfg, log, db):
    try:
        if cfg.FullReload:
            db.executeCommand(cfg.sqlTruncateLigands)

        structs = db.getData(cfg.sqlSelectCandidates).fetchall()

        for strut in structs:
            json_string = PDB.get_ligands(cfg, strut[0])
            if json_string and json_string['structureId'] and json_string[
                    'structureId']['ligandInfo']:
                if type(json_string['structureId']['ligandInfo']
                        ['ligand']) == dict:
                    item = json_string['structureId']['ligandInfo']['ligand']
                    db.executeCommand(
                        cfg.sqlInsertLigand,
                        (strut[0], item['@chemicalID'], item['chemicalName'],
                         item['@type'], item['formula'],
                         item['@molecularWeight']))
                else:
                    for item in json_string['structureId']['ligandInfo'][
                            'ligand']:
                        db.executeCommand(
                            cfg.sqlInsertLigand,
                            (strut[0], item['@chemicalID'],
                             item['chemicalName'], item['@type'],
                             item['formula'], item['@molecularWeight']))

        db.commit()
    except:
        log.error(traceback.format_exc())
        db.rollback()
Beispiel #11
0
def relate_structures(cfg, log, db, pm):
    try:
        log.info('Merging Training and Result Sets...')
        rows = db.getData(cfg.sqlCopyTrainingSetIntoLiterature).fetchall()
        for row in rows:
            article = pm.get_pubmed_article(row['pubmed_id'])
            pm.save_pubmed_article(row['pubmed_id'], article, 'Literature')

        if cfg.FullReload:
            db.executeCommand(cfg.sqlTruncateRel)

        candidates = db.getData(cfg.sqlSelectCandidates).fetchall()

        for addFile in candidates:
            key = addFile['pdbID']
            f = PDB.parse_prody(cfg, key)
            if f.journal.pmid != "":
                db.executeCommand(
                    cfg.sqlInsertRel,
                    (addFile['pdbID'], f.journal.pmid, 'Originator'))
                log.info('Opening article to find references to structures...')
                fileName = pm.get_related_pubmed_articles(f.journal.pmid)
                for item in fileName[0]['LinkSetDb'][0]['Link']:
                    db.executeCommand(
                        cfg.sqlInsertRel,
                        (addFile['pdbID'], item['Id'], 'Related'))

        db.commit()
    except:
        db.rollback()
        log.error(traceback.format_exc())
        raise Exception('Training Set was not built.')
Beispiel #12
0
def retrieve_go_terms(cfg, log, db):
    try:
        if cfg.FullReload:
            db.executeCommand(cfg.sqlTruncateGoTerms)

        structs = db.getData(cfg.sqlSelectCandidates).fetchall()

        for strut in structs:
            log.info('Getting GO terms for structure %s' % strut[0])
            json_string = PDB.get_go_terms(cfg, strut[0])
            if json_string and json_string['goTerms'] and json_string[
                    'goTerms']['term']:
                if type(json_string['goTerms']['term']) == dict:
                    item = json_string['goTerms']['term']
                    if '@synonyms' in json_string['goTerms']['term']:
                        db.executeCommand(
                            cfg.sqlInsertGoTerms,
                            (strut[0], item['@chainId'], item['@id'],
                             item['detail']['@name'],
                             item['detail']['@definition'],
                             item['detail']['@synonyms'],
                             item['detail']['@ontology']))
                    else:
                        db.executeCommand(
                            cfg.sqlInsertGoTerms,
                            (strut[0], item['@chainId'], item['@id'],
                             item['detail']['@name'],
                             item['detail']['@definition'], None,
                             item['detail']['@ontology']))
                else:
                    for item in json_string['goTerms']['term']:
                        if '@synonyms' in json_string['goTerms']['term']:
                            db.executeCommand(
                                cfg.sqlInsertGoTerms,
                                (strut[0], item['@chainId'], item['@id'],
                                 item['detail']['@name'],
                                 item['detail']['@definition'],
                                 item['detail']['@synonyms'],
                                 item['detail']['@ontology']))
                        else:
                            db.executeCommand(
                                cfg.sqlInsertGoTerms,
                                (strut[0], item['@chainId'], item['@id'],
                                 item['detail']['@name'],
                                 item['detail']['@definition'], None,
                                 item['detail']['@ontology']))

        db.commit()
    except:
        log.error(traceback.format_exc())
Beispiel #13
0
def retrieve_ligands(cfg,log,db):
    try:
        if cfg.FullReload:
            db.executeCommand(cfg.sqlTruncateLigands)

        structs = db.getData(cfg.sqlSelectCandidates).fetchall()

        for strut in structs:
            json_string = PDB.get_ligands(cfg,strut['pdbID'])
            if json_string and json_string['structureId'] and json_string['structureId']['ligandInfo']:
                if type(json_string['structureId']['ligandInfo']['ligand']) == dict:
                    item = json_string['structureId']['ligandInfo']['ligand']
                    db.executeCommand(cfg.sqlInsertLigand,(strut['pdbID'], item['@chemicalID'], item['chemicalName'],item['@type'],item['formula'],item['@molecularWeight']))
                else:
                    for item in json_string['structureId']['ligandInfo']['ligand']:
                        db.executeCommand(cfg.sqlInsertLigand,(strut['pdbID'], item['@chemicalID'], item['chemicalName'],item['@type'],item['formula'],item['@molecularWeight']))

        db.commit()
    except:
        log.error(traceback.format_exc())
        db.rollback()
Beispiel #14
0
def retrieve_pathways(cfg,log,db):
    try:
        if cfg.FullReload:
            db.executeCommand(cfg.sqlTruncatePathway)

        structs = db.getData(cfg.sqlSelectCandidates).fetchall()
        html = None
        for item in structs:
            log.info('Finding pathway data for structure  %s.' % item['pdbID'])
            html = None
            try:
                html = PDB.get_pathways_info(item['pdbECnumber'])
                soup = BeautifulSoup(html)
                links = soup.findAll('a')
                for link in  links:
                    if 'href' in link.attrs[0]:
                        if 'show_pathway' in link.attrs[0][1]:
                            db.executeCommand(cfg.sqlInsertPathway,(item['pdbID'],cfg.keggRootURL + link.attrs[0][1],link.contents[0]))
            except:
                raise

        db.commit()
    except:
        log.error(traceback.format_exc())
Beispiel #15
0
def build_training_set(cfg, log, db, pm):
    try:
        listAdded = []

        if cfg.FullReload:
            db.executeCommand(cfg.sqlTruncateTrainingSet)

        log.info('Getting Reference articles...')
        candidates = db.getData(cfg.sqlSelectCandidates).fetchall()

        for addFile in candidates:
            key = addFile[0]
            f = PDB.parse_prody(cfg, key)
            if f.status == 'Imported' and (
                    f.journal.pmid not in listAdded) and f.journal.pmid != "":
                article = pm.get_pubmed_article(f.journal.pmid)
                pm.save_pubmed_article(f.journal.pmid, article, 'Training')
                listAdded.append(f.journal.pmid)

        db.commit()
    except:
        log.error(traceback.format_exc())
        db.rollback()
        raise Exception('Training Set was not built.')
Beispiel #16
0
    try:
        listAdded = []
<<<<<<< HEAD

=======
            
>>>>>>> origin/master
        if cfg.FullReload:
            db.executeCommand(cfg.sqlTruncateTrainingSet)

        log.info('Getting Reference articles...')
        candidates = db.getData(cfg.sqlSelectCandidates).fetchall()

        for addFile in candidates:
            key = addFile['pdbID']
            f = PDB.parse_prody(cfg,key)
            if (f.journal.pmid not in listAdded) and f.journal.pmid != "":
                article = pm.get_pubmed_article(f.journal.pmid)
                pm.save_pubmed_article(f.journal.pmid,article,'Training')
                listAdded.append(f.journal.pmid)

        db.commit()
    except:
        log.error(traceback.format_exc())
        db.rollback()
        raise Exception('Training Set was not built.')

def search_literature(cfg,log,db,pm):
    try:
        if cfg.FullReload:
            db.executeCommand(cfg.sqlTruncateLiterature)