def insert_referencedbentity(nex_session, pmid, source_id, record): pubstatus, date_revised = get_pubstatus_date_revised(record) journal_id, journal, journal_title, issn_print = get_journal_id(nex_session, record) pubdate = record.get('DP', '') year = pubdate.split(' ')[0] title = record.get('TI', '') authors = record.get('AU', []) volume = record.get('VI', '') issue = record.get('IP', '') pages = record.get('PG', '') citation = set_cite(title, authors, year, journal, volume, issue, pages) doi, doi_url = get_doi(record) pmcid = record.get('PMC', '') pmc_url = pmc_root + pmcid + '/' if pmcid else '' publication_status = status fulltext_status = pdf_status if pubstatus == 'aheadofprint': publication_status = epub_status fulltext_status = epub_pdf_status x = Referencedbentity(display_name = citation.split(')')[0] + ')', source_id = source_id, subclass = 'REFERENCE', dbentity_status = 'Active', method_obtained = 'Curator triage', publication_status = publication_status, fulltext_status = fulltext_status, citation = citation, year = year, pmid = pmid, pmcid = pmcid, date_published = pubdate, date_revised = date_revised, issue = issue, page = year, volume = volume, title = title, doi = doi, journal_id = journal_id, created_by = CREATED_BY) nex_session.add(x) nex_session.flush() nex_session.refresh(x) dbentity_id = x.dbentity_id ## insert into REFERENCEDOCUMENT insert_abstract(nex_session, pmid, dbentity_id, record, source_id, journal, journal_title, issn_print) return [dbentity_id, authors, doi_url, pmc_url]
def update_reference(nex_session, fw, pmid, record, x, journal_id_to_abbrev, source_id, date_revised, published_status): journal = journal_id_to_abbrev[x.journal_id] authors = record.get('AU', []) title = record.get('TI', '') pubdate = record.get('DP', '') # 'PubDate': '2012 Mar 20' year = int(pubdate.split(' ')[0]) if year is None: year = x.year volume = record.get('VI', '') issue = record.get('IP', '') page = record.get('PG', '') citation = set_cite(title, authors, year, journal, volume, issue, page) doi, doi_url = get_doi(record) pmcid = record.get('PMC', '') update_str = "" ### update reference table if published_status: x.publication_status = published_status print "UPDATE:", pmid, "publication_status=", published_status if citation != x.citation: x.citation = citation print "UPDATE:", pmid, "citation=", citation if title != x.title: x.title = title print "UPDATE:", pmid, "title=", title if year != x.year: x.year = year print "UPDATE:", pmid, "year=", year if volume != x.volume: x.volume = volume print "UPDATE:", pmid, "volume=", volume if issue != issue: x.issue = issue print "UPDATE:", pmid, "issue=", issue if page != page: x.page = page print "UPDATE:", pmid, "page=", page if doi and doi != x.doi: x.doi = doi print "UPDATE:", pmid, "doi=", doi if pmcid and pmcid != x.pmcid and pmcid != "PMC4502675": x.pmcid = pmcid print "UPDATE:", pmid, "pmcid=", pmcid if date_revised: x.date_revised = date_revised print "UPDATE:", pmid, "date_revised=", date_revised nex_session.add(x) nex_session.commit()
def update_reference(nex_session, fw, pmid, record, x, reference_id_to_authors, journal_id_to_abbrev, source_id, date_revised, published_status): journal = journal_id_to_abbrev[x.journal_id] authors = record.get('authors', []) title = record.get('title', '') year = record.get('year') if year is None: year = x.year else: year = int(year) volume = record.get('volume', '') issue = record.get('issue', '') page = record.get('page', '') citation = set_cite(title, authors, year, journal, volume, issue, page) doi = record.get('doi', '') pmcid = record.get('pmc', '') ### update author table update_authors(nex_session, fw, pmid, x.dbentity_id, authors, reference_id_to_authors.get(x.dbentity_id), source_id) update_str = "" ### update reference table has_update = 0 if published_status: x.publication_status = published_status print "UPDATE:", pmid, "publication_status=", published_status has_update = 1 if citation != x.citation: x.citation = citation print "UPDATE:", pmid, "citation=", citation has_update = 1 if title != x.title: x.title = title print "UPDATE:", pmid, "title=", title has_update = 1 if year != x.year: x.year = year print "UPDATE:", pmid, "year=", year has_update = 1 if volume != x.volume: x.volume = volume print "UPDATE:", pmid, "volume=", volume has_update = 1 if issue != x.issue: x.issue = issue print "UPDATE:", pmid, "issue=", issue has_update = 1 if page != x.page: x.page = page print "UPDATE:", pmid, "page=", page has_update = 1 if doi and doi != x.doi: x.doi = doi print "UPDATE:", pmid, "doi=", doi has_update = 1 if pmcid and pmcid != x.pmcid and pmcid != "PMC4502675": x.pmcid = pmcid print "UPDATE:", pmid, "pmcid=", pmcid has_update = 1 if date_revised: date_revised_db = None if x.date_revised: date_revised_db = str(x.date_revised).split(' ')[0] if date_revised_db is None or date_revised != date_revised_db: x.date_revised = date_revised print "UPDATE:", pmid, "date_revised=", date_revised has_update = 1 if has_update == 1: nex_session.add(x) nex_session.commit() else: print pmid, "No change"
def load_references(log_file): nex_session = get_dev_session() pmid_to_reference_id = dict([(x.pmid, x.dbentity_id) for x in nex_session.query(Referencedbentity).all()]) pmid_to_curation_id = dict([(x.pmid, x.curation_id) for x in nex_session.query(Referencetriage).all()]) pmid_to_refdeleted_id = dict([(x.pmid, x.referencedeleted_id) for x in nex_session.query(Referencedeleted).all()]) gene_list = [] all_loci = nex_session.query(Locusdbentity).all() for x in all_loci: if len(x.systematic_name) > 12 or len(x.systematic_name) < 4: continue gene_list.append(str(x.systematic_name.upper())) if x.gene_name and x.gene_name != x.systematic_name: gene_list.append(str(x.gene_name.upper())) alias_to_name = {} for x in nex_session.query(LocusAlias).all(): if x.alias_type not in ['Uniform', 'Non-uniform']: continue if len(x.display_name) < 4: continue name = x.locus.gene_name if x.locus.gene_name else x.locus.systematic_name alias_to_name[x.display_name] = name fw = open(log_file,"w") fw.write(str(datetime.now()) + "\n") fw.write("Getting PMID list...\n") print datetime.now() print "Getting PMID list..." pmid_list = get_pmid_list(TERMS, RETMAX, DAY) pmids = [] for pmid in pmid_list: if int(pmid) in pmid_to_reference_id: continue if int(pmid) in pmid_to_curation_id: continue if int(pmid) in pmid_to_refdeleted_id: continue pmids.append(pmid) if len(pmids) == 0: fw.write("No new papers\n") print "No new papers" return fw.write(str(datetime.now()) + "\n") fw.write("Getting Pubmed records...\n") print datetime.now() print "Getting Pubmed records and inserting references..." records = get_pubmed_record(','.join(pmids)) i = 1 for rec in records: rec_file = StringIO(rec) record = Medline.read(rec_file) pmid = record.get('PMID') pubmed_url = 'http://www.ncbi.nlm.nih.gov/pubmed/' + str(pmid) doi_url = "" if record.get('AID'): # ['S0167-7012(17)30042-8 [pii]', '10.1016/j.mimet.2017.02.002 [doi]'] doi = None for id in record['AID']: if id.endswith('[doi]'): doi = id.replace(' [doi]', '') break if doi: doi_url = "/".join(['http://dx.doi.org', doi]) title = record.get('TI', '') authors = record.get('AU', []) pubdate = record.get('DP', '') # 'PubDate': '2012 Mar 20' year = pubdate.split(' ')[0] journal = record.get('TA', '') volume = record.get('VI', '') issue = record.get('IP', '') pages = record.get('PG', '') citation = set_cite(title, authors, year, journal, volume, issue, pages) # print "CITE=", citation # print "URL=", doi_url abstract = record.get('AB', '') gene_names = extract_gene_names(abstract, gene_list, alias_to_name) # print "gene_name=", gene_names, "\n" # print abstract, "\n" insert_reference(nex_session, fw, pmid, citation, doi_url, abstract, "| ".join(gene_names)) fw.close() print "Done!"