예제 #1
0
def insert_referencedbentity(nex_session, pmid, source_id, record):
    
    pubstatus, date_revised = get_pubstatus_date_revised(record)
    journal_id, journal, journal_title, issn_print = get_journal_id(nex_session, record)
    pubdate = record.get('DP', '')
    year = pubdate.split(' ')[0]
    title = record.get('TI', '')
    authors = record.get('AU', [])
    volume = record.get('VI', '')
    issue = record.get('IP', '')
    pages = record.get('PG', '')
    citation = set_cite(title, authors, year, journal, volume, issue, pages)
    doi, doi_url = get_doi(record)
    pmcid = record.get('PMC', '')
    pmc_url = pmc_root + pmcid + '/' if pmcid else ''

    publication_status = status
    fulltext_status = pdf_status
    if pubstatus == 'aheadofprint':
        publication_status = epub_status
        fulltext_status = epub_pdf_status

    x = Referencedbentity(display_name = citation.split(')')[0] + ')',
                          source_id = source_id,
                          subclass = 'REFERENCE',
                          dbentity_status = 'Active',
                          method_obtained = 'Curator triage',
                          publication_status = publication_status,
                          fulltext_status = fulltext_status,
                          citation = citation,
                          year = year,
                          pmid = pmid,
                          pmcid = pmcid,
                          date_published = pubdate,
                          date_revised = date_revised,
                          issue = issue,
                          page = year,
                          volume = volume,
                          title = title,
                          doi = doi,
                          journal_id = journal_id,
                          created_by = CREATED_BY)

    nex_session.add(x)
    nex_session.flush()
    nex_session.refresh(x)
    dbentity_id = x.dbentity_id

    ## insert into REFERENCEDOCUMENT                                                                                     
    insert_abstract(nex_session, pmid, dbentity_id, record,  
                    source_id, journal, journal_title, issn_print)      

    return [dbentity_id, authors, doi_url, pmc_url]
def update_reference(nex_session, fw, pmid, record, x, journal_id_to_abbrev, source_id, date_revised, published_status):

    journal = journal_id_to_abbrev[x.journal_id]
    authors = record.get('AU', [])
    title = record.get('TI', '')
    pubdate = record.get('DP', '')  # 'PubDate': '2012 Mar 20'  
    year = int(pubdate.split(' ')[0])
    if year is None:
        year = x.year
    volume = record.get('VI', '')
    issue = record.get('IP', '')
    page = record.get('PG', '')
    citation = set_cite(title, authors, year, journal, volume, issue, page)    
    doi, doi_url = get_doi(record)
    pmcid = record.get('PMC', '')

    update_str = ""
    ### update reference table
    if published_status:
        x.publication_status = published_status
        print "UPDATE:", pmid, "publication_status=", published_status
    if citation != x.citation:
        x.citation = citation
        print "UPDATE:", pmid, "citation=", citation
    if title != x.title:
        x.title = title
        print "UPDATE:", pmid, "title=", title
    if year != x.year:
        x.year = year
        print "UPDATE:", pmid, "year=", year
    if volume != x.volume:
        x.volume = volume
        print "UPDATE:", pmid, "volume=", volume
    if issue != issue:
        x.issue = issue
        print "UPDATE:", pmid, "issue=", issue
    if page != page:
        x.page = page
        print "UPDATE:", pmid, "page=", page
    if doi and doi != x.doi:
        x.doi = doi
        print "UPDATE:", pmid, "doi=", doi
    if pmcid and pmcid != x.pmcid and pmcid != "PMC4502675":
        x.pmcid = pmcid
        print "UPDATE:", pmid, "pmcid=", pmcid
    if date_revised:
        x.date_revised = date_revised
        print "UPDATE:", pmid, "date_revised=", date_revised
    nex_session.add(x)
    nex_session.commit()
def update_reference(nex_session, fw, pmid, record, x, reference_id_to_authors,
                     journal_id_to_abbrev, source_id, date_revised,
                     published_status):

    journal = journal_id_to_abbrev[x.journal_id]
    authors = record.get('authors', [])
    title = record.get('title', '')
    year = record.get('year')
    if year is None:
        year = x.year
    else:
        year = int(year)
    volume = record.get('volume', '')
    issue = record.get('issue', '')
    page = record.get('page', '')
    citation = set_cite(title, authors, year, journal, volume, issue, page)
    doi = record.get('doi', '')
    pmcid = record.get('pmc', '')

    ### update author table
    update_authors(nex_session, fw, pmid, x.dbentity_id, authors,
                   reference_id_to_authors.get(x.dbentity_id), source_id)

    update_str = ""
    ### update reference table
    has_update = 0
    if published_status:
        x.publication_status = published_status
        print "UPDATE:", pmid, "publication_status=", published_status
        has_update = 1
    if citation != x.citation:
        x.citation = citation
        print "UPDATE:", pmid, "citation=", citation
        has_update = 1
    if title != x.title:
        x.title = title
        print "UPDATE:", pmid, "title=", title
        has_update = 1
    if year != x.year:
        x.year = year
        print "UPDATE:", pmid, "year=", year
        has_update = 1
    if volume != x.volume:
        x.volume = volume
        print "UPDATE:", pmid, "volume=", volume
        has_update = 1
    if issue != x.issue:
        x.issue = issue
        print "UPDATE:", pmid, "issue=", issue
        has_update = 1
    if page != x.page:
        x.page = page
        print "UPDATE:", pmid, "page=", page
        has_update = 1
    if doi and doi != x.doi:
        x.doi = doi
        print "UPDATE:", pmid, "doi=", doi
        has_update = 1
    if pmcid and pmcid != x.pmcid and pmcid != "PMC4502675":
        x.pmcid = pmcid
        print "UPDATE:", pmid, "pmcid=", pmcid
        has_update = 1
    if date_revised:
        date_revised_db = None
        if x.date_revised:
            date_revised_db = str(x.date_revised).split(' ')[0]
        if date_revised_db is None or date_revised != date_revised_db:
            x.date_revised = date_revised
            print "UPDATE:", pmid, "date_revised=", date_revised
            has_update = 1

    if has_update == 1:
        nex_session.add(x)
        nex_session.commit()
    else:
        print pmid, "No change"
예제 #4
0
def load_references(log_file):
 
    nex_session = get_dev_session()

    pmid_to_reference_id =  dict([(x.pmid, x.dbentity_id) for x in nex_session.query(Referencedbentity).all()])
    pmid_to_curation_id =  dict([(x.pmid, x.curation_id) for x in nex_session.query(Referencetriage).all()])
    pmid_to_refdeleted_id = dict([(x.pmid, x.referencedeleted_id) for x in nex_session.query(Referencedeleted).all()])

    gene_list = []
    all_loci = nex_session.query(Locusdbentity).all()
    for x in all_loci:
        if len(x.systematic_name) > 12 or len(x.systematic_name) < 4:
            continue
        gene_list.append(str(x.systematic_name.upper()))
        if x.gene_name and x.gene_name != x.systematic_name:
            gene_list.append(str(x.gene_name.upper()))
    
    alias_to_name = {}
    for x in nex_session.query(LocusAlias).all():
        if x.alias_type not in ['Uniform', 'Non-uniform']:
            continue
        if len(x.display_name) < 4:
            continue
        name = x.locus.gene_name if x.locus.gene_name else x.locus.systematic_name 
        alias_to_name[x.display_name] = name
        
    fw = open(log_file,"w")

    fw.write(str(datetime.now()) + "\n")
    fw.write("Getting PMID list...\n")
    
    print datetime.now()
    print "Getting PMID list..."

    pmid_list = get_pmid_list(TERMS, RETMAX, DAY)

    pmids = []
    for pmid in pmid_list:
        if int(pmid) in pmid_to_reference_id:
            continue
        if int(pmid) in pmid_to_curation_id:
            continue
        if int(pmid) in pmid_to_refdeleted_id:
            continue
        pmids.append(pmid)

    if len(pmids) == 0:
        fw.write("No new papers\n")
        print "No new papers"
        return

    fw.write(str(datetime.now()) + "\n")
    fw.write("Getting Pubmed records...\n")
    
    print datetime.now()
    print "Getting Pubmed records and inserting references..."

    records = get_pubmed_record(','.join(pmids))

    i = 1
    for rec in records:
        rec_file = StringIO(rec)
        record = Medline.read(rec_file)
        pmid = record.get('PMID')
        pubmed_url = 'http://www.ncbi.nlm.nih.gov/pubmed/' + str(pmid)
        doi_url = ""
        if record.get('AID'):
            # ['S0167-7012(17)30042-8 [pii]', '10.1016/j.mimet.2017.02.002 [doi]']
            doi = None
            for id in record['AID']:
                if id.endswith('[doi]'):
                    doi = id.replace(' [doi]', '')
                    break
            if doi:
                doi_url = "/".join(['http://dx.doi.org', doi])
        title = record.get('TI', '')
        authors = record.get('AU', [])
        pubdate = record.get('DP', '')  # 'PubDate': '2012 Mar 20'  
        year = pubdate.split(' ')[0]
        journal = record.get('TA', '')
        volume = record.get('VI', '')
        issue = record.get('IP', '')
        pages = record.get('PG', '')
                
        citation = set_cite(title, authors, year, journal, volume, issue, pages)  

        # print "CITE=", citation
        # print "URL=", doi_url

        abstract = record.get('AB', '')

        gene_names = extract_gene_names(abstract, gene_list, alias_to_name)
        
        # print "gene_name=", gene_names, "\n"
        # print abstract, "\n"
    
        insert_reference(nex_session, fw, pmid, citation, doi_url, abstract, "| ".join(gene_names))

    fw.close()

    print "Done!"