Beispiel #1
0
def insert_urls(pmid, reference_id, doi_url, pmc_url, source_id, created_by):
    x = ReferenceUrl(display_name='PubMed',
                     obj_url=pubmed_root + str(pmid),
                     reference_id=reference_id,
                     url_type='PubMed',
                     source_id=source_id,
                     created_by=created_by)
    DBSession.add(x)
    if doi_url:
        x = ReferenceUrl(display_name='DOI full text',
                         obj_url=doi_url,
                         reference_id=reference_id,
                         url_type='DOI full text',
                         source_id=source_id,
                         created_by=created_by)
        DBSession.add(x)
    if pmc_url:
        x = ReferenceUrl(display_name='PMC full text',
                         obj_url=pmc_url,
                         reference_id=reference_id,
                         url_type='PMC full text',
                         source_id=source_id,
                         created_by=created_by)
        DBSession.add(x)
    DBSession.flush()
    DBSession.refresh(x)
Beispiel #2
0
def insert_abstract(pmid, reference_id, record, source_id, journal_abbrev,
                    journal_title, issn_print, created_by):
    """ Add abstract to Referencedocument table

    This method does not return anything, just does the necessary CRUD operations
    
    Parameters
    ----------
    pmid: int
    reference_id: int
    source_id: int
    journal_abbrev: str
    journal_title: str
    issn_print: str
    created_by: str

    Return
    ------
    empty
        does not return anything


    """

    text = record.get('AB', '')

    if text == '':
        return
    locus_names_ids = DBSession.query(Locusdbentity.display_name,
                                      Locusdbentity.sgdid).all()
    html = link_gene_names(text, locus_names_ids)
    x = Referencedocument(document_type='Abstract',
                          source_id=source_id,
                          reference_id=reference_id,
                          text=text,
                          html=html,
                          created_by=created_by)
    DBSession.add(x)

    entries = create_bibentry(pmid, record, journal_abbrev, journal_title,
                              issn_print)
    y = Referencedocument(document_type='Medline',
                          source_id=source_id,
                          reference_id=reference_id,
                          text='\n'.join([
                              key + ' - ' + str(value)
                              for key, value in entries if value is not None
                          ]),
                          html='\n'.join([
                              key + ' - ' + str(value)
                              for key, value in entries if value is not None
                          ]),
                          created_by=created_by)
    DBSession.add(y)
    DBSession.flush()
    DBSession.refresh(x)
Beispiel #3
0
def insert_pubtypes(pmid, reference_id, pubtypes, source_id, created_by):
    for type in pubtypes:
        x = Referencetype(display_name=type,
                          obj_url='/referencetype/' + type.replace(' ', '_'),
                          source_id=source_id,
                          reference_id=reference_id,
                          created_by=created_by)
        DBSession.add(x)
    DBSession.flush()
    DBSession.refresh(x)
Beispiel #4
0
def get_journal_id(record, created_by):
    journal_abbr = record.get('TA', '')
    journal_full_name = record.get('JT', '')

    # 1469-221X (Print) 1469-221X (Linking)
    # 1573-6881 (Electronic) 0145-479X (Linking)
    issn_list = record.get('IS', '').split(') ')
    issn_print = ''
    issn_electronic = ''
    for issn in issn_list:
        if "Print" in issn or "Linking" in issn:
            issn_print = issn.split(' ')[0]
        if "Electronic" in issn:
            issn_electronic = issn.split(' ')[0]
    if issn_print:
        journals = DBSession.query(Journal).filter_by(
            issn_print=issn_print).all()
        if len(journals) > 0:
            return journals[0].journal_id, journals[
                0].med_abbr, journal_full_name, issn_print

    if journal_abbr == '':
        return None, '', '', ''

    if journal_abbr:
        journals = DBSession.query(Journal).filter_by(
            med_abbr=journal_abbr).all()
        if len(journals) > 0:
            return journals[0].journal_id, journals[
                0].med_abbr, journal_full_name, issn_print

    source_id = 824  # 'PubMed'
    shortened_full_name = (
        journal_full_name[:197] +
        '...') if len(journal_full_name) > 200 else journal_full_name
    format_name = journal_full_name.replace(' ', '_') + journal_abbr.replace(
        ' ', '_')

    j = Journal(issn_print=issn_print,
                issn_electronic=issn_electronic,
                display_name=shortened_full_name,
                format_name=(format_name[:97] +
                             '...') if len(format_name) > 100 else format_name,
                title=shortened_full_name,
                med_abbr=journal_abbr,
                source_id=source_id,
                obj_url='/journal/' + format_name,
                created_by=created_by)
    DBSession.add(j)
    DBSession.flush()
    DBSession.refresh(j)

    return j.journal_id, j.med_abbr, journal_full_name, issn_print
Beispiel #5
0
def insert_authors(reference_id, authors, source_id, created_by):
    if len(authors) == 0:
        return

    i = 0
    for author in authors:
        i = i + 1
        x = Referenceauthor(display_name=author,
                            obj_url='/author/' + author.replace(' ', '_'),
                            source_id=source_id,
                            reference_id=reference_id,
                            author_order=i,
                            author_type='Author',
                            created_by=created_by)
        DBSession.add(x)
    DBSession.flush()
    DBSession.refresh(x)
Beispiel #6
0
def insert_relations(pmid, reference_id, record, created_by):
    tag_to_type = {
        "CON": "Comment",
        "CIN": "Comment",
        "EIN": "Erratum",
        "EFR": "Erratum",
        "CRI": "Corrected and Republished",
        "CRF": "Corrected and Republished",
        "PRIN": "Partial retraction",
        "PROF": "Partial retraction",
        "RPI": "Republished",
        "RPF": "Republished",
        "RIN": "Retraction",
        "ROF": "Retraction",
        "UIN": "Update",
        "UOF": "Update",
        "SPIN": "Summary for patients",
        "ORI": "Original report"
    }

    inText = None
    onText = None
    rtype = None
    for tag in [
            'CIN', 'EIN', 'CRI', 'PRIN', 'RPI', 'RIN', 'UIN', 'SPIN', 'ORI'
    ]:
        if record.get(tag):
            inText = record[tag]
            rtype = tag_to_type[tag]
            break

    for tag in ['CON', 'EFR', 'CRF', 'PROF', 'RPF', 'ROF', 'UOF']:
        if record.get(tag):
            onText = record[tag]
            rtype = tag_to_type[tag]
            break

    if inText is None and onText is None:
        return

    source_id = 834  # 'SGD'

    parent_reference_id = None
    child_reference_id = None

    if type(inText) == list:
        inText = inText[0]
    if inText is not None and "PMID:" in inText:
        print(inText)
        parent_reference_id = reference_id
        child_pmid = inText.split("PMID: ")[1].strip()
        print(child_pmid)
        child_reference_id = get_reference_id(int(child_pmid))
        print('is there a child?')
        print((child_pmid, child_reference_id))
        if child_reference_id is not None:
            x = ReferenceRelation(parent_id=parent_reference_id,
                                  child_id=child_reference_id,
                                  source_id=source_id,
                                  correction_type=rtype,
                                  created_by=created_by)
            DBSession.add(x)

    if type(onText) == list:
        onText = onText[0]
    if onText is not None and "PMID:" in onText:
        child_reference_id = reference_id
        parent_pmid = onText.split("PMID: ")[1].strip()
        parent_reference_id = get_reference_id(int(parent_pmid))
        print('is there a parent?')
        print((parent_pmid, parent_reference_id))
        if parent_reference_id is not None:
            x = ReferenceRelation(parent_id=parent_reference_id,
                                  child_id=child_reference_id,
                                  source_id=source_id,
                                  correction_type=rtype,
                                  created_by=created_by)
            DBSession.add(x)

    DBSession.flush()
    DBSession.refresh(x)
Beispiel #7
0
def insert_referencedbentity(pmid,
                             source_id,
                             record,
                             created_by,
                             method_obtained="Curator triage"):
    """ Inserts referencedbentity object into table referencedbentity
    
    Parameters
    ----------
    pmid: int
    source_id: int
    record: dict
    created_oby: str
    method_obtained: str, optional

    Returns
    --------
    list

    """

    pubstatus, date_revised = get_pubstatus_date_revised(record)
    journal_id, journal, journal_title, issn_print = get_journal_id(
        record, created_by)
    pubdate = record.get('DP', None)
    year = pubdate.split(' ')[0]
    title = record.get('TI', None)
    authors = record.get('AU', [])
    volume = record.get('VI', None)
    issue = record.get('IP', None)
    pages = record.get('PG', None)
    citation = set_cite(title, authors, year, journal, volume, issue, pages)
    doi, doi_url = get_doi(record)
    pmcid = record.get('PMC', None)
    pmc_url = pmc_root + pmcid + '/' if pmcid else None

    publication_status = status
    fulltext_status = pdf_status
    if pubstatus == 'aheadofprint':
        publication_status = epub_status
        fulltext_status = epub_pdf_status

    if year:
        year = int(year)
    if journal_id:
        journal_id = int(journal_id)

    x = Referencedbentity(display_name=citation.split(')')[0] + ')',
                          source_id=source_id,
                          subclass='REFERENCE',
                          dbentity_status='Active',
                          method_obtained=method_obtained,
                          publication_status=publication_status,
                          fulltext_status=fulltext_status,
                          citation=citation,
                          year=year,
                          pmid=int(pmid),
                          pmcid=pmcid,
                          date_published=pubdate,
                          date_revised=date_revised,
                          issue=issue,
                          page=pages,
                          volume=volume,
                          title=title,
                          doi=doi,
                          journal_id=journal_id,
                          created_by=created_by)

    DBSession.add(x)
    DBSession.flush()
    DBSession.refresh(x)
    dbentity_id = x.dbentity_id
    ## insert into REFERENCEDOCUMENT
    insert_abstract(pmid, dbentity_id, record, source_id, journal,
                    journal_title, issn_print, created_by)

    return [dbentity_id, authors, doi_url, pmc_url, x.sgdid, x]