def test_replace_refs_correct_sources(get_db_rec, get_es_rec):
    with_es_record = {'ES': 'ES'}
    with_db_record = {'DB': 'DB'}

    get_es_rec.return_value = with_es_record
    get_db_rec.return_value = with_db_record

    db_rec = replace_refs({'$ref': _build_url()}, 'db')
    es_rec = replace_refs({'$ref': _build_url()}, 'es')

    assert db_rec == with_db_record
    assert es_rec == with_es_record
Esempio n. 2
0
def test_replace_refs_correct_sources(get_db_rec, get_es_rec):
    with_es_record = {'ES': 'ES'}
    with_db_record = {'DB': 'DB'}

    get_es_rec.return_value = with_es_record
    get_db_rec.return_value = with_db_record

    db_rec = replace_refs({'$ref': _build_url()}, 'db')
    es_rec = replace_refs({'$ref': _build_url()}, 'es')

    assert db_rec == with_db_record
    assert es_rec == with_es_record
Esempio n. 3
0
def test_replace_refs_correct_sources(get_db_rec, get_es_rec, app):
    with_es_record = {'ES': 'ES'}
    with_db_record = {'DB': 'DB'}

    get_es_rec.return_value = with_es_record
    get_db_rec.return_value = with_db_record

    with app.app_context():
        db_rec = replace_refs({'$ref': _build_url(app)}, 'db')
        es_rec = replace_refs({'$ref': _build_url(app)}, 'es')

        # Lazy objects need to be evaluated in app_context.
        assert db_rec == with_db_record
        assert es_rec == with_es_record
Esempio n. 4
0
def populate_journal_coverage(obj, eng):
    """Populate ``journal_coverage`` from the Journals DB.

    Searches in the Journals DB if the current article was published in a
    journal that we harvest entirely, then populates the ``journal_coverage``
    key in ``extra_data`` with ``'full'`` if it was, ``'partial' otherwise.

    Args:
        obj: a workflow object.
        eng: a workflow engine.

    Returns:
        None

    """
    journals = replace_refs(
        get_value(obj.data, 'publication_info.journal_record'), 'db')
    if not journals:
        return

    if any(
            get_value(journal, '_harvesting_info.coverage') == 'full'
            for journal in journals):
        obj.extra_data['journal_coverage'] = 'full'
    else:
        obj.extra_data['journal_coverage'] = 'partial'
Esempio n. 5
0
def get_conference_record(record, default=None):
    """Return the first Conference record associated with a record.

    Queries the database to fetch the first Conference record referenced
    in the ``publication_info`` of the record.

    Args:
        record(InspireRecord): a record.
        default: value to be returned if no conference record present/found

    Returns:
        InspireRecord: the first Conference record associated with the record.

    Examples:
        >>> record = {
        ...     'publication_info': [
        ...         {
        ...             'conference_record': {
        ...                 '$ref': '/api/conferences/972464',
        ...             },
        ...         },
        ...     ],
        ... }
        >>> conference_record = get_conference_record(record)
        >>> conference_record['control_number']
        972464

    """
    replaced = replace_refs(get_value(record, 'publication_info.conference_record[0]'), 'db')
    if replaced:
        return replaced
    else:
        return default
Esempio n. 6
0
def get_conference_record(record):
    """Return the first Conference record associated with a record.

    Queries the database to fetch the first Conference record referenced
    in the ``publication_info`` of the record.

    Args:
        record(InspireRecord): a record.

    Returns:
        InspireRecord: the first Conference record associated with the record.

    Examples:
        >>> record = {
        ...     'publication_info': [
        ...         {
        ...             'conference_record': {
        ...                 '$ref': '/api/conferences/972464',
        ...             },
        ...         },
        ...     ],
        ... }
        >>> conference_record = get_conference_record(record)
        >>> conference_record['control_number']
        972464

    """
    return replace_refs(
        get_value(record,
                  'publication_info.conference_record[0]',
                  default=None), 'db')
Esempio n. 7
0
    def conference_information(self):
        """Conference information.

        Returns a list with information about conferences related to the
        record.
        """
        conf_info = []
        for pub_info in self['publication_info']:
            conference_recid = None
            parent_recid = None
            parent_rec = {}
            conference_rec = {}
            if 'conference_record' in pub_info:
                conference_rec = replace_refs(pub_info['conference_record'],
                                              'es')
                if conference_rec and conference_rec.get('control_number'):
                    conference_recid = conference_rec['control_number']
                else:
                    conference_rec = {}
            if 'parent_record' in pub_info:
                parent_rec = replace_refs(pub_info['parent_record'], 'es')
                if parent_rec and parent_rec.get('control_number'):
                    parent_recid = parent_rec['control_number']
                else:
                    parent_rec = {}
            conf_info.append({
                "conference_recid":
                conference_recid,
                "conference_title":
                LiteratureReader(conference_rec).title,
                "parent_recid":
                parent_recid,
                "parent_title":
                LiteratureReader(parent_rec).title.replace(
                    "Proceedings, ", "", 1),
                "page_start":
                pub_info.get('page_start'),
                "page_end":
                pub_info.get('page_end'),
                "artid":
                pub_info.get('artid'),
            })

        return conf_info
Esempio n. 8
0
def set_refereed_and_fix_document_type(obj, eng):
    """Set the ``refereed`` field using the Journals DB.

    Searches in the Journals DB if the current article was published in journals
    that we know for sure to be peer-reviewed, or that publish both peer-reviewed
    and non peer-reviewed content but for which we can infer that it belongs to
    the former category, and sets the ``refereed`` key in ``data`` to ``True`` if
    that was the case. If instead we know for sure that all journals in which it
    published are **not** peer-reviewed we set it to ``False``.

    Also replaces the ``article`` document type with ``conference paper`` if the
    paper was only published in non refereed proceedings.

    Args:
        obj: a workflow object.
        eng: a workflow engine.

    Returns:
        None

    """
    journals = replace_refs(
        get_value(obj.data, 'publication_info.journal_record'), 'db')
    if not journals:
        return

    is_published_in_a_refereed_journal_that_does_not_publish_proceedings = any(
        journal.get('refereed') and not journal.get('proceedings')
        for journal in journals)
    is_published_in_a_refereed_journal_that_also_publishes_proceedings = any(
        journal.get('refereed') and journal.get('proceedings')
        for journal in journals)
    is_not_a_conference_paper = 'conference paper' not in obj.data[
        'document_type']

    is_published_exclusively_in_non_refereed_journals = all(
        not journal.get('refereed', True) for journal in journals)

    if is_published_in_a_refereed_journal_that_does_not_publish_proceedings:
        obj.data['refereed'] = True
    elif is_not_a_conference_paper and is_published_in_a_refereed_journal_that_also_publishes_proceedings:
        obj.data['refereed'] = True
    elif is_published_exclusively_in_non_refereed_journals:
        obj.data['refereed'] = False

    is_published_only_in_proceedings = all(
        journal.get('proceedings') for journal in journals)
    is_published_only_in_non_refereed_journals = all(
        not journal.get('refereed') for journal in journals)

    if is_published_only_in_proceedings and is_published_only_in_non_refereed_journals:
        try:
            obj.data['document_type'].remove('article')
            obj.data['document_type'].append('conference paper')
        except ValueError:
            pass
Esempio n. 9
0
def get_journal_coverage(obj, eng):
    """Return the journal coverage that this article belongs to."""
    journals = replace_refs(get_value(obj.data, 'publication_info.journal_record'), 'db')

    if not journals:
        return

    if any(journal['_harvesting_info'].get('coverage') == 'full' for journal in journals):
        obj.extra_data['journal_coverage'] = 'full'
    else:
        obj.extra_data['journal_coverage'] = 'partial'
Esempio n. 10
0
    def conference_information(self):
        """Conference information.

        Returns a list with information about conferences related to the
        record.
        """
        conf_info = []
        for pub_info in self['publication_info']:
            conference_recid = None
            parent_recid = None
            parent_rec = {}
            conference_rec = {}
            if 'conference_record' in pub_info:
                conference_rec = replace_refs(pub_info['conference_record'],
                                              'es')
                if conference_rec and conference_rec.get('control_number'):
                    conference_recid = conference_rec['control_number']
                else:
                    conference_rec = {}
            if 'parent_record' in pub_info:
                parent_rec = replace_refs(pub_info['parent_record'], 'es')
                if parent_rec and parent_rec.get('control_number'):
                    parent_recid = parent_rec['control_number']
                else:
                    parent_rec = {}
            conf_info.append(
                {
                    "conference_recid": conference_recid,
                    "conference_title": get_title(conference_rec),
                    "parent_recid": parent_recid,
                    "parent_title":
                        get_title(parent_rec).replace(
                            "Proceedings, ", "", 1
                    ),
                    "page_start": pub_info.get('page_start'),
                    "page_end": pub_info.get('page_end'),
                    "artid": pub_info.get('artid'),
                }
            )

        return conf_info
Esempio n. 11
0
def set_refereed_and_fix_document_type(obj, eng):
    """Set the ``refereed`` field using the Journals DB.

    Searches in the Journals DB if the current article was published in journals
    that we know for sure to be peer-reviewed, or that publish both peer-reviewed
    and non peer-reviewed content but for which we can infer that it belongs to
    the former category, and sets the ``refereed`` key in ``data`` to ``True`` if
    that was the case. If instead we know for sure that all journals in which it
    published are **not** peer-reviewed we set it to ``False``.

    Also replaces the ``article`` document type with ``conference paper`` if the
    paper was only published in non refereed proceedings.

    Args:
        obj: a workflow object.
        eng: a workflow engine.

    Returns:
        None

    """
    journals = replace_refs(get_value(obj.data, 'publication_info.journal_record'), 'db')
    if not journals:
        return

    is_published_in_a_refereed_journal_that_does_not_publish_proceedings = any(
        journal.get('refereed') and not journal.get('proceedings') for journal in journals)
    is_published_in_a_refereed_journal_that_also_publishes_proceedings = any(
        journal.get('refereed') and journal.get('proceedings') for journal in journals)
    is_not_a_conference_paper = 'conference paper' not in obj.data['document_type']

    is_published_exclusively_in_non_refereed_journals = all(
        not journal.get('refereed', True) for journal in journals)

    if is_published_in_a_refereed_journal_that_does_not_publish_proceedings:
        obj.data['refereed'] = True
    elif is_not_a_conference_paper and is_published_in_a_refereed_journal_that_also_publishes_proceedings:
        obj.data['refereed'] = True
    elif is_published_exclusively_in_non_refereed_journals:
        obj.data['refereed'] = False

    is_published_only_in_proceedings = all(journal.get('proceedings') for journal in journals)
    is_published_only_in_non_refereed_journals = all(not journal.get('refereed') for journal in journals)

    if is_published_only_in_proceedings and is_published_only_in_non_refereed_journals:
        try:
            obj.data['document_type'].remove('article')
            obj.data['document_type'].append('conference paper')
        except ValueError:
            pass
Esempio n. 12
0
def _conference_data(conf):
    ref = replace_refs(conf, 'db')

    # FIXME: Add conference city, country, and country code fields
    if ref:
        return {'type': "conference",
                'name': get_value(ref, "titles[0].title", ""),
                'acronym': get_value(ref, "acronym[0]", ""),
                'opening_date': get_value(ref, "opening_date", ""),
                'closing_date': get_value(ref, "closing_date", "")}
    else:
        return {'type': "conference",
                'name': "",
                'acronym': "",
                'opening_date': "",
                'closing_date': ""}
Esempio n. 13
0
def _conference_data(conf):
    ref = replace_refs(conf, 'db')

    # FIXME: Add conference city, country, and country code fields
    if ref:
        return {
            'type': "conference",
            'name': get_value(ref, "titles[0].title", ""),
            'acronym': get_value(ref, "acronym[0]", ""),
            'opening_date': get_value(ref, "opening_date", ""),
            'closing_date': get_value(ref, "closing_date", "")
        }
    else:
        return {
            'type': "conference",
            'name': "",
            'acronym': "",
            'opening_date': "",
            'closing_date': ""
        }
Esempio n. 14
0
def _conference_data(conf_record):
    #sys.stderr.write(str(conf_record))
    ref = replace_refs(conf_record, 'db')
    #sys.stderr.write(str(ref))
    o_addr = ref['address'][0]['original_address'].split(" ")
    city = o_addr[0][:-1] # trim off comma
    country = o_addr[1]

    date = ref['date'].split(" ")
    month = date[1]
    year = date[2]

    return {'type': "conference",
            'name': ref['titles'][0]['title'],
            'acronym': ref['acronym'][0],
            'opening_date': ref['opening_date'],
            'closing_date': ref['closing_date'],
            'month': month,
            'year': year,
            'city': city,
            'country': country,
            'country_code': ref['address'][0]['country_code']}
Esempio n. 15
0
def populate_journal_coverage(obj, eng):
    """Populate ``journal_coverage`` from the Journals DB.

    Searches in the Journals DB if the current article was published in a
    journal that we harvest entirely, then populates the ``journal_coverage``
    key in ``extra_data`` with ``'full'`` if it was, ``'partial' otherwise.

    Args:
        obj: a workflow object.
        eng: a workflow engine.

    Returns:
        None

    """
    journals = replace_refs(get_value(obj.data, 'publication_info.journal_record'), 'db')
    if not journals:
        return

    if any(get_value(journal, '_harvesting_info.coverage') == 'full' for journal in journals):
        obj.extra_data['journal_coverage'] = 'full'
    else:
        obj.extra_data['journal_coverage'] = 'partial'
Esempio n. 16
0
def tei_response(record):
    
    data = record
    env = Environment(loader=PackageLoader('inspirehep.modules.converttohal',
                                           'templates'), trim_blocks=True, lstrip_blocks=True)
    template = env.get_template(TEMPLATE)
    #import ipdb; ipdb.set_trace()

    authors = data['authors']
    for author in data['authors']:
        if 'full_name' in author and author['full_name']:
            # handle first/last name
            #scan = scan_author_string_for_phrases(author['full_name'])
            #parsed = parse_scanned_author_for_phrases(scan)
            #author['parsed_name'] = parsed
            
            parsed = HumanName(author['full_name'])
            author['parsed_name'] = parsed

            #sys.stderr.write(str(scan) + '\n' + str(parsed) + '\n')
            #sys.exit(0)
            
            

            '''
            auth_spl = author['full_name'].split(",")
            if len(auth_spl) == 2:
                last = auth_spl[0].strip()
                first = auth_spl[1].strip()
            else:
                last = author['full_name']
                first = ""

            authors.append({'last': last,
                            'first': first,
                            'affiliation_id': (author['affiliations'][0]
                                    ['recid'])
                                if 'affiliations' in author
                                and 'recid' in author['affiliations'][0]
                                else ""
                           })'''

    titles = data.get('titles', [])

    # TODO: update the following line
    doi = data['dois'][0]['value'] if 'dois' in data else ""

    if 'publication_info' in data:
        pub_info = data['publication_info'][0]
        if 'journal_title' in pub_info:
            if 'page_artid' in pub_info:
                pp = pub_info['page_artid']
            elif 'page_start' and 'page_end' in pub_info:
                pp = pub_info['page_start'] + "-" + pub_info['page_end']
            elif 'page_start' in pub_info or 'page_end' in pub_info:
                pp = pub_info['page_start'] or pub_info['page_end']
            else:
                pp = ""

            publication = {'type': "journal",
                           'name': pub_info['journal_title'],
                           'year': pub_info['year'],
                           'volume': pub_info['journal_volume']
                               if 'journal_volume' in pub_info
                               else "",
                           'issue': pub_info['journal_issue']
                               if 'journal_issue' in pub_info
                               else "",
                           'pp': pp}
        elif 'conference_record' in pub_info:
            publication = _conference_data(pub_info['conference_record'])
        else:
            publication = None
    else:
        publication = None

    my_affiliations = []
    recids = []
    structures = []
    for author in (data.get('authors') or []):
        for affiliation in (author.get('affiliations') or []):
            if 'recid' in affiliation and affiliation['recid'] not in recids:
                my_affiliations.append(affiliation)
                recids.append(affiliation['recid'])
    for affiliation in my_affiliations:
        ref = replace_refs(affiliation, 'db')

        #import ipdb; ipdb.set_trace()
        #sys.stderr.write(str(ref) + '\n')
        #sys.stderr.write(str(ref['record']) + '\n')
        #sys.stderr.write(str(ref['record']['collections']) + '\n')
        #sys.stderr.write(str(ref['record']['collections'][1]['primary']) + '\n\n')
        if ('record' in ref and 'collections' in ref['record']):
            structures.append({'type': ref['record']['collections'][1]['primary'].lower()
                                   if len(ref['record']['collections']) >= 2 else "",
                               'name': ref['record']['institution'][0],
                               'address': ref['record']['address'][0]['original_address'],
                               'country': ref['record']['address'][0]['country_code'],
                               'recid': ref['record']['oai_pmh'][0]['id'].split(":")[-1]
                              })

    print template.render(titles=titles, doi=doi, authors=authors,
                          publication=publication, structures=structures)
Esempio n. 17
0
def publication_info(record):
    """Displays inline publication and conference information"""
    result = {}
    out = []
    if 'publication_info' in record:
        journal_title, journal_volume, year, journal_issue, pages = \
            ('', '', '', '', '')
        for pub_info in record['publication_info']:
            if 'journal_title' in pub_info:
                journal_title = '<i>' + pub_info['journal_title'] + '</i>'
                if 'journal_volume' in pub_info:
                    journal_volume = ' ' + pub_info['journal_volume']
                if 'year' in pub_info:
                    year = ' (' + str(pub_info['year']) + ')'
                if 'journal_issue' in pub_info:
                    journal_issue = ' ' + pub_info['journal_issue'] + ', '
                if 'page_start' in pub_info and 'page_end' in pub_info:
                    pages = ' ' + '{page_start}-{page_end}'.format(**pub_info)
                elif 'page_start' in pub_info:
                    pages = ' ' + '{page_start}'.format(**pub_info)
                elif 'artid' in pub_info:
                    pages = ' ' + '{artid}'.format(**pub_info)
                out.append(journal_title + journal_volume +
                           year + journal_issue + pages)
        if out:
            result['pub_info'] = out
        if not result:
            for field in record['publication_info']:
                if 'pubinfo_freetext' in field:
                    out.append(field['pubinfo_freetext'])
                    result['pub_info'] = out
                    break
        # Conference info line
        for pub_info in record['publication_info']:
            conference_recid = None
            parent_recid = None
            if 'conference_record' in pub_info:
                conference_rec = replace_refs(pub_info['conference_record'],
                                              'es')
                if conference_rec and conference_rec.get('control_number'):
                    conference_recid = conference_rec['control_number']
            if 'parent_record' in pub_info:
                parent_rec = replace_refs(pub_info['parent_record'], 'es')
                if parent_rec and parent_rec.get('control_number'):
                    parent_recid = parent_rec['control_number']

            if conference_recid and parent_recid:
                try:
                    ctx = {
                        "parent_recid": parent_recid,
                        "conference_recid": conference_recid,
                        "conference_title": get_title(conference_rec)
                    }
                    if result:
                        result['conf_info'] = render_macro_from_template(
                            name="conf_with_pub_info",
                            template="inspirehep_theme/format/record/Conference_info_macros.tpl",
                            ctx=ctx)
                        break
                    else:
                        ctx.update(dict(
                            page_start=pub_info.get('page_start'),
                            page_end=pub_info.get('page_end'),
                            artid=pub_info.get('artid')
                        ))
                        result['conf_info'] = render_macro_from_template(
                            name="conf_without_pub_info",
                            template="inspirehep_theme/format/record/Conference_info_macros.tpl",
                            ctx=ctx)
                        break
                except TypeError:
                    pass
            elif conference_recid and not parent_recid:
                try:
                    ctx = {
                        "conference_recid": conference_recid,
                        "conference_title": get_title(conference_rec),
                        "pub_info": bool(result.get('pub_info', ''))
                    }
                    result['conf_info'] = render_macro_from_template(
                        name="conference_only",
                        template="inspirehep_theme/format/record/Conference_info_macros.tpl",
                        ctx=ctx)
                except TypeError:
                    pass
            elif parent_recid and not conference_recid:
                try:
                    ctx = {
                        "parent_recid": parent_recid,
                        "parent_title":
                            parent_rec['titles'][0]['title'].replace(
                                "Proceedings, ", "", 1),
                        "pub_info": bool(result.get('pub_info', ''))
                    }
                    result['conf_info'] = render_macro_from_template(
                        name="proceedings_only",
                        template="inspirehep_theme/format/record/Conference_info_macros.tpl",
                        ctx=ctx)
                except TypeError:
                    pass
    return result