Ejemplo n.º 1
0
def extract_journal_info(obj, eng):
    """Extract the journal information from ``pubinfo_freetext``.

    Runs ``extract_journal_reference`` on the ``pubinfo_freetext`` key of each
    ``publication_info``, if it exists, and uses the extracted information to
    populate the other keys.

    Args:
        obj: a workflow object.
        eng: a workflow engine.

    Returns:
        None

    """
    if not obj.data.get('publication_info'):
        return

    for publication_info in obj.data['publication_info']:
        try:
            with local_refextract_kbs_path() as kbs_path:
                extracted_publication_info = extract_journal_reference(
                    publication_info['pubinfo_freetext'],
                    override_kbs_files=kbs_path,
                )

            if not extracted_publication_info:
                continue

            if extracted_publication_info.get('title'):
                publication_info['journal_title'] = extracted_publication_info[
                    'title']

            if extracted_publication_info.get('volume'):
                publication_info[
                    'journal_volume'] = extracted_publication_info['volume']

            if extracted_publication_info.get('page'):
                page_start, page_end, artid = split_page_artid(
                    extracted_publication_info['page'])
                if page_start:
                    publication_info['page_start'] = page_start
                if page_end:
                    publication_info['page_end'] = page_end
                if artid:
                    publication_info['artid'] = artid

            if extracted_publication_info.get('year'):
                year = maybe_int(extracted_publication_info['year'])
                if year:
                    publication_info['year'] = year
        except KeyError:
            pass

    obj.data['publication_info'] = convert_old_publication_info_to_new(
        obj.data['publication_info'])
Ejemplo n.º 2
0
def refextract_url():
    """Run refextract on a URL."""
    with local_refextract_kbs_path() as kbs_path:
        extracted_references = extract_references_from_url(
            request.json['url'],
            override_kbs_files=kbs_path,
            reference_format=u'{title},{volume},{page}')
    references = map_refextract_to_schema(extracted_references)

    return jsonify(references)
Ejemplo n.º 3
0
def extract_references_from_text(text, source=None, custom_kbs_file=None):
    """Extract references from text and return in INSPIRE format."""
    with local_refextract_kbs_path() as kbs_path:
        extracted_references = extract_references_from_string(
            text,
            override_kbs_files=kbs_path,
            reference_format=u'{title},{volume},{page}',
        )

    return map_refextract_to_schema(extracted_references, source=source)
Ejemplo n.º 4
0
def extract_references_from_text(text, source=None, custom_kbs_file=None):
    """Extract references from text and return in INSPIRE format."""
    with local_refextract_kbs_path() as kbs_path:
        extracted_references = extract_references_from_string(
            text,
            override_kbs_files=kbs_path,
            reference_format=u'{title},{volume},{page}',
        )

    return map_refextract_to_schema(extracted_references, source=source)
Ejemplo n.º 5
0
def refextract_text():
    """Run refextract on a piece of text."""
    with local_refextract_kbs_path() as kbs_path:
        extracted_references = extract_references_from_string(
            request.json['text'],
            override_kbs_files=kbs_path,
            reference_format=u'{title},{volume},{page}')
    references = map_refextract_to_schema(extracted_references)
    references = match_references(references)
    return jsonify(references)
Ejemplo n.º 6
0
def refextract_text():
    """Run refextract on a piece of text."""
    with local_refextract_kbs_path() as kbs_path:
        extracted_references = extract_references_from_string(
            request.json['text'],
            override_kbs_files=kbs_path,
            reference_format=u'{title},{volume},{page}'
        )
    references = map_refextract_to_schema(extracted_references)

    return jsonify(references)
Ejemplo n.º 7
0
def refextract_url():
    """Run refextract on a URL."""
    with local_refextract_kbs_path() as kbs_path:
        extracted_references = extract_references_from_url(
            request.json['url'],
            override_kbs_files=kbs_path,
            reference_format=u'{title},{volume},{page}'
        )
    references = map_refextract_to_schema(extracted_references)
    references = match_references(references)
    return jsonify(references)
Ejemplo n.º 8
0
def extract_journal_info(obj, eng):
    """Extract the journal information from ``pubinfo_freetext``.

    Runs ``extract_journal_reference`` on the ``pubinfo_freetext`` key of each
    ``publication_info``, if it exists, and uses the extracted information to
    populate the other keys.

    Args:
        obj: a workflow object.
        eng: a workflow engine.

    Returns:
        None

    """
    if not obj.data.get('publication_info'):
        return

    for publication_info in obj.data['publication_info']:
        try:
            with local_refextract_kbs_path() as kbs_path:
                extracted_publication_info = extract_journal_reference(
                    publication_info['pubinfo_freetext'],
                    override_kbs_files=kbs_path,
                )

            if not extracted_publication_info:
                continue

            if extracted_publication_info.get('title'):
                publication_info['journal_title'] = extracted_publication_info['title']

            if extracted_publication_info.get('volume'):
                publication_info['journal_volume'] = extracted_publication_info['volume']

            if extracted_publication_info.get('page'):
                page_start, page_end, artid = split_page_artid(extracted_publication_info['page'])
                if page_start:
                    publication_info['page_start'] = page_start
                if page_end:
                    publication_info['page_end'] = page_end
                if artid:
                    publication_info['artid'] = artid

            if extracted_publication_info.get('year'):
                year = maybe_int(extracted_publication_info['year'])
                if year:
                    publication_info['year'] = year
        except KeyError:
            pass

    obj.data['publication_info'] = convert_old_publication_info_to_new(obj.data['publication_info'])