コード例 #1
0
ファイル: tasks.py プロジェクト: nikpap/inspire-next
def extract_journal_info(obj, eng):
    """Extract journal, volume etc. from any freetext publication info."""
    publication_info = obj.data.get("publication_info")
    if not publication_info:
        return

    new_publication_info = []
    for pubnote in publication_info:
        freetext = pubnote.get("pubinfo_freetext")
        if freetext:
            extracted_publication_info = extract_journal_reference(
                freetext,
                # override_kbs_files={'journals': get_mappings_from_kbname(cfg['REFEXTRACT_KB_NAME'])}
            )
            if extracted_publication_info:
                if "volume" in extracted_publication_info:
                    pubnote["journal_volume"] = extracted_publication_info.get(
                        "volume"
                    )
                if "title" in extracted_publication_info:
                    pubnote["journal_title"] = extracted_publication_info.get(
                        "title"
                    )
                if "year" in extracted_publication_info:
                    pubnote["year"] = extracted_publication_info.get(
                        "year"
                    )
                if "page" in extracted_publication_info:
                    pubnote["page_artid"] = extracted_publication_info.get(
                        "page"
                    )
        new_publication_info.append(pubnote)

    obj.data["publication_info"] = new_publication_info
コード例 #2
0
def extract_journal_info(obj, eng):
    """Extract the journal information from ``pubinfo_freetext``.

    Runs ``extract_journal_reference`` on the ``pubinfo_freetext`` key of each
    ``publication_info``, if it exists, and uses the extracted information to
    populate the other keys.

    Args:
        obj: a workflow object.
        eng: a workflow engine.

    Returns:
        None

    """
    if not obj.data.get('publication_info'):
        return

    for publication_info in obj.data['publication_info']:
        try:
            with local_refextract_kbs_path() as kbs_path:
                extracted_publication_info = extract_journal_reference(
                    publication_info['pubinfo_freetext'],
                    override_kbs_files=kbs_path,
                )

            if not extracted_publication_info:
                continue

            if extracted_publication_info.get('title'):
                publication_info['journal_title'] = extracted_publication_info[
                    'title']

            if extracted_publication_info.get('volume'):
                publication_info[
                    'journal_volume'] = extracted_publication_info['volume']

            if extracted_publication_info.get('page'):
                page_start, page_end, artid = split_page_artid(
                    extracted_publication_info['page'])
                if page_start:
                    publication_info['page_start'] = page_start
                if page_end:
                    publication_info['page_end'] = page_end
                if artid:
                    publication_info['artid'] = artid

            if extracted_publication_info.get('year'):
                year = maybe_int(extracted_publication_info['year'])
                if year:
                    publication_info['year'] = year
        except KeyError:
            pass

    obj.data['publication_info'] = convert_old_publication_info_to_new(
        obj.data['publication_info'])
コード例 #3
0
ファイル: refextract.py プロジェクト: harunurhan/inspire-next
def extract_journal_info(obj, eng):
    """Extract the journal information from ``pubinfo_freetext``.

    Runs ``extract_journal_reference`` on the ``pubinfo_freetext`` key of each
    ``publication_info``, if it exists, and uses the extracted information to
    populate the other keys.

    Args:
        obj: a workflow object.
        eng: a workflow engine.

    Returns:
        None

    """
    if not obj.data.get('publication_info'):
        return

    for publication_info in obj.data['publication_info']:
        try:
            with local_refextract_kbs_path() as kbs_path:
                extracted_publication_info = extract_journal_reference(
                    publication_info['pubinfo_freetext'],
                    override_kbs_files=kbs_path,
                )

            if not extracted_publication_info:
                continue

            if extracted_publication_info.get('title'):
                publication_info['journal_title'] = extracted_publication_info['title']

            if extracted_publication_info.get('volume'):
                publication_info['journal_volume'] = extracted_publication_info['volume']

            if extracted_publication_info.get('page'):
                page_start, page_end, artid = split_page_artid(extracted_publication_info['page'])
                if page_start:
                    publication_info['page_start'] = page_start
                if page_end:
                    publication_info['page_end'] = page_end
                if artid:
                    publication_info['artid'] = artid

            if extracted_publication_info.get('year'):
                year = maybe_int(extracted_publication_info['year'])
                if year:
                    publication_info['year'] = year
        except KeyError:
            pass

    obj.data['publication_info'] = convert_old_publication_info_to_new(obj.data['publication_info'])
コード例 #4
0
ファイル: refextract.py プロジェクト: kaplun/inspire-next
def extract_journal_info(obj, eng):
    """Extract journal, volume etc. from any freetext publication info."""
    publication_info = get_value(obj.data, "publication_info")
    if not publication_info:
        return

    new_publication_info = []
    for pubnote in publication_info:
        if not pubnote:
            continue
        freetext = pubnote.get("pubinfo_freetext")
        if freetext:
            if isinstance(freetext, (list, tuple)):
                freetext = ". ".join(freetext)
            extracted_publication_info = extract_journal_reference(
                freetext,
                # override_kbs_files={
                #    'journals': get_mappings_from_kbname(['REFEXTRACT_KB_NAME'])
                # }
            )
            if extracted_publication_info:
                if "volume" in extracted_publication_info:
                    pubnote["journal_volume"] = extracted_publication_info.get(
                        "volume"
                    )
                if "title" in extracted_publication_info:
                    pubnote["journal_title"] = extracted_publication_info.get(
                        "title"
                    )
                if "year" in extracted_publication_info:
                    year = maybe_int(extracted_publication_info.get('year'))
                    if year is not None:
                        pubnote['year'] = year
                if "page" in extracted_publication_info:
                    page_start, page_end, artid = split_page_artid(
                        extracted_publication_info.get("page"))
                    if page_start:
                        pubnote["page_start"] = page_start
                    if page_end:
                        pubnote["page_end"] = page_end
                    if artid:
                        pubnote["artid"] = artid
        if any(value for value in pubnote.values()):
            new_publication_info.append(pubnote)

    obj.data["publication_info"] = new_publication_info
コード例 #5
0
ファイル: tasks.py プロジェクト: liamkirsh/inspire-next
def extract_journal_info(obj, eng):
    """Extract journal, volume etc. from any freetext publication info."""
    publication_info = get_value(obj.data, "publication_info")
    if not publication_info:
        return

    new_publication_info = []
    for pubnote in publication_info:
        if not pubnote:
            continue
        freetext = pubnote.get("pubinfo_freetext")
        if not freetext:
            continue
        if isinstance(freetext, (list, tuple)):
            freetext = ". ".join(freetext)
        extracted_publication_info = extract_journal_reference(
            freetext,
            # override_kbs_files={
            #    'journals': get_mappings_from_kbname(['REFEXTRACT_KB_NAME'])
            # }
        )
        if extracted_publication_info:
            if "volume" in extracted_publication_info:
                pubnote["journal_volume"] = extracted_publication_info.get(
                    "volume"
                )
            if "title" in extracted_publication_info:
                pubnote["journal_title"] = extracted_publication_info.get(
                    "title"
                )
            if "year" in extracted_publication_info:
                pubnote["year"] = extracted_publication_info.get(
                    "year"
                )
            if "page" in extracted_publication_info:
                page_start, page_end, artid = split_page_artid(
                    extracted_publication_info.get("page"))
                if page_start:
                    pubnote["page_start"] = page_start
                if page_end:
                    pubnote["page_end"] = page_end
                if artid:
                    pubnote["artid"] = artid
        new_publication_info.append(pubnote)

    obj.data["publication_info"] = new_publication_info
コード例 #6
0
ファイル: tasks.py プロジェクト: gitter-badger/inspire-next
def extract_journal_info(obj, eng):
    """Extract journal, volume etc. from any freetext publication info."""
    model = eng.workflow_definition.model(obj)
    record = get_record_from_model(model)

    publication_info = record.get("publication_info")
    if not publication_info:
        return

    new_publication_info = []
    for pubnote in publication_info:
        freetext = pubnote.get("pubinfo_freetext")
        if freetext:
            extracted_publication_info = extract_journal_reference(
                freetext,
                override_kbs_files={'journals': get_mappings_from_kbname(cfg['REFEXTRACT_KB_NAME'])}
            )
            if extracted_publication_info:
                if "volume" in extracted_publication_info:
                    pubnote["journal_volume"] = extracted_publication_info.get(
                        "volume"
                    )
                if "title" in extracted_publication_info:
                    pubnote["journal_title"] = extracted_publication_info.get(
                        "title"
                    )
                if "year" in extracted_publication_info:
                    pubnote["year"] = extracted_publication_info.get(
                        "year"
                    )
                if "page" in extracted_publication_info:
                    pubnote["page_artid"] = extracted_publication_info.get(
                        "page"
                    )
        new_publication_info.append(pubnote)

    record["publication_info"] = new_publication_info
    model.update()