예제 #1
0
def extract_references(filepath):
    """Extract references from PDF and return in INSPIRE format."""
    references = extract_references_from_file(
        filepath,
        reference_format="{title},{volume},{page}",
        override_kbs_files={'journals': get_mappings_from_kbname(cfg['REFEXTRACT_KB_NAME'])}
    )
    mapped_references = []
    if references.get('references'):
        for ref in references.get('references'):
            reference = {}
            reference["journal_pubnote"] = ref.get('journal_reference')
            reference["year"] = ref.get('year')
            reference["collaboration"] = ref.get('collaboration')
            reference["title"] = ref.get('title')
            reference["misc"] = ref.get('misc')
            reference["number"] = ref.get('linemarker')
            reference["authors"] = ref.get('author')
            reference["isbn"] = ref.get('isbn')
            reference["doi"] = ref.get('doi')
            reference["report_number"] = ref.get('reportnumber')
            reference["publisher"] = ref.get('publisher')
            reference["recid"] = ref.get('recid')

            for key, value in reference.items():
                if value and isinstance(value, list):
                    reference[key] = ",".join(value)
                elif not value:
                    del reference[key]
            mapped_references.append(reference)
    return mapped_references
예제 #2
0
def extract_journal_info(obj, eng):
    """Extract journal, volume etc. from any freetext publication info."""
    model = eng.workflow_definition.model(obj)
    record = get_record_from_model(model)

    publication_info = record.get("publication_info")
    if not publication_info:
        return

    new_publication_info = []
    for pubnote in publication_info:
        freetext = pubnote.get("pubinfo_freetext")
        if freetext:
            extracted_publication_info = extract_journal_reference(
                freetext,
                override_kbs_files={'journals': get_mappings_from_kbname(cfg['REFEXTRACT_KB_NAME'])}
            )
            if extracted_publication_info:
                if "volume" in extracted_publication_info:
                    pubnote["journal_volume"] = extracted_publication_info.get(
                        "volume"
                    )
                if "title" in extracted_publication_info:
                    pubnote["journal_title"] = extracted_publication_info.get(
                        "title"
                    )
                if "year" in extracted_publication_info:
                    pubnote["year"] = extracted_publication_info.get(
                        "year"
                    )
                if "page" in extracted_publication_info:
                    pubnote["page_artid"] = extracted_publication_info.get(
                        "page"
                    )
        new_publication_info.append(pubnote)

    record["publication_info"] = new_publication_info
    model.update()