def test_extracts_authors_from_multiple_citations():
    # given
    citations = [
        Citation(authors=[Person(raw='Ozkut Korkmaz')]),
        Citation(authors=[Person(raw='Smith, Jordan')]),
    ]

    # when
    known_authors = extract_known_authors(citations)

    # then
    assert known_authors == {'Ozkut Korkmaz', 'Smith, Jordan'}
Esempio n. 2
0
def test_parse_citation_fields():
    field_parsed_citation = parse_fields_in_citation(CITATION)

    assert field_parsed_citation == Citation(
        volume=1,
        number=1,
        type=CitationType.COLLECTION,
        title='Lexikon der islamischen Welt',
        editors=[
            Person(first='Klaus', last='Kreiser', raw='Klaus Kreiser'),
            Person(first='Werner', last='Diem', raw='Werner Diem'),
            Person(first='Hans',
                   middle='Georg',
                   last='Majer',
                   raw='Hans Georg Majer')
        ],
        keywords=[{
            'raw': 'A'
        }, {
            'raw': 'B'
        }],
        reviews=[],
        comments=[],
        date_published={'year': 1974},
        location='Stuttgart',
        series='Urban-Taschenbücher, 200/1-3',
        number_of_volumes='3',
        authors=[Person(first='L.', last='Bazin', raw='Bazin, L.')],
        raw_text='1. Lexikon der islamischen Welt. '
        'Klaus Kreiser, Werner Diem, Hans Georg Majer ed. '
        '3 Bde., Stuttgart, 1974 (Urban-Taschenbücher, 200/1-3).',
        remaining_text=
        '{{{ title }}}.  {{{ editors }}}   {{{ number_of_volumes }}} '
        '{{{ location }}} {{{ date_published }}}{{{ series }}}.')
def parse_fields_in_citation(intermediate: IntermediateCitation) -> Citation:
    reviews, comments = parse_amendments_or_comments([intermediate.comment])
    more_reviews, amendments = parse_amendments_or_comments(intermediate.amendments)
    reviews.extend(more_reviews)
    reviews.extend(parse_reviews(intermediate.reviews))
    return Citation(
        volume=intermediate.volume,
        number=int(intermediate.number),
        type=intermediate.type,
        title=intermediate.title,
        location=intermediate.location,
        series=intermediate.series,
        keywords=[{'raw': keyword} for keyword in intermediate.keywords],
        number_of_volumes=intermediate.number_of_volumes,
        number_of_pages=intermediate.number_of_pages,
        authors=parse_authors(intermediate.authors),
        editors=parse_editors_or_translators(intermediate.editors),
        translators=parse_editors_or_translators(intermediate.translators),
        comments=comments,
        reviews=reviews,
        published_in=parse_reference(intermediate.published_in),
        amendments=amendments,
        date_published=parse_date_published(intermediate.date_published),
        raw_text=intermediate.raw_text,
        material=parse_material(intermediate.material),
        date=parse_date(intermediate.date),
        ta_references=parse_ta_references(intermediate.ta_references),
        remaining_text=intermediate.remaining_text
    )
def test_normalize_keywords_with_super_keyword():
    citation = Citation(keywords=[{'raw': 'AB. IRGENDWAS'}])
    keyword_mapping = {
        'A': {
            'de': 'Allgemeines',
            'en': 'General'
        },
        'AB': {
            'de': 'Spezielles',
            'en': 'Specific'
        }
    }
    citation = normalize_keywords_for_citation(citation, keyword_mapping)
    assert citation.keywords == [{
        'code': 'AB',
        'nameDE': 'Spezielles',
        'nameEN': 'Specific',
        'raw': 'AB. IRGENDWAS',
        'super': {
            'code': 'A',
            'nameDE': 'Allgemeines',
            'nameEN': 'General',
            'raw': None,
            'super': None
        }
    }]
def test_extracts_nothing_if_no_authors_present():
    # given
    citations = [Citation(authors=[])]

    # when
    known_authors = extract_known_authors(citations)

    # then
    assert len(known_authors) == 0
def test_extracts_single_author_from_one_citation():
    # given
    citations = [Citation(authors=[Person(raw='Smith, Jordan')])]

    # when
    known_authors = extract_known_authors(citations)

    # then
    assert known_authors == {'Smith, Jordan'}
def test_normalize_keywords():
    citation = Citation(keywords=[{'raw': 'A. ASDFERAWER'}])
    keyword_mapping = {'A': {'de': 'Allgemeines', 'en': 'General'}}
    citation = normalize_keywords_for_citation(citation, keyword_mapping)
    assert citation.keywords == [{
        'code': 'A',
        'nameDE': 'Allgemeines',
        'nameEN': 'General',
        'raw': 'A. ASDFERAWER',
        'super': None
    }]
def test_assign_duplicate():
    # given
    citations = [
        Citation(volume=1, number=1),
        Citation(volume=1, number=2),
        Citation(volume=1, number=1),
        Citation(volume=1, number=1),
    ]

    # when
    citations_with_ids = list(assign_citation_ids(citations))

    # then
    assert citations_with_ids == [
        Citation(volume=1, number=1, id='1-1'),
        Citation(volume=1, number=2, id='1-2'),
        Citation(volume=1, number=1, id='1-1-1'),
        Citation(volume=1, number=1, id='1-1-2'),
    ]
def test_insert_known_authors():
    raw_text = ''.join((
        '12. Handžić, Adem ',
        'Problematika sakupljanja i izdavanja turskih '
        'istorij-skih izvora u radu Orijentalnog Instituta. ',
        'In: POF 20-21.1970/71 (1974).213-221. ',
        '[Die Problematik der Erfassung und Herausgabe der türkischen historischen Quellen ',
        'im Rahmen der Arbeiten des Orientalischen Instituts in Sarajevo, Jugoslavien.]',
    ))
    raw_citation = IntermediateCitation(volume=1, raw_text=raw_text)
    parsed_citation = parse_fields_in_citation(parse_citation(raw_citation))
    queue = Queue()
    insert_known_authors([parsed_citation], ['Handžić, Adem'], queue)

    assert queue.get() == Citation(
        volume=1,
        number=12,
        type=CitationType.ARTICLE,
        authors=[Person(last='Handžić', first='Adem', raw='Handžić, Adem')],
        title='Problematika sakupljanja i izdavanja turskih '
              'istorij-skih izvora u radu Orijentalnog Instituta',
        comments=[
            'Die Problematik der Erfassung und Herausgabe der türkischen historischen Quellen '
            'im Rahmen der Arbeiten des Orientalischen Instituts in Sarajevo, Jugoslavien'
        ],
        reviews=[],
        raw_text=raw_text,
        published_in={
            'journal': 'POF',
            'volumeStart': 20,
            'volumeEnd': 21,
            'yearStart': 1970,
            'yearEnd': 1971,
            'yearParentheses': 1974,
            'pageStart': 213,
            'pageEnd': 221,
            'type': 'journal',
            'raw': 'POF 20-21.1970/71 (1974).213-221'
        },
        ta_references=[],
        remaining_text='{{{ authors }}} {{{ title }}} {{{ in }}}'
    )
    assert queue.empty()
Esempio n. 10
0
 def _citation_as_dict(cls, citation: Citation):
     citation_dict = asdict(replace(
         citation, type=citation.type.value if citation.type else None),
                            dict_factory=cls._to_dict)
     citation_dict['fullyParsed'] = citation.fully_parsed()
     return citation_dict