def test_extracts_authors_from_multiple_citations(): # given citations = [ Citation(authors=[Person(raw='Ozkut Korkmaz')]), Citation(authors=[Person(raw='Smith, Jordan')]), ] # when known_authors = extract_known_authors(citations) # then assert known_authors == {'Ozkut Korkmaz', 'Smith, Jordan'}
def test_parse_citation_fields(): field_parsed_citation = parse_fields_in_citation(CITATION) assert field_parsed_citation == Citation( volume=1, number=1, type=CitationType.COLLECTION, title='Lexikon der islamischen Welt', editors=[ Person(first='Klaus', last='Kreiser', raw='Klaus Kreiser'), Person(first='Werner', last='Diem', raw='Werner Diem'), Person(first='Hans', middle='Georg', last='Majer', raw='Hans Georg Majer') ], keywords=[{ 'raw': 'A' }, { 'raw': 'B' }], reviews=[], comments=[], date_published={'year': 1974}, location='Stuttgart', series='Urban-Taschenbücher, 200/1-3', number_of_volumes='3', authors=[Person(first='L.', last='Bazin', raw='Bazin, L.')], raw_text='1. Lexikon der islamischen Welt. ' 'Klaus Kreiser, Werner Diem, Hans Georg Majer ed. ' '3 Bde., Stuttgart, 1974 (Urban-Taschenbücher, 200/1-3).', remaining_text= '{{{ title }}}. {{{ editors }}} {{{ number_of_volumes }}} ' '{{{ location }}} {{{ date_published }}}{{{ series }}}.')
def parse_fields_in_citation(intermediate: IntermediateCitation) -> Citation: reviews, comments = parse_amendments_or_comments([intermediate.comment]) more_reviews, amendments = parse_amendments_or_comments(intermediate.amendments) reviews.extend(more_reviews) reviews.extend(parse_reviews(intermediate.reviews)) return Citation( volume=intermediate.volume, number=int(intermediate.number), type=intermediate.type, title=intermediate.title, location=intermediate.location, series=intermediate.series, keywords=[{'raw': keyword} for keyword in intermediate.keywords], number_of_volumes=intermediate.number_of_volumes, number_of_pages=intermediate.number_of_pages, authors=parse_authors(intermediate.authors), editors=parse_editors_or_translators(intermediate.editors), translators=parse_editors_or_translators(intermediate.translators), comments=comments, reviews=reviews, published_in=parse_reference(intermediate.published_in), amendments=amendments, date_published=parse_date_published(intermediate.date_published), raw_text=intermediate.raw_text, material=parse_material(intermediate.material), date=parse_date(intermediate.date), ta_references=parse_ta_references(intermediate.ta_references), remaining_text=intermediate.remaining_text )
def test_normalize_keywords_with_super_keyword(): citation = Citation(keywords=[{'raw': 'AB. IRGENDWAS'}]) keyword_mapping = { 'A': { 'de': 'Allgemeines', 'en': 'General' }, 'AB': { 'de': 'Spezielles', 'en': 'Specific' } } citation = normalize_keywords_for_citation(citation, keyword_mapping) assert citation.keywords == [{ 'code': 'AB', 'nameDE': 'Spezielles', 'nameEN': 'Specific', 'raw': 'AB. IRGENDWAS', 'super': { 'code': 'A', 'nameDE': 'Allgemeines', 'nameEN': 'General', 'raw': None, 'super': None } }]
def test_extracts_nothing_if_no_authors_present(): # given citations = [Citation(authors=[])] # when known_authors = extract_known_authors(citations) # then assert len(known_authors) == 0
def test_extracts_single_author_from_one_citation(): # given citations = [Citation(authors=[Person(raw='Smith, Jordan')])] # when known_authors = extract_known_authors(citations) # then assert known_authors == {'Smith, Jordan'}
def test_normalize_keywords(): citation = Citation(keywords=[{'raw': 'A. ASDFERAWER'}]) keyword_mapping = {'A': {'de': 'Allgemeines', 'en': 'General'}} citation = normalize_keywords_for_citation(citation, keyword_mapping) assert citation.keywords == [{ 'code': 'A', 'nameDE': 'Allgemeines', 'nameEN': 'General', 'raw': 'A. ASDFERAWER', 'super': None }]
def test_assign_duplicate(): # given citations = [ Citation(volume=1, number=1), Citation(volume=1, number=2), Citation(volume=1, number=1), Citation(volume=1, number=1), ] # when citations_with_ids = list(assign_citation_ids(citations)) # then assert citations_with_ids == [ Citation(volume=1, number=1, id='1-1'), Citation(volume=1, number=2, id='1-2'), Citation(volume=1, number=1, id='1-1-1'), Citation(volume=1, number=1, id='1-1-2'), ]
def test_insert_known_authors(): raw_text = ''.join(( '12. Handžić, Adem ', 'Problematika sakupljanja i izdavanja turskih ' 'istorij-skih izvora u radu Orijentalnog Instituta. ', 'In: POF 20-21.1970/71 (1974).213-221. ', '[Die Problematik der Erfassung und Herausgabe der türkischen historischen Quellen ', 'im Rahmen der Arbeiten des Orientalischen Instituts in Sarajevo, Jugoslavien.]', )) raw_citation = IntermediateCitation(volume=1, raw_text=raw_text) parsed_citation = parse_fields_in_citation(parse_citation(raw_citation)) queue = Queue() insert_known_authors([parsed_citation], ['Handžić, Adem'], queue) assert queue.get() == Citation( volume=1, number=12, type=CitationType.ARTICLE, authors=[Person(last='Handžić', first='Adem', raw='Handžić, Adem')], title='Problematika sakupljanja i izdavanja turskih ' 'istorij-skih izvora u radu Orijentalnog Instituta', comments=[ 'Die Problematik der Erfassung und Herausgabe der türkischen historischen Quellen ' 'im Rahmen der Arbeiten des Orientalischen Instituts in Sarajevo, Jugoslavien' ], reviews=[], raw_text=raw_text, published_in={ 'journal': 'POF', 'volumeStart': 20, 'volumeEnd': 21, 'yearStart': 1970, 'yearEnd': 1971, 'yearParentheses': 1974, 'pageStart': 213, 'pageEnd': 221, 'type': 'journal', 'raw': 'POF 20-21.1970/71 (1974).213-221' }, ta_references=[], remaining_text='{{{ authors }}} {{{ title }}} {{{ in }}}' ) assert queue.empty()
def _citation_as_dict(cls, citation: Citation): citation_dict = asdict(replace( citation, type=citation.type.value if citation.type else None), dict_factory=cls._to_dict) citation_dict['fullyParsed'] = citation.fully_parsed() return citation_dict