def test_subjects_for_work(self): data = [ ([('a', 'Authors, American'), ('y', '19th century'), ('x', 'Biography.')], { 'subject_times': ['19th century'], 'subjects': ['American Authors', 'Biography'] }), ([('a', 'Western stories'), ('x', 'History and criticism.')], { 'subjects': ['Western stories', 'History and criticism'] }), ( [('a', 'United States'), ('x', 'History'), ('y', 'Revolution, 1775-1783'), ('x', 'Influence.')], # TODO: this expectation does not capture the intent or ordering of the original MARC, investigate x subfield! { 'subject_times': ['Revolution, 1775-1783'], 'subjects': ['United States', 'Influence', 'History'] }), # 'United States -- History -- Revolution, 1775-1783 -- Influence.' ([('a', 'West Indies, British'), ('x', 'History'), ('y', '18th century.')], { 'subject_times': ['18th century'], 'subjects': ['British West Indies', 'History'] }), # 'West Indies, British -- History -- 18th century.'), ([('a', 'Great Britain'), ('x', 'Relations'), ('z', 'West Indies, British.')], { 'subject_places': ['British West Indies'], 'subjects': ['Great Britain', 'Relations'] }), #'Great Britain -- Relations -- West Indies, British.'), ([('a', 'West Indies, British'), ('x', 'Relations'), ('z', 'Great Britain.')], { 'subject_places': ['Great Britain'], 'subjects': ['British West Indies', 'Relations'] }) #'West Indies, British -- Relations -- Great Britain.') ] for (value, expect) in data: output = subjects_for_work(MockRecord('650', value)) self.assertEqual(expect, output)
def test_subjects_for_work(self): data = [ ([ ('a', 'Authors, American'), ('y', '19th century'), ('x', 'Biography.')], {'subject_times': ['19th century'], 'subjects': ['American Authors', 'Biography']}), ([ ('a', 'Western stories'), ('x', 'History and criticism.')], {'subjects': ['Western stories', 'History and criticism']}), ([ ('a', 'United States'), ('x', 'History'), ('y', 'Revolution, 1775-1783'), ('x', 'Influence.')], # TODO: this expectation does not capture the intent or ordering of the original MARC, investigate x subfield! {'subject_times': ['Revolution, 1775-1783'], 'subjects': ['United States', 'Influence', 'History']}), # 'United States -- History -- Revolution, 1775-1783 -- Influence.' ([ ('a', 'West Indies, British'), ('x', 'History'), ('y', '18th century.')], {'subject_times': ['18th century'], 'subjects': ['British West Indies', 'History']}), # 'West Indies, British -- History -- 18th century.'), ([ ('a', 'Great Britain'), ('x', 'Relations'), ('z', 'West Indies, British.')], {'subject_places': ['British West Indies'], 'subjects': ['Great Britain', 'Relations']}), #'Great Britain -- Relations -- West Indies, British.'), ([ ('a', 'West Indies, British'), ('x', 'Relations'), ('z', 'Great Britain.')], {'subject_places': ['Great Britain'], 'subjects': ['British West Indies', 'Relations']}) #'West Indies, British -- Relations -- Great Britain.') ] for (value, expect) in data: output = subjects_for_work(MockRecord('650', value)) self.assertEqual(expect, output)
def read_edition(rec): """ Converts MARC record object into a dict representation of an edition suitable for importing into Open Library. :param (MarcBinary | MarcXml) rec: :rtype: dict :return: Edition representation """ handle_missing_008 = True rec.build_fields(want) edition = {} tag_008 = rec.get_fields('008') if len(tag_008) == 0: if not handle_missing_008: raise BadMARC("single '008' field required") if len(tag_008) > 1: len_40 = [f for f in tag_008 if len(f) == 40] if len_40: tag_008 = len_40 tag_008 = [min(tag_008, key=lambda f:f.count(' '))] if len(tag_008) == 1: #assert len(tag_008[0]) == 40 f = re_bad_char.sub(' ', tag_008[0]) if not f: raise BadMARC("'008' field must not be blank") publish_date = f[7:11] if publish_date.isdigit() and publish_date != '0000': edition["publish_date"] = publish_date if f[6] == 't': edition["copyright_date"] = f[11:15] publish_country = f[15:18] if publish_country not in ('|||', ' ', '\x01\x01\x01', '???'): edition["publish_country"] = publish_country.strip() lang = f[35:38] if lang not in (' ', '|||', '', '???', 'zxx'): # diebrokeradical400poll if f[34:37].lower() == 'eng': lang = 'eng' else: lang = lang.lower() edition['languages'] = [lang_map.get(lang, lang)] else: assert handle_missing_008 update_edition(rec, edition, read_languages, 'languages') update_edition(rec, edition, read_pub_date, 'publish_date') update_edition(rec, edition, read_lccn, 'lccn') update_edition(rec, edition, read_dnb, 'identifiers') update_edition(rec, edition, read_authors, 'authors') update_edition(rec, edition, read_oclc, 'oclc_numbers') update_edition(rec, edition, read_lc_classification, 'lc_classifications') update_edition(rec, edition, read_dewey, 'dewey_decimal_class') update_edition(rec, edition, read_work_titles, 'work_titles') update_edition(rec, edition, read_other_titles, 'other_titles') update_edition(rec, edition, read_edition_name, 'edition_name') update_edition(rec, edition, read_series, 'series') update_edition(rec, edition, read_notes, 'notes') update_edition(rec, edition, read_description, 'description') update_edition(rec, edition, read_location, 'location') update_edition(rec, edition, read_toc, 'table_of_contents') update_edition(rec, edition, read_url, 'links') edition.update(read_contributions(rec)) edition.update(subjects_for_work(rec)) try: edition.update(read_title(rec)) except NoTitle: if 'work_titles' in edition: assert len(edition['work_titles']) == 1 edition['title'] = edition['work_titles'][0] del edition['work_titles'] else: raise for func in (read_publisher, read_isbn, read_pagination): v = func(rec) if v: edition.update(v) return edition