Beispiel #1
0
 def test_subjects_for_work(self):
     data = [
         ([('a', 'Authors, American'), ('y', '19th century'),
           ('x', 'Biography.')], {
               'subject_times': ['19th century'],
               'subjects': ['American Authors', 'Biography']
           }),
         ([('a', 'Western stories'), ('x', 'History and criticism.')], {
             'subjects': ['Western stories', 'History and criticism']
         }),
         (
             [('a', 'United States'), ('x', 'History'),
              ('y', 'Revolution, 1775-1783'), ('x', 'Influence.')],
             # TODO: this expectation does not capture the intent or ordering of the original MARC, investigate x subfield!
             {
                 'subject_times': ['Revolution, 1775-1783'],
                 'subjects': ['United States', 'Influence', 'History']
             }),
         # 'United States -- History -- Revolution, 1775-1783 -- Influence.'
         ([('a', 'West Indies, British'), ('x', 'History'),
           ('y', '18th century.')], {
               'subject_times': ['18th century'],
               'subjects': ['British West Indies', 'History']
           }),
         # 'West Indies, British -- History -- 18th century.'),
         ([('a', 'Great Britain'), ('x', 'Relations'),
           ('z', 'West Indies, British.')], {
               'subject_places': ['British West Indies'],
               'subjects': ['Great Britain', 'Relations']
           }),
         #'Great Britain -- Relations -- West Indies, British.'),
         ([('a', 'West Indies, British'), ('x', 'Relations'),
           ('z', 'Great Britain.')], {
               'subject_places': ['Great Britain'],
               'subjects': ['British West Indies', 'Relations']
           })
         #'West Indies, British -- Relations -- Great Britain.')
     ]
     for (value, expect) in data:
         output = subjects_for_work(MockRecord('650', value))
         self.assertEqual(expect, output)
Beispiel #2
0
 def test_subjects_for_work(self):
     data = [
         ([  ('a', 'Authors, American'),
             ('y', '19th century'),
             ('x', 'Biography.')],
             {'subject_times': ['19th century'],
              'subjects': ['American Authors', 'Biography']}),
         ([  ('a', 'Western stories'),
             ('x', 'History and criticism.')],
             {'subjects': ['Western stories', 'History and criticism']}),
         ([  ('a', 'United States'),
             ('x', 'History'),
             ('y', 'Revolution, 1775-1783'),
             ('x', 'Influence.')],
             # TODO: this expectation does not capture the intent or ordering of the original MARC, investigate x subfield!
             {'subject_times': ['Revolution, 1775-1783'], 'subjects': ['United States', 'Influence', 'History']}),
             # 'United States -- History -- Revolution, 1775-1783 -- Influence.'
         ([  ('a', 'West Indies, British'),
             ('x', 'History'),
             ('y', '18th century.')],
             {'subject_times': ['18th century'], 'subjects': ['British West Indies', 'History']}),
             # 'West Indies, British -- History -- 18th century.'),
         ([  ('a', 'Great Britain'),
             ('x', 'Relations'),
             ('z', 'West Indies, British.')],
             {'subject_places': ['British West Indies'], 'subjects': ['Great Britain', 'Relations']}),
             #'Great Britain -- Relations -- West Indies, British.'),
         ([  ('a', 'West Indies, British'),
             ('x', 'Relations'),
             ('z', 'Great Britain.')],
             {'subject_places': ['Great Britain'], 'subjects': ['British West Indies', 'Relations']})
             #'West Indies, British -- Relations -- Great Britain.')
     ]
     for (value, expect) in data:
         output = subjects_for_work(MockRecord('650', value))
         self.assertEqual(expect, output)
Beispiel #3
0
def read_edition(rec):
    """
    Converts MARC record object into a dict representation of an edition
    suitable for importing into Open Library.

    :param (MarcBinary | MarcXml) rec:
    :rtype: dict
    :return: Edition representation
    """
    handle_missing_008 = True
    rec.build_fields(want)
    edition = {}
    tag_008 = rec.get_fields('008')
    if len(tag_008) == 0:
        if not handle_missing_008:
            raise BadMARC("single '008' field required")
    if len(tag_008) > 1:
        len_40 = [f for f in tag_008 if len(f) == 40]
        if len_40:
            tag_008 = len_40
        tag_008 = [min(tag_008, key=lambda f:f.count(' '))]
    if len(tag_008) == 1:
        #assert len(tag_008[0]) == 40
        f = re_bad_char.sub(' ', tag_008[0])
        if not f:
            raise BadMARC("'008' field must not be blank")
        publish_date = f[7:11]

        if publish_date.isdigit() and publish_date != '0000':
            edition["publish_date"] = publish_date
        if f[6] == 't':
            edition["copyright_date"] = f[11:15]
        publish_country = f[15:18]
        if publish_country not in ('|||', '   ', '\x01\x01\x01', '???'):
            edition["publish_country"] = publish_country.strip()
        lang = f[35:38]
        if lang not in ('   ', '|||', '', '???', 'zxx'):
            # diebrokeradical400poll
            if f[34:37].lower() == 'eng':
                lang = 'eng'
            else:
                lang = lang.lower()
            edition['languages'] = [lang_map.get(lang, lang)]
    else:
        assert handle_missing_008
        update_edition(rec, edition, read_languages, 'languages')
        update_edition(rec, edition, read_pub_date, 'publish_date')

    update_edition(rec, edition, read_lccn, 'lccn')
    update_edition(rec, edition, read_dnb, 'identifiers')
    update_edition(rec, edition, read_authors, 'authors')
    update_edition(rec, edition, read_oclc, 'oclc_numbers')
    update_edition(rec, edition, read_lc_classification, 'lc_classifications')
    update_edition(rec, edition, read_dewey, 'dewey_decimal_class')
    update_edition(rec, edition, read_work_titles, 'work_titles')
    update_edition(rec, edition, read_other_titles, 'other_titles')
    update_edition(rec, edition, read_edition_name, 'edition_name')
    update_edition(rec, edition, read_series, 'series')
    update_edition(rec, edition, read_notes, 'notes')
    update_edition(rec, edition, read_description, 'description')
    update_edition(rec, edition, read_location, 'location')
    update_edition(rec, edition, read_toc, 'table_of_contents')
    update_edition(rec, edition, read_url, 'links')

    edition.update(read_contributions(rec))
    edition.update(subjects_for_work(rec))

    try:
        edition.update(read_title(rec))
    except NoTitle:
        if 'work_titles' in edition:
            assert len(edition['work_titles']) == 1
            edition['title'] = edition['work_titles'][0]
            del edition['work_titles']
        else:
            raise

    for func in (read_publisher, read_isbn, read_pagination):
        v = func(rec)
        if v:
            edition.update(v)
    return edition