Python MarcBinaryの例、openlibrary.catalog.marc.marc_binary.MarcBinary Pythonの例

コード例 #1

0

ファイルを表示

ファイル: code.py プロジェクト: ank-kumar/openlibrary

def parse_data(data):
    """
    Takes POSTed data and determines the format, and returns an Edition record
    suitable for adding to OL.

    :param str data: Raw data
    :rtype: (dict|None, str|None)
    :return: (Edition record, format (rdf|opds|marcxml|json|marc)) or (None, None)
    """
    data = data.strip()
    if -1 != data[:10].find('<?xml'):
        root = etree.fromstring(data)
        if '{http://www.w3.org/1999/02/22-rdf-syntax-ns#}RDF' == root.tag:
            edition_builder = import_rdf.parse(root)
            format = 'rdf'
        elif '{http://www.w3.org/2005/Atom}entry' == root.tag:
            edition_builder = import_opds.parse(root)
            format = 'opds'
        elif '{http://www.loc.gov/MARC21/slim}record' == root.tag:
            if root.tag == '{http://www.loc.gov/MARC21/slim}collection':
                root = root[0]
            rec = MarcXml(root)
            edition = read_edition(rec)
            edition_builder = import_edition_builder.import_edition_builder(init_dict=edition)
            format = 'marcxml'
        else:
            print('unrecognized XML format')
            return None, None
    elif data.startswith('{') and data.endswith('}'):
        obj = json.loads(data)
        edition_builder = import_edition_builder.import_edition_builder(init_dict=obj)
        format = 'json'
    else:
        #Marc Binary
        if len(data) < MARC_LENGTH_POS or len(data) != int(data[:MARC_LENGTH_POS]):
            raise DataError('no-marc-record')
        rec = MarcBinary(data)

        edition = read_edition(rec)
        edition_builder = import_edition_builder.import_edition_builder(init_dict=edition)
        format = 'marc'

    parse_meta_headers(edition_builder)

    return edition_builder.get_dict(), format

コード例 #2

0

ファイルを表示

ファイル: test_add_book.py プロジェクト: khabdrick/openlibrary

    def test_from_marc_fields(self, mock_site, add_languages):
        ia = 'isbn_9781419594069'
        data = open_test_data(ia + '_meta.mrc').read()
        rec = read_edition(MarcBinary(data))
        rec['source_records'] = ['ia:' + ia]
        reply = load(rec)
        assert reply['success'] is True
        # author from 100
        assert reply['authors'][0]['name'] == 'Adam Weiner'

        edition = mock_site.get(reply['edition']['key'])
        # Publish place, publisher, & publish date - 260$a, $b, $c
        assert edition['publishers'][0] == 'Kaplan Publishing'
        assert edition['publish_date'] == '2007'
        assert edition['publish_places'][0] == 'New York'
        # Pagination 300
        assert edition['number_of_pages'] == 264
        assert edition['pagination'] == 'viii, 264 p.'
        # 8 subjects, 650
        assert len(edition['subjects']) == 8
        assert sorted(edition['subjects']) == [
            'Action and adventure films',
            'Cinematography',
            'Miscellanea',
            'Physics',
            'Physics in motion pictures',
            'Popular works',
            'Science fiction films',
            'Special effects',
        ]
        # Edition description from 520
        desc = (
            'Explains the basic laws of physics, covering such topics '
            'as mechanics, forces, and energy, while deconstructing '
            'famous scenes and stunts from motion pictures, including '
            '"Apollo 13" and "Titanic," to determine if they are possible.')
        assert isinstance(edition['description'], Text)
        assert edition['description'] == desc
        # Work description from 520
        work = mock_site.get(reply['work']['key'])
        assert isinstance(work['description'], Text)
        assert work['description'] == desc

コード例 #3

0

ファイルを表示

ファイル: test_parse.py プロジェクト: Arpanray01/Open-Library

 def test_binary(self, i):
     expect_filename = "%s/bin_expect/%s" % (test_data, i)
     data = open("%s/bin_input/%s" % (test_data, i)).read()
     if len(data) != int(data[:5]):
         #TODO: Why are we fixing this in test expectations? Investigate.
         #      affects histoirereligieu05cr_meta.mrc and zweibchersatir01horauoft_meta.mrc
         data = data.decode('utf-8').encode('raw_unicode_escape')
     assert len(data) == int(data[:5])
     rec = MarcBinary(data)
     edition_marc_bin = read_edition(rec)
     assert edition_marc_bin
     j = simplejson.load(open(expect_filename))
     assert j, "Unable to open test data: %s" % expect_filename
     assert sorted(edition_marc_bin.keys()) == sorted(j.keys())
     for k in edition_marc_bin.keys():
         if isinstance(j[k], list):
             for item1, item2 in zip(edition_marc_bin[k], j[k]):
                 assert item1 == item2
         assert edition_marc_bin[k] == j[k]
     assert edition_marc_bin == j

コード例 #4

0

ファイルを表示

ファイル: marc_subject.py プロジェクト: user404d/openlibrary

def get_work_subjects(w, do_get_mc=True):
    found = set()
    for e in w['editions']:
        sr = e.get('source_records', [])
        if sr:
            for i in sr:
                if i.endswith('initial import'):
                    bad_source_record(e, i)
                    continue
                if i.startswith('ia:') or i.startswith('marc:'):
                    found.add(i)
                    continue
        else:
            mc = None
            if do_get_mc:
                m = re_edition_key.match(e['key'])
                mc = get_mc('/b/' + m.group(1))
            if mc:
                if mc.endswith('initial import'):
                    bad_source_record(e, mc)
                    continue
                if not mc.startswith('amazon:') and not re_ia_marc.match(mc):
                    found.add('marc:' + mc)
    subjects = []
    for sr in found:
        if sr.startswith('marc:ia:'):
            subjects.append(get_subjects_from_ia(sr[8:]))
        elif sr.startswith('marc:'):
            loc = sr[5:]
            data = get_from_archive(loc)
            rec = MarcBinary(data)
            try:
                subjects.append(read_subjects(rec))
            except:
                print(('bad MARC:', loc))
                print(('data:', repr(data)))
                raise
        else:
            assert sr.startswith('ia:')
            subjects.append(get_subjects_from_ia(sr[3:]))
    return combine_subjects(subjects)

コード例 #5

0

ファイルを表示

def test_extra_author(mock_site):
    add_languages(mock_site)

    mock_site.save({
        "name": "Hubert Howe Bancroft",
        "death_date": "1918.",
        "alternate_names": ["HUBERT HOWE BANCROFT", "Hubert Howe Bandcroft"],
        "key": "/authors/OL563100A",
        "birth_date": "1832",
        "personal_name": "Hubert Howe Bancroft",
        "type": {"key": "/type/author"},
    })

    mock_site.save({
        "title": "The works of Hubert Howe Bancroft",
        "covers": [6060295, 5551343],
        "first_sentence": {"type": "/type/text", "value": "When it first became known to Europe that a new continent had been discovered, the wise men, philosophers, and especially the learned ecclesiastics, were sorely perplexed to account for such a discovery."},
        "subject_places": ["Alaska", "America", "Arizona", "British Columbia", "California", "Canadian Northwest", "Central America", "Colorado", "Idaho", "Mexico", "Montana", "Nevada", "New Mexico", "Northwest Coast of North America", "Northwest boundary of the United States", "Oregon", "Pacific States", "Texas", "United States", "Utah", "Washington (State)", "West (U.S.)", "Wyoming"],
        "excerpts": [{"excerpt": "When it first became known to Europe that a new continent had been discovered, the wise men, philosophers, and especially the learned ecclesiastics, were sorely perplexed to account for such a discovery."}],
        "first_publish_date": "1882",
        "key": "/works/OL3421434W",
        "authors": [{"type": {"key": "/type/author_role"}, "author": {"key": "/authors/OL563100A"}}],
        "subject_times": ["1540-1810", "1810-1821", "1821-1861", "1821-1951", "1846-1850", "1850-1950", "1859-", "1859-1950", "1867-1910", "1867-1959", "1871-1903", "Civil War, 1861-1865", "Conquest, 1519-1540", "European intervention, 1861-1867", "Spanish colony, 1540-1810", "To 1519", "To 1821", "To 1846", "To 1859", "To 1867", "To 1871", "To 1889", "To 1912", "Wars of Independence, 1810-1821"],
        "type": {"key": "/type/work"},
        "subjects": ["Antiquities", "Archaeology", "Autobiography", "Bibliography", "California Civil War, 1861-1865", "Comparative Literature", "Comparative civilization", "Courts", "Description and travel", "Discovery and exploration", "Early accounts to 1600", "English essays", "Ethnology", "Foreign relations", "Gold discoveries", "Historians", "History", "Indians", "Indians of Central America", "Indians of Mexico", "Indians of North America", "Languages", "Law", "Mayas", "Mexican War, 1846-1848", "Nahuas", "Nahuatl language", "Oregon question", "Political aspects of Law", "Politics and government", "Religion and mythology", "Religions", "Social life and customs", "Spanish", "Vigilance committees", "Writing", "Zamorano 80", "Accessible book", "Protected DAISY"]
    })

    ia = 'workshuberthowe00racegoog'
    src = ia + '_meta.mrc'
    marc = MarcBinary(open_test_data(src).read())
    rec = read_edition(marc)
    rec['source_records'] = ['ia:' + ia]

    reply = load(rec)
    assert reply['success'] == True

    w = mock_site.get(reply['work']['key'])

    reply = load(rec)
    assert reply['success'] == True
    w = mock_site.get(reply['work']['key'])

コード例 #6

0

ファイルを表示

def parse_data(data):
    data = data.strip()
    if -1 != data[:10].find('<?xml'):
        root = etree.fromstring(data)
        #print root.tag
        if '{http://www.w3.org/1999/02/22-rdf-syntax-ns#}RDF' == root.tag:
            edition_builder = import_rdf.parse(root)
            format = 'rdf'
        elif '{http://www.w3.org/2005/Atom}entry' == root.tag:
            edition_builder = import_opds.parse(root)
            format = 'opds'
        elif '{http://www.loc.gov/MARC21/slim}record' == root.tag:
            if root.tag == '{http://www.loc.gov/MARC21/slim}collection':
                root = root[0]
            rec = MarcXml(root)
            edition = read_edition(rec)
            edition_builder = import_edition_builder.import_edition_builder(
                init_dict=edition)
            format = 'marcxml'
        else:
            print 'unrecognized XML format'
            return None, None
    elif data.startswith('{') and data.endswith('}'):
        obj = json.loads(data)
        edition_builder = import_edition_builder.import_edition_builder(
            init_dict=obj)
        format = 'json'
    else:
        #Marc Binary
        if len(data) != int(data[:5]):
            return json.dumps({'success': False, 'error': 'Bad MARC length'})

        rec = MarcBinary(data)
        edition = read_edition(rec)
        edition_builder = import_edition_builder.import_edition_builder(
            init_dict=edition)
        format = 'marc'

    parse_meta_headers(edition_builder)

    return edition_builder.get_dict(), format

コード例 #7

0

ファイルを表示

def get_marc_record_from_ia(identifier):
    """Takes IA identifiers and returns MARC record instance.
    """
    metadata = ia.get_metadata(identifier)
    filenames = metadata['_filenames']

    marc_xml_filename = identifier + "_marc.xml"
    marc_bin_filename = identifier + "_meta.mrc"

    item_base = base + "/" + identifier + "/"

    # Try marc.xml first
    if marc_xml_filename in filenames:
        data = urlopen_keep_trying(item_base + marc_xml_filename).read()
        if data[:10].find('<?xml') != -1:
            root = etree.fromstring(data)
            return MarcXml(root)

    # If that fails, try marc.bin
    if marc_bin_filename in filenames:
        data = urlopen_keep_trying(item_base + marc_bin_filename).read()
        if len(data) == int(data[:5]):
            return MarcBinary(data)

コード例 #8

0

ファイルを表示

ファイル: test_parse.py プロジェクト: seesawdev/openlibrary

 def test_binary(self, i):
     expect_filename = "%s/bin_expect/%s" % (test_data, i)
     data = open("%s/bin_input/%s" % (test_data, i)).read()
     if len(data) != int(data[:5]):
         #TODO: Why are we fixing this in test expectations? Investigate.
         #      affects histoirereligieu05cr_meta.mrc and zweibchersatir01horauoft_meta.mrc
         data = data.decode('utf-8').encode('raw_unicode_escape')
     assert len(data) == int(data[:5])
     rec = MarcBinary(data)
     edition_marc_bin = read_edition(rec)
     assert edition_marc_bin
     if not os.path.exists(expect_filename):
         # Missing test expectations file. Create a template from the input, but fail the current test.
         simplejson.dump(edition_marc_bin, open(expect_filename, 'w'), indent=2)
         assert False, 'Expectations file %s not found: template generated in %s. Please review and commit this file.' % (expect_filename, '/bin_expect')
     j = simplejson.load(open(expect_filename))
     assert j, 'Unable to open test data: %s' % expect_filename
     assert sorted(edition_marc_bin.keys()) == sorted(j.keys()), 'Processed binary MARC fields do not match expectations in %s' % expect_filename
     for k in edition_marc_bin.keys():
         if isinstance(j[k], list):
             for item1, item2 in zip(edition_marc_bin[k], j[k]):
                 assert item1 == item2
         assert edition_marc_bin[k] == j[k], 'Processed binary MARC values do not match expectations in %s' % expect_filename
     assert edition_marc_bin == j

コード例 #9

0

ファイルを表示

 def test_binary(self, i):
     expect_filename = '%s/bin_expect/%s' % (test_data, i)
     with open('%s/bin_input/%s' % (test_data, i), 'rb') as f:
         rec = MarcBinary(f.read())
     edition_marc_bin = read_edition(rec)
     assert edition_marc_bin
     if not os.path.exists(expect_filename):
         # Missing test expectations file. Create a template from the input, but fail the current test.
         json.dump(edition_marc_bin, open(expect_filename, 'w'), indent=2)
         assert False, 'Expectations file %s not found: template generated in %s. Please review and commit this file.' % (
             expect_filename, '/bin_expect')
     j = json.load(open(expect_filename))
     assert j, 'Unable to open test data: %s' % expect_filename
     assert sorted(edition_marc_bin) == sorted(j), (
         'Processed binary MARC fields do not match expectations in %s' %
         expect_filename)
     msg = ('Processed binary MARC values do not match expectations in %s' %
            expect_filename)
     for key, value in edition_marc_bin.items():
         if isinstance(value, Iterable):  # can not sort a list of dicts
             assert len(value) == len(j[key]), msg
             assert all(item in value for item in j[key]), msg
         else:
             assert value == j[key], msg

コード例 #10

0

ファイルを表示

ファイル: test_add_book.py プロジェクト: HarshCasper/openlibrary-1

def test_no_extra_author(mock_site, add_languages):
    author = {
        "name": "Paul Michael Boothe",
        "key": "/authors/OL1A",
        "type": {
            "key": "/type/author"
        },
    }
    mock_site.save(author)

    work = {
        "title":
        "A Separate Pension Plan for Alberta",
        "covers": [1644794],
        "key":
        "/works/OL1W",
        "authors": [{
            "type": "/type/author_role",
            "author": {
                "key": "/authors/OL1A"
            }
        }],
        "type": {
            "key": "/type/work"
        },
    }
    mock_site.save(work)

    edition = {
        "number_of_pages":
        90,
        "subtitle":
        "Analysis and Discussion (Western Studies in Economic Policy, No. 5)",
        "weight":
        "6.2 ounces",
        "covers": [1644794],
        "latest_revision":
        6,
        "title":
        "A Separate Pension Plan for Alberta",
        "languages": [{
            "key": "/languages/eng"
        }],
        "subjects": [
            "Economics", "Alberta",
            "Political Science / State & Local Government",
            "Government policy", "Old age pensions", "Pensions",
            "Social security"
        ],
        "type": {
            "key": "/type/edition"
        },
        "physical_dimensions":
        "9 x 6 x 0.2 inches",
        "publishers": ["The University of Alberta Press"],
        "physical_format":
        "Paperback",
        "key":
        "/books/OL1M",
        "authors": [{
            "key": "/authors/OL1A"
        }],
        "identifiers": {
            "goodreads": ["4340973"],
            "librarything": ["5580522"]
        },
        "isbn_13": ["9780888643513"],
        "isbn_10": ["0888643519"],
        "publish_date":
        "May 1, 2000",
        "works": [{
            "key": "/works/OL1W"
        }]
    }
    mock_site.save(edition)

    src = 'v39.i34.records.utf8--186503-1413'
    marc = MarcBinary(open_test_data(src).read())
    rec = read_edition(marc)
    rec['source_records'] = ['marc:' + src]

    reply = load(rec)
    assert reply['success'] is True
    assert reply['edition']['status'] == 'modified'
    assert reply['work']['status'] == 'modified'
    assert 'authors' not in reply

    assert reply['edition']['key'] == edition['key']
    assert reply['work']['key'] == work['key']

    e = mock_site.get(reply['edition']['key'])
    w = mock_site.get(reply['work']['key'])

    assert 'source_records' in e
    assert 'subjects' in w
    assert len(e['authors']) == 1
    assert len(w['authors']) == 1

コード例 #11

0

ファイルを表示

ファイル: test_add_book.py プロジェクト: HarshCasper/openlibrary-1

def test_missing_source_records(mock_site, add_languages):
    mock_site.save({
        'key': '/authors/OL592898A',
        'name': 'Michael Robert Marrus',
        'personal_name': 'Michael Robert Marrus',
        'type': {
            'key': '/type/author'
        }
    })

    mock_site.save({
        'authors': [{
            'author': '/authors/OL592898A',
            'type': {
                'key': '/type/author_role'
            }
        }],
        'key':
        '/works/OL16029710W',
        'subjects': [
            'Nuremberg Trial of Major German War Criminals, Nuremberg, Germany, 1945-1946',
            'Protected DAISY', 'Lending library'
        ],
        'title':
        'The Nuremberg war crimes trial, 1945-46',
        'type': {
            'key': '/type/work'
        },
    })

    mock_site.save({
        "number_of_pages":
        276,
        "subtitle":
        "a documentary history",
        "series": ["The Bedford series in history and culture"],
        "covers": [6649715, 3865334, 173632],
        "lc_classifications": ["D804.G42 N87 1997"],
        "ocaid":
        "nurembergwarcrim00marr",
        "contributions": ["Marrus, Michael Robert."],
        "uri_descriptions": ["Book review (H-Net)"],
        "title":
        "The Nuremberg war crimes trial, 1945-46",
        "languages": [{
            "key": "/languages/eng"
        }],
        "subjects": [
            "Nuremberg Trial of Major German War Criminals, Nuremberg, Germany, 1945-1946"
        ],
        "publish_country":
        "mau",
        "by_statement":
        "[compiled by] Michael R. Marrus.",
        "type": {
            "key": "/type/edition"
        },
        "uris": ["http://www.h-net.org/review/hrev-a0a6c9-aa"],
        "publishers": ["Bedford Books"],
        "ia_box_id": ["IA127618"],
        "key":
        "/books/OL1023483M",
        "authors": [{
            "key": "/authors/OL592898A"
        }],
        "publish_places": ["Boston"],
        "pagination":
        "xi, 276 p. :",
        "lccn": ["96086777"],
        "notes": {
            "type": "/type/text",
            "value":
            "Includes bibliographical references (p. 262-268) and index."
        },
        "identifiers": {
            "goodreads": ["326638"],
            "librarything": ["1114474"]
        },
        "url": ["http://www.h-net.org/review/hrev-a0a6c9-aa"],
        "isbn_10": ["031216386X", "0312136919"],
        "publish_date":
        "1997",
        "works": [{
            "key": "/works/OL16029710W"
        }]
    })

    ia = 'nurembergwarcrim1997marr'
    src = ia + '_meta.mrc'
    marc = MarcBinary(open_test_data(src).read())
    rec = read_edition(marc)
    rec['source_records'] = ['ia:' + ia]

    reply = load(rec)
    assert reply['success'] is True
    e = mock_site.get(reply['edition']['key'])
    assert 'source_records' in e

コード例 #12

0

ファイルを表示

ファイル: code.py プロジェクト: hornc/openlibrary-1

    def POST(self):
        web.header('Content-Type', 'application/json')

        if not can_write():
            raise web.HTTPError('403 Forbidden')

        i = web.input()

        require_marc = not (i.get('require_marc') == 'false')
        bulk_marc = i.get('bulk_marc') == 'true'

        if 'identifier' not in i:
            return self.error('bad-input', 'identifier not provided')
        identifier = i.identifier

        # First check whether this is a non-book, bulk-marc item
        if bulk_marc:
            # Get binary MARC by identifier = ocaid/filename:offset:length
            re_bulk_identifier = re.compile("([^/]*)/([^:]*):(\d*):(\d*)")
            try:
                ocaid, filename, offset, length = re_bulk_identifier.match(identifier).groups()
                data, next_offset, next_length = get_from_archive_bulk(identifier)
                next_data = {'next_record_offset': next_offset, 'next_record_length': next_length}
                rec = MarcBinary(data)
                edition = read_edition(rec)
            except MarcException as e:
                details = "%s: %s" % (identifier, str(e))
                logger.error("failed to read from bulk MARC record %s", details)
                return self.error('invalid-marc-record', details, **next_data)

            actual_length = int(rec.leader()[:MARC_LENGTH_POS])
            edition['source_records'] = 'marc:%s/%s:%s:%d' % (ocaid, filename, offset, actual_length)

            #TODO: Look up URN prefixes to support more sources, extend openlibrary/catalog/marc/sources?
            if ocaid == 'OpenLibraries-Trent-MARCs':
                prefix = 'trent'
                edition['local_id'] = ['urn:%s:%s' % (prefix, _id) for _id in rec.get_fields('001')]

            result = add_book.load(edition)

            # Add next_data to the response as location of next record:
            result.update(next_data)
            return json.dumps(result)

        # Case 1 - Is this a valid Archive.org item?
        try:
            item_json = ia.get_item_json(identifier)
            item_server = item_json['server']
            item_path = item_json['dir']
        except KeyError:
            return self.error("invalid-ia-identifier", "%s not found" % identifier)
        metadata = ia.extract_item_metadata(item_json)
        if not metadata:
            return self.error("invalid-ia-identifier")

        # Case 2 - Does the item have an openlibrary field specified?
        # The scan operators search OL before loading the book and add the
        # OL key if a match is found. We can trust them and attach the item
        # to that edition.
        if metadata.get("mediatype") == "texts" and metadata.get("openlibrary"):
            edition_data = self.get_ia_record(metadata)
            edition_data["openlibrary"] = metadata["openlibrary"]
            edition_data = self.populate_edition_data(edition_data, identifier)
            return self.load_book(edition_data)

        # Case 3 - Can the item be loaded into Open Library?
        status = ia.get_item_status(identifier, metadata,
                                    item_server=item_server, item_path=item_path)
        if status != 'ok':
            return self.error(status, "Prohibited Item")

        # Case 4 - Does this item have a marc record?
        marc_record = self.get_marc_record(identifier)
        if marc_record:
            self.reject_non_book_marc(marc_record)

            try:
                edition_data = read_edition(marc_record)
            except MarcException as e:
                logger.error("failed to read from MARC record %s: %s", identifier, str(e))
                return self.error("invalid-marc-record")

        elif require_marc:
            return self.error("no-marc-record")

        else:
            try:
                edition_data = self.get_ia_record(metadata)
            except KeyError:
                return self.error("invalid-ia-metadata")

        # Add IA specific fields: ocaid, source_records, and cover
        edition_data = self.populate_edition_data(edition_data, identifier)

        return self.load_book(edition_data)

コード例 #13

0

ファイルを表示

    def POST(self):
        web.header('Content-Type', 'application/json')

        if not can_write():
            raise web.HTTPError('403 Forbidden')

        i = web.input()

        require_marc = not (i.get('require_marc') == 'false')
        force_import = i.get('force_import') == 'true'
        bulk_marc = i.get('bulk_marc') == 'true'

        if 'identifier' not in i:
            return self.error('bad-input', 'identifier not provided')
        identifier = i.identifier

        # First check whether this is a non-book, bulk-marc item
        if bulk_marc:
            # Get binary MARC by identifier = ocaid/filename:offset:length
            re_bulk_identifier = re.compile(r"([^/]*)/([^:]*):(\d*):(\d*)")
            try:
                ocaid, filename, offset, length = re_bulk_identifier.match(
                    identifier).groups()
                data, next_offset, next_length = get_from_archive_bulk(
                    identifier)
                next_data = {
                    'next_record_offset': next_offset,
                    'next_record_length': next_length,
                }
                rec = MarcBinary(data)
                edition = read_edition(rec)
            except MarcException as e:
                details = f"{identifier}: {str(e)}"
                logger.error("failed to read from bulk MARC record %s",
                             details)
                return self.error('invalid-marc-record', details, **next_data)

            actual_length = int(rec.leader()[:MARC_LENGTH_POS])
            edition['source_records'] = 'marc:%s/%s:%s:%d' % (
                ocaid,
                filename,
                offset,
                actual_length,
            )

            local_id = i.get('local_id')
            if local_id:
                local_id_type = web.ctx.site.get('/local_ids/' + local_id)
                prefix = local_id_type.urn_prefix
                force_import = True
                id_field, id_subfield = local_id_type.id_location.split('$')

                def get_subfield(field, id_subfield):
                    if isinstance(field, str):
                        return field
                    subfields = field[1].get_subfield_values(id_subfield)
                    return subfields[0] if subfields else None

                _ids = [
                    get_subfield(f, id_subfield)
                    for f in rec.read_fields([id_field])
                    if f and get_subfield(f, id_subfield)
                ]
                edition['local_id'] = [f'urn:{prefix}:{_id}' for _id in _ids]

            # Don't add the book if the MARC record is a non-monograph item,
            # unless it is a scanning partner record and/or force_import is set.
            if not force_import:
                try:
                    raise_non_book_marc(rec, **next_data)
                except BookImportError as e:
                    return self.error(e.error_code, e.error, **e.kwargs)
            result = add_book.load(edition)

            # Add next_data to the response as location of next record:
            result.update(next_data)
            return json.dumps(result)

        try:
            return self.ia_import(identifier,
                                  require_marc=require_marc,
                                  force_import=force_import)
        except BookImportError as e:
            return self.error(e.error_code, e.error, **e.kwargs)

コード例 #14

0

ファイルを表示

ファイル: test_parse.py プロジェクト: rexzing/openlibrary

 def test_raises_no_title(self):
     filename = "%s/bin_input/talis_no_title2.mrc" % test_data
     with open(filename, 'r') as f:
         rec = MarcBinary(f.read())
     with pytest.raises(NoTitle):
         read_edition(rec)

コード例 #15

0

ファイルを表示

ファイル: test_add_book.py プロジェクト: smashinu/openlibrary

def test_don_quixote(mock_site):
    """
    All of these items are by 'Miguel de Cervantes Saavedra',
    only one Author should be created. Some items have bad
    MARC length, others are missing binary MARC altogether
    and raise BadMARC exceptions.
    """
    pytest.skip("This test make live requests to archive.org")

    dq = [u'lifeexploitsofin01cerv', u'cu31924096224518',
        u'elingeniosedcrit04cerv', u'ingeniousgentlem01cervuoft',
        u'historyofingenio01cerv', u'lifeexploitsofin02cerviala',
        u'elingeniosohidal03cervuoft', u'nybc209000', u'elingeniosohidal11cerv',
        u'elingeniosohidal01cervuoft', u'elingeniosoh01cerv',
        u'donquixotedelama00cerviala', u'1896elingeniosohid02cerv',
        u'ingeniousgentlem04cervuoft', u'cu31924027656978', u'histoiredeladmir01cerv',
        u'donquijotedelama04cerv', u'cu31924027657075', u'donquixotedelama03cervuoft',
        u'aventurasdedonqu00cerv', u'p1elingeniosohid03cerv',
        u'geshikhefundonik01cervuoft', u'historyofvalorou02cerviala',
        u'ingeniousgentlem01cerv', u'donquixotedelama01cervuoft',
        u'ingeniousgentlem0195cerv', u'firstpartofdelig00cervuoft',
        u'p4elingeniosohid02cerv', u'donquijote00cervuoft', u'cu31924008863924',
        u'c2elingeniosohid02cerv', u'historyofvalorou03cerviala',
        u'historyofingenio01cerviala', u'historyadventure00cerv',
        u'elingeniosohidal00cerv', u'lifeexploitsofin01cervuoft',
        u'p2elingeniosohid05cerv', u'nybc203136', u'elingeniosohidal00cervuoft',
        u'donquixotedelama02cervuoft', u'lingnieuxcheva00cerv',
        u'ingeniousgentlem03cerv', u'vidayhechosdeli00siscgoog',
        u'lifeandexploits01jarvgoog', u'elingeniosohida00puiggoog',
        u'elingeniosohida00navagoog', u'donquichottedel02florgoog',
        u'historydonquixo00cogoog', u'vidayhechosdeli01siscgoog',
        u'elingeniosohida28saavgoog', u'historyvalorous00brangoog',
        u'elingeniosohida01goog', u'historyandadven00unkngoog',
        u'historyvalorous01goog', u'ingeniousgentle11saavgoog',
        u'elingeniosohida10saavgoog', u'adventuresdonqu00jarvgoog',
        u'historydonquixo04saavgoog', u'lingnieuxcheval00rouxgoog',
        u'elingeniosohida19saavgoog', u'historyingeniou00lalagoog',
        u'elingeniosohida00ormsgoog', u'historyandadven01smolgoog',
        u'elingeniosohida27saavgoog', u'elingeniosohida21saavgoog',
        u'historyingeniou00mottgoog', u'historyingeniou03unkngoog',
        u'lifeandexploits00jarvgoog', u'ingeniousgentle00conggoog',
        u'elingeniosohida00quixgoog', u'elingeniosohida01saavgoog',
        u'donquixotedelam02saavgoog', u'adventuresdonqu00gilbgoog',
        u'historyingeniou02saavgoog', u'donquixotedelam03saavgoog',
        u'elingeniosohida00ochogoog', u'historyingeniou08mottgoog',
        u'lifeandexploits01saavgoog', u'firstpartdeligh00shelgoog',
        u'elingeniosohida00castgoog', u'elingeniosohida01castgoog',
        u'adventofdonquixo00cerv', u'portablecervante00cerv',
        u'firstpartofdelig14cerv', u'donquixotemanofl00cerv',
        u'firstpartofdelig00cerv']

    bad_length = []
    bad_marc = []

    add_languages(mock_site)
    edition_status_counts = defaultdict(int)
    work_status_counts = defaultdict(int)
    author_status_counts = defaultdict(int)

    for ocaid in dq:
        marc_url = 'https://archive.org/download/%s/%s_meta.mrc' % (ocaid, ocaid)
        data = urlopen(marc_url).read()
        try:
            marc = MarcBinary(data)
        except BadLength:
            bad_length.append(ocaid)
            continue
        except BadMARC:
            bad_marc.append(ocaid)
            continue

        rec = read_edition(marc)
        rec['source_records'] = ['ia:' + ocaid]
        reply = load(rec)

        q = {
            'type': '/type/work',
            'authors.author': '/authors/OL1A',
        }
        work_keys = list(mock_site.things(q))
        author_keys = list(mock_site.things({'type': '/type/author'}))
        print("\nReply for %s: %s" % (ocaid, reply))
        print("Work keys: %s" % work_keys)
        assert author_keys == ['/authors/OL1A']
        assert reply['success'] is True

        # Increment status counters
        edition_status_counts[reply['edition']['status']] += 1
        work_status_counts[reply['work']['status']] += 1
        if (reply['work']['status'] != 'matched') and (reply['edition']['status'] != 'modified'):
            # No author key in response if work is 'matched'
            # No author key in response if edition is 'modified'
            author_status_counts[reply['authors'][0]['status']] += 1

    print("BAD MARC LENGTH items: %s" % bad_length)
    print("BAD MARC items: %s" % bad_marc)
    print("Edition status counts: %s" % edition_status_counts)
    print("Work status counts: %s" % work_status_counts)
    print("Author status counts: %s" % author_status_counts)

コード例 #16

0

ファイルを表示

    def POST(self):
        web.header('Content-Type', 'application/json')

        if not can_write():
            raise web.HTTPError('403 Forbidden')

        i = web.input()

        require_marc = not (i.get('require_marc') == 'false')
        bulk_marc = i.get('bulk_marc') == 'true'

        if 'identifier' not in i:
            return self.error('bad-input', 'identifier not provided')
        identifier = i.identifier

        # First check whether this is a non-book, bulk-marc item
        if bulk_marc:
            # Get binary MARC by identifier = ocaid/filename:offset:length
            re_bulk_identifier = re.compile("([^/]*)/([^:]*):(\d*):(\d*)")
            try:
                ocaid, filename, offset, length = re_bulk_identifier.match(identifier).groups()
                data, next_offset, next_length = get_from_archive_bulk(identifier)
                next_data = {'next_record_offset': next_offset, 'next_record_length': next_length}
                rec = MarcBinary(data)
                edition = read_edition(rec)
            except MarcException as e:
                details = "%s: %s" % (identifier, str(e))
                logger.error("failed to read from bulk MARC record %s", details)
                return self.error('invalid-marc-record', details, **next_data)

            actual_length = int(rec.leader()[:5])
            edition['source_records'] = 'marc:%s/%s:%s:%d' % (ocaid, filename, offset, actual_length)

            #TODO: Look up URN prefixes to support more sources
            prefix = 'trent'
            edition['local_id'] = ['urn:%s:%s' % (prefix, _id) for _id in rec.get_fields('001')]
            result = add_book.load(edition)

            # Add next_data to the response as location of next record:
            result.update(next_data)

            return json.dumps(result)

        # Case 0 - Is the item already loaded
        key = self.find_edition(identifier)
        if key:
            return self.status_matched(key)

        # Case 1 - Is this a valid Archive.org item?
        try:
            item_json = ia.get_item_json(identifier)
            item_server = item_json['server']
            item_path = item_json['dir']
        except KeyError:
            return self.error("invalid-ia-identifier", "%s not found" % identifier)
        metadata = ia.extract_item_metadata(item_json)
        if not metadata:
            return self.error("invalid-ia-identifier")

        # Case 2 - Does the item have an openlibrary field specified?
        # The scan operators search OL before loading the book and add the
        # OL key if a match is found. We can trust them and attach the item
        # to that edition.
        if metadata.get("mediatype") == "texts" and metadata.get("openlibrary"):
            d = {
                "title": metadata['title'],
                "openlibrary": "/books/" + metadata["openlibrary"]
            }
            d = self.populate_edition_data(d, identifier)
            return self.load_book(d)

        # Case 3 - Can the item be loaded into Open Library?
        status = ia.get_item_status(identifier, metadata,
                                    item_server=item_server, item_path=item_path)
        if status != 'ok':
            return self.error(status, "Prohibited Item")

        # Gio - April 2016
        # items with metadata no_ol_import=true will be not imported
        if metadata.get("no_ol_import", '').lower() == 'true':
            return self.error("no-ol-import")

        # Case 4 - Does this item have a marc record?
        marc_record = self.get_marc_record(identifier)
        if marc_record:
            # Is the item a serial instead of a book?
            marc_leaders = marc_record.leader()
            if marc_leaders[7] == 's':
                return self.error("item-is-serial")

            # insider note: follows Archive.org's approach of
            # Item::isMARCXMLforMonograph() which excludes non-books
            if not (marc_leaders[7] == 'm' and marc_leaders[6] == 'a'):
                return self.error("item-not-book")

            try:
                edition_data = read_edition(marc_record)
            except MarcException as e:
                logger.error("failed to read from MARC record %s: %s", identifier, str(e))
                return self.error("invalid-marc-record")

        elif require_marc:
            return self.error("no-marc-record")

        else:
            try:
                edition_data = self.get_ia_record(metadata)
            except KeyError:
                return self.error("invalid-ia-metadata")

        # Add IA specific fields: ocaid, source_records, and cover
        edition_data = self.populate_edition_data(edition_data, identifier)

        return self.load_book(edition_data)

コード例 #17

0

ファイルを表示

def test_don_quixote(mock_site):
    dq = [
        u'lifeexploitsofin01cerv', u'cu31924096224518',
        u'elingeniosedcrit04cerv', u'ingeniousgentlem01cervuoft',
        u'historyofingenio01cerv', u'lifeexploitsofin02cerviala',
        u'elingeniosohidal03cervuoft', u'nybc209000',
        u'elingeniosohidal11cerv', u'elingeniosohidal01cervuoft',
        u'elingeniosoh01cerv', u'donquixotedelama00cerviala',
        u'1896elingeniosohid02cerv', u'ingeniousgentlem04cervuoft',
        u'cu31924027656978', u'histoiredeladmir01cerv',
        u'donquijotedelama04cerv', u'cu31924027657075',
        u'donquixotedelama03cervuoft', u'aventurasdedonqu00cerv',
        u'p1elingeniosohid03cerv', u'geshikhefundonik01cervuoft',
        u'historyofvalorou02cerviala', u'ingeniousgentlem01cerv',
        u'donquixotedelama01cervuoft', u'ingeniousgentlem0195cerv',
        u'firstpartofdelig00cervuoft', u'p4elingeniosohid02cerv',
        u'donquijote00cervuoft', u'cu31924008863924',
        u'c2elingeniosohid02cerv', u'historyofvalorou03cerviala',
        u'historyofingenio01cerviala', u'historyadventure00cerv',
        u'elingeniosohidal00cerv', u'lifeexploitsofin01cervuoft',
        u'p2elingeniosohid05cerv', u'nybc203136',
        u'elingeniosohidal00cervuoft', u'donquixotedelama02cervuoft',
        u'lingnieuxcheva00cerv', u'ingeniousgentlem03cerv',
        u'vidayhechosdeli00siscgoog', u'lifeandexploits01jarvgoog',
        u'elingeniosohida00puiggoog', u'elingeniosohida00navagoog',
        u'donquichottedel02florgoog', u'historydonquixo00cogoog',
        u'vidayhechosdeli01siscgoog', u'elingeniosohida28saavgoog',
        u'historyvalorous00brangoog', u'elingeniosohida01goog',
        u'historyandadven00unkngoog', u'historyvalorous01goog',
        u'ingeniousgentle11saavgoog', u'elingeniosohida10saavgoog',
        u'adventuresdonqu00jarvgoog', u'historydonquixo04saavgoog',
        u'lingnieuxcheval00rouxgoog', u'elingeniosohida19saavgoog',
        u'historyingeniou00lalagoog', u'elingeniosohida00ormsgoog',
        u'historyandadven01smolgoog', u'elingeniosohida27saavgoog',
        u'elingeniosohida21saavgoog', u'historyingeniou00mottgoog',
        u'historyingeniou03unkngoog', u'lifeandexploits00jarvgoog',
        u'ingeniousgentle00conggoog', u'elingeniosohida00quixgoog',
        u'elingeniosohida01saavgoog', u'donquixotedelam02saavgoog',
        u'adventuresdonqu00gilbgoog', u'historyingeniou02saavgoog',
        u'donquixotedelam03saavgoog', u'elingeniosohida00ochogoog',
        u'historyingeniou08mottgoog', u'lifeandexploits01saavgoog',
        u'firstpartdeligh00shelgoog', u'elingeniosohida00castgoog',
        u'elingeniosohida01castgoog', u'adventofdonquixo00cerv',
        u'portablecervante00cerv', u'firstpartofdelig14cerv',
        u'donquixotemanofl00cerv', u'firstpartofdelig00cerv'
    ]

    add_languages(mock_site)
    edition_status_counts = defaultdict(int)
    work_status_counts = defaultdict(int)
    author_status_counts = defaultdict(int)
    for num, ia in enumerate(dq):
        marc_url = 'http://archive.org/download/%s/%s_meta.mrc' % (ia, ia)
        data = urlopen(marc_url).read()
        if '<title>Internet Archive: Page Not Found</title>' in data:
            continue
        marc = MarcBinary(data)
        rec = read_edition(marc)
        reply = load(rec)
        q = {
            'type': '/type/work',
            'authors.author': '/authors/OL1A',
        }
        work_keys = list(mock_site.things(q))
        assert work_keys

        pprint(reply)
        assert reply['success'] == True
        astatus = reply['authors'][0]['status']
        wstatus = reply['work']['status']
        estatus = reply['edition']['status']
        if num == 0:
            assert astatus == 'created'
        else:
            assert astatus == 'modified'
        edition_status_counts[estatus] += 1
        work_status_counts[wstatus] += 1
        author_status_counts[astatus] += 1
        for k, v in edition_status_counts.iteritems():
            print 'edition %8s: %d' % (k, v)
        print
        for k, v in work_status_counts.iteritems():
            print 'work %8s: %d' % (k, v)
        print
        for k, v in author_status_counts.iteritems():
            print 'author %8s: %d' % (k, v)

コード例 #18

0

ファイルを表示

    def POST(self):
        web.header('Content-Type', 'application/json')

        if not can_write():
            raise web.HTTPError('403 Forbidden')

        i = web.input()

        require_marc = not (i.get('require_marc') == 'false')
        bulk_marc = i.get('bulk_marc') == 'true'

        if 'identifier' not in i:
            return self.error('bad-input', 'identifier not provided')
        identifier = i.identifier

        # First check whether this is a non-book, bulk-marc item
        if bulk_marc:
            # Get binary MARC by identifier = ocaid/filename:offset:length
            re_bulk_identifier = re.compile("([^/]*)/([^:]*):(\d*):(\d*)")
            try:
                ocaid, filename, offset, length = re_bulk_identifier.match(identifier).groups()
                data, next_offset, next_length = get_from_archive_bulk(identifier)
                next_data = {'next_record_offset': next_offset, 'next_record_length': next_length}
                rec = MarcBinary(data)
                edition = read_edition(rec)
            except MarcException as e:
                details = "%s: %s" % (identifier, str(e))
                logger.error("failed to read from bulk MARC record %s", details)
                return self.error('invalid-marc-record', details, **next_data)

            actual_length = int(rec.leader()[:MARC_LENGTH_POS])
            edition['source_records'] = 'marc:%s/%s:%s:%d' % (ocaid, filename, offset, actual_length)

            local_id = i.get('local_id')
            if local_id:
                local_id_type = web.ctx.site.get('/local_ids/' + local_id)
                prefix = local_id_type.urn_prefix
                edition['local_id'] = ['urn:%s:%s' % (prefix, _id) for _id in rec.get_fields('001')]

            result = add_book.load(edition)

            # Add next_data to the response as location of next record:
            result.update(next_data)
            return json.dumps(result)

        # Case 1 - Is this a valid Archive.org item?
        try:
            item_json = ia.get_item_json(identifier)
            item_server = item_json['server']
            item_path = item_json['dir']
        except KeyError:
            return self.error("invalid-ia-identifier", "%s not found" % identifier)
        metadata = ia.extract_item_metadata(item_json)
        if not metadata:
            return self.error("invalid-ia-identifier")

        # Case 2 - Does the item have an openlibrary field specified?
        # The scan operators search OL before loading the book and add the
        # OL key if a match is found. We can trust them and attach the item
        # to that edition.
        if metadata.get("mediatype") == "texts" and metadata.get("openlibrary"):
            edition_data = self.get_ia_record(metadata)
            edition_data["openlibrary"] = metadata["openlibrary"]
            edition_data = self.populate_edition_data(edition_data, identifier)
            return self.load_book(edition_data)

        # Case 3 - Can the item be loaded into Open Library?
        status = ia.get_item_status(identifier, metadata,
                                    item_server=item_server, item_path=item_path)
        if status != 'ok':
            return self.error(status, "Prohibited Item")

        # Case 4 - Does this item have a marc record?
        marc_record = self.get_marc_record(identifier)
        if marc_record:
            self.reject_non_book_marc(marc_record)

            try:
                edition_data = read_edition(marc_record)
            except MarcException as e:
                logger.error("failed to read from MARC record %s: %s", identifier, str(e))
                return self.error("invalid-marc-record")

        elif require_marc:
            return self.error("no-marc-record")

        else:
            try:
                edition_data = self.get_ia_record(metadata)
            except KeyError:
                return self.error("invalid-ia-metadata")

        # Add IA specific fields: ocaid, source_records, and cover
        edition_data = self.populate_edition_data(edition_data, identifier)

        return self.load_book(edition_data)

コード例 #19

0

ファイルを表示

def parse_data(data):
    """
    Takes POSTed data and determines the format, and returns an Edition record
    suitable for adding to OL.

    :param str data: Raw data
    :rtype: (dict|None, str|None)
    :return: (Edition record, format (rdf|opds|marcxml|json|marc)) or (None, None)
    """
    data = data.strip()
    if -1 != data[:10].find('<?xml'):
        root = etree.fromstring(data)
        if '{http://www.w3.org/1999/02/22-rdf-syntax-ns#}RDF' == root.tag:
            edition_builder = import_rdf.parse(root)
            format = 'rdf'
        elif '{http://www.w3.org/2005/Atom}entry' == root.tag:
            edition_builder = import_opds.parse(root)
            format = 'opds'
        elif '{http://www.loc.gov/MARC21/slim}record' == root.tag:
            if root.tag == '{http://www.loc.gov/MARC21/slim}collection':
                root = root[0]
            rec = MarcXml(root)
            edition = read_edition(rec)
            edition_builder = import_edition_builder.import_edition_builder(init_dict=edition)
            format = 'marcxml'
        else:
            print 'unrecognized XML format'
            return None, None
    elif data.startswith('{') and data.endswith('}'):
        obj = json.loads(data)
        edition_builder = import_edition_builder.import_edition_builder(init_dict=obj)
        format = 'json'
    else:
        # Special case to load IA records, DEPRECATED: use import/ia endpoint
        # Just passing ia:foo00bar is enough to load foo00bar from IA.
        if data.startswith("ia:"):
            source_records = [data]
            itemid = data[len("ia:"):]

            metadata = ia.get_metadata(itemid)
            if not metadata:
                raise DataError("invalid-ia-identifier")

            # see ia_importapi to address `imagecount` limitations
            status = ia.get_item_status(itemid, metadata)
            if status != 'ok':
                raise DataError(status)

            try:
                rec = get_marc_record_from_ia(itemid)

                # skip serials
                if rec and rec.leader()[7] == 's':
                    raise DataError("item-is-serial")
            except IOError:
                raise DataError("no-marc-record")

            if not rec:
                raise DataError("no-marc-record")
        else:
            source_records = None
            itemid = None

            #Marc Binary
            if len(data) != int(data[:5]):
                return json.dumps({'success':False, 'error':'Bad MARC length'})

            rec = MarcBinary(data)

        edition = read_edition(rec)
        if source_records:
            edition['source_records'] = source_records
            edition['ocaid'] = itemid
        edition_builder = import_edition_builder.import_edition_builder(init_dict=edition)
        format = 'marc'

    parse_meta_headers(edition_builder)

    return edition_builder.get_dict(), format

コード例 #20

0

ファイルを表示

ファイル: test_work_subject.py プロジェクト: z84213966/openlibrary

        rec = MarcBinary(data)
        assert read_subjects(rec) == expected


subjects = []
for item, expect in xml_samples:
    filename = os.path.dirname(
        __file__) + '/test_data/xml_input/' + item + '_marc.xml'
    element = etree.parse(filename).getroot()
    if element.tag != record_tag and element[0].tag == record_tag:
        element = element[0]
    rec = MarcXml(element)
    subjects.append(read_subjects(rec))

for item, expect in bin_samples:
    filename = os.path.dirname(__file__) + '/test_data/bin_input/' + item

    data = open(filename).read()
    if len(data) != int(data[:5]):
        data = data.decode('utf-8').encode('raw_unicode_escape')
    rec = MarcBinary(data)
    subjects.append(read_subjects(rec))

all_subjects = defaultdict(lambda: defaultdict(int))
for a in subjects:
    for b, c in a.items():
        for d, e in c.items():
            all_subjects[b][d] += e

print four_types(dict((k, dict(v)) for k, v in all_subjects.items()))

コード例 #21

0

ファイルを表示

ファイル: test_parse.py プロジェクト: rexzing/openlibrary

 def test_raises_see_also(self):
     filename = "%s/bin_input/talis_see_also.mrc" % test_data
     with open(filename, 'r') as f:
         rec = MarcBinary(f.read())
     with pytest.raises(SeeAlsoAsTitle):
         read_edition(rec)

コード例 #22

0

ファイルを表示

ファイル: code.py プロジェクト: iefbr14/openlibrary

def parse_data(data):
    data = data.strip()
    if -1 != data[:10].find('<?xml'):
        root = etree.fromstring(data)
        #print root.tag
        if '{http://www.w3.org/1999/02/22-rdf-syntax-ns#}RDF' == root.tag:
            edition_builder = import_rdf.parse(root)
            format = 'rdf'
        elif '{http://www.w3.org/2005/Atom}entry' == root.tag:
            edition_builder = import_opds.parse(root)
            format = 'opds'
        elif '{http://www.loc.gov/MARC21/slim}record' == root.tag:
            if root.tag == '{http://www.loc.gov/MARC21/slim}collection':
                root = root[0]
            rec = MarcXml(root)
            edition = read_edition(rec)
            edition_builder = import_edition_builder.import_edition_builder(
                init_dict=edition)
            format = 'marcxml'
        else:
            print 'unrecognized XML format'
            return None, None
    elif data.startswith('{') and data.endswith('}'):
        obj = json.loads(data)
        edition_builder = import_edition_builder.import_edition_builder(
            init_dict=obj)
        format = 'json'
    else:
        # Special case to load IA records
        # Just passing ia:foo00bar is enough to load foo00bar from IA.
        if data.startswith("ia:"):
            source_records = [data]
            itemid = data[len("ia:"):]

            metadata = ia.get_metadata(itemid)
            if not metadata:
                raise DataError("invalid-ia-identifier")

            if not ia.edition_from_item_metadata(itemid, metadata):
                raise DataError("item-not-a-book")

            try:
                rec = get_marc_record_from_ia(itemid)
            except IOError:
                raise DataError("no-marc-record")

            if not rec:
                raise DataError("no-marc-record")
        else:
            source_records = None
            itemid = None

            #Marc Binary
            if len(data) != int(data[:5]):
                return json.dumps({
                    'success': False,
                    'error': 'Bad MARC length'
                })

            rec = MarcBinary(data)

        edition = read_edition(rec)
        if source_records:
            edition['source_records'] = source_records
            edition['ocaid'] = itemid
        edition_builder = import_edition_builder.import_edition_builder(
            init_dict=edition)
        format = 'marc'

    parse_meta_headers(edition_builder)

    return edition_builder.get_dict(), format

コード例 #23

0

ファイルを表示

 def test_subjects_bin(self, item, expected):
     filename = os.path.dirname(__file__) + '/test_data/bin_input/' + item
     with open(filename, mode='rb') as f:
         rec = MarcBinary(f.read())
     assert read_subjects(rec) == expected

コード例 #24

0

ファイルを表示

 def test_bad_binary_data(self):
     with pytest.raises(BadMARC):
         result = MarcBinary('nonMARCdata')