Python get_marc_record_from_iaの例、openlibrary.catalog.get_ia.get_marc_record_from_ia Pythonの例

コード例 #1

0

ファイルを表示

ファイル: test_get_ia.py プロジェクト: mkkaushiksjce/openlibrary

    def test_incorrect_length_marcs(self, monkeypatch):
        """If a Binary MARC has a different length than stated in the MARC leader, it is probably due to bad character conversions."""
        monkeypatch.setattr(get_ia, 'urlopen_keep_trying',
                            return_test_marc_bin)
        monkeypatch.setattr(
            ia, 'get_metadata',
            lambda itemid: {'_filenames': [itemid + "_meta.mrc"]})

        bad_marcs = [
            '1733mmoiresdel00vill',  # Binary MARC reports len=734, but actually=742. Has badly converted unicode
            # original unicode converted as if it were MARC8
            'dasrmischepriv00rein',  # same as zweibchersatir01horauoft, binary representation of unicode interpreted as unicode codepoints
            'histoirereligieu05cr',  # C3A2 in this file should be single byte MARC8 combining acute 0xE2
            # Original MARC8 0xE2 interpreted as u00E2 => \xC3\xA2, leader still MARC8
            'lesabndioeinas00sche',  # Original MARC8 0xE2 interpreted as u00E2 => \xC3\xA2, leader still MARC8
            'poganucpeoplethe00stowuoft',  # junk / unexpected character at end of publishers in field 260
            'scrapbooksofmoun03tupp',  # possible extra chars at end of field 505?
            'zweibchersatir01horauoft',  # leader is unicode, chars '\xc3\x83\xc2\xbc' in mrc should be '\xc3\xbc'
            # original '\xc3\xb3' was converted to '\u00c3\u00b3'
        ]

        for bad_marc in bad_marcs:
            result = get_ia.get_marc_record_from_ia(bad_marc)
            #TODO: get_marc_record_from_ia() currently returns None in this case,
            #  It should be handled by MarcBinary and raise a BadMarc exception, or similar.
            assert result is None

コード例 #2

0

ファイルを表示

ファイル: test_get_ia.py プロジェクト: randomecho/openlibrary

    def test_no_marc_xml(self):
        """When no XML MARC is listed in _filenames, the Binary MARC should be fetched."""
        self.m.setattr(get_ia, 'urlopen_keep_trying', return_test_marc_bin)
        self.m.setattr(ia, 'get_metadata', lambda itemid: {'_filenames': [itemid + "_meta.mrc"]})

        bin_items = ['0descriptionofta1682unit',
                     '13dipolarcycload00burk',
                     'bijouorannualofl1828cole',
                     'cu31924091184469',
                     'diebrokeradical400poll',
                     'engineercorpsofh00sher',
                     'flatlandromanceo00abbouoft',
                     'henrywardbeecher00robauoft',
                     'lincolncentenary00horn',
                     'livrodostermosh00bragoog',
                     'mytwocountries1954asto',
                     'onquietcomedyint00brid',
                     'secretcodeofsucc00stjo',
                     'thewilliamsrecord_vol29b',
                     'warofrebellionco1473unit',
                    ]

        for item in bin_items:
            result = get_ia.get_marc_record_from_ia(item)
            self.assertIsInstance(result, MarcBinary,
                                  "%s: expected instanceof MarcBinary, got %s" % (item, type(result)))

コード例 #3

0

ファイルを表示

ファイル: test_get_ia.py プロジェクト: mkkaushiksjce/openlibrary

    def test_no_marc_xml(self, monkeypatch):
        """When no XML MARC is listed in _filenames, the Binary MARC should be fetched."""
        monkeypatch.setattr(get_ia, 'urlopen_keep_trying',
                            return_test_marc_bin)
        monkeypatch.setattr(
            ia, 'get_metadata',
            lambda itemid: {'_filenames': [itemid + "_meta.mrc"]})

        bin_items = [
            '0descriptionofta1682unit',
            '13dipolarcycload00burk',
            'bijouorannualofl1828cole',
            'cu31924091184469',
            'diebrokeradical400poll',
            'engineercorpsofh00sher',
            'flatlandromanceo00abbouoft',
            'henrywardbeecher00robauoft',
            'lincolncentenary00horn',
            'livrodostermosh00bragoog',
            'mytwocountries1954asto',
            'onquietcomedyint00brid',
            'secretcodeofsucc00stjo',
            'thewilliamsrecord_vol29b',
            'warofrebellionco1473unit',
        ]

        for item in bin_items:
            result = get_ia.get_marc_record_from_ia(item)
            assert isinstance(result, MarcBinary), \
                   "%s: expected instanceof MarcBinary, got %s" % (item, type(result))

コード例 #4

0

ファイルを表示

    def test_incorrect_length_marcs(self, bad_marc, monkeypatch):
        """If a Binary MARC has a different length than stated in the MARC leader, it is probably due to bad character conversions."""
        monkeypatch.setattr(get_ia, 'urlopen_keep_trying', return_test_marc_bin)
        monkeypatch.setattr(ia, 'get_metadata', lambda itemid: {'_filenames': [itemid + "_meta.mrc"]})

        with pytest.raises(BadLength):
            result = get_ia.get_marc_record_from_ia(bad_marc)

コード例 #5

0

ファイルを表示

ファイル: test_get_ia.py プロジェクト: hornc/openlibrary-1

    def test_incorrect_length_marcs(self, bad_marc, monkeypatch):
        """If a Binary MARC has a different length than stated in the MARC leader, it is probably due to bad character conversions."""
        monkeypatch.setattr(get_ia, 'urlopen_keep_trying', return_test_marc_bin)
        monkeypatch.setattr(ia, 'get_metadata', lambda itemid: {'_filenames': [itemid + "_meta.mrc"]})

        with pytest.raises(BadLength):
            result = get_ia.get_marc_record_from_ia(bad_marc)

コード例 #6

0

ファイルを表示

    def test_get_marc_record_from_ia(self):
        """Tests the method returning MARC records from IA
        used by the import API. It should return an XML MARC if one exists."""
        self.m.setattr(get_ia, 'urlopen_keep_trying', return_test_marc_xml)
        self.m.setattr(ia, 'get_metadata', lambda itemid: {'_filenames': [itemid + '_marc.xml', itemid + '_meta.mrc']})

        xml_items = ['1733mmoiresdel00vill',     # no <?xml
                     '0descriptionofta1682unit', # has <?xml
                     'cu31924091184469',         # is <collection>
                     #'1893manualofharm00jadauoft', # 0 byte xml file
                     '00schlgoog',
                     '13dipolarcycload00burk',
                     '39002054008678.yale.edu',
                     'abhandlungender01ggoog',
                     'bijouorannualofl1828cole',
                     'dasrmischepriv00rein',
                     'diebrokeradical400poll',
                     'engineercorpsofh00sher',
                     'flatlandromanceo00abbouoft',
                     'lesabndioeinas00sche',
                     'lincolncentenary00horn',
                     'livrodostermosh00bragoog',
                     'mytwocountries1954asto',
                     'nybc200247',
                     'onquietcomedyint00brid',
                     'scrapbooksofmoun03tupp',
                     'secretcodeofsucc00stjo',
                     'soilsurveyrepor00statgoog',
                     'warofrebellionco1473unit',
                     'zweibchersatir01horauoft',
                    ]
        for item in xml_items:
            result = get_ia.get_marc_record_from_ia(item)
            self.assertIsInstance(result, MarcXml,
                                  "%s: expected instanceof MarcXml, got %s" % (item, type(result)))

コード例 #7

0

ファイルを表示

ファイル: test_get_ia.py プロジェクト: hornc/openlibrary-1

    def test_get_marc_record_from_ia(self, item, monkeypatch):
        """Tests the method returning MARC records from IA
        used by the import API. It should return an XML MARC if one exists."""
        monkeypatch.setattr(get_ia, 'urlopen_keep_trying', return_test_marc_xml)
        monkeypatch.setattr(ia, 'get_metadata', lambda itemid: {'_filenames': [itemid + '_marc.xml', itemid + '_meta.mrc']})

        result = get_ia.get_marc_record_from_ia(item)
        assert isinstance(result, MarcXml), \
            "%s: expected instanceof MarcXml, got %s" % (item, type(result))

コード例 #8

0

ファイルを表示

    def test_get_marc_record_from_ia(self, item, monkeypatch):
        """Tests the method returning MARC records from IA
        used by the import API. It should return an XML MARC if one exists."""
        monkeypatch.setattr(get_ia, 'urlopen_keep_trying', return_test_marc_xml)
        monkeypatch.setattr(ia, 'get_metadata', lambda itemid: {'_filenames': [itemid + '_marc.xml', itemid + '_meta.mrc']})

        result = get_ia.get_marc_record_from_ia(item)
        assert isinstance(result, MarcXml), \
            f"{item}: expected instanceof MarcXml, got {type(result)}"

コード例 #9

0

ファイルを表示

    def ia_import(cls, identifier, require_marc=True, force_import=False):
        """
        Performs logic to fetch archive.org item + metadata,
        produces a data dict, then loads into Open Library

        :param str identifier: archive.org ocaid
        :param bool require_marc: require archive.org item have MARC record?
        :param bool force_import: force import of this record
        :rtype: dict
        :returns: the data of the imported book or raises  BookImportError
        """
        # Case 1 - Is this a valid Archive.org item?
        metadata = ia.get_metadata(identifier)
        if not metadata:
            raise BookImportError('invalid-ia-identifier',
                                  '%s not found' % identifier)

        # Case 2 - Does the item have an openlibrary field specified?
        # The scan operators search OL before loading the book and add the
        # OL key if a match is found. We can trust them and attach the item
        # to that edition.
        if metadata.get('mediatype') == 'texts' and metadata.get(
                'openlibrary'):
            edition_data = cls.get_ia_record(metadata)
            edition_data['openlibrary'] = metadata['openlibrary']
            edition_data = cls.populate_edition_data(edition_data, identifier)
            return cls.load_book(edition_data)

        # Case 3 - Can the item be loaded into Open Library?
        status = ia.get_item_status(identifier, metadata)
        if status != 'ok' and not force_import:
            raise BookImportError(status, 'Prohibited Item %s' % identifier)

        # Case 4 - Does this item have a marc record?
        marc_record = get_marc_record_from_ia(identifier)
        if require_marc and not marc_record:
            raise BookImportError('no-marc-record')
        if marc_record:
            if not force_import:
                raise_non_book_marc(marc_record)
            try:
                edition_data = read_edition(marc_record)
            except MarcException as e:
                logger.error('failed to read from MARC record %s: %s',
                             identifier, str(e))
                raise BookImportError('invalid-marc-record')
        else:
            try:
                edition_data = cls.get_ia_record(metadata)
            except KeyError:
                raise BookImportError('invalid-ia-metadata')

        # Add IA specific fields: ocaid, source_records, and cover
        edition_data = cls.populate_edition_data(edition_data, identifier)
        return cls.load_book(edition_data)

コード例 #10

0

ファイルを表示

ファイル: test_get_ia.py プロジェクト: hornc/openlibrary-1

    def test_no_marc_xml(self, item, monkeypatch):
        """When no XML MARC is listed in _filenames, the Binary MARC should be fetched."""
        monkeypatch.setattr(get_ia, 'urlopen_keep_trying', return_test_marc_bin)
        monkeypatch.setattr(ia, 'get_metadata', lambda itemid: {'_filenames': [itemid + "_meta.mrc"]})

        result = get_ia.get_marc_record_from_ia(item)
        assert isinstance(result, MarcBinary), \
            "%s: expected instanceof MarcBinary, got %s" % (item, type(result))
        print("%s:\n\tUNICODE: [%s]\n\tTITLE: %s" % (item,
                                                     result.leader()[9],
                                                     unicode.encode(result.read_fields(['245']).next()[1].get_all_subfields().next()[1], 'utf8')))

コード例 #11

0

ファイルを表示

    def test_no_marc_xml(self, item, monkeypatch):
        """When no XML MARC is listed in _filenames, the Binary MARC should be fetched."""
        monkeypatch.setattr(get_ia, 'urlopen_keep_trying', return_test_marc_bin)
        monkeypatch.setattr(ia, 'get_metadata', lambda itemid: {'_filenames': [itemid + "_meta.mrc"]})

        result = get_ia.get_marc_record_from_ia(item)
        assert isinstance(result, MarcBinary), \
            f"{item}: expected instanceof MarcBinary, got {type(result)}"
        field_245 = next(result.read_fields(['245']))
        title = next(field_245[1].get_all_subfields())[1].encode('utf8')
        print(f"{item}:\n\tUNICODE: [{result.leader()[9]}]\n\tTITLE: {title}")

コード例 #12

0

ファイルを表示

    def test_no_marc_xml(self, item, monkeypatch):
        """When no XML MARC is listed in _filenames, the Binary MARC should be fetched."""
        monkeypatch.setattr(get_ia, 'urlopen_keep_trying',
                            return_test_marc_bin)
        monkeypatch.setattr(
            ia, 'get_metadata',
            lambda itemid: {'_filenames': [itemid + "_meta.mrc"]})

        result = get_ia.get_marc_record_from_ia(item)
        assert isinstance(result, MarcBinary), \
            "%s: expected instanceof MarcBinary, got %s" % (item, type(result))
        print("%s:\n\tUNICODE: [%s]\n\tTITLE: %s" %
              (item, result.leader()[9],
               unicode.encode(
                   result.read_fields([
                       '245'
                   ]).next()[1].get_all_subfields().next()[1], 'utf8')))

コード例 #13

0

ファイルを表示

ファイル: test_get_ia.py プロジェクト: randomecho/openlibrary

    def test_incorrect_length_marcs(self):
        """If a Binary MARC has a different length than stated in the MARC leader, it is probably due to bad character conversions."""
        self.m.setattr(get_ia, 'urlopen_keep_trying', return_test_marc_bin)
        self.m.setattr(ia, 'get_metadata', lambda itemid: {'_filenames': [itemid + "_meta.mrc"]})

        bad_marcs = ['1733mmoiresdel00vill', # Binary MARC reports len=734, but actually=742. Has badly converted unicode
                                             # original unicode converted as if it were MARC8
                     'dasrmischepriv00rein', # same as zweibchersatir01horauoft, binary representation of unicode interpreted as unicode codepoints
                     'histoirereligieu05cr', # C3A2 in this file should be single byte MARC8 combining acute 0xE2
                                             # Original MARC8 0xE2 interpreted as u00E2 => \xC3\xA2, leader still MARC8
                     'lesabndioeinas00sche', # Original MARC8 0xE2 interpreted as u00E2 => \xC3\xA2, leader still MARC8
                     'poganucpeoplethe00stowuoft', # junk / unexpected character at end of publishers in field 260
                     'scrapbooksofmoun03tupp', # possible extra chars at end of field 505?
                     'zweibchersatir01horauoft', # leader is unicode, chars '\xc3\x83\xc2\xbc' in mrc should be '\xc3\xbc'
                                                 # original '\xc3\xb3' was converted to '\u00c3\u00b3'
                   ]

        for bad_marc in bad_marcs:
            result = get_ia.get_marc_record_from_ia(bad_marc)
            #TODO: get_marc_record_from_ia() currently returns None in this case,
            #  It should be handled by MarcBinary and raise a BadMarc exception, or similar.
            self.assertIsNone(result)

コード例 #14

0

ファイルを表示

ファイル: code.py プロジェクト: digital-dreamer/openlibrary

def parse_data(data):
    data = data.strip()
    if -1 != data[:10].find('<?xml'):
        root = etree.fromstring(data)
        #print root.tag
        if '{http://www.w3.org/1999/02/22-rdf-syntax-ns#}RDF' == root.tag:
            edition_builder = import_rdf.parse(root)
            format = 'rdf'
        elif '{http://www.w3.org/2005/Atom}entry' == root.tag:
            edition_builder = import_opds.parse(root)
            format = 'opds'
        elif '{http://www.loc.gov/MARC21/slim}record' == root.tag:
            if root.tag == '{http://www.loc.gov/MARC21/slim}collection':
                root = root[0]
            rec = MarcXml(root)
            edition = read_edition(rec)
            edition_builder = import_edition_builder.import_edition_builder(init_dict=edition)
            format = 'marcxml'
        else:
            print 'unrecognized XML format'
            return None, None
    elif data.startswith('{') and data.endswith('}'):
        obj = json.loads(data)
        edition_builder = import_edition_builder.import_edition_builder(init_dict=obj)
        format = 'json'
    else:
        # Special case to load IA records
        # Just passing ia:foo00bar is enough to load foo00bar from IA.
        if data.startswith("ia:"):
            source_records = [data]
            itemid = data[len("ia:"):]

            metadata = ia.get_metadata(itemid)
            if not metadata:
                raise DataError("invalid-ia-identifier")

            if not ia.edition_from_item_metadata(itemid, metadata):
                raise DataError("item-not-a-book")

            try:
                rec = get_marc_record_from_ia(itemid)

                # skip serials
                if rec.leader()[7] == 's':
                    raise DataError("item-is-serial")
            except IOError:
                raise DataError("no-marc-record")

            if not rec:
                raise DataError("no-marc-record")
        else:
            source_records = None
            itemid = None

            #Marc Binary
            if len(data) != int(data[:5]):
                return json.dumps({'success':False, 'error':'Bad MARC length'})

            rec = MarcBinary(data)

        edition = read_edition(rec)
        if source_records:
            edition['source_records'] = source_records
            edition['ocaid'] = itemid
        edition_builder = import_edition_builder.import_edition_builder(init_dict=edition)
        format = 'marc'

    parse_meta_headers(edition_builder)
    
    return edition_builder.get_dict(), format

コード例 #15

0

ファイルを表示

ファイル: code.py プロジェクト: iefbr14/openlibrary

def parse_data(data):
    data = data.strip()
    if -1 != data[:10].find('<?xml'):
        root = etree.fromstring(data)
        #print root.tag
        if '{http://www.w3.org/1999/02/22-rdf-syntax-ns#}RDF' == root.tag:
            edition_builder = import_rdf.parse(root)
            format = 'rdf'
        elif '{http://www.w3.org/2005/Atom}entry' == root.tag:
            edition_builder = import_opds.parse(root)
            format = 'opds'
        elif '{http://www.loc.gov/MARC21/slim}record' == root.tag:
            if root.tag == '{http://www.loc.gov/MARC21/slim}collection':
                root = root[0]
            rec = MarcXml(root)
            edition = read_edition(rec)
            edition_builder = import_edition_builder.import_edition_builder(
                init_dict=edition)
            format = 'marcxml'
        else:
            print 'unrecognized XML format'
            return None, None
    elif data.startswith('{') and data.endswith('}'):
        obj = json.loads(data)
        edition_builder = import_edition_builder.import_edition_builder(
            init_dict=obj)
        format = 'json'
    else:
        # Special case to load IA records
        # Just passing ia:foo00bar is enough to load foo00bar from IA.
        if data.startswith("ia:"):
            source_records = [data]
            itemid = data[len("ia:"):]

            metadata = ia.get_metadata(itemid)
            if not metadata:
                raise DataError("invalid-ia-identifier")

            if not ia.edition_from_item_metadata(itemid, metadata):
                raise DataError("item-not-a-book")

            try:
                rec = get_marc_record_from_ia(itemid)
            except IOError:
                raise DataError("no-marc-record")

            if not rec:
                raise DataError("no-marc-record")
        else:
            source_records = None
            itemid = None

            #Marc Binary
            if len(data) != int(data[:5]):
                return json.dumps({
                    'success': False,
                    'error': 'Bad MARC length'
                })

            rec = MarcBinary(data)

        edition = read_edition(rec)
        if source_records:
            edition['source_records'] = source_records
            edition['ocaid'] = itemid
        edition_builder = import_edition_builder.import_edition_builder(
            init_dict=edition)
        format = 'marc'

    parse_meta_headers(edition_builder)

    return edition_builder.get_dict(), format

コード例 #16

0

ファイルを表示

 def get_marc_record(self, identifier):
     try:
         return get_marc_record_from_ia(identifier)
     except IOError:
         return None

コード例 #17

0

ファイルを表示

def parse_data(data):
    """
    Takes POSTed data and determines the format, and returns an Edition record
    suitable for adding to OL.

    :param str data: Raw data
    :rtype: (dict|None, str|None)
    :return: (Edition record, format (rdf|opds|marcxml|json|marc)) or (None, None)
    """
    data = data.strip()
    if -1 != data[:10].find('<?xml'):
        root = etree.fromstring(data)
        if '{http://www.w3.org/1999/02/22-rdf-syntax-ns#}RDF' == root.tag:
            edition_builder = import_rdf.parse(root)
            format = 'rdf'
        elif '{http://www.w3.org/2005/Atom}entry' == root.tag:
            edition_builder = import_opds.parse(root)
            format = 'opds'
        elif '{http://www.loc.gov/MARC21/slim}record' == root.tag:
            if root.tag == '{http://www.loc.gov/MARC21/slim}collection':
                root = root[0]
            rec = MarcXml(root)
            edition = read_edition(rec)
            edition_builder = import_edition_builder.import_edition_builder(init_dict=edition)
            format = 'marcxml'
        else:
            print 'unrecognized XML format'
            return None, None
    elif data.startswith('{') and data.endswith('}'):
        obj = json.loads(data)
        edition_builder = import_edition_builder.import_edition_builder(init_dict=obj)
        format = 'json'
    else:
        # Special case to load IA records, DEPRECATED: use import/ia endpoint
        # Just passing ia:foo00bar is enough to load foo00bar from IA.
        if data.startswith("ia:"):
            source_records = [data]
            itemid = data[len("ia:"):]

            metadata = ia.get_metadata(itemid)
            if not metadata:
                raise DataError("invalid-ia-identifier")

            # see ia_importapi to address `imagecount` limitations
            status = ia.get_item_status(itemid, metadata)
            if status != 'ok':
                raise DataError(status)

            try:
                rec = get_marc_record_from_ia(itemid)

                # skip serials
                if rec and rec.leader()[7] == 's':
                    raise DataError("item-is-serial")
            except IOError:
                raise DataError("no-marc-record")

            if not rec:
                raise DataError("no-marc-record")
        else:
            source_records = None
            itemid = None

            #Marc Binary
            if len(data) != int(data[:5]):
                return json.dumps({'success':False, 'error':'Bad MARC length'})

            rec = MarcBinary(data)

        edition = read_edition(rec)
        if source_records:
            edition['source_records'] = source_records
            edition['ocaid'] = itemid
        edition_builder = import_edition_builder.import_edition_builder(init_dict=edition)
        format = 'marc'

    parse_meta_headers(edition_builder)

    return edition_builder.get_dict(), format

コード例 #18

0

ファイルを表示

    def POST(self):
        web.header('Content-Type', 'application/json')

        if not can_write():
            raise web.HTTPError('403 Forbidden')

        i = web.input()

        require_marc = not (i.get('require_marc') == 'false')
        bulk_marc = i.get('bulk_marc') == 'true'

        if 'identifier' not in i:
            return self.error('bad-input', 'identifier not provided')
        identifier = i.identifier

        # First check whether this is a non-book, bulk-marc item
        if bulk_marc:
            # Get binary MARC by identifier = ocaid/filename:offset:length
            re_bulk_identifier = re.compile("([^/]*)/([^:]*):(\d*):(\d*)")
            try:
                ocaid, filename, offset, length = re_bulk_identifier.match(
                    identifier).groups()
                data, next_offset, next_length = get_from_archive_bulk(
                    identifier)
                next_data = {
                    'next_record_offset': next_offset,
                    'next_record_length': next_length
                }
                rec = MarcBinary(data)
                edition = read_edition(rec)
            except MarcException as e:
                details = "%s: %s" % (identifier, str(e))
                logger.error("failed to read from bulk MARC record %s",
                             details)
                return self.error('invalid-marc-record', details, **next_data)

            actual_length = int(rec.leader()[:MARC_LENGTH_POS])
            edition['source_records'] = 'marc:%s/%s:%s:%d' % (
                ocaid, filename, offset, actual_length)

            local_id = i.get('local_id')
            if local_id:
                local_id_type = web.ctx.site.get('/local_ids/' + local_id)
                prefix = local_id_type.urn_prefix
                id_field, id_subfield = local_id_type.id_location.split('$')

                def get_subfield(field, id_subfield):
                    if isinstance(field, str):
                        return field
                    subfields = field[1].get_subfield_values(id_subfield)
                    return subfields[0] if subfields else None

                _ids = [
                    get_subfield(f, id_subfield)
                    for f in rec.read_fields([id_field])
                    if f and get_subfield(f, id_subfield)
                ]
                edition['local_id'] = [
                    'urn:%s:%s' % (prefix, _id) for _id in _ids
                ]

            # Don't add the book if the MARC record is a non-book item
            self.reject_non_book_marc(rec, **next_data)
            result = add_book.load(edition)

            # Add next_data to the response as location of next record:
            result.update(next_data)
            return json.dumps(result)

        # Case 1 - Is this a valid Archive.org item?
        metadata = ia.get_metadata(identifier)
        if not metadata:
            return self.error('invalid-ia-identifier',
                              '%s not found' % identifier)

        # Case 2 - Does the item have an openlibrary field specified?
        # The scan operators search OL before loading the book and add the
        # OL key if a match is found. We can trust them and attach the item
        # to that edition.
        if metadata.get('mediatype') == 'texts' and metadata.get(
                'openlibrary'):
            edition_data = self.get_ia_record(metadata)
            edition_data['openlibrary'] = metadata['openlibrary']
            edition_data = self.populate_edition_data(edition_data, identifier)
            return self.load_book(edition_data)

        # Case 3 - Can the item be loaded into Open Library?
        status = ia.get_item_status(identifier, metadata)
        if status != 'ok':
            return self.error(status, 'Prohibited Item %s' % identifier)

        # Case 4 - Does this item have a marc record?
        marc_record = get_marc_record_from_ia(identifier)
        if marc_record:
            self.reject_non_book_marc(marc_record)
            try:
                edition_data = read_edition(marc_record)
            except MarcException as e:
                logger.error('failed to read from MARC record %s: %s',
                             identifier, str(e))
                return self.error('invalid-marc-record')
        elif require_marc:
            return self.error('no-marc-record')
        else:
            try:
                edition_data = self.get_ia_record(metadata)
            except KeyError:
                return self.error("invalid-ia-metadata")

        # Add IA specific fields: ocaid, source_records, and cover
        edition_data = self.populate_edition_data(edition_data, identifier)
        return self.load_book(edition_data)