예제 #1
0
def test_wrapped_lines():
    filename = '%s/wrapped_lines' % test_data
    with open(filename, 'r') as f:
        rec = MarcBinary(f.read())
        ret = list(rec.read_fields(['520']))
        assert len(ret) == 2
        a, b = ret
        assert a[0] == '520' and b[0] == '520'
        a_content = list(a[1].get_all_subfields())[0][1]
        assert len(a_content) == 2290
        b_content = list(b[1].get_all_subfields())[0][1]
        assert len(b_content) == 243
예제 #2
0
    def POST(self):
        web.header('Content-Type', 'application/json')

        if not can_write():
            raise web.HTTPError('403 Forbidden')

        i = web.input()

        require_marc = not (i.get('require_marc') == 'false')
        force_import = i.get('force_import') == 'true'
        bulk_marc = i.get('bulk_marc') == 'true'

        if 'identifier' not in i:
            return self.error('bad-input', 'identifier not provided')
        identifier = i.identifier

        # First check whether this is a non-book, bulk-marc item
        if bulk_marc:
            # Get binary MARC by identifier = ocaid/filename:offset:length
            re_bulk_identifier = re.compile(r"([^/]*)/([^:]*):(\d*):(\d*)")
            try:
                ocaid, filename, offset, length = re_bulk_identifier.match(
                    identifier).groups()
                data, next_offset, next_length = get_from_archive_bulk(
                    identifier)
                next_data = {
                    'next_record_offset': next_offset,
                    'next_record_length': next_length,
                }
                rec = MarcBinary(data)
                edition = read_edition(rec)
            except MarcException as e:
                details = f"{identifier}: {str(e)}"
                logger.error("failed to read from bulk MARC record %s",
                             details)
                return self.error('invalid-marc-record', details, **next_data)

            actual_length = int(rec.leader()[:MARC_LENGTH_POS])
            edition['source_records'] = 'marc:%s/%s:%s:%d' % (
                ocaid,
                filename,
                offset,
                actual_length,
            )

            local_id = i.get('local_id')
            if local_id:
                local_id_type = web.ctx.site.get('/local_ids/' + local_id)
                prefix = local_id_type.urn_prefix
                force_import = True
                id_field, id_subfield = local_id_type.id_location.split('$')

                def get_subfield(field, id_subfield):
                    if isinstance(field, str):
                        return field
                    subfields = field[1].get_subfield_values(id_subfield)
                    return subfields[0] if subfields else None

                _ids = [
                    get_subfield(f, id_subfield)
                    for f in rec.read_fields([id_field])
                    if f and get_subfield(f, id_subfield)
                ]
                edition['local_id'] = [f'urn:{prefix}:{_id}' for _id in _ids]

            # Don't add the book if the MARC record is a non-monograph item,
            # unless it is a scanning partner record and/or force_import is set.
            if not force_import:
                try:
                    raise_non_book_marc(rec, **next_data)
                except BookImportError as e:
                    return self.error(e.error_code, e.error, **e.kwargs)
            result = add_book.load(edition)

            # Add next_data to the response as location of next record:
            result.update(next_data)
            return json.dumps(result)

        try:
            return self.ia_import(identifier,
                                  require_marc=require_marc,
                                  force_import=force_import)
        except BookImportError as e:
            return self.error(e.error_code, e.error, **e.kwargs)
예제 #3
0
    def POST(self):
        web.header('Content-Type', 'application/json')

        if not can_write():
            raise web.HTTPError('403 Forbidden')

        i = web.input()

        require_marc = not (i.get('require_marc') == 'false')
        bulk_marc = i.get('bulk_marc') == 'true'

        if 'identifier' not in i:
            return self.error('bad-input', 'identifier not provided')
        identifier = i.identifier

        # First check whether this is a non-book, bulk-marc item
        if bulk_marc:
            # Get binary MARC by identifier = ocaid/filename:offset:length
            re_bulk_identifier = re.compile("([^/]*)/([^:]*):(\d*):(\d*)")
            try:
                ocaid, filename, offset, length = re_bulk_identifier.match(
                    identifier).groups()
                data, next_offset, next_length = get_from_archive_bulk(
                    identifier)
                next_data = {
                    'next_record_offset': next_offset,
                    'next_record_length': next_length
                }
                rec = MarcBinary(data)
                edition = read_edition(rec)
            except MarcException as e:
                details = "%s: %s" % (identifier, str(e))
                logger.error("failed to read from bulk MARC record %s",
                             details)
                return self.error('invalid-marc-record', details, **next_data)

            actual_length = int(rec.leader()[:MARC_LENGTH_POS])
            edition['source_records'] = 'marc:%s/%s:%s:%d' % (
                ocaid, filename, offset, actual_length)

            local_id = i.get('local_id')
            if local_id:
                local_id_type = web.ctx.site.get('/local_ids/' + local_id)
                prefix = local_id_type.urn_prefix
                id_field, id_subfield = local_id_type.id_location.split('$')

                def get_subfield(field, id_subfield):
                    if isinstance(field, str):
                        return field
                    subfields = field[1].get_subfield_values(id_subfield)
                    return subfields[0] if subfields else None

                _ids = [
                    get_subfield(f, id_subfield)
                    for f in rec.read_fields([id_field])
                    if f and get_subfield(f, id_subfield)
                ]
                edition['local_id'] = [
                    'urn:%s:%s' % (prefix, _id) for _id in _ids
                ]

            # Don't add the book if the MARC record is a non-book item
            self.reject_non_book_marc(rec, **next_data)
            result = add_book.load(edition)

            # Add next_data to the response as location of next record:
            result.update(next_data)
            return json.dumps(result)

        # Case 1 - Is this a valid Archive.org item?
        metadata = ia.get_metadata(identifier)
        if not metadata:
            return self.error('invalid-ia-identifier',
                              '%s not found' % identifier)

        # Case 2 - Does the item have an openlibrary field specified?
        # The scan operators search OL before loading the book and add the
        # OL key if a match is found. We can trust them and attach the item
        # to that edition.
        if metadata.get('mediatype') == 'texts' and metadata.get(
                'openlibrary'):
            edition_data = self.get_ia_record(metadata)
            edition_data['openlibrary'] = metadata['openlibrary']
            edition_data = self.populate_edition_data(edition_data, identifier)
            return self.load_book(edition_data)

        # Case 3 - Can the item be loaded into Open Library?
        status = ia.get_item_status(identifier, metadata)
        if status != 'ok':
            return self.error(status, 'Prohibited Item %s' % identifier)

        # Case 4 - Does this item have a marc record?
        marc_record = get_marc_record_from_ia(identifier)
        if marc_record:
            self.reject_non_book_marc(marc_record)
            try:
                edition_data = read_edition(marc_record)
            except MarcException as e:
                logger.error('failed to read from MARC record %s: %s',
                             identifier, str(e))
                return self.error('invalid-marc-record')
        elif require_marc:
            return self.error('no-marc-record')
        else:
            try:
                edition_data = self.get_ia_record(metadata)
            except KeyError:
                return self.error("invalid-ia-metadata")

        # Add IA specific fields: ocaid, source_records, and cover
        edition_data = self.populate_edition_data(edition_data, identifier)
        return self.load_book(edition_data)
예제 #4
0
    def POST(self):
        web.header('Content-Type', 'application/json')

        if not can_write():
            raise web.HTTPError('403 Forbidden')

        i = web.input()

        require_marc = not (i.get('require_marc') == 'false')
        bulk_marc = i.get('bulk_marc') == 'true'

        if 'identifier' not in i:
            return self.error('bad-input', 'identifier not provided')
        identifier = i.identifier

        # First check whether this is a non-book, bulk-marc item
        if bulk_marc:
            # Get binary MARC by identifier = ocaid/filename:offset:length
            re_bulk_identifier = re.compile("([^/]*)/([^:]*):(\d*):(\d*)")
            try:
                ocaid, filename, offset, length = re_bulk_identifier.match(identifier).groups()
                data, next_offset, next_length = get_from_archive_bulk(identifier)
                next_data = {'next_record_offset': next_offset, 'next_record_length': next_length}
                rec = MarcBinary(data)
                edition = read_edition(rec)
            except MarcException as e:
                details = "%s: %s" % (identifier, str(e))
                logger.error("failed to read from bulk MARC record %s", details)
                return self.error('invalid-marc-record', details, **next_data)

            actual_length = int(rec.leader()[:MARC_LENGTH_POS])
            edition['source_records'] = 'marc:%s/%s:%s:%d' % (ocaid, filename, offset, actual_length)

            local_id = i.get('local_id')
            if local_id:
                local_id_type = web.ctx.site.get('/local_ids/' + local_id)
                prefix = local_id_type.urn_prefix
                id_field, id_subfield = local_id_type.id_location.split('$')
                _ids = [f if isinstance(f, str) else f[1].get_subfield_values(id_subfield)[0] for f in rec.read_fields([id_field])]
                edition['local_id'] = ['urn:%s:%s' % (prefix, _id) for _id in _ids]

            result = add_book.load(edition)

            # Add next_data to the response as location of next record:
            result.update(next_data)
            return json.dumps(result)

        # Case 1 - Is this a valid Archive.org item?
        try:
            item_json = ia.get_item_json(identifier)
            item_server = item_json['server']
            item_path = item_json['dir']
        except KeyError:
            return self.error("invalid-ia-identifier", "%s not found" % identifier)
        metadata = ia.extract_item_metadata(item_json)
        if not metadata:
            return self.error("invalid-ia-identifier")

        # Case 2 - Does the item have an openlibrary field specified?
        # The scan operators search OL before loading the book and add the
        # OL key if a match is found. We can trust them and attach the item
        # to that edition.
        if metadata.get("mediatype") == "texts" and metadata.get("openlibrary"):
            edition_data = self.get_ia_record(metadata)
            edition_data["openlibrary"] = metadata["openlibrary"]
            edition_data = self.populate_edition_data(edition_data, identifier)
            return self.load_book(edition_data)

        # Case 3 - Can the item be loaded into Open Library?
        status = ia.get_item_status(identifier, metadata,
                                    item_server=item_server, item_path=item_path)
        if status != 'ok':
            return self.error(status, "Prohibited Item")

        # Case 4 - Does this item have a marc record?
        marc_record = self.get_marc_record(identifier)
        if marc_record:
            self.reject_non_book_marc(marc_record)

            try:
                edition_data = read_edition(marc_record)
            except MarcException as e:
                logger.error("failed to read from MARC record %s: %s", identifier, str(e))
                return self.error("invalid-marc-record")

        elif require_marc:
            return self.error("no-marc-record")

        else:
            try:
                edition_data = self.get_ia_record(metadata)
            except KeyError:
                return self.error("invalid-ia-metadata")

        # Add IA specific fields: ocaid, source_records, and cover
        edition_data = self.populate_edition_data(edition_data, identifier)

        return self.load_book(edition_data)