def test_wrapped_lines(): filename = '%s/wrapped_lines' % test_data with open(filename, 'r') as f: rec = MarcBinary(f.read()) ret = list(rec.read_fields(['520'])) assert len(ret) == 2 a, b = ret assert a[0] == '520' and b[0] == '520' a_content = list(a[1].get_all_subfields())[0][1] assert len(a_content) == 2290 b_content = list(b[1].get_all_subfields())[0][1] assert len(b_content) == 243
def POST(self): web.header('Content-Type', 'application/json') if not can_write(): raise web.HTTPError('403 Forbidden') i = web.input() require_marc = not (i.get('require_marc') == 'false') force_import = i.get('force_import') == 'true' bulk_marc = i.get('bulk_marc') == 'true' if 'identifier' not in i: return self.error('bad-input', 'identifier not provided') identifier = i.identifier # First check whether this is a non-book, bulk-marc item if bulk_marc: # Get binary MARC by identifier = ocaid/filename:offset:length re_bulk_identifier = re.compile(r"([^/]*)/([^:]*):(\d*):(\d*)") try: ocaid, filename, offset, length = re_bulk_identifier.match( identifier).groups() data, next_offset, next_length = get_from_archive_bulk( identifier) next_data = { 'next_record_offset': next_offset, 'next_record_length': next_length, } rec = MarcBinary(data) edition = read_edition(rec) except MarcException as e: details = f"{identifier}: {str(e)}" logger.error("failed to read from bulk MARC record %s", details) return self.error('invalid-marc-record', details, **next_data) actual_length = int(rec.leader()[:MARC_LENGTH_POS]) edition['source_records'] = 'marc:%s/%s:%s:%d' % ( ocaid, filename, offset, actual_length, ) local_id = i.get('local_id') if local_id: local_id_type = web.ctx.site.get('/local_ids/' + local_id) prefix = local_id_type.urn_prefix force_import = True id_field, id_subfield = local_id_type.id_location.split('$') def get_subfield(field, id_subfield): if isinstance(field, str): return field subfields = field[1].get_subfield_values(id_subfield) return subfields[0] if subfields else None _ids = [ get_subfield(f, id_subfield) for f in rec.read_fields([id_field]) if f and get_subfield(f, id_subfield) ] edition['local_id'] = [f'urn:{prefix}:{_id}' for _id in _ids] # Don't add the book if the MARC record is a non-monograph item, # unless it is a scanning partner record and/or force_import is set. if not force_import: try: raise_non_book_marc(rec, **next_data) except BookImportError as e: return self.error(e.error_code, e.error, **e.kwargs) result = add_book.load(edition) # Add next_data to the response as location of next record: result.update(next_data) return json.dumps(result) try: return self.ia_import(identifier, require_marc=require_marc, force_import=force_import) except BookImportError as e: return self.error(e.error_code, e.error, **e.kwargs)
def POST(self): web.header('Content-Type', 'application/json') if not can_write(): raise web.HTTPError('403 Forbidden') i = web.input() require_marc = not (i.get('require_marc') == 'false') bulk_marc = i.get('bulk_marc') == 'true' if 'identifier' not in i: return self.error('bad-input', 'identifier not provided') identifier = i.identifier # First check whether this is a non-book, bulk-marc item if bulk_marc: # Get binary MARC by identifier = ocaid/filename:offset:length re_bulk_identifier = re.compile("([^/]*)/([^:]*):(\d*):(\d*)") try: ocaid, filename, offset, length = re_bulk_identifier.match( identifier).groups() data, next_offset, next_length = get_from_archive_bulk( identifier) next_data = { 'next_record_offset': next_offset, 'next_record_length': next_length } rec = MarcBinary(data) edition = read_edition(rec) except MarcException as e: details = "%s: %s" % (identifier, str(e)) logger.error("failed to read from bulk MARC record %s", details) return self.error('invalid-marc-record', details, **next_data) actual_length = int(rec.leader()[:MARC_LENGTH_POS]) edition['source_records'] = 'marc:%s/%s:%s:%d' % ( ocaid, filename, offset, actual_length) local_id = i.get('local_id') if local_id: local_id_type = web.ctx.site.get('/local_ids/' + local_id) prefix = local_id_type.urn_prefix id_field, id_subfield = local_id_type.id_location.split('$') def get_subfield(field, id_subfield): if isinstance(field, str): return field subfields = field[1].get_subfield_values(id_subfield) return subfields[0] if subfields else None _ids = [ get_subfield(f, id_subfield) for f in rec.read_fields([id_field]) if f and get_subfield(f, id_subfield) ] edition['local_id'] = [ 'urn:%s:%s' % (prefix, _id) for _id in _ids ] # Don't add the book if the MARC record is a non-book item self.reject_non_book_marc(rec, **next_data) result = add_book.load(edition) # Add next_data to the response as location of next record: result.update(next_data) return json.dumps(result) # Case 1 - Is this a valid Archive.org item? metadata = ia.get_metadata(identifier) if not metadata: return self.error('invalid-ia-identifier', '%s not found' % identifier) # Case 2 - Does the item have an openlibrary field specified? # The scan operators search OL before loading the book and add the # OL key if a match is found. We can trust them and attach the item # to that edition. if metadata.get('mediatype') == 'texts' and metadata.get( 'openlibrary'): edition_data = self.get_ia_record(metadata) edition_data['openlibrary'] = metadata['openlibrary'] edition_data = self.populate_edition_data(edition_data, identifier) return self.load_book(edition_data) # Case 3 - Can the item be loaded into Open Library? status = ia.get_item_status(identifier, metadata) if status != 'ok': return self.error(status, 'Prohibited Item %s' % identifier) # Case 4 - Does this item have a marc record? marc_record = get_marc_record_from_ia(identifier) if marc_record: self.reject_non_book_marc(marc_record) try: edition_data = read_edition(marc_record) except MarcException as e: logger.error('failed to read from MARC record %s: %s', identifier, str(e)) return self.error('invalid-marc-record') elif require_marc: return self.error('no-marc-record') else: try: edition_data = self.get_ia_record(metadata) except KeyError: return self.error("invalid-ia-metadata") # Add IA specific fields: ocaid, source_records, and cover edition_data = self.populate_edition_data(edition_data, identifier) return self.load_book(edition_data)
def POST(self): web.header('Content-Type', 'application/json') if not can_write(): raise web.HTTPError('403 Forbidden') i = web.input() require_marc = not (i.get('require_marc') == 'false') bulk_marc = i.get('bulk_marc') == 'true' if 'identifier' not in i: return self.error('bad-input', 'identifier not provided') identifier = i.identifier # First check whether this is a non-book, bulk-marc item if bulk_marc: # Get binary MARC by identifier = ocaid/filename:offset:length re_bulk_identifier = re.compile("([^/]*)/([^:]*):(\d*):(\d*)") try: ocaid, filename, offset, length = re_bulk_identifier.match(identifier).groups() data, next_offset, next_length = get_from_archive_bulk(identifier) next_data = {'next_record_offset': next_offset, 'next_record_length': next_length} rec = MarcBinary(data) edition = read_edition(rec) except MarcException as e: details = "%s: %s" % (identifier, str(e)) logger.error("failed to read from bulk MARC record %s", details) return self.error('invalid-marc-record', details, **next_data) actual_length = int(rec.leader()[:MARC_LENGTH_POS]) edition['source_records'] = 'marc:%s/%s:%s:%d' % (ocaid, filename, offset, actual_length) local_id = i.get('local_id') if local_id: local_id_type = web.ctx.site.get('/local_ids/' + local_id) prefix = local_id_type.urn_prefix id_field, id_subfield = local_id_type.id_location.split('$') _ids = [f if isinstance(f, str) else f[1].get_subfield_values(id_subfield)[0] for f in rec.read_fields([id_field])] edition['local_id'] = ['urn:%s:%s' % (prefix, _id) for _id in _ids] result = add_book.load(edition) # Add next_data to the response as location of next record: result.update(next_data) return json.dumps(result) # Case 1 - Is this a valid Archive.org item? try: item_json = ia.get_item_json(identifier) item_server = item_json['server'] item_path = item_json['dir'] except KeyError: return self.error("invalid-ia-identifier", "%s not found" % identifier) metadata = ia.extract_item_metadata(item_json) if not metadata: return self.error("invalid-ia-identifier") # Case 2 - Does the item have an openlibrary field specified? # The scan operators search OL before loading the book and add the # OL key if a match is found. We can trust them and attach the item # to that edition. if metadata.get("mediatype") == "texts" and metadata.get("openlibrary"): edition_data = self.get_ia_record(metadata) edition_data["openlibrary"] = metadata["openlibrary"] edition_data = self.populate_edition_data(edition_data, identifier) return self.load_book(edition_data) # Case 3 - Can the item be loaded into Open Library? status = ia.get_item_status(identifier, metadata, item_server=item_server, item_path=item_path) if status != 'ok': return self.error(status, "Prohibited Item") # Case 4 - Does this item have a marc record? marc_record = self.get_marc_record(identifier) if marc_record: self.reject_non_book_marc(marc_record) try: edition_data = read_edition(marc_record) except MarcException as e: logger.error("failed to read from MARC record %s: %s", identifier, str(e)) return self.error("invalid-marc-record") elif require_marc: return self.error("no-marc-record") else: try: edition_data = self.get_ia_record(metadata) except KeyError: return self.error("invalid-ia-metadata") # Add IA specific fields: ocaid, source_records, and cover edition_data = self.populate_edition_data(edition_data, identifier) return self.load_book(edition_data)