def dump(self): """Perform record dump.""" dt = datetime.datetime.utcnow() exception_handlers = { UnexpectedValue: importer_exception_handler, MissingRequiredField: importer_exception_handler, ManualImportRequired: importer_exception_handler, } marc_record = create_record(self.data) try: # MARCXML -> JSON fields translation val = self.dojson_model.do(marc_record, exception_handlers=exception_handlers) # check for missing rules missing = self.dojson_model.missing(marc_record) if missing: raise LossyConversion(missing=missing) return dt, val except LossyConversion as e: current_app.logger.error("MIGRATION RULE MISSING {0} - {1}".format( e.missing, marc_record)) raise e except Exception as e: current_app.logger.error( "Impossible to convert to JSON {0} - {1}".format( e, marc_record)) raise e
def _prepare_revision(self, data): dt = arrow.get(data["modification_datetime"]).datetime exception_handlers = { UnexpectedValue: migration_exception_handler, MissingRequiredField: migration_exception_handler, ManualImportRequired: migration_exception_handler, } if self.source_type == "marcxml": marc_record = create_record(data["marcxml"]) try: val = self.dojson_model.do( marc_record, exception_handlers=exception_handlers) missing = self.dojson_model.missing(marc_record) if missing: raise LossyConversion(missing=missing) return dt, val except LossyConversion as e: current_app.logger.error( "MIGRATION RULE MISSING {0} - {1}".format( e.missing, marc_record)) raise e except Exception as e: current_app.logger.error( "Impossible to convert to JSON {0} - {1}".format( e, marc_record)) raise e else: val = data["json"] return dt, val
def _prepare_final_revision(self, data): dt = arrow.get(data['modification_datetime']).datetime exception_handlers = { UnexpectedValue: migration_exception_handler, MissingRequiredField: migration_exception_handler, ManualMigrationRequired: migration_exception_handler, } if self.source_type == 'marcxml': marc_record = create_record(data['marcxml']) try: val = self.dojson_model.do( marc_record, exception_handlers=exception_handlers) missing = self.dojson_model.missing(marc_record) if missing: raise LossyConversion(missing=missing) update_access(val, self.collection_access) return dt, val except LossyConversion as e: raise e except Exception as e: current_app.logger.error( 'Impossible to convert to JSON {0} - {1}'.format( e, marc_record)) raise e else: val = data['json'] # Calculate the _access key update_access(val, self.collection_access) return dt, val
def check_transformation(marcxml_body, json_body): """Check transformation.""" blob = create_record(marcxml.format(marcxml_body)) model._default_fields = { "_migration": { "is_multipart": False, "has_related": False, "related": [], "record_type": "journal", "volumes": [], } } record = model.do(blob, ignore_missing=False) expected = { "_migration": { "is_multipart": False, "has_related": False, "related": [], "record_type": "journal", "volumes": [], }, } expected.update(**json_body) assert record == expected
def check_transformation(marcxml_body, json_body, model=None): """Check transformation.""" blob = create_record(marcxml.format(marcxml_body)) record = model.do(blob, ignore_missing=False) expected = {} expected.update(**json_body) assert record == expected
def check_transformation(marcxml_body, json_body): blob = create_record(marcxml.format(marcxml_body)) model._default_fields = {"_migration": {**get_helper_dict()}} record = model.do(blob, ignore_missing=False) expected = {"_migration": {**get_helper_dict()}} expected.update(**json_body) assert record == expected
def check_transformation(marcxml_body, json_body): blob = create_record(marcxml.format(marcxml_body)) record = model.do(blob) expected = { '$schema': 'https://cds.cern.ch/schemas/' 'records/videos/project/project-v1.0.0.json' } expected.update(**json_body) assert record == expected
def test_base_model(app): """Test base model.""" marcxml = pkg_resources.resource_string( __name__, os.path.join('fixtures', 'base.xml')) with app.app_context(): blob = create_record(marcxml) assert model.missing(blob) == {'001', '003', '005'} record = model.do(blob) assert record['recid'] == 1495143 assert record['agency_code'] == 'SzGeCERN' assert not model.missing(blob)
def _prepare_intermediate_revision(self, data): """Convert intermediate versions to marc into JSON.""" dt = arrow.get(data['modification_datetime']).datetime if self.source_type == 'marcxml': marc_record = create_record(data['marcxml']) return dt, marc_record else: val = data['json'] # MARC21 versions of the record are only accessible to admins val['_access'] = { 'read': ['*****@*****.**'], 'update': ['*****@*****.**'] } return dt, val
def _prepare_final_revision(self, data): dt = arrow.get(data['modification_datetime']).datetime if self.source_type == 'marcxml': marc_record = create_record(data['marcxml']) try: val = self.dojson_model.do(marc_record) except Exception as e: current_app.logger.error( 'Impossible to convert to JSON {0} - {1}'.format( e, marc_record)) raise missing = self.dojson_model.missing(marc_record, _json=val) if missing: raise RuntimeError('Lossy conversion: {0}'.format(missing)) else: val = data['json'] # Calculate the _access key update_access(val, self.collection_access) return (dt, val)
def test_required_fields(app): """Test required fields.""" marcxml = load_fixture_file('videos_project.xml') with app.app_context(): blob = create_record(marcxml) record = model.do(blob) assert record == { '$schema': 'https://cds.cern.ch/schemas/' 'records/videos/project/project-v1.0.0.json', '_access': {'update': ['*****@*****.**']}, 'category': 'CERN', 'contributors': [ {'name': 'CERN Video Productions', 'role': 'Producer'}, {'name': 'CERN Video Productions', 'role': 'Director'} ], 'keywords': [{'name': 'Higgs', 'source': 'CERN'}, {'name': 'anniversary', 'source': 'CERN'}], 'recid': 2272969, 'report_number': ['CERN-MOVIE-2017-023'], 'title': {'title': 'Higgs anniversary 5Y'}, 'type': 'MOVIE', 'videos': [{'$ref': 'https://cds.cern.ch/record/1'}, {'$ref': 'https://cds.cern.ch/record/2'}], 'external_system_identifiers': [ {'schema': 'AVW', 'value': 'AVW.project.2963'} ], 'modified_by': '*****@*****.**', } # Add required fields calculated by post-process tasks. record['publication_date'] = '2017-07-04' record['date'] = '2017-07-04' validate( record, schema={'schema': record['$schema']}, types={'array': (list, tuple)} )
def _prepare_revision(self, data): timestamp = arrow.get(data["modification_datetime"]).datetime exception_handlers = { UnexpectedValue: migration_exception_handler, MissingRequiredField: migration_exception_handler, ManualImportRequired: migration_exception_handler, } if self.source_type == "marcxml": marc_record = create_record(data["marcxml"]) try: json_converted_record = self.dojson_model.do( marc_record, exception_handlers=exception_handlers) except Exception as e: raise JSONConversionException(e) missing = self.dojson_model.missing(marc_record) if missing: raise LossyConversion(missing=missing) return timestamp, json_converted_record else: return timestamp, data["json"]
def dump(self): """Perform record dump.""" dt = datetime.datetime.utcnow() marc_record = create_record(self.data) if "d" in marc_record.get("leader", []): is_deletable = True else: is_deletable = False # MARCXML -> JSON fields translation try: val = self.dojson_model.do(marc_record, exception_handlers=xml_import_handlers) except AttributeError: raise RecordModelMissing if not self.ignore_missing: # check for missing rules missing = self.dojson_model.missing(marc_record) if missing: raise LossyConversion(missing=missing) return dt, val, is_deletable
def test_required_fields(app): """Test required fields.""" marcxml = load_fixture_file('videos_video.xml') with app.app_context(): blob = create_record(marcxml) record = model.do(blob) expected = { '$schema': 'https://cds.cern.ch/schemas/records/videos/video/' 'video-v1.0.0.json', '_access': {'read': ['*****@*****.**', '*****@*****.**', '*****@*****.**', '*****@*****.**'], 'update': ['*****@*****.**', '*****@*****.**']}, '_files': [ { 'filepath': 'MediaArchive/Video/Masters/Movies/CERN/2017/CERN-MOVIE-2017-023/Final_Output/CERN-MOVIE-2017-023-001.mov', 'key': 'CERN-MOVIE-2017-023-001.mov', 'tags': { 'media_type': 'video', 'content_type': 'mov', 'context_type': 'master', 'preview': True, }, }, { 'filepath': 'MediaArchive/Video/Public/Movies/CERN/2017/CERN-MOVIE-2017-023/CERN-MOVIE-2017-023-001/CERN-MOVIE-2017-023-001-5872-kbps-1920x1080-audio-128-kbps-stereo.mp4', 'key': 'CERN-MOVIE-2017-023-001-5872-kbps-1920x1080-audio-128-kbps-stereo.mp4', 'tags_to_guess_preset': {'preset': '1080p', 'video_bitrate': 5872}, 'tags': { 'media_type': 'video', 'content_type': 'mp4', 'context_type': 'subformat', }, }, { 'filepath': 'MediaArchive/Video/Public/Movies/CERN/2017/CERN-MOVIE-2017-023/CERN-MOVIE-2017-023-001/CERN-MOVIE-2017-023-001-2672-kbps-1280x720-audio-128-kbps-stereo.mp4', 'key': 'CERN-MOVIE-2017-023-001-2672-kbps-1280x720-audio-128-kbps-stereo.mp4', 'tags_to_guess_preset': {'preset': '720p', 'video_bitrate': 2672}, 'tags': { 'media_type': 'video', 'content_type': 'mp4', 'context_type': 'subformat', }, }, { 'filepath': 'MediaArchive/Video/Public/Movies/CERN/2017/CERN-MOVIE-2017-023/CERN-MOVIE-2017-023-001/CERN-MOVIE-2017-023-001-1436-kbps-853x480-audio-64-kbps-stereo.mp4', 'key': 'CERN-MOVIE-2017-023-001-1436-kbps-853x480-audio-64-kbps-stereo.mp4', 'tags_to_guess_preset': {'preset': '480p', 'video_bitrate': 1436}, 'tags': { 'media_type': 'video', 'content_type': 'mp4', 'context_type': 'subformat', }, }, { 'filepath': 'MediaArchive/Video/Public/Movies/CERN/2017/CERN-MOVIE-2017-023/CERN-MOVIE-2017-023-001/CERN-MOVIE-2017-023-001-836-kbps-640x360-audio-64-kbps-stereo.mp4', 'key': 'CERN-MOVIE-2017-023-001-836-kbps-640x360-audio-64-kbps-stereo.mp4', 'tags_to_guess_preset': {'preset': '360p', 'video_bitrate': 836}, 'tags': { 'media_type': 'video', 'content_type': 'mp4', 'context_type': 'subformat', }, }, { 'filepath': 'MediaArchive/Video/Public/Movies/CERN/2017/CERN-MOVIE-2017-023/CERN-MOVIE-2017-023-001/CERN-MOVIE-2017-023-001-386-kbps-426x240-audio-64-kbps-stereo.mp4', 'key': 'CERN-MOVIE-2017-023-001-386-kbps-426x240-audio-64-kbps-stereo.mp4', 'tags_to_guess_preset': {'preset': '240p', 'video_bitrate': 386}, 'tags': { 'media_type': 'video', 'content_type': 'mp4', 'context_type': 'subformat', }, }, { 'filepath': 'MediaArchive/Video/Public/Movies/CERN/2017/CERN-MOVIE-2017-023/CERN-MOVIE-2017-023-001/CERN-MOVIE-2017-023-001-posterframe-640x360-at-5-percent.jpg', 'key': 'frame-1.jpg', 'tags': {'content_type': 'jpg', 'context_type': 'frame', 'media_type': 'image'}, 'tags_to_transform': {'timestamp': 5} }, { 'filepath': 'MediaArchive/Video/Public/Movies/CERN/2017/CERN-MOVIE-2017-023/CERN-MOVIE-2017-023-001/CERN-MOVIE-2017-023-001-posterframe-640x360-at-5-percent.jpg', 'key': 'posterframe.jpg', 'tags': { 'media_type': 'image', 'height': '360', 'width': '640', 'content_type': 'jpg', 'context_type': 'poster', }, 'tags_to_transform': {'timestamp': 5}, }, { 'filepath': 'MediaArchive/Video/Public/Movies/CERN/2017/CERN-MOVIE-2017-023/CERN-MOVIE-2017-023-001/CERN-MOVIE-2017-023-001-posterframe-640x360-at-15-percent.jpg', 'key': 'frame-2.jpg', 'tags': {'content_type': 'jpg', 'context_type': 'frame', 'media_type': 'image'}, 'tags_to_transform': {'timestamp': 15} }, { 'filepath': 'MediaArchive/Video/Public/Movies/CERN/2017/CERN-MOVIE-2017-023/CERN-MOVIE-2017-023-001/CERN-MOVIE-2017-023-001-posterframe-640x360-at-25-percent.jpg', 'key': 'frame-3.jpg', 'tags': {'content_type': 'jpg', 'context_type': 'frame', 'media_type': 'image'}, 'tags_to_transform': {'timestamp': 25} }, { 'filepath': 'MediaArchive/Video/Public/Movies/CERN/2017/CERN-MOVIE-2017-023/CERN-MOVIE-2017-023-001/CERN-MOVIE-2017-023-001-posterframe-640x360-at-35-percent.jpg', 'key': 'frame-4.jpg', 'tags': {'content_type': 'jpg', 'context_type': 'frame', 'media_type': 'image'}, 'tags_to_transform': {'timestamp': 35} }, { 'filepath': 'MediaArchive/Video/Public/Movies/CERN/2017/CERN-MOVIE-2017-023/CERN-MOVIE-2017-023-001/CERN-MOVIE-2017-023-001-posterframe-640x360-at-45-percent.jpg', 'key': 'frame-5.jpg', 'tags': {'content_type': 'jpg', 'context_type': 'frame', 'media_type': 'image'}, 'tags_to_transform': {'timestamp': 45} }, { 'filepath': 'MediaArchive/Video/Public/Movies/CERN/2017/CERN-MOVIE-2017-023/CERN-MOVIE-2017-023-001/CERN-MOVIE-2017-023-001-posterframe-640x360-at-55-percent.jpg', 'key': 'frame-6.jpg', 'tags': {'content_type': 'jpg', 'context_type': 'frame', 'media_type': 'image'}, 'tags_to_transform': {'timestamp': 55} }, { 'filepath': 'MediaArchive/Video/Public/Movies/CERN/2017/CERN-MOVIE-2017-023/CERN-MOVIE-2017-023-001/CERN-MOVIE-2017-023-001-posterframe-640x360-at-65-percent.jpg', 'key': 'frame-7.jpg', 'tags': {'content_type': 'jpg', 'context_type': 'frame', 'media_type': 'image'}, 'tags_to_transform': {'timestamp': 65} }, { 'filepath': 'MediaArchive/Video/Public/Movies/CERN/2017/CERN-MOVIE-2017-023/CERN-MOVIE-2017-023-001/CERN-MOVIE-2017-023-001-posterframe-640x360-at-75-percent.jpg', 'key': 'frame-8.jpg', 'tags': {'content_type': 'jpg', 'context_type': 'frame', 'media_type': 'image'}, 'tags_to_transform': {'timestamp': 75} }, { 'filepath': 'MediaArchive/Video/Public/Movies/CERN/2017/CERN-MOVIE-2017-023/CERN-MOVIE-2017-023-001/CERN-MOVIE-2017-023-001-posterframe-640x360-at-85-percent.jpg', 'key': 'frame-9.jpg', 'tags': {'content_type': 'jpg', 'context_type': 'frame', 'media_type': 'image'}, 'tags_to_transform': {'timestamp': 85} }, { 'filepath': 'MediaArchive/Video/Public/Movies/CERN/2017/CERN-MOVIE-2017-023/CERN-MOVIE-2017-023-001/CERN-MOVIE-2017-023-001-posterframe-640x360-at-95-percent.jpg', 'key': 'frame-10.jpg', 'tags': {'content_type': 'jpg', 'context_type': 'frame', 'media_type': 'image'}, 'tags_to_transform': {'timestamp': 95} } ], '_project_id': 'https://cds.cern.ch/record/1', 'category': 'CERN', 'contributors': [ {'name': 'CERN Video Productions', 'role': 'Producer'}, {'name': 'CERN Video Productions', 'role': 'Director'}, {'affiliations': (u'CERN',), 'email': u'*****@*****.**', 'ids': [{'source': 'CERN', 'value': u'755568'}, {'source': 'CDS', 'value': u'2090563'}], 'name': 'Madsen, Christoph Martin', 'role': 'Director'}, {'affiliations': (u'CERN',), 'email': u'*****@*****.**', 'ids': [{'source': 'CERN', 'value': u'380837'}, {'source': 'CDS', 'value': u'2050975'}], 'name': 'Catapano, Paola', 'role': 'Director'}, {'affiliations': (u'CERN',), 'email': u'*****@*****.**', 'ids': [{'source': 'CERN', 'value': u'755568'}, {'source': 'CDS', 'value': u'2090563'}], 'name': 'Madsen, Christoph Martin', 'role': 'Editor'}], 'copyright': {'holder': 'CERN', 'year': '2017'}, 'date': '2017-07-04', 'description': ('Where were you on 4 July 2012, the day in which ' 'the Higgs boson discovery was announced?'), 'duration': '00:01:09', 'keywords': [ {'name': 'higgs', 'source': 'CERN'}, {'name': 'anniversary', 'source': 'CERN'} ], 'language': u'en', 'recid': 2272973, 'report_number': ['CERN-MOVIE-2017-023-001'], 'title': {'title': 'Happy 5th anniversary, Higgs boson!'}, 'type': 'MOVIE', 'external_system_identifiers': [ {'schema': 'AVW', 'value': 'AVW.clip.3447'} ], 'modified_by': '*****@*****.**', } assert record == expected # Add required fields calculated by post-process tasks. record['publication_date'] = '2017-07-04' record['doi'] = 'CERN/2272973' record['license'] = [ {'license': 'CC BY 4.0', 'url': 'https://creativecommons.org/licenses/by/4.0/'} ] validate( record, schema={'schema': record['$schema']}, types={'array': (list, tuple)} )