def test_checks_records_error(): setup_main_database() with tempfile.TemporaryDirectory() as tmpdir: metadata_db = MetadataDB(tmpdir) metadata_db.create_session_metadata("Test", True, "http://www.test.com", "2018-01-01-10-00-00") metadata_db.add_filestatus({ 'filename': 'test1.json', 'url': 'http://www.test.com', 'data_type': 'record_package' }) # store details source_session_id = database.start_store("test_source", "2018-01-01-10-00-00", True, metadata_db) for data in metadata_db.list_filestatus(): with database.add_file(source_session_id, data) as database_file: database_file.insert_record({'record': 'totally'}, { 'version': '0.1-does-not-exist', 'extensions': [] }) database.end_store(source_session_id) record_id = 1 # Don't like hard coding ID in. Relies on DB assigning 1 to this new row. But I think we can assume that. # Test assert not database.is_record_check_done(record_id) assert not database.is_check_done(source_session_id) # check! for data in metadata_db.list_filestatus(): checks.check_file(source_session_id, data) # Test assert database.is_record_check_done(record_id) assert database.is_check_done(source_session_id) with database.engine.begin() as connection: s = sa.sql.select([database.record_check_error_table]) result = connection.execute(s) data = result.fetchone() assert 'The schema version in your data is not valid. Accepted values:' in data[ 'error']
def test_checks_releases(): setup_main_database() with tempfile.TemporaryDirectory() as tmpdir: metadata_db = MetadataDB(tmpdir) metadata_db.create_session_metadata("Test", True, "http://www.test.com", "2018-01-01-10-00-00") metadata_db.add_filestatus({ 'filename': 'test1.json', 'url': 'http://www.test.com', 'data_type': 'release_package' }) # store details source_session_id = database.start_store("test_source", "2018-01-01-10-00-00", True, metadata_db) for data in metadata_db.list_filestatus(): with database.add_file(source_session_id, data) as database_file: database_file.insert_release({'release': 'totally'}, {'extensions': []}) database.end_store(source_session_id) release_id = 1 # Don't like hard coding ID in. Relies on DB assigning 1 to this new row. But I think we can assume that. # Test assert not database.is_release_check_done(release_id) # check! for data in metadata_db.list_filestatus(): checks.check_file(source_session_id, data) # Test assert database.is_release_check_done(release_id) with database.engine.begin() as connection: s = sa.sql.select([database.release_check_table]) result = connection.execute(s) data = result.fetchone() assert data['cove_output']['file_type'] == 'json' assert len(data['cove_output']['validation_errors']) > 0
def run_store(self): metadata = self.metadata_db.get_session() if not metadata['fetch_success']: raise Exception('Can not run store without a successful fetch') if database.is_store_done(self.source_id, self.data_version, self.sample): return source_session_id = database.start_store(self.source_id, self.data_version, self.sample, self.metadata_db) for data in self.metadata_db.list_filestatus(): if data['data_type'].startswith('meta'): continue if database.is_store_file_done(source_session_id, data): continue with database.add_file(source_session_id, data) as database_file: try: with open(os.path.join(self.full_directory, data['filename']), encoding=data['encoding']) as f: file_json_data = json.load(f) except Exception as e: # TODO better way of dealing with this? raise e return objects_list = [] if data['data_type'] == 'record_package_list_in_results': objects_list.extend(file_json_data['results']) elif data['data_type'] == 'release_package_list_in_results': objects_list.extend(file_json_data['results']) elif data['data_type'] == 'record_package_list' or data[ 'data_type'] == 'release_package_list': objects_list.extend(file_json_data) else: objects_list.append(file_json_data) del file_json_data for json_data in objects_list: error_msg = '' if not isinstance(json_data, dict): error_msg = "Can not process data in file {} as JSON is not an object".format( data['filename']) if data['data_type'] == 'release_package' or \ data['data_type'] == 'release_package_list_in_results' or \ data['data_type'] == 'release_package_list': if 'releases' not in json_data: error_msg = "Release list not found in file {}".format( data['filename']) elif not isinstance(json_data['releases'], list): error_msg = "Release list which is not a list found in file {}".format( data['filename']) data_list = json_data['releases'] elif data['data_type'] == 'record_package' or \ data['data_type'] == 'record_package_list_in_results' or \ data['data_type'] == 'record_package_list': if 'records' not in json_data: error_msg = "Record list not found in file {}".format( data['filename']) elif not isinstance(json_data['records'], list): error_msg = "Record list which is not a list found in file {}".format( data['filename']) data_list = json_data['records'] else: error_msg = "data_type not a known type" if error_msg: raise Exception(error_msg) package_data = {} for key, value in json_data.items(): if key not in ('releases', 'records'): package_data[key] = value for row in data_list: if not isinstance(row, dict): error_msg = "Row in data is not a object {}".format( data['filename']) raise Exception(error_msg) if data['data_type'] == 'record_package' or \ data['data_type'] == 'record_package_list_in_results' or \ data['data_type'] == 'record_package_list': database_file.insert_record(row, package_data) else: database_file.insert_release(row, package_data) database.end_store(source_session_id)