def dispatch_request(self): if not self._check_authorization(request): return "ACCESS DENIED", 401 if not self._load_collection_variables(request): return "COLLECTION FIELDS NOT SPECIFIED", 400 # TODO check all required fields are there! store = Store(config=current_app.kingfisher_config, database=current_app.kingfisher_database) store.load_collection( self.collection_source, self.collection_data_version, self.collection_sample, ) current_app.kingfisher_web_logger.info( "Submit File API V1 called for collection " + str(store.collection_id)) store.add_collection_note(request.form.get('collection_note')) file_filename = request.form.get('file_name', '') file_url = request.form.get('url', '') file_data_type = request.form.get('data_type') file_encoding = request.form.get('encoding', 'utf-8') if 'file' in request.files: (tmp_file, tmp_filename) = tempfile.mkstemp(prefix="ocdskf-") os.close(tmp_file) request.files['file'].save(tmp_filename) store.store_file_from_local(file_filename, file_url, file_data_type, file_encoding, tmp_filename) os.remove(tmp_filename) elif 'local_file_name' in request.form: store.store_file_from_local(file_filename, file_url, file_data_type, file_encoding, request.form.get('local_file_name')) else: raise Exception('Did not send file data') return "OCDS Kingfisher APIs V1 Submit"
def __init__(self, config, database, destination_collection, run_until_timestamp=None): self.config = config self.database = database self.destination_collection = destination_collection self.source_collection = self.database.get_collection( destination_collection.transform_from_collection_id) self.store = Store(config, database) self.store.set_collection(destination_collection) self.run_until_timestamp = run_until_timestamp
def dispatch_request(self): if not self._check_authorization(request): return "ACCESS DENIED", 401 if not self._load_collection_variables(request): return "COLLECTION FIELDS NOT SPECIFIED", 400 # TODO check all required fields are there! store = Store(config=current_app.kingfisher_config, database=current_app.kingfisher_database) store.load_collection( self.collection_source, self.collection_data_version, self.collection_sample, ) current_app.kingfisher_web_logger.info( "End Collection API V1 Store called for collection " + str(store.collection_id)) if store.is_collection_store_ended(): return "OCDS Kingfisher APIs V1 Submit - Already Done!" else: store.end_collection_store() return "OCDS Kingfisher APIs V1 Submit"
class BaseTransform(): def __init__(self, config, database, destination_collection, run_until_timestamp=None): self.config = config self.database = database self.destination_collection = destination_collection self.source_collection = self.database.get_collection( destination_collection.transform_from_collection_id) self.store = Store(config, database) self.store.set_collection(destination_collection) self.run_until_timestamp = run_until_timestamp def process(self): # This is an "abstract" method - child classes should implement it! pass
def dispatch_request(self): if not self._check_authorization(request): return "ACCESS DENIED", 401 if not self._load_collection_variables(request): return "COLLECTION FIELDS NOT SPECIFIED", 400 # TODO check all required fields are there! store = Store(config=current_app.kingfisher_config, database=current_app.kingfisher_database) store.load_collection( self.collection_source, self.collection_data_version, self.collection_sample, ) current_app.kingfisher_web_logger.info( "Submit Item API V1 called for collection " + str(store.collection_id)) store.add_collection_note(request.form.get('collection_note')) file_filename = request.form.get('file_name', '') file_url = request.form.get('url', '') file_data_type = request.form.get('data_type') item_number = int(request.form.get('number')) data = json.loads(request.form.get('data')) try: store.store_file_item( file_filename, file_url, file_data_type, data, item_number, ) except Exception as e: store.store_file_item_errors(file_filename, item_number, file_url, [str(e)]) return "OCDS Kingfisher APIs V1 Submit"
def _setup_collections_and_data_run_transform(self, filename, load_a_second_time=False): # Make source collection source_collection_id = self.database.get_or_create_collection_id( "test", datetime.datetime.now(), False) source_collection = self.database.get_collection(source_collection_id) # Load some data store = Store(self.config, self.database) store.set_collection(source_collection) json_filename = os.path.join( os.path.dirname(os.path.realpath(__file__)), 'fixtures', filename) store.store_file_from_local("test.json", "http://example.com", "record_package", "utf-8", json_filename) if load_a_second_time: store.store_file_from_local("test2.json", "http://example.com", "record_package", "utf-8", json_filename) # Make destination collection destination_collection_id = self.database.get_or_create_collection_id( source_collection.source_id, source_collection.data_version, source_collection.sample, transform_from_collection_id=source_collection_id, transform_type=TRANSFORM_TYPE_COMPILE_RELEASES) destination_collection = self.database.get_collection( destination_collection_id) # transform! Nothing should happen because source is not finished transform = CompileReleasesTransform(self.config, self.database, destination_collection) transform.process() # check with self.database.get_engine().begin() as connection: s = sa.sql.select([self.database.compiled_release_table]) result = connection.execute(s) assert 0 == result.rowcount # Mark source collection as finished self.database.mark_collection_store_done(source_collection_id) # transform! This should do the work. transform = CompileReleasesTransform(self.config, self.database, destination_collection) transform.process() return source_collection_id, source_collection, destination_collection_id, destination_collection
def dispatch_request(self): if not self._check_authorization(request): return "ACCESS DENIED", 401 if not self._load_collection_variables(request): return "COLLECTION FIELDS NOT SPECIFIED", 400 # TODO check all required fields are there! store = Store(config=current_app.kingfisher_config, database=current_app.kingfisher_database) store.load_collection( self.collection_source, self.collection_data_version, self.collection_sample, ) current_app.kingfisher_web_logger.info( "Submit File Error API V1 called for collection " + str(store.collection_id)) file_filename = request.form.get('file_name', '') file_errors_raw = request.form.get('errors') file_errors = json.loads(file_errors_raw) file_url = request.form.get('url', '') store.store_file_errors(file_filename, file_url, file_errors) return "OCDS Kingfisher APIs V1 Submit"
def test_releases(self): collection_id = self.database.get_or_create_collection_id( "test", datetime.datetime.now(), False) collection = self.database.get_collection(collection_id) store = Store(self.config, self.database) store.set_collection(collection) json_filename = os.path.join( os.path.dirname(os.path.realpath(__file__)), 'data', 'sample_1_1_releases_multiple_with_same_ocid.json') store.store_file_from_local("test.json", "http://example.com", "release_package", "utf-8", json_filename) assert len([ res for res in self.database.get_releases_to_check( collection_id, override_schema_version='1.1') ]) == 0 # the file has 6 releases assert len([ res for res in self.database.get_releases_to_check( collection_id, override_schema_version='1.2') ]) == 6
def run_command(self, args): file_type = args.filetype directory = os.path.abspath(args.directory) if not directory[-1] == '/': directory += '/' encoding = args.encoding self.run_command_for_selecting_existing_collection(args) if file_type not in Store.ALLOWED_DATA_TYPES: print("We can not find the file type that you requested!") quit(-1) if not os.path.isdir(directory): print("We can not find the directory that you requested!") quit(-1) store = Store(config=self.config, database=self.database) store.set_collection(self.collection) glob_path = os.path.join(directory, '*') for file_path in glob.glob(glob_path): print("Processing {}".format(file_path)) store.store_file_from_local(file_path[len(directory):], 'file:/' + file_path, file_type, encoding, file_path) print("Done")
def test_releases(self): self.config.default_value_collection_check_data = False self.config.default_value_collection_check_older_data_with_schema_version_1_1 = False collection_id = self.database.get_or_create_collection_id( "test", datetime.datetime.now(), False) collection = self.database.get_collection(collection_id) store = Store(self.config, self.database) store.set_collection(collection) json_filename = os.path.join( os.path.dirname(os.path.realpath(__file__)), 'data', 'sample_1_0_release.json') store.store_file_from_local("test.json", "http://example.com", "release_package", "utf-8", json_filename) # Check Number of check results with self.database.get_engine().begin() as connection: s = sa.sql.select([self.database.record_check_table]) result = connection.execute(s) assert 0 == result.rowcount s = sa.sql.select([self.database.release_check_table]) result = connection.execute(s) assert 0 == result.rowcount s = sa.sql.select([self.database.record_check_error_table]) result = connection.execute(s) assert 0 == result.rowcount s = sa.sql.select([self.database.release_check_error_table]) result = connection.execute(s) assert 0 == result.rowcount # Call Checks checks = Checks(self.database, collection) checks.process_all_files() # Check Number of check results with self.database.get_engine().begin() as connection: s = sa.sql.select([self.database.record_check_table]) result = connection.execute(s) assert 0 == result.rowcount s = sa.sql.select([self.database.release_check_table]) result = connection.execute(s) assert 0 == result.rowcount s = sa.sql.select([self.database.record_check_error_table]) result = connection.execute(s) assert 0 == result.rowcount s = sa.sql.select([self.database.release_check_error_table]) result = connection.execute(s) assert 0 == result.rowcount
def test_compiled_releases(self): # Make source collection source_collection_id = self.database.get_or_create_collection_id("test", datetime.datetime.now(), False) source_collection = self.database.get_collection(source_collection_id) # Load some data store = Store(self.config, self.database) store.set_collection(source_collection) json_filename = os.path.join(os.path.dirname( os.path.realpath(__file__)), 'fixtures', 'sample_1_1_releases_multiple_with_same_ocid.json' ) store.store_file_from_local("test.json", "http://example.com", "release_package", "utf-8", json_filename) # Make destination collection destination_collection_id = self.database.get_or_create_collection_id( source_collection.source_id, source_collection.data_version, source_collection.sample, transform_from_collection_id=source_collection_id, transform_type=TRANSFORM_TYPE_COMPILE_RELEASES) destination_collection = self.database.get_collection(destination_collection_id) # transform! Nothing should happen because source is not finished transform = CompileReleasesTransform(self.config, self.database, destination_collection) transform.process() # check with self.database.get_engine().begin() as connection: s = sa.sql.select([self.database.compiled_release_table]) result = connection.execute(s) assert 0 == result.rowcount # Mark source collection as finished self.database.mark_collection_store_done(source_collection_id) # transform! This should do the work. transform = CompileReleasesTransform(self.config, self.database, destination_collection) transform.process() # update_collection_cached_columns self.database.update_collection_cached_columns(source_collection_id) self.database.update_collection_cached_columns(destination_collection_id) # check with self.database.get_engine().begin() as connection: s = sa.sql.select([self.database.collection_table]).order_by(self.database.collection_table.columns.id) result = connection.execute(s) assert 2 == result.rowcount data = result.fetchone() assert 6 == data['cached_releases_count'] assert 0 == data['cached_compiled_releases_count'] data = result.fetchone() assert 0 == data['cached_releases_count'] assert 1 == data['cached_compiled_releases_count']
def test_records_via_process_all_files_method(self): collection_id = self.database.get_or_create_collection_id( "test", datetime.datetime.now(), False) collection = self.database.get_collection(collection_id) store = Store(self.config, self.database) store.set_collection(collection) json_filename = os.path.join( os.path.dirname(os.path.realpath(__file__)), 'fixtures', 'sample_1_1_record.json') store.store_file_from_local("test.json", "http://example.com", "record", "utf-8", json_filename) assert len([res for res in self.database.get_records_to_check(collection_id, override_schema_version='1.1')]) \ == 0 assert len([res for res in self.database.get_records_to_check(collection_id, override_schema_version='1.2')]) \ == 1
def test_bad_data_with_control_codes(self): # Make source collection source_collection_id = self.database.get_or_create_collection_id("test", datetime.datetime.now(), False) source_collection = self.database.get_collection(source_collection_id) # Load some data store = Store(self.config, self.database) store.set_collection(source_collection) json_filename = os.path.join(os.path.dirname( os.path.realpath(__file__)), 'data', 'sample_1_0_record_with_control_codes.json' ) store.store_file_from_local("test.json", "http://example.com", "record", "utf-8", json_filename) # Check Warnings with self.database.get_engine().begin() as connection: s = sa.sql.select([self.database.collection_file_table]) result = connection.execute(s) assert 1 == result.rowcount data = result.first() assert 1 == len(data['warnings']) assert 'We had to replace control codes: chr(16)' == data['warnings'][0]
def test_records(self): # Make collection collection_id = self.database.get_or_create_collection_id("test", datetime.datetime.now(), False) collection = self.database.get_collection(collection_id) # Load some data store = Store(self.config, self.database) store.set_collection(collection) json_filename = os.path.join(os.path.dirname( os.path.realpath(__file__)), 'fixtures', 'sample_1_0_record.json' ) store.store_file_from_local("test.json", "http://example.com", "record_package", "utf-8", json_filename) # test self.database.update_collection_cached_columns(collection_id) # check with self.database.get_engine().begin() as connection: s = sa.sql.select([self.database.collection_table]) result = connection.execute(s) assert 1 == result.rowcount data = result.fetchone() assert 1 == data['cached_records_count']
def test_two_collections_with_upgrade_1_0_to_1_1(self): # Source Collection source_collection_id = self.database.get_or_create_collection_id("test", datetime.datetime.now(), False) source_collection = self.database.get_collection(source_collection_id) store = Store(self.config, self.database) store.set_collection(source_collection) json_filename = os.path.join(os.path.dirname( os.path.realpath(__file__)), 'data', 'sample_1_0_record.json' ) store.store_file_from_local("test.json", "http://example.com", "record", "utf-8", json_filename) self.database.mark_collection_store_done(source_collection_id) # Destination Collection destination_collection_id = self.database.get_or_create_collection_id("test", datetime.datetime.now(), False, transform_from_collection_id=source_collection_id, transform_type=TRANSFORM_TYPE_UPGRADE_1_0_TO_1_1) destination_collection = self.database.get_collection(destination_collection_id) transform = Upgrade10To11Transform(self.config, self.database, destination_collection) transform.process() # Check Number of rows in various tables with self.database.get_engine().begin() as connection: s = sa.sql.select([self.database.collection_table]) result = connection.execute(s) assert 2 == result.rowcount s = sa.sql.select([self.database.collection_file_table]) result = connection.execute(s) assert 2 == result.rowcount s = sa.sql.select([self.database.collection_file_item_table]) result = connection.execute(s) assert 2 == result.rowcount s = sa.sql.select([self.database.record_table]) result = connection.execute(s) assert 2 == result.rowcount s = sa.sql.select([self.database.transform_upgrade_1_0_to_1_1_status_record_table]) result = connection.execute(s) assert 1 == result.rowcount s = sa.sql.select([self.database.data_table]) result = connection.execute(s) assert 0 < result.rowcount s = sa.sql.select([self.database.package_data_table]) result = connection.execute(s) assert 0 < result.rowcount # Delete self.database.mark_collection_deleted_at(source_collection_id) self.database.delete_collection(source_collection_id) self.database.mark_collection_deleted_at(destination_collection_id) self.database.delete_collection(destination_collection_id) # Check Number of rows in various tables with self.database.get_engine().begin() as connection: s = sa.sql.select([self.database.collection_table]) result = connection.execute(s) assert 2 == result.rowcount s = sa.sql.select([self.database.collection_file_table]) result = connection.execute(s) assert 0 == result.rowcount s = sa.sql.select([self.database.collection_file_item_table]) result = connection.execute(s) assert 0 == result.rowcount s = sa.sql.select([self.database.record_table]) result = connection.execute(s) assert 0 == result.rowcount s = sa.sql.select([self.database.transform_upgrade_1_0_to_1_1_status_record_table]) result = connection.execute(s) assert 0 == result.rowcount s = sa.sql.select([self.database.data_table]) result = connection.execute(s) assert 0 == result.rowcount s = sa.sql.select([self.database.package_data_table]) result = connection.execute(s) assert 0 == result.rowcount
def test_a_single_collection(self): collection_id = self.database.get_or_create_collection_id( "test", datetime.datetime.now(), False) collection = self.database.get_collection(collection_id) store = Store(self.config, self.database) store.set_collection(collection) json_filename = os.path.join( os.path.dirname(os.path.realpath(__file__)), 'data', 'sample_1_0_record.json') store.store_file_from_local("test.json", "http://example.com", "record", "utf-8", json_filename) # Check Number of rows in various tables with self.database.get_engine().begin() as connection: s = sa.sql.select([self.database.collection_table]) result = connection.execute(s) assert 1 == result.rowcount s = sa.sql.select([self.database.collection_file_table]) result = connection.execute(s) assert 1 == result.rowcount s = sa.sql.select([self.database.collection_file_item_table]) result = connection.execute(s) assert 1 == result.rowcount s = sa.sql.select([self.database.record_table]) result = connection.execute(s) assert 1 == result.rowcount s = sa.sql.select([self.database.data_table]) result = connection.execute(s) assert 1 == result.rowcount s = sa.sql.select([self.database.package_data_table]) result = connection.execute(s) assert 1 == result.rowcount # Delete self.database.mark_collection_deleted_at(collection_id) self.database.delete_collection(collection_id) # Check Number of rows in various tables with self.database.get_engine().begin() as connection: s = sa.sql.select([self.database.collection_table]) result = connection.execute(s) assert 1 == result.rowcount s = sa.sql.select([self.database.collection_file_table]) result = connection.execute(s) assert 0 == result.rowcount s = sa.sql.select([self.database.collection_file_item_table]) result = connection.execute(s) assert 0 == result.rowcount s = sa.sql.select([self.database.record_table]) result = connection.execute(s) assert 0 == result.rowcount s = sa.sql.select([self.database.data_table]) result = connection.execute(s) assert 0 == result.rowcount s = sa.sql.select([self.database.package_data_table]) result = connection.execute(s) assert 0 == result.rowcount
def test_release_1(self): # Make source collection source_collection_id = self.database.get_or_create_collection_id("test", datetime.datetime.now(), False) source_collection = self.database.get_collection(source_collection_id) # Load some data store = Store(self.config, self.database) store.set_collection(source_collection) json_filename = os.path.join(os.path.dirname( os.path.realpath(__file__)), 'fixtures', 'sample_1_0_releases.json' ) store.store_file_from_local("test.json", "http://example.com", "release_package", "utf-8", json_filename) # Make destination collection destination_collection_id = self.database.get_or_create_collection_id( source_collection.source_id, source_collection.data_version, source_collection.sample, transform_from_collection_id=source_collection_id, transform_type=TRANSFORM_TYPE_UPGRADE_1_0_TO_1_1) destination_collection = self.database.get_collection(destination_collection_id) # transform! transform = Upgrade10To11Transform(self.config, self.database, destination_collection) transform.process() # check with self.database.get_engine().begin() as connection: s = sa.sql.select([self.database.release_table]) result = connection.execute(s) assert 4 == result.rowcount s = sa.sql.select([self.database.record_table]) result = connection.execute(s) assert 0 == result.rowcount s = sa.sql.select([self.database.transform_upgrade_1_0_to_1_1_status_record_table]) result = connection.execute(s) assert 0 == result.rowcount s = sa.sql.select([self.database.transform_upgrade_1_0_to_1_1_status_release_table]) result = connection.execute(s) assert 2 == result.rowcount # transform again! This should be fine transform = Upgrade10To11Transform(self.config, self.database, destination_collection) transform.process() # check with self.database.get_engine().begin() as connection: s = sa.sql.select([self.database.release_table]) result = connection.execute(s) assert 4 == result.rowcount s = sa.sql.select([self.database.record_table]) result = connection.execute(s) assert 0 == result.rowcount s = sa.sql.select([self.database.transform_upgrade_1_0_to_1_1_status_record_table]) result = connection.execute(s) assert 0 == result.rowcount s = sa.sql.select([self.database.transform_upgrade_1_0_to_1_1_status_release_table]) result = connection.execute(s) assert 2 == result.rowcount # destination collection will not be closed (because source is still open!) destination_collection = self.database.get_collection(destination_collection_id) assert destination_collection.store_end_at is None # Mark source collection as finished self.database.mark_collection_store_done(source_collection_id) # transform! transform = Upgrade10To11Transform(self.config, self.database, destination_collection) transform.process() # destination collection should be closed destination_collection = self.database.get_collection(destination_collection_id) assert destination_collection.store_end_at is not None
def test_records(self): collection_id = self.database.get_or_create_collection_id( "test", datetime.datetime.now(), False) self.database.mark_collection_check_older_data_with_schema_version_1_1( collection_id, True) collection = self.database.get_collection(collection_id) store = Store(self.config, self.database) store.set_collection(collection) json_filename = os.path.join( os.path.dirname(os.path.realpath(__file__)), 'data', 'sample_1_0_record.json') store.store_file_from_local("test.json", "http://example.com", "record", "utf-8", json_filename) # Check Number of check results with self.database.get_engine().begin() as connection: s = sa.sql.select([self.database.record_check_table]) result = connection.execute(s) assert 0 == result.rowcount s = sa.sql.select([self.database.release_check_table]) result = connection.execute(s) assert 0 == result.rowcount s = sa.sql.select([self.database.record_check_error_table]) result = connection.execute(s) assert 0 == result.rowcount s = sa.sql.select([self.database.release_check_error_table]) result = connection.execute(s) assert 0 == result.rowcount # Call Checks checks = Checks(self.database, collection) checks.process_all_files() # Check Number of check results with self.database.get_engine().begin() as connection: s = sa.sql.select([self.database.record_check_table]) result = connection.execute(s) assert 1 == result.rowcount data = result.fetchone() assert '1.1' == data.override_schema_version s = sa.sql.select([self.database.release_check_table]) result = connection.execute(s) assert 0 == result.rowcount s = sa.sql.select([self.database.record_check_error_table]) result = connection.execute(s) assert 0 == result.rowcount s = sa.sql.select([self.database.release_check_error_table]) result = connection.execute(s) assert 0 == result.rowcount # Call Checks Again - that should be fine checks = Checks(self.database, collection) checks.process_all_files() # Check Number of check results with self.database.get_engine().begin() as connection: s = sa.sql.select([self.database.record_check_table]) result = connection.execute(s) assert 1 == result.rowcount data = result.fetchone() assert '1.1' == data.override_schema_version s = sa.sql.select([self.database.release_check_table]) result = connection.execute(s) assert 0 == result.rowcount s = sa.sql.select([self.database.record_check_error_table]) result = connection.execute(s) assert 0 == result.rowcount s = sa.sql.select([self.database.release_check_error_table]) result = connection.execute(s) assert 0 == result.rowcount
def test_releases_via_process_file_item_id_method(self): collection_id = self.database.get_or_create_collection_id( "test", datetime.datetime.now(), False) self.database.mark_collection_check_data(collection_id, True) self.database.mark_collection_check_older_data_with_schema_version_1_1( collection_id, True) collection = self.database.get_collection(collection_id) store = Store(self.config, self.database) store.set_collection(collection) json_filename = os.path.join( os.path.dirname(os.path.realpath(__file__)), 'fixtures', 'sample_1_0_release.json') store.store_file_from_local("test.json", "http://example.com", "release_package", "utf-8", json_filename) file_item = self.database.get_all_files_items_in_file( self.database.get_all_files_in_collection(collection_id)[0])[0] # Check Number of check results with self.database.get_engine().begin() as connection: s = sa.sql.select([self.database.record_check_table]) result = connection.execute(s) assert 0 == result.rowcount s = sa.sql.select([self.database.release_check_table]) result = connection.execute(s) assert 0 == result.rowcount s = sa.sql.select([self.database.record_check_error_table]) result = connection.execute(s) assert 0 == result.rowcount s = sa.sql.select([self.database.release_check_error_table]) result = connection.execute(s) assert 0 == result.rowcount # Call Checks checks = Checks(self.database, collection) checks.process_file_item_id(file_item.database_id) # Check Number of check results with self.database.get_engine().begin() as connection: s = sa.sql.select([self.database.record_check_table]) result = connection.execute(s) assert 0 == result.rowcount s = sa.sql.select([self.database.release_check_table]) result = connection.execute(s) assert 2 == result.rowcount s = sa.sql.select([self.database.record_check_error_table]) result = connection.execute(s) assert 0 == result.rowcount s = sa.sql.select([self.database.release_check_error_table]) result = connection.execute(s) assert 0 == result.rowcount # Call Checks Again - that should be fine checks = Checks(self.database, collection) checks.process_file_item_id(file_item.database_id) # Check Number of check results with self.database.get_engine().begin() as connection: s = sa.sql.select([self.database.record_check_table]) result = connection.execute(s) assert 0 == result.rowcount s = sa.sql.select([self.database.release_check_table]) result = connection.execute(s) assert 2 == result.rowcount s = sa.sql.select([self.database.record_check_error_table]) result = connection.execute(s) assert 0 == result.rowcount s = sa.sql.select([self.database.release_check_error_table]) result = connection.execute(s) assert 0 == result.rowcount