def run_command(self, args):

        file_type = args.filetype
        directory = os.path.abspath(args.directory)
        if not directory[-1] == '/':
            directory += '/'
        encoding = args.encoding

        self.run_command_for_selecting_existing_collection(args)
        if file_type not in Store.ALLOWED_DATA_TYPES:
            print("We can not find the file type that you requested!")
            quit(-1)

        if not os.path.isdir(directory):
            print("We can not find the directory that you requested!")
            quit(-1)

        store = Store(config=self.config, database=self.database)
        store.set_collection(self.collection)

        glob_path = os.path.join(directory, '*')
        for file_path in glob.glob(glob_path):
            print("Processing {}".format(file_path))
            store.store_file_from_local(file_path[len(directory):],
                                        'file:/' + file_path, file_type,
                                        encoding, file_path)

        print("Done")
Beispiel #2
0
    def test_releases(self):

        collection_id = self.database.get_or_create_collection_id(
            "test", datetime.datetime.now(), False)
        collection = self.database.get_collection(collection_id)

        store = Store(self.config, self.database)
        store.set_collection(collection)

        json_filename = os.path.join(
            os.path.dirname(os.path.realpath(__file__)), 'data',
            'sample_1_1_releases_multiple_with_same_ocid.json')

        store.store_file_from_local("test.json", "http://example.com",
                                    "release_package", "utf-8", json_filename)

        assert len([
            res for res in self.database.get_releases_to_check(
                collection_id, override_schema_version='1.1')
        ]) == 0
        # the file has 6 releases
        assert len([
            res for res in self.database.get_releases_to_check(
                collection_id, override_schema_version='1.2')
        ]) == 6
Beispiel #3
0
    def test_releases(self):

        self.config.default_value_collection_check_data = False
        self.config.default_value_collection_check_older_data_with_schema_version_1_1 = False

        collection_id = self.database.get_or_create_collection_id(
            "test", datetime.datetime.now(), False)
        collection = self.database.get_collection(collection_id)

        store = Store(self.config, self.database)
        store.set_collection(collection)

        json_filename = os.path.join(
            os.path.dirname(os.path.realpath(__file__)), 'data',
            'sample_1_0_release.json')

        store.store_file_from_local("test.json", "http://example.com",
                                    "release_package", "utf-8", json_filename)

        # Check Number of check results
        with self.database.get_engine().begin() as connection:
            s = sa.sql.select([self.database.record_check_table])
            result = connection.execute(s)
            assert 0 == result.rowcount

            s = sa.sql.select([self.database.release_check_table])
            result = connection.execute(s)
            assert 0 == result.rowcount

            s = sa.sql.select([self.database.record_check_error_table])
            result = connection.execute(s)
            assert 0 == result.rowcount

            s = sa.sql.select([self.database.release_check_error_table])
            result = connection.execute(s)
            assert 0 == result.rowcount

        # Call Checks
        checks = Checks(self.database, collection)
        checks.process_all_files()

        # Check Number of check results
        with self.database.get_engine().begin() as connection:
            s = sa.sql.select([self.database.record_check_table])
            result = connection.execute(s)
            assert 0 == result.rowcount

            s = sa.sql.select([self.database.release_check_table])
            result = connection.execute(s)
            assert 0 == result.rowcount

            s = sa.sql.select([self.database.record_check_error_table])
            result = connection.execute(s)
            assert 0 == result.rowcount

            s = sa.sql.select([self.database.release_check_error_table])
            result = connection.execute(s)
            assert 0 == result.rowcount
    def test_compiled_releases(self):
        # Make source collection
        source_collection_id = self.database.get_or_create_collection_id("test", datetime.datetime.now(), False)
        source_collection = self.database.get_collection(source_collection_id)

        # Load some data
        store = Store(self.config, self.database)
        store.set_collection(source_collection)
        json_filename = os.path.join(os.path.dirname(
            os.path.realpath(__file__)), 'fixtures', 'sample_1_1_releases_multiple_with_same_ocid.json'
        )
        store.store_file_from_local("test.json", "http://example.com", "release_package", "utf-8", json_filename)

        # Make destination collection
        destination_collection_id = self.database.get_or_create_collection_id(
            source_collection.source_id,
            source_collection.data_version,
            source_collection.sample,
            transform_from_collection_id=source_collection_id,
            transform_type=TRANSFORM_TYPE_COMPILE_RELEASES)
        destination_collection = self.database.get_collection(destination_collection_id)

        # transform! Nothing should happen because source is not finished
        transform = CompileReleasesTransform(self.config, self.database, destination_collection)
        transform.process()

        # check
        with self.database.get_engine().begin() as connection:
            s = sa.sql.select([self.database.compiled_release_table])
            result = connection.execute(s)
            assert 0 == result.rowcount

        # Mark source collection as finished
        self.database.mark_collection_store_done(source_collection_id)

        # transform! This should do the work.
        transform = CompileReleasesTransform(self.config, self.database, destination_collection)
        transform.process()

        # update_collection_cached_columns
        self.database.update_collection_cached_columns(source_collection_id)
        self.database.update_collection_cached_columns(destination_collection_id)

        # check
        with self.database.get_engine().begin() as connection:
            s = sa.sql.select([self.database.collection_table]).order_by(self.database.collection_table.columns.id)
            result = connection.execute(s)
            assert 2 == result.rowcount
            data = result.fetchone()
            assert 6 == data['cached_releases_count']
            assert 0 == data['cached_compiled_releases_count']
            data = result.fetchone()
            assert 0 == data['cached_releases_count']
            assert 1 == data['cached_compiled_releases_count']
Beispiel #5
0
    def _setup_collections_and_data_run_transform(self,
                                                  filename,
                                                  load_a_second_time=False):

        # Make source collection
        source_collection_id = self.database.get_or_create_collection_id(
            "test", datetime.datetime.now(), False)
        source_collection = self.database.get_collection(source_collection_id)

        # Load some data
        store = Store(self.config, self.database)
        store.set_collection(source_collection)
        json_filename = os.path.join(
            os.path.dirname(os.path.realpath(__file__)), 'fixtures', filename)
        store.store_file_from_local("test.json", "http://example.com",
                                    "record_package", "utf-8", json_filename)
        if load_a_second_time:
            store.store_file_from_local("test2.json", "http://example.com",
                                        "record_package", "utf-8",
                                        json_filename)

        # Make destination collection
        destination_collection_id = self.database.get_or_create_collection_id(
            source_collection.source_id,
            source_collection.data_version,
            source_collection.sample,
            transform_from_collection_id=source_collection_id,
            transform_type=TRANSFORM_TYPE_COMPILE_RELEASES)
        destination_collection = self.database.get_collection(
            destination_collection_id)

        # transform! Nothing should happen because source is not finished
        transform = CompileReleasesTransform(self.config, self.database,
                                             destination_collection)
        transform.process()

        # check
        with self.database.get_engine().begin() as connection:
            s = sa.sql.select([self.database.compiled_release_table])
            result = connection.execute(s)
            assert 0 == result.rowcount

        # Mark source collection as finished
        self.database.mark_collection_store_done(source_collection_id)

        # transform! This should do the work.
        transform = CompileReleasesTransform(self.config, self.database,
                                             destination_collection)
        transform.process()

        return source_collection_id, source_collection, destination_collection_id, destination_collection
Beispiel #6
0
class BaseTransform():
    def __init__(self,
                 config,
                 database,
                 destination_collection,
                 run_until_timestamp=None):
        self.config = config
        self.database = database
        self.destination_collection = destination_collection
        self.source_collection = self.database.get_collection(
            destination_collection.transform_from_collection_id)
        self.store = Store(config, database)
        self.store.set_collection(destination_collection)
        self.run_until_timestamp = run_until_timestamp

    def process(self):
        # This is an "abstract" method - child classes should implement it!
        pass
    def test_records_via_process_all_files_method(self):

        collection_id = self.database.get_or_create_collection_id(
            "test", datetime.datetime.now(), False)
        collection = self.database.get_collection(collection_id)

        store = Store(self.config, self.database)
        store.set_collection(collection)

        json_filename = os.path.join(
            os.path.dirname(os.path.realpath(__file__)), 'fixtures',
            'sample_1_1_record.json')

        store.store_file_from_local("test.json", "http://example.com",
                                    "record", "utf-8", json_filename)

        assert len([res for res in self.database.get_records_to_check(collection_id, override_schema_version='1.1')]) \
            == 0
        assert len([res for res in self.database.get_records_to_check(collection_id, override_schema_version='1.2')]) \
            == 1
    def test_bad_data_with_control_codes(self):
        # Make source collection
        source_collection_id = self.database.get_or_create_collection_id("test", datetime.datetime.now(), False)
        source_collection = self.database.get_collection(source_collection_id)

        # Load some data
        store = Store(self.config, self.database)
        store.set_collection(source_collection)
        json_filename = os.path.join(os.path.dirname(
            os.path.realpath(__file__)), 'data', 'sample_1_0_record_with_control_codes.json'
        )
        store.store_file_from_local("test.json", "http://example.com", "record", "utf-8", json_filename)

        # Check Warnings
        with self.database.get_engine().begin() as connection:
            s = sa.sql.select([self.database.collection_file_table])
            result = connection.execute(s)
            assert 1 == result.rowcount
            data = result.first()
            assert 1 == len(data['warnings'])
            assert 'We had to replace control codes: chr(16)' == data['warnings'][0]
    def test_records(self):
        # Make collection
        collection_id = self.database.get_or_create_collection_id("test", datetime.datetime.now(), False)
        collection = self.database.get_collection(collection_id)

        # Load some data
        store = Store(self.config, self.database)
        store.set_collection(collection)
        json_filename = os.path.join(os.path.dirname(
            os.path.realpath(__file__)), 'fixtures', 'sample_1_0_record.json'
        )
        store.store_file_from_local("test.json", "http://example.com", "record_package", "utf-8", json_filename)

        # test
        self.database.update_collection_cached_columns(collection_id)

        # check
        with self.database.get_engine().begin() as connection:
            s = sa.sql.select([self.database.collection_table])
            result = connection.execute(s)
            assert 1 == result.rowcount
            data = result.fetchone()
            assert 1 == data['cached_records_count']
Beispiel #10
0
    def test_a_single_collection(self):

        collection_id = self.database.get_or_create_collection_id(
            "test", datetime.datetime.now(), False)
        collection = self.database.get_collection(collection_id)

        store = Store(self.config, self.database)
        store.set_collection(collection)

        json_filename = os.path.join(
            os.path.dirname(os.path.realpath(__file__)), 'data',
            'sample_1_0_record.json')

        store.store_file_from_local("test.json", "http://example.com",
                                    "record", "utf-8", json_filename)

        # Check Number of rows in various tables
        with self.database.get_engine().begin() as connection:
            s = sa.sql.select([self.database.collection_table])
            result = connection.execute(s)
            assert 1 == result.rowcount

            s = sa.sql.select([self.database.collection_file_table])
            result = connection.execute(s)
            assert 1 == result.rowcount

            s = sa.sql.select([self.database.collection_file_item_table])
            result = connection.execute(s)
            assert 1 == result.rowcount

            s = sa.sql.select([self.database.record_table])
            result = connection.execute(s)
            assert 1 == result.rowcount

            s = sa.sql.select([self.database.data_table])
            result = connection.execute(s)
            assert 1 == result.rowcount

            s = sa.sql.select([self.database.package_data_table])
            result = connection.execute(s)
            assert 1 == result.rowcount

        # Delete
        self.database.mark_collection_deleted_at(collection_id)
        self.database.delete_collection(collection_id)

        # Check Number of rows in various tables
        with self.database.get_engine().begin() as connection:
            s = sa.sql.select([self.database.collection_table])
            result = connection.execute(s)
            assert 1 == result.rowcount

            s = sa.sql.select([self.database.collection_file_table])
            result = connection.execute(s)
            assert 0 == result.rowcount

            s = sa.sql.select([self.database.collection_file_item_table])
            result = connection.execute(s)
            assert 0 == result.rowcount

            s = sa.sql.select([self.database.record_table])
            result = connection.execute(s)
            assert 0 == result.rowcount

            s = sa.sql.select([self.database.data_table])
            result = connection.execute(s)
            assert 0 == result.rowcount

            s = sa.sql.select([self.database.package_data_table])
            result = connection.execute(s)
            assert 0 == result.rowcount
Beispiel #11
0
    def test_records(self):

        collection_id = self.database.get_or_create_collection_id(
            "test", datetime.datetime.now(), False)
        self.database.mark_collection_check_older_data_with_schema_version_1_1(
            collection_id, True)

        collection = self.database.get_collection(collection_id)

        store = Store(self.config, self.database)
        store.set_collection(collection)

        json_filename = os.path.join(
            os.path.dirname(os.path.realpath(__file__)), 'data',
            'sample_1_0_record.json')

        store.store_file_from_local("test.json", "http://example.com",
                                    "record", "utf-8", json_filename)

        # Check Number of check results
        with self.database.get_engine().begin() as connection:
            s = sa.sql.select([self.database.record_check_table])
            result = connection.execute(s)
            assert 0 == result.rowcount

            s = sa.sql.select([self.database.release_check_table])
            result = connection.execute(s)
            assert 0 == result.rowcount

            s = sa.sql.select([self.database.record_check_error_table])
            result = connection.execute(s)
            assert 0 == result.rowcount

            s = sa.sql.select([self.database.release_check_error_table])
            result = connection.execute(s)
            assert 0 == result.rowcount

        # Call Checks
        checks = Checks(self.database, collection)
        checks.process_all_files()

        # Check Number of check results
        with self.database.get_engine().begin() as connection:
            s = sa.sql.select([self.database.record_check_table])
            result = connection.execute(s)
            assert 1 == result.rowcount
            data = result.fetchone()
            assert '1.1' == data.override_schema_version

            s = sa.sql.select([self.database.release_check_table])
            result = connection.execute(s)
            assert 0 == result.rowcount

            s = sa.sql.select([self.database.record_check_error_table])
            result = connection.execute(s)
            assert 0 == result.rowcount

            s = sa.sql.select([self.database.release_check_error_table])
            result = connection.execute(s)
            assert 0 == result.rowcount

        # Call Checks Again - that should be fine
        checks = Checks(self.database, collection)
        checks.process_all_files()

        # Check Number of check results
        with self.database.get_engine().begin() as connection:
            s = sa.sql.select([self.database.record_check_table])
            result = connection.execute(s)
            assert 1 == result.rowcount
            data = result.fetchone()
            assert '1.1' == data.override_schema_version

            s = sa.sql.select([self.database.release_check_table])
            result = connection.execute(s)
            assert 0 == result.rowcount

            s = sa.sql.select([self.database.record_check_error_table])
            result = connection.execute(s)
            assert 0 == result.rowcount

            s = sa.sql.select([self.database.release_check_error_table])
            result = connection.execute(s)
            assert 0 == result.rowcount
    def test_release_1(self):
        # Make source collection
        source_collection_id = self.database.get_or_create_collection_id("test", datetime.datetime.now(), False)
        source_collection = self.database.get_collection(source_collection_id)

        # Load some data
        store = Store(self.config, self.database)
        store.set_collection(source_collection)
        json_filename = os.path.join(os.path.dirname(
            os.path.realpath(__file__)), 'fixtures', 'sample_1_0_releases.json'
        )
        store.store_file_from_local("test.json", "http://example.com", "release_package", "utf-8", json_filename)

        # Make destination collection
        destination_collection_id = self.database.get_or_create_collection_id(
            source_collection.source_id,
            source_collection.data_version,
            source_collection.sample,
            transform_from_collection_id=source_collection_id,
            transform_type=TRANSFORM_TYPE_UPGRADE_1_0_TO_1_1)
        destination_collection = self.database.get_collection(destination_collection_id)

        # transform!
        transform = Upgrade10To11Transform(self.config, self.database, destination_collection)
        transform.process()

        # check
        with self.database.get_engine().begin() as connection:
            s = sa.sql.select([self.database.release_table])
            result = connection.execute(s)
            assert 4 == result.rowcount

            s = sa.sql.select([self.database.record_table])
            result = connection.execute(s)
            assert 0 == result.rowcount

            s = sa.sql.select([self.database.transform_upgrade_1_0_to_1_1_status_record_table])
            result = connection.execute(s)
            assert 0 == result.rowcount

            s = sa.sql.select([self.database.transform_upgrade_1_0_to_1_1_status_release_table])
            result = connection.execute(s)
            assert 2 == result.rowcount

        # transform again! This should be fine
        transform = Upgrade10To11Transform(self.config, self.database, destination_collection)
        transform.process()

        # check
        with self.database.get_engine().begin() as connection:
            s = sa.sql.select([self.database.release_table])
            result = connection.execute(s)
            assert 4 == result.rowcount

            s = sa.sql.select([self.database.record_table])
            result = connection.execute(s)
            assert 0 == result.rowcount

            s = sa.sql.select([self.database.transform_upgrade_1_0_to_1_1_status_record_table])
            result = connection.execute(s)
            assert 0 == result.rowcount

            s = sa.sql.select([self.database.transform_upgrade_1_0_to_1_1_status_release_table])
            result = connection.execute(s)
            assert 2 == result.rowcount

        # destination collection will not be closed (because source is still open!)
        destination_collection = self.database.get_collection(destination_collection_id)
        assert destination_collection.store_end_at is None

        # Mark source collection as finished
        self.database.mark_collection_store_done(source_collection_id)

        # transform!
        transform = Upgrade10To11Transform(self.config, self.database, destination_collection)
        transform.process()

        # destination collection should be closed
        destination_collection = self.database.get_collection(destination_collection_id)
        assert destination_collection.store_end_at is not None
Beispiel #13
0
    def test_two_collections_with_upgrade_1_0_to_1_1(self):

        # Source Collection

        source_collection_id = self.database.get_or_create_collection_id("test", datetime.datetime.now(), False)
        source_collection = self.database.get_collection(source_collection_id)

        store = Store(self.config, self.database)
        store.set_collection(source_collection)

        json_filename = os.path.join(os.path.dirname(
            os.path.realpath(__file__)), 'data', 'sample_1_0_record.json'
        )

        store.store_file_from_local("test.json", "http://example.com", "record", "utf-8", json_filename)

        self.database.mark_collection_store_done(source_collection_id)

        # Destination Collection
        destination_collection_id = self.database.get_or_create_collection_id("test", datetime.datetime.now(), False,
                                                                              transform_from_collection_id=source_collection_id,
                                                                              transform_type=TRANSFORM_TYPE_UPGRADE_1_0_TO_1_1)
        destination_collection = self.database.get_collection(destination_collection_id)

        transform = Upgrade10To11Transform(self.config, self.database, destination_collection)
        transform.process()

        # Check Number of rows in various tables
        with self.database.get_engine().begin() as connection:
            s = sa.sql.select([self.database.collection_table])
            result = connection.execute(s)
            assert 2 == result.rowcount

            s = sa.sql.select([self.database.collection_file_table])
            result = connection.execute(s)
            assert 2 == result.rowcount

            s = sa.sql.select([self.database.collection_file_item_table])
            result = connection.execute(s)
            assert 2 == result.rowcount

            s = sa.sql.select([self.database.record_table])
            result = connection.execute(s)
            assert 2 == result.rowcount

            s = sa.sql.select([self.database.transform_upgrade_1_0_to_1_1_status_record_table])
            result = connection.execute(s)
            assert 1 == result.rowcount

            s = sa.sql.select([self.database.data_table])
            result = connection.execute(s)
            assert 0 < result.rowcount

            s = sa.sql.select([self.database.package_data_table])
            result = connection.execute(s)
            assert 0 < result.rowcount

        # Delete
        self.database.mark_collection_deleted_at(source_collection_id)
        self.database.delete_collection(source_collection_id)

        self.database.mark_collection_deleted_at(destination_collection_id)
        self.database.delete_collection(destination_collection_id)

        # Check Number of rows in various tables
        with self.database.get_engine().begin() as connection:
            s = sa.sql.select([self.database.collection_table])
            result = connection.execute(s)
            assert 2 == result.rowcount

            s = sa.sql.select([self.database.collection_file_table])
            result = connection.execute(s)
            assert 0 == result.rowcount

            s = sa.sql.select([self.database.collection_file_item_table])
            result = connection.execute(s)
            assert 0 == result.rowcount

            s = sa.sql.select([self.database.record_table])
            result = connection.execute(s)
            assert 0 == result.rowcount

            s = sa.sql.select([self.database.transform_upgrade_1_0_to_1_1_status_record_table])
            result = connection.execute(s)
            assert 0 == result.rowcount

            s = sa.sql.select([self.database.data_table])
            result = connection.execute(s)
            assert 0 == result.rowcount

            s = sa.sql.select([self.database.package_data_table])
            result = connection.execute(s)
            assert 0 == result.rowcount
    def test_releases_via_process_file_item_id_method(self):

        collection_id = self.database.get_or_create_collection_id(
            "test", datetime.datetime.now(), False)
        self.database.mark_collection_check_data(collection_id, True)
        self.database.mark_collection_check_older_data_with_schema_version_1_1(
            collection_id, True)

        collection = self.database.get_collection(collection_id)

        store = Store(self.config, self.database)
        store.set_collection(collection)

        json_filename = os.path.join(
            os.path.dirname(os.path.realpath(__file__)), 'fixtures',
            'sample_1_0_release.json')

        store.store_file_from_local("test.json", "http://example.com",
                                    "release_package", "utf-8", json_filename)

        file_item = self.database.get_all_files_items_in_file(
            self.database.get_all_files_in_collection(collection_id)[0])[0]

        # Check Number of check results
        with self.database.get_engine().begin() as connection:
            s = sa.sql.select([self.database.record_check_table])
            result = connection.execute(s)
            assert 0 == result.rowcount

            s = sa.sql.select([self.database.release_check_table])
            result = connection.execute(s)
            assert 0 == result.rowcount

            s = sa.sql.select([self.database.record_check_error_table])
            result = connection.execute(s)
            assert 0 == result.rowcount

            s = sa.sql.select([self.database.release_check_error_table])
            result = connection.execute(s)
            assert 0 == result.rowcount

        # Call Checks
        checks = Checks(self.database, collection)
        checks.process_file_item_id(file_item.database_id)

        # Check Number of check results
        with self.database.get_engine().begin() as connection:
            s = sa.sql.select([self.database.record_check_table])
            result = connection.execute(s)
            assert 0 == result.rowcount

            s = sa.sql.select([self.database.release_check_table])
            result = connection.execute(s)
            assert 2 == result.rowcount

            s = sa.sql.select([self.database.record_check_error_table])
            result = connection.execute(s)
            assert 0 == result.rowcount

            s = sa.sql.select([self.database.release_check_error_table])
            result = connection.execute(s)
            assert 0 == result.rowcount

        # Call Checks Again - that should be fine
        checks = Checks(self.database, collection)
        checks.process_file_item_id(file_item.database_id)

        # Check Number of check results
        with self.database.get_engine().begin() as connection:
            s = sa.sql.select([self.database.record_check_table])
            result = connection.execute(s)
            assert 0 == result.rowcount

            s = sa.sql.select([self.database.release_check_table])
            result = connection.execute(s)
            assert 2 == result.rowcount

            s = sa.sql.select([self.database.record_check_error_table])
            result = connection.execute(s)
            assert 0 == result.rowcount

            s = sa.sql.select([self.database.release_check_error_table])
            result = connection.execute(s)
            assert 0 == result.rowcount