Example #1
0
def test_database_store():
    setup_main_database()
    with tempfile.TemporaryDirectory() as tmpdir:
        metadata_db = MetadataDB(tmpdir)
        metadata_db.create_session_metadata("Test", True,
                                            "http://www.test.com",
                                            "2018-01-01-10-00-00")

        # test this store
        assert not database.is_store_done("test_source", "2018-01-01-10-00-00",
                                          True)

        # start it!
        store_id = database.start_store("test_source", "2018-01-01-10-00-00",
                                        True, metadata_db)

        # test this store
        assert database.get_id_of_store("test_source", "2018-01-01-10-00-00",
                                        True) == store_id

        # end it!
        database.end_store(store_id)

        # test this store
        assert database.is_store_done("test_source", "2018-01-01-10-00-00",
                                      True)

        # test other stores aren't marked as done
        assert not database.is_store_done("test_source", "2018-01-01-10-00-00",
                                          False)  # Not a Sample
        assert not database.is_store_done("test_source", "2027-01-01-10-00-00",
                                          True)  # Different version
        assert not database.is_store_done("a_different_source",
                                          "2018-01-01-10-00-00",
                                          True)  # Different source
Example #2
0
def test_checks_records_error():
    setup_main_database()
    with tempfile.TemporaryDirectory() as tmpdir:
        metadata_db = MetadataDB(tmpdir)
        metadata_db.create_session_metadata("Test", True,
                                            "http://www.test.com",
                                            "2018-01-01-10-00-00")
        metadata_db.add_filestatus({
            'filename': 'test1.json',
            'url': 'http://www.test.com',
            'data_type': 'record_package'
        })

        # store details
        source_session_id = database.start_store("test_source",
                                                 "2018-01-01-10-00-00", True,
                                                 metadata_db)
        for data in metadata_db.list_filestatus():
            with database.add_file(source_session_id, data) as database_file:
                database_file.insert_record({'record': 'totally'}, {
                    'version': '0.1-does-not-exist',
                    'extensions': []
                })
        database.end_store(source_session_id)

        record_id = 1
        # Don't like hard coding ID in. Relies on DB assigning 1 to this new row. But I think we can assume that.

        # Test
        assert not database.is_record_check_done(record_id)
        assert not database.is_check_done(source_session_id)

        # check!
        for data in metadata_db.list_filestatus():
            checks.check_file(source_session_id, data)

        # Test
        assert database.is_record_check_done(record_id)
        assert database.is_check_done(source_session_id)

        with database.engine.begin() as connection:
            s = sa.sql.select([database.record_check_error_table])
            result = connection.execute(s)
            data = result.fetchone()

        assert 'The schema version in your data is not valid. Accepted values:' in data[
            'error']
Example #3
0
def test_checks_releases():
    setup_main_database()
    with tempfile.TemporaryDirectory() as tmpdir:
        metadata_db = MetadataDB(tmpdir)
        metadata_db.create_session_metadata("Test", True,
                                            "http://www.test.com",
                                            "2018-01-01-10-00-00")
        metadata_db.add_filestatus({
            'filename': 'test1.json',
            'url': 'http://www.test.com',
            'data_type': 'release_package'
        })

        # store details
        source_session_id = database.start_store("test_source",
                                                 "2018-01-01-10-00-00", True,
                                                 metadata_db)
        for data in metadata_db.list_filestatus():
            with database.add_file(source_session_id, data) as database_file:
                database_file.insert_release({'release': 'totally'},
                                             {'extensions': []})
        database.end_store(source_session_id)

        release_id = 1
        # Don't like hard coding ID in. Relies on DB assigning 1 to this new row. But I think we can assume that.

        # Test
        assert not database.is_release_check_done(release_id)

        # check!
        for data in metadata_db.list_filestatus():
            checks.check_file(source_session_id, data)

        # Test
        assert database.is_release_check_done(release_id)

        with database.engine.begin() as connection:
            s = sa.sql.select([database.release_check_table])
            result = connection.execute(s)
            data = result.fetchone()

        assert data['cove_output']['file_type'] == 'json'
        assert len(data['cove_output']['validation_errors']) > 0
Example #4
0
def test_database_store_file():
    setup_main_database()
    with tempfile.TemporaryDirectory() as tmpdir:
        metadata_db = MetadataDB(tmpdir)
        metadata_db.create_session_metadata("Test", True,
                                            "http://www.test.com",
                                            "2018-01-01-10-00-00")
        metadata_db.add_filestatus({
            'filename': 'test1.json',
            'url': 'http://www.test.com',
            'data_type': 'record_package'
        })

        # start it!
        store_id = database.start_store("test_source", "2018-01-01-10-00-00",
                                        True, metadata_db)

        # test
        file_id = database.get_id_of_store_file(store_id,
                                                {'filename': 'test1.json'})
        assert file_id == 1
Example #5
0
    def run_store(self):
        metadata = self.metadata_db.get_session()

        if not metadata['fetch_success']:
            raise Exception('Can not run store without a successful fetch')

        if database.is_store_done(self.source_id, self.data_version,
                                  self.sample):
            return

        source_session_id = database.start_store(self.source_id,
                                                 self.data_version,
                                                 self.sample, self.metadata_db)

        for data in self.metadata_db.list_filestatus():

            if data['data_type'].startswith('meta'):
                continue

            if database.is_store_file_done(source_session_id, data):
                continue

            with database.add_file(source_session_id, data) as database_file:

                try:
                    with open(os.path.join(self.full_directory,
                                           data['filename']),
                              encoding=data['encoding']) as f:
                        file_json_data = json.load(f)
                except Exception as e:
                    # TODO better way of dealing with this?
                    raise e
                    return

                objects_list = []
                if data['data_type'] == 'record_package_list_in_results':
                    objects_list.extend(file_json_data['results'])
                elif data['data_type'] == 'release_package_list_in_results':
                    objects_list.extend(file_json_data['results'])
                elif data['data_type'] == 'record_package_list' or data[
                        'data_type'] == 'release_package_list':
                    objects_list.extend(file_json_data)
                else:
                    objects_list.append(file_json_data)

                del file_json_data

                for json_data in objects_list:
                    error_msg = ''
                    if not isinstance(json_data, dict):
                        error_msg = "Can not process data in file {} as JSON is not an object".format(
                            data['filename'])

                    if data['data_type'] == 'release_package' or \
                            data['data_type'] == 'release_package_list_in_results' or \
                            data['data_type'] == 'release_package_list':
                        if 'releases' not in json_data:
                            error_msg = "Release list not found in file {}".format(
                                data['filename'])
                        elif not isinstance(json_data['releases'], list):
                            error_msg = "Release list which is not a list found in file {}".format(
                                data['filename'])
                        data_list = json_data['releases']
                    elif data['data_type'] == 'record_package' or \
                            data['data_type'] == 'record_package_list_in_results' or \
                            data['data_type'] == 'record_package_list':
                        if 'records' not in json_data:
                            error_msg = "Record list not found in file {}".format(
                                data['filename'])
                        elif not isinstance(json_data['records'], list):
                            error_msg = "Record list which is not a list found in file {}".format(
                                data['filename'])
                        data_list = json_data['records']
                    else:
                        error_msg = "data_type not a known type"

                    if error_msg:
                        raise Exception(error_msg)
                    package_data = {}
                    for key, value in json_data.items():
                        if key not in ('releases', 'records'):
                            package_data[key] = value

                    for row in data_list:
                        if not isinstance(row, dict):
                            error_msg = "Row in data is not a object {}".format(
                                data['filename'])
                            raise Exception(error_msg)

                        if data['data_type'] == 'record_package' or \
                                data['data_type'] == 'record_package_list_in_results' or \
                                data['data_type'] == 'record_package_list':
                            database_file.insert_record(row, package_data)
                        else:
                            database_file.insert_release(row, package_data)

        database.end_store(source_session_id)