def test_returns_list_with_all_datasets(self): # prepare state DatasetFactory._meta.sqlalchemy_session = self.db.session ds1 = DatasetFactory() ds2 = DatasetFactory() ds3 = DatasetFactory() # testing ret = self.db.datasets self.assertIsInstance(ret, list) self.assertEqual(len(ret), 3) self.assertIn(ds1, ret) self.assertIn(ds2, ret) self.assertIn(ds3, ret)
def test_returns_true_if_dataset_is_already_indexed(self): DatasetFactory._meta.sqlalchemy_session = self.my_library.database.session dataset = DatasetFactory() self.backend.dataset_index.index_one(dataset) # search just added document. self.assertTrue(self.backend.dataset_index.is_indexed(dataset))
def test_identity_fields_values(self): """ Test contacts group fields of the metadata config. """ # Test both - setting and saving to db. top = Top() dataset = DatasetFactory() top.link_config(self.my_library.database.session, dataset) top.identity.bspace = 'b-space' top.identity.btime = 'b-time' top.identity.dataset = dataset.vid top.identity.id = dataset.id top.identity.revision = 7 top.identity.source = 'example.com' top.identity.subset = 'mortality' top.identity.type = '?' top.identity.variation = 1 top.identity.version = '0.0.7' # build from db and check new_top = Top() new_top.build_from_db(dataset) self.assertEqual(new_top.identity.bspace, 'b-space') self.assertEqual(new_top.identity.btime, 'b-time') self.assertEqual(new_top.identity.dataset, dataset.vid) self.assertEqual(new_top.identity.id, dataset.id) self.assertEqual(new_top.identity.revision, 7) self.assertEqual(new_top.identity.source, 'example.com') self.assertEqual(new_top.identity.subset, 'mortality') self.assertEqual(new_top.identity.type, '?') self.assertEqual(new_top.identity.variation, 1) self.assertEqual(new_top.identity.version, '0.0.7')
def test_creates_partition_table(self): DatasetFactory._meta.sqlalchemy_session = self.db.session PartitionFactory._meta.sqlalchemy_session = self.db.session ds1 = DatasetFactory() PartitionFactory(dataset=ds1) self.db.commit()
def test_search_with(self): """ search by `* with cucumber` """ DatasetFactory._meta.sqlalchemy_session = self._my_library.database.session PartitionFactory._meta.sqlalchemy_session = self._my_library.database.session TableFactory._meta.sqlalchemy_session = self._my_library.database.session dataset = DatasetFactory() table = TableFactory(dataset=dataset, name='table2', description='table2') partition = PartitionFactory(dataset=dataset, table=table, time=1) self._my_library.database.commit() partition.table.add_column('id') partition.table.add_column('column1', description='cucumber') self._my_library.database.commit() self._my_library.search.index_dataset(dataset) self._my_library.search.index_partition(partition) # find partition in the partition index. self._assert_finds_partition(partition, 'dataset with cucumber') # finds dataset extended with partition found = list(self._my_library.search.search('dataset with cucumber')) self.assertEqual(len(found), 1) self.assertEqual(len(found[0].partitions), 1) self.assertIn(partition.vid, found[0].partitions)
def test_creates_resource_for_each_external_documentation(self, fake_call): DatasetFactory._meta.sqlalchemy_session = self.sqlite_db.session ds1 = DatasetFactory() ds1.config.metadata.about.access = 'public' # create two external documentations. # site1_descr = 'Descr1' site1_url = 'http://example.com/1' site2_descr = 'Descr2' site2_url = 'http://example.com/2' ds1.config.metadata.external_documentation.site1.description = site1_descr ds1.config.metadata.external_documentation.site1.url = site1_url ds1.config.metadata.external_documentation.site2.description = site2_descr ds1.config.metadata.external_documentation.site2.url = site2_url bundle = _get_fake_bundle(ds1) export(bundle) # assert call was valid resource_create_calls = {} for call in fake_call.mock_calls: _, args, kwargs = call if args[0] == 'resource_create': resource_create_calls[kwargs['data_dict']['name']] = kwargs['data_dict'] self.assertIn('site1', resource_create_calls) self.assertEqual(resource_create_calls['site1']['url'], site1_url) self.assertEqual(resource_create_calls['site1']['description'], site1_descr) self.assertIn('site2', resource_create_calls) self.assertEqual(resource_create_calls['site2']['url'], site2_url) self.assertEqual(resource_create_calls['site2']['description'], site2_descr)
def test_dataset_config_operations(self): dataset = DatasetFactory() dataset.config.metadata.identity.id = 'd02' dataset.config.metadata.identity.version = '0.0.1' self._my_library.database.commit() # Refresh dataset after commit dataset = self._my_library.dataset(dataset.vid) self.assertEqual(dataset.config.metadata.identity.id, 'd02') self.assertEqual(dataset.config.metadata.identity.version, '0.0.1') self.assertEqual(10, len(dataset.config.metadata.identity)) identity_keys = [ 'subset', 'variation', 'dataset', 'btime', 'source', 'version', 'bspace', 'type', 'id', 'revision' ] self.assertEqual(sorted(identity_keys), sorted([v for v in dataset.config.metadata.identity])) with self.assertRaises(AttributeError): dataset.config.metadata = 'foo' try: dataset.config.metadata.identity = [1, 2, 3] except AssertionError as exc: self.assertIn('Dictionary is required', str(exc))
def test_range_and_in(self): """ search by `table2 from 1978 to 1979 in california` (geographic bounds and temporal bounds) """ DatasetFactory._meta.sqlalchemy_session = self._my_library.database.session TableFactory._meta.sqlalchemy_session = self._my_library.database.session PartitionFactory._meta.sqlalchemy_session = self._my_library.database.session dataset = DatasetFactory() table = TableFactory(dataset=dataset, description='table2', name='table2') partition = PartitionFactory(dataset=dataset, table=table, time=1, grain_coverage=['county'], space_coverage=['california'], time_coverage=['1978', '1979']) self._my_library.database.commit() self._my_library.search.index_dataset(dataset) self._my_library.search.index_partition(partition) # finds dataset extended with partition found = list( self._my_library.search.search( 'table2 from 1978 to 1979 in california')) self.assertEqual(len(found), 1) self.assertEqual(len(found[0].partitions), 1) self.assertIn(partition.vid, found[0].partitions)
def test_unique(self): """ d_vid, type, group and key are unique together. """ dataset = DatasetFactory() conf1 = Config(sequence_id=1, d_vid=dataset.vid, type='metadata', group='identity', key='key1', value='value1') self._my_library.database.session.add(conf1) self._my_library.database.session.commit() dupe = Config(sequence_id=2, d_vid=dataset.vid, type='metadata', group='identity', key='key1', value='value1') self._my_library.database.session.add(dupe) try: self._my_library.database.session.commit() raise AssertionError( 'Dupe unexpectedly saved. It seems unique constraint is broken.' ) except IntegrityError as exc: self.assertIn('UNIQUE constraint failed', str(exc))
def test_search_by(self): """ search by `source example.com by county` (granularity search) """ DatasetFactory._meta.sqlalchemy_session = self._my_library.database.session TableFactory._meta.sqlalchemy_session = self._my_library.database.session PartitionFactory._meta.sqlalchemy_session = self._my_library.database.session dataset = DatasetFactory(source='example.com') table = TableFactory(dataset=dataset, description='table2', name='table2') partition = PartitionFactory(dataset=dataset, table=table, grain_coverage=['county']) self._my_library.database.commit() self._my_library.search.index_dataset(dataset) self._my_library.search.index_partition(partition) # find partition in the partition index. self._assert_finds_partition(partition, 'by county') # finds dataset extended with partition found = list( self._my_library.search.search('source example.com by county')) self.assertEqual(len(found), 1) self.assertEqual(len(found[0].partitions), 1) self.assertIn(partition.vid, found[0].partitions)
def test_search_in(self): """ search by `source example.com in California` (geographic bounds) """ DatasetFactory._meta.sqlalchemy_session = self._my_library.database.session PartitionFactory._meta.sqlalchemy_session = self._my_library.database.session TableFactory._meta.sqlalchemy_session = self._my_library.database.session dataset = DatasetFactory() table = TableFactory(dataset=dataset, name='table2', description='table2') partition = PartitionFactory(dataset=dataset, table=table, time=1, space_coverage=['california']) self._my_library.search.index_dataset(dataset) self._my_library.search.index_partition(partition) # find partition in the partition index. self._assert_finds_partition(partition, 'in California') # finds dataset extended with partition found = list( self._my_library.search.search('source example.com in California')) self.assertEqual(len(found), 1) self.assertEqual(len(found[0].partitions), 1) self.assertIn(partition.vid, found[0].partitions)
def test_search_years_range(self): """ search by `source example.com from 1978 to 1979` (temporal bounds) """ DatasetFactory._meta.sqlalchemy_session = self._my_library.database.session PartitionFactory._meta.sqlalchemy_session = self._my_library.database.session TableFactory._meta.sqlalchemy_session = self._my_library.database.session dataset = DatasetFactory() table = TableFactory(dataset=dataset, name='table2', description='table2') partition = PartitionFactory(dataset=dataset, table=table, time=1, time_coverage=['1978', '1979']) self._my_library.database.commit() self._my_library.search.index_partition(partition) self._my_library.search.index_dataset(dataset) # find partition in the partition index. self._assert_finds_partition(partition, 'from 1978 to 1979') # find dataset extended with partition found = list( self._my_library.search.search( 'source example.com from 1978 to 1979')) self.assertEqual(len(found), 1) self.assertEqual(len(found[0].partitions), 1) self.assertIn(partition.vid, found[0].partitions)
def test_adds_dataset_document_to_the_index(self): DatasetFactory._meta.sqlalchemy_session = self.my_library.database.session dataset = DatasetFactory() self.backend.dataset_index.index_one(dataset) # search just added document. all_docs = list(self.backend.dataset_index.index.searcher().documents()) self.assertEqual(all_docs[0]['vid'], dataset.vid)
def test_search_dataset_by_vid(self): DatasetFactory._meta.sqlalchemy_session = self._my_library.database.session dataset = DatasetFactory() self._my_library.search.index_dataset(dataset) found = self._my_library.search.search(dataset.vid) all_vids = [x.vid for x in found] self.assertIn(dataset.vid, all_vids)
def test_raises_UnpublishedAccessError_error(self, fake_call): DatasetFactory._meta.sqlalchemy_session = self.sqlite_db.session ds1 = DatasetFactory() ds1.config.metadata.about.access = 'restricted' bundle = _get_fake_bundle(ds1) with self.assertRaises(UnpublishedAccessError): export(bundle)
def test_search_partition_by_vname(self): DatasetFactory._meta.sqlalchemy_session = self._my_library.database.session PartitionFactory._meta.sqlalchemy_session = self._my_library.database.session dataset = DatasetFactory() partition = PartitionFactory(dataset=dataset) self._my_library.database.session.commit() self._my_library.search.index_partition(partition) self._assert_finds_partition(partition, str(partition.identity.vname))
def test_add_dataset_to_the_index(self): DatasetFactory._meta.sqlalchemy_session = self._my_library.database.session dataset = DatasetFactory() self._my_library.database.session.commit() self._my_library.search.index_dataset(dataset) datasets = self._my_library.search.backend.dataset_index.all() all_vids = [x.vid for x in datasets] self.assertIn(dataset.vid, all_vids)
def test_creates_table_table(self): DatasetFactory._meta.sqlalchemy_session = self.db.session TableFactory._meta.sqlalchemy_session = self.db.session # Now all tables are created. Can we use ORM to create datasets? ds1 = DatasetFactory() self.db.commit() TableFactory(dataset=ds1) self.db.commit()
def test_add_partition_to_the_index(self): DatasetFactory._meta.sqlalchemy_session = self._my_library.database.session PartitionFactory._meta.sqlalchemy_session = self._my_library.database.session dataset = DatasetFactory() partition = PartitionFactory(dataset=dataset) self._my_library.database.session.commit() self._my_library.search.index_partition(partition) partitions = self._my_library.search.backend.partition_index.all() all_vids = [x.vid for x in partitions] self.assertIn(partition.vid, all_vids)
def test_returns_config_repr(self): ds = DatasetFactory() self._my_library.database.session.commit() config1 = ConfigFactory(d_vid=ds.vid) repr_str = config1.__repr__() self.assertIsInstance(repr_str, text_type) self.assertIn(config1.d_vid, repr_str) self.assertIn(config1.group, repr_str) self.assertIn(config1.key, repr_str) self.assertIn(config1.value, repr_str)
def test_deletes_dataset_from_index(self): DatasetFactory._meta.sqlalchemy_session = self.my_library.database.session dataset = DatasetFactory() self.backend.dataset_index.index_one(dataset) # look for just added document. all_docs = list(self.backend.dataset_index.index.searcher().documents()) self.assertIn(dataset.vid, [x['vid'] for x in all_docs]) self.backend.dataset_index._delete(vid=dataset.vid) all_docs = list(self.backend.dataset_index.index.searcher().documents()) self.assertNotIn(dataset.vid, [x['vid'] for x in all_docs])
def test_does_not_add_dataset_twice(self): DatasetFactory._meta.sqlalchemy_session = self._my_library.database.session dataset = DatasetFactory() self._my_library.search.index_dataset(dataset) datasets = self._my_library.search.backend.dataset_index.all() self.assertEqual(len(datasets), 1) self._my_library.search.index_dataset(dataset) datasets = self._my_library.search.backend.dataset_index.all() self.assertEqual(len(datasets), 1)
def test_extends_notes_with_dataset_documentation(self): DatasetFactory._meta.sqlalchemy_session = self.sqlite_db.session FileFactory._meta.sqlalchemy_session = self.sqlite_db.session ds1 = DatasetFactory() FileFactory(dataset=ds1, path='documentation.md', contents='### Dataset documentation.') self.sqlite_db.commit() bundle = _get_fake_bundle(ds1) ret = _convert_bundle(bundle) self.assertIn('### Dataset documentation.', ret['notes'])
def test_adds_dataset_document_to_the_index(self): DatasetFactory._meta.sqlalchemy_session = self.my_library.database.session dataset = DatasetFactory() self.backend.dataset_index.index_one(dataset) # search just added document. query = """ SELECT vid FROM dataset_index; """ result = self.backend.library.database.connection.execute(query).fetchall() self.assertEqual(result[0][0], dataset.vid)
def test_returns_found_dataset(self): # add dataset to backend. DatasetFactory._meta.sqlalchemy_session = self.my_library.database.session dataset = DatasetFactory() self.backend.dataset_index.index_one(dataset) # search just added document. found = list(self.backend.dataset_index.search(dataset.vid)) all_vids = [x.vid for x in found] self.assertIn(dataset.vid, all_vids) self.assertIsInstance(found[0], DatasetSearchResult)
def test_creates_column_table(self): DatasetFactory._meta.sqlalchemy_session = self.db.session TableFactory._meta.sqlalchemy_session = self.db.session ds1 = DatasetFactory() self.db.commit() table = TableFactory(dataset=ds1) ColumnFactory._meta.sqlalchemy_session = self.db.session # Now all tables are created. Can we use ORM to create columns? ColumnFactory(name='id', table=table) self.db.commit()
def test_populates_id_field(self, fake_before_update): ds = DatasetFactory() config1 = ConfigFactory.build(d_vid=ds.vid) assert config1.id is None mapper = Mock() conn = Mock() Config.before_insert(mapper, conn, config1) self.assertIsNotNone(config1.id) self.assertTrue(config1.id.startswith('Fds')) self.assertEqual(len(fake_before_update.mock_calls), 1)
def test_returns_dictionary_representation_of_the_config(self): ds = DatasetFactory() config1 = ConfigFactory(d_vid=ds.vid) fields = [ 'id', 'sequence_id', 'dataset', 'd_vid', 'type', 'group', 'key', 'value', 'modified', 'children', 'parent_id', 'parent' ] self.assertEqual(sorted(fields), sorted(config1.dict)) for field in fields: self.assertEqual(getattr(config1, field), config1.dict[field])
def _test_converts_schema_to_resource_dict(self): DatasetFactory._meta.sqlalchemy_session = self.sqlite_db.session FileFactory._meta.sqlalchemy_session = self.sqlite_db.session ds1 = DatasetFactory() partition1 = PartitionFactory(dataset=ds1) self.sqlite_db.commit() partition1._datafile = MagicMock() ret = _convert_partition(partition1) self.assertIn('package_id', ret) self.assertEqual(ret['package_id'], ds1.vid) self.assertEqual(ret['name'], partition1.name)
def test_removes_dataset(self): # prepare state. DatasetFactory._meta.sqlalchemy_session = self.db.session ds1 = DatasetFactory() self.db.session.commit() ds1_vid = ds1.vid # testing self.db.remove_dataset(ds1) self.assertEqual( self.db.session.query(Dataset).filter_by(vid=ds1_vid).all(), [], 'Dataset was not removed.')