def test_minimal(self): dataset = DatasetFactory.build() # Does not have an URL d = dataset_to_rdf(dataset) g = d.graph assert isinstance(d, RdfResource) assert len(list(g.subjects(RDF.type, DCAT.Dataset))) is 1 assert g.value(d.identifier, RDF.type) == DCAT.Dataset assert isinstance(d.identifier, BNode) assert d.value(DCT.identifier) == Literal(dataset.id) assert d.value(DCT.title) == Literal(dataset.title) assert d.value(DCT.issued) == Literal(dataset.created_at) assert d.value(DCT.modified) == Literal(dataset.last_modified)
def test_minimal(self): dataset = DatasetFactory.build() # Does not have an URL d = dataset_to_rdf(dataset) g = d.graph self.assertIsInstance(d, RdfResource) self.assertEqual(len(list(g.subjects(RDF.type, DCAT.Dataset))), 1) self.assertEqual(g.value(d.identifier, RDF.type), DCAT.Dataset) self.assertIsInstance(d.identifier, BNode) self.assertEqual(d.value(DCT.identifier), Literal(dataset.id)) self.assertEqual(d.value(DCT.title), Literal(dataset.title)) self.assertEqual(d.value(DCT.issued), Literal(dataset.created_at)) self.assertEqual(d.value(DCT.modified), Literal(dataset.last_modified))
def test_attach_does_not_duplicate(self): attached_datasets = [] for i in range(2): dataset = DatasetFactory.build() dataset.extras['harvest:domain'] = 'test.org' dataset.extras['harvest:remote_id'] = str(i) dataset.last_modified = datetime.now() dataset.save() attached_datasets.append(dataset) datasets = DatasetFactory.create_batch(3) with NamedTemporaryFile() as csvfile: writer = csv.DictWriter(csvfile, fieldnames=['local', 'remote'], delimiter=b';', quotechar=b'"') writer.writeheader() for index, dataset in enumerate(datasets): writer.writerow({ 'local': str(dataset.id), 'remote': str(index) }) csvfile.flush() result = actions.attach('test.org', csvfile.name) dbcount = Dataset.objects(**{ 'extras__harvest:remote_id__exists': True }).count() self.assertEqual(result.success, len(datasets)) self.assertEqual(dbcount, result.success) for index, dataset in enumerate(datasets): dataset.reload() self.assertEqual(dataset.extras['harvest:domain'], 'test.org') self.assertEqual(dataset.extras['harvest:remote_id'], str(index))
def test_attach_does_not_duplicate(self): attached_datasets = [] for i in range(2): dataset = DatasetFactory.build() dataset.extras['harvest:domain'] = 'test.org' dataset.extras['harvest:remote_id'] = str(i) dataset.last_modified = datetime.now() dataset.save() attached_datasets.append(dataset) datasets = DatasetFactory.create_batch(3) with NamedTemporaryFile() as csvfile: writer = csv.DictWriter(csvfile, fieldnames=['local', 'remote'], delimiter=b';', quotechar=b'"') writer.writeheader() for index, dataset in enumerate(datasets): writer.writerow({ 'local': str(dataset.id), 'remote': str(index) }) csvfile.flush() result = actions.attach('test.org', csvfile.name) dbcount = Dataset.objects(**{ 'extras__harvest:remote_id__exists': True }).count() assert result.success == len(datasets) assert dbcount == result.success for index, dataset in enumerate(datasets): dataset.reload() assert dataset.extras['harvest:domain'] == 'test.org' assert dataset.extras['harvest:remote_id'] == str(index)
def process(self, item): mock_process.send(self, item=item) return DatasetFactory.build(title='dataset-{0}'.format(item.remote_id))