def test_deletes_records_in_deleted_datasets(self): ids = [ 'a6c6cead-b5ce-4a4e-8cf5-1542ba708dec', 'd6c6cead-b5ce-4a4e-8cf5-1542ba708ded', 'f6c6cead-b5ce-4a4e-8cf5-1542ba708def' ] for id_ in ids: d = Dataset.objects.create(id=id_, data={}) for i in range(3): ResolvableObject.objects.create(id='{}_{}'.format(id_, i), data={}, type='occurrence', dataset=d) cache_data.sync_datasets([ids[0]]) self.assertEqual( set( ResolvableObject.objects.filter( dataset__id=ids[0]).values_list('deleted_date', flat=True)), {None}) self.assertEqual( set( ResolvableObject.objects.filter( dataset__id=ids[1]).values_list('deleted_date', flat=True)), {date.today()}) self.assertEqual( set( ResolvableObject.objects.filter( dataset__id=ids[2]).values_list('deleted_date', flat=True)), {date.today()})
def handle(self, *args, **options): dataset_list = _gbif_api.get_dataset_list() # Skip some datasets big = { 'crop wild relatives, global': '07044577-bd82-4089-9f3a-f4a9d2170b2e', 'artsobs': 'b124e1e0-4755-430f-9eab-894f25a9b59c', } skip = False # Set up for import if not skip: create_duplicates_file() reset_import_table() dataset_ids = [] overall_start = datetime.now() # Iterate over GBIF datasets for dataset in dataset_list: if skip or dataset['key'] in big.values(): self.logger.info('skip') continue start = datetime.now() # Get dataset details dataset_details = _gbif_api.get_dataset_detailed_info( dataset['key']) endpoint = _gbif_api.get_dwc_endpoint(dataset_details['endpoints']) self.logger.info(dataset_details['title']) if not endpoint: self.logger.info('Metadata only dataset, skipping') continue if not sync_dataset(dataset_details): self.logger.info('Dataset is unchanged, skipping') continue self.logger.info(endpoint['url']) _gbif_api.get_dwca_and_store_as_tmp_zip(endpoint['url']) _migration_processing.import_dwca(dataset['key']) dataset_ids.append(dataset['key']) log_time(start, 'fin inserting dataset {}'.format(dataset['key'])) log_time(overall_start, 'finished all datasets, merging in starts next') start = datetime.now() _cache_data.sync_datasets(dataset_ids) log_time(start, 'caching complete') start = datetime.now() _cache_data.merge_in_new_data(False) # options['reset'] log_time(start, 'merging complete') start = datetime.now() total_count = Statistic.objects.set_total_count() log_time(start, 'finished! total count now set {}'.format(total_count))
def test_does_no_deletions_if_none_deleted(self): ids = [ 'a6c6cead-b5ce-4a4e-8cf5-1542ba708dec', 'd6c6cead-b5ce-4a4e-8cf5-1542ba708ded', 'f6c6cead-b5ce-4a4e-8cf5-1542ba708def' ] for id_ in ids: Dataset.objects.create(id=id_, data={}) cache_data.sync_datasets(ids) all_deleted_dates = Dataset.objects.all().values_list('deleted_date', flat=True) self.assertEqual(set(all_deleted_dates), {None}) self.assertEqual( set(ResolvableObject.objects.all().values_list('deleted_date', flat=True)), set())
def test_sets_deleted_date_for_datasets_not_in_new_migration(self): ids = [ 'a6c6cead-b5ce-4a4e-8cf5-1542ba708dec', 'd6c6cead-b5ce-4a4e-8cf5-1542ba708ded', 'f6c6cead-b5ce-4a4e-8cf5-1542ba708def' ] Dataset.objects.create(id=ids[0], data={}) Dataset.objects.create(id=ids[1], data={}) Dataset.objects.create(id=ids[2], data={}) cache_data.sync_datasets([ids[1]]) self.assertEqual( Dataset.objects.get(id=ids[0]).deleted_date, date.today()) self.assertEqual(Dataset.objects.get(id=ids[1]).deleted_date, None) self.assertEqual( Dataset.objects.get(id=ids[2]).deleted_date, date.today())