def test_deletes_records_in_deleted_datasets(self):
     ids = [
         'a6c6cead-b5ce-4a4e-8cf5-1542ba708dec',
         'd6c6cead-b5ce-4a4e-8cf5-1542ba708ded',
         'f6c6cead-b5ce-4a4e-8cf5-1542ba708def'
     ]
     for id_ in ids:
         d = Dataset.objects.create(id=id_, data={})
         for i in range(3):
             ResolvableObject.objects.create(id='{}_{}'.format(id_, i),
                                             data={},
                                             type='occurrence',
                                             dataset=d)
     cache_data.sync_datasets([ids[0]])
     self.assertEqual(
         set(
             ResolvableObject.objects.filter(
                 dataset__id=ids[0]).values_list('deleted_date',
                                                 flat=True)), {None})
     self.assertEqual(
         set(
             ResolvableObject.objects.filter(
                 dataset__id=ids[1]).values_list('deleted_date',
                                                 flat=True)),
         {date.today()})
     self.assertEqual(
         set(
             ResolvableObject.objects.filter(
                 dataset__id=ids[2]).values_list('deleted_date',
                                                 flat=True)),
         {date.today()})
Beispiel #2
0
    def handle(self, *args, **options):
        dataset_list = _gbif_api.get_dataset_list()

        # Skip some datasets
        big = {
            'crop wild relatives, global':
            '07044577-bd82-4089-9f3a-f4a9d2170b2e',
            'artsobs': 'b124e1e0-4755-430f-9eab-894f25a9b59c',
        }
        skip = False

        # Set up for import
        if not skip:
            create_duplicates_file()
            reset_import_table()
        dataset_ids = []
        overall_start = datetime.now()

        # Iterate over GBIF datasets
        for dataset in dataset_list:
            if skip or dataset['key'] in big.values():
                self.logger.info('skip')
                continue
            start = datetime.now()

            # Get dataset details
            dataset_details = _gbif_api.get_dataset_detailed_info(
                dataset['key'])
            endpoint = _gbif_api.get_dwc_endpoint(dataset_details['endpoints'])
            self.logger.info(dataset_details['title'])

            if not endpoint:
                self.logger.info('Metadata only dataset, skipping')
                continue
            if not sync_dataset(dataset_details):
                self.logger.info('Dataset is unchanged, skipping')
                continue

            self.logger.info(endpoint['url'])
            _gbif_api.get_dwca_and_store_as_tmp_zip(endpoint['url'])
            _migration_processing.import_dwca(dataset['key'])
            dataset_ids.append(dataset['key'])
            log_time(start, 'fin inserting dataset {}'.format(dataset['key']))

        log_time(overall_start,
                 'finished all datasets, merging in starts next')
        start = datetime.now()
        _cache_data.sync_datasets(dataset_ids)
        log_time(start, 'caching complete')
        start = datetime.now()
        _cache_data.merge_in_new_data(False)  # options['reset']
        log_time(start, 'merging complete')
        start = datetime.now()
        total_count = Statistic.objects.set_total_count()
        log_time(start,
                 'finished! total  count now set {}'.format(total_count))
 def test_does_no_deletions_if_none_deleted(self):
     ids = [
         'a6c6cead-b5ce-4a4e-8cf5-1542ba708dec',
         'd6c6cead-b5ce-4a4e-8cf5-1542ba708ded',
         'f6c6cead-b5ce-4a4e-8cf5-1542ba708def'
     ]
     for id_ in ids:
         Dataset.objects.create(id=id_, data={})
     cache_data.sync_datasets(ids)
     all_deleted_dates = Dataset.objects.all().values_list('deleted_date',
                                                           flat=True)
     self.assertEqual(set(all_deleted_dates), {None})
     self.assertEqual(
         set(ResolvableObject.objects.all().values_list('deleted_date',
                                                        flat=True)), set())
 def test_sets_deleted_date_for_datasets_not_in_new_migration(self):
     ids = [
         'a6c6cead-b5ce-4a4e-8cf5-1542ba708dec',
         'd6c6cead-b5ce-4a4e-8cf5-1542ba708ded',
         'f6c6cead-b5ce-4a4e-8cf5-1542ba708def'
     ]
     Dataset.objects.create(id=ids[0], data={})
     Dataset.objects.create(id=ids[1], data={})
     Dataset.objects.create(id=ids[2], data={})
     cache_data.sync_datasets([ids[1]])
     self.assertEqual(
         Dataset.objects.get(id=ids[0]).deleted_date, date.today())
     self.assertEqual(Dataset.objects.get(id=ids[1]).deleted_date, None)
     self.assertEqual(
         Dataset.objects.get(id=ids[2]).deleted_date, date.today())