def gather_stage(self, harvest_job): log.debug('In DotStatHarvester gather_stage') # For each row of data, use its ID as the GUID and save a harvest object # Return a list of all these new harvest jobs try: harvest_obj_ids = [] self._set_config(harvest_job.source.config) base_url = harvest_job.source.url try: # Get list of endpoint ids endpoints = self.get_endpoints(base_url) except (AccessTypeNotAvailableError, KeyError): log.debug('Endpoint function failed') # Make a harvest object for each dataset # Set the GUID to the dataset's ID (DF_SDG etc.) for agency_id, _id, version in endpoints: harvest_obj = HarvestObject( guid="{}-{}".format(agency_id, _id), job=harvest_job ) harvest_obj.extras = [ HarvestObjectExtra(key='stats_guid', value=_id), HarvestObjectExtra(key='version', value=version) ] harvest_obj.save() harvest_obj_ids.append(harvest_obj.id) log.debug('IDs: {}'.format(harvest_obj_ids)) return harvest_obj_ids except Exception as e: self._save_gather_error( 'Unable to get content for URL: %s: %s / %s' % (base_url, str(e), traceback.format_exc()), harvest_job)