.order_by("gather_finished desc").first() # We thought about using the document's modified date to see if it is # unchanged from the previous harvest, but it's hard to tell if the # previous harvest was not successful due to whatever reason, so don't # skip the doc because of its modified date. # We create a new HarvestObject for each inv:Dataset within the # Inventory document ids = [] harvested_identifiers = set() for dataset_node in doc.dataset_nodes(): dataset = doc.dataset_to_dict(dataset_node) if dataset['identifier'] in harvested_identifiers: HarvestGatherError.create( 'Dataset with duplicate identifier "%s" - discarding' % dataset['identifier'], harvest_job) continue harvested_identifiers.add(dataset['identifier']) guid = self.build_guid(doc_metadata['identifier'], dataset['identifier']) # Use the most recent modification date out of the doc and dataset, # since they might have forgotten to enter or update the dataset # date. dataset_last_modified = dataset['modified'] or doc_last_modified if dataset_last_modified and doc_last_modified: dataset_last_modified = max(dataset_last_modified, doc_last_modified) if previous: # object may be in the previous harvest, or an older one
.order_by("gather_finished desc").first() # We thought about using the document's modified date to see if it is # unchanged from the previous harvest, but it's hard to tell if the # previous harvest was not successful due to whatever reason, so don't # skip the doc because of its modified date. # We create a new HarvestObject for each inv:Dataset within the # Inventory document ids = [] harvested_identifiers = set() for dataset_node in doc.dataset_nodes(): dataset = doc.dataset_to_dict(dataset_node) if dataset['identifier'] in harvested_identifiers: HarvestGatherError.create( 'Dataset with duplicate identifier "%s" - discarding' % dataset['identifier'], harvest_job) continue harvested_identifiers.add(dataset['identifier']) guid = self.build_guid(doc_metadata['identifier'], dataset['identifier']) # Use the most recent modification date out of the doc and dataset, # since they might have forgotten to enter or update the dataset # date. dataset_last_modified = dataset['modified'] or doc_last_modified if dataset_last_modified and doc_last_modified: dataset_last_modified = max(dataset_last_modified, doc_last_modified) if previous: # object may be in the previous harvest, or an older one existing_object = model.Session.query(HarvestObject)\ .filter_by(guid=guid)\