예제 #1
0
            .order_by("gather_finished desc").first()
        # We thought about using the document's modified date to see if it is
        # unchanged from the previous harvest, but it's hard to tell if the
        # previous harvest was not successful due to whatever reason, so don't
        # skip the doc because of its modified date.

        # We create a new HarvestObject for each inv:Dataset within the
        # Inventory document
        ids = []
        harvested_identifiers = set()
        for dataset_node in doc.dataset_nodes():
            dataset = doc.dataset_to_dict(dataset_node)

            if dataset['identifier'] in harvested_identifiers:
                HarvestGatherError.create(
                    'Dataset with duplicate identifier "%s" - discarding' %
                    dataset['identifier'], harvest_job)
                continue
            harvested_identifiers.add(dataset['identifier'])

            guid = self.build_guid(doc_metadata['identifier'],
                                   dataset['identifier'])
            # Use the most recent modification date out of the doc and dataset,
            # since they might have forgotten to enter or update the dataset
            # date.
            dataset_last_modified = dataset['modified'] or doc_last_modified
            if dataset_last_modified and doc_last_modified:
                dataset_last_modified = max(dataset_last_modified,
                                            doc_last_modified)
            if previous:
                # object may be in the previous harvest, or an older one
예제 #2
0
            .order_by("gather_finished desc").first()
        # We thought about using the document's modified date to see if it is
        # unchanged from the previous harvest, but it's hard to tell if the
        # previous harvest was not successful due to whatever reason, so don't
        # skip the doc because of its modified date.

        # We create a new HarvestObject for each inv:Dataset within the
        # Inventory document
        ids = []
        harvested_identifiers = set()
        for dataset_node in doc.dataset_nodes():
            dataset = doc.dataset_to_dict(dataset_node)

            if dataset['identifier'] in harvested_identifiers:
                HarvestGatherError.create(
                    'Dataset with duplicate identifier "%s" - discarding'
                    % dataset['identifier'], harvest_job)
                continue
            harvested_identifiers.add(dataset['identifier'])

            guid = self.build_guid(doc_metadata['identifier'], dataset['identifier'])
            # Use the most recent modification date out of the doc and dataset,
            # since they might have forgotten to enter or update the dataset
            # date.
            dataset_last_modified = dataset['modified'] or doc_last_modified
            if dataset_last_modified and doc_last_modified:
                dataset_last_modified = max(dataset_last_modified, doc_last_modified)
            if previous:
                # object may be in the previous harvest, or an older one
                existing_object = model.Session.query(HarvestObject)\
                                       .filter_by(guid=guid)\