def normalize(raw_doc, harvester_name): normalized_started = timestamp() harvester = registry[harvester_name] normalized = harvester.normalize(raw_doc) if not normalized: raise events.Skip('Did not normalize document with id {}'.format(raw_doc['docID'])) normalized['timestamps'] = util.stamp_from_raw(raw_doc, normalizeStarted=normalized_started) return normalized # returns a single normalized document
def test_logged_decorator_skipped(mock_dispatch): exception = events.Skip('For Reasons') @events.logged('testing') def logged_func(test): raise exception assert logged_func('baz') is None mock_dispatch.assert_has_calls([ mock.call('testing', events.STARTED, _index=None, test='baz'), mock.call('testing', events.SKIPPED, _index=None, test='baz', reason='For Reasons') ])
def send_to_database(self, docID, source, **kwargs): documents = DocumentModel.objects(docID=docID, source=source) if documents: document = documents[0] if self.different(dict(document), dict(docID=docID, source=source, **kwargs)): # Create new version, get UUID of new version, update versions = document.versions + kwargs.pop('versions', []) version = VersionModel(key=uuid4(), **dict(document)) version.save() versions.append(version.key) return document.update(versions=versions, **kwargs) else: raise events.Skip("No changees detected for document with ID {0} and source {1}.".format(docID, source)) else: # create document return DocumentModel.create(docID=docID, source=source, **kwargs)
def process_normalized(normalized_doc, raw_doc, **kwargs): if not normalized_doc: raise events.Skip('Not processing document with id {}'.format(raw_doc['docID'])) processing.process_normalized(raw_doc, normalized_doc, kwargs)