Esempio n. 1
0
def normalize(raw_doc, harvester_name):
    normalized_started = timestamp()
    harvester = registry[harvester_name]

    normalized = harvester.normalize(raw_doc)

    if not normalized:
        raise events.Skip('Did not normalize document with id {}'.format(raw_doc['docID']))

    normalized['timestamps'] = util.stamp_from_raw(raw_doc, normalizeStarted=normalized_started)

    return normalized  # returns a single normalized document
Esempio n. 2
0
def test_logged_decorator_skipped(mock_dispatch):
    exception = events.Skip('For Reasons')

    @events.logged('testing')
    def logged_func(test):
        raise exception

    assert logged_func('baz') is None
    mock_dispatch.assert_has_calls([
        mock.call('testing', events.STARTED, _index=None, test='baz'),
        mock.call('testing', events.SKIPPED, _index=None, test='baz', reason='For Reasons')
    ])
Esempio n. 3
0
 def send_to_database(self, docID, source, **kwargs):
     documents = DocumentModel.objects(docID=docID, source=source)
     if documents:
         document = documents[0]
         if self.different(dict(document), dict(docID=docID, source=source, **kwargs)):
             # Create new version, get UUID of new version, update
             versions = document.versions + kwargs.pop('versions', [])
             version = VersionModel(key=uuid4(), **dict(document))
             version.save()
             versions.append(version.key)
             return document.update(versions=versions, **kwargs)
         else:
             raise events.Skip("No changees detected for document with ID {0} and source {1}.".format(docID, source))
     else:
         # create document
         return DocumentModel.create(docID=docID, source=source, **kwargs)
Esempio n. 4
0
def process_normalized(normalized_doc, raw_doc, **kwargs):
    if not normalized_doc:
        raise events.Skip('Not processing document with id {}'.format(raw_doc['docID']))
    processing.process_normalized(raw_doc, normalized_doc, kwargs)