def test_stamp_from_raw(self): raw_doc = {'doc': 'Macho Man Story', 'timestamps': {}} new_stamps = {'done': 'now'} stamped_raw = util.stamp_from_raw(raw_doc, **new_stamps) assert isinstance(stamped_raw, dict) assert set(stamped_raw.keys()) == set(['done', 'normalizeFinished'])
def normalize(raw_doc, harvester_name): normalized_started = timestamp() harvester = registry[harvester_name] normalized = harvester.normalize(raw_doc) if not normalized: raise events.Skip('Did not normalize document with id {}'.format(raw_doc['docID'])) normalized['timestamps'] = util.stamp_from_raw(raw_doc, normalizeStarted=normalized_started) return normalized # returns a single normalized document