def produce_titles(self, match_status=None): start = time.time() for match in filmdata.sink.get_matches('title', status=match_status): sources = dict([ (n, filmdata.sink.get_source_title_by_id(n, i)) for n, i in match_iter(match) ]) yield self._merge_source_titles(match['id'], **sources) print '%f finished merging' % (time.time() - start)
def consume_match(self, match, type='title'): collection = '%s_match' % type doc = dict(match_iter(match)) if not doc: raise Exception('Match is empty of ids') if not match.get('id'): doc['_id'] = self._get_seq_id(type) doc['_admin'] = { 'status' : 'new' } else: doc['_id'] = match['id'] doc['_admin'] = { 'status' : 'updated' } self.m[collection].update({ '_id' : doc['_id'] }, doc, upsert=True, multi=False) for source_name, source_id in match_iter(match): self.update_source_status(source_name, type, source_id, 'matched') return doc['_id']
def produce_persons(self, match_status=None): for match in filmdata.sink.get_matches('person', status=match_status): sources = dict([ (n, filmdata.sink.get_source_person_by_id(n, i)) for n, i in match_iter(match) ]) yield self._merge_source_persons(match['id'], **sources)