コード例 #1
0
ファイル: merge.py プロジェクト: mrowl/filmdata
 def produce_titles(self, match_status=None):
     start = time.time()
     for match in filmdata.sink.get_matches('title', status=match_status):
         sources = dict([ (n, filmdata.sink.get_source_title_by_id(n, i)) for
                          n, i in match_iter(match) ])
         yield self._merge_source_titles(match['id'], **sources)
     print '%f finished merging' % (time.time() - start)
コード例 #2
0
ファイル: mongo.py プロジェクト: mrowl/filmdata
    def consume_match(self, match, type='title'):
        collection = '%s_match' % type
        doc = dict(match_iter(match))
        if not doc:
            raise Exception('Match is empty of ids')
        if not match.get('id'):
            doc['_id'] = self._get_seq_id(type)
            doc['_admin'] = { 'status' : 'new' }
        else:
            doc['_id'] = match['id']
            doc['_admin'] = { 'status' : 'updated' }

        self.m[collection].update({ '_id' : doc['_id'] },
                                  doc, upsert=True, multi=False)
        for source_name, source_id in match_iter(match):
            self.update_source_status(source_name, type,
                                    source_id, 'matched')
        return doc['_id']
コード例 #3
0
ファイル: merge.py プロジェクト: mrowl/filmdata
 def produce_persons(self, match_status=None):
     for match in filmdata.sink.get_matches('person', status=match_status):
         sources = dict([ (n, filmdata.sink.get_source_person_by_id(n, i)) for
                          n, i in match_iter(match) ])
         yield self._merge_source_persons(match['id'], **sources)