Beispiel #1
0
    def real_dispatcher(self, from_where):

        to_dispatch_items = random.sample(self.loaded_items, self.dispatcher_cnt)

        for item in to_dispatch_items:
            doc = self.cv_raw_store.get_one(item)
            del doc['_id']
            self.queue.put(doc)
            self.already_dispatched_cnt += 1

            Logger.default_log("current dispatched cnt: %d, total need cnt: %d" % (self.already_dispatched_cnt, self.dispatcher_cnt))
Beispiel #2
0
    def real_dispatcher(self, from_where):

        i = 0
        total_cnt = self.cv_raw_store.count_all()

        for doc in self.cv_raw_store.get_all():
            del doc['_id']
            self.queue.put(doc)
            i += 1
            if i % 10000 == 0:
                Logger.printProgress(i, total_cnt)
Beispiel #3
0
    def run_job(self, job):

        if not job:
            return

        htmlfile = job.get('fn')
        cvId = job.get('cvId')

        with open(htmlfile, 'rb') as f:
            pagecontent = f.read()

        try:
            ps = self.cv_parser.parser(htmlContent=pagecontent, cvFrom=self.channel)
            cvRaw_obj = constructCvRawObj(ps)

            cvRaw_obj.cvId = "%s://%s" % (self.channel, cvId)
            cvRaw_obj.cvFrom = self.channel

            self._check_fields(cvRaw_obj.to_json(), cvId)
            # Logger.default_log("cvId: %s Ok" % cvId)
        except Exception as e:
            Logger.default_log("cvId: %s Fail" % cvId)
            traceback.print_exc()
Beispiel #4
0
 def _bulk_execute(self):
     Logger.default_log("start bulk execute, db: %s, collection: %s" % (self.db, self.coll))
     result = self._bulk.execute()
     self._unset_bulk()
Beispiel #5
0
 def _bulk_execute(self):
     Logger.default_log("start bulk execute, db: %s, collection: %s" % (self.db_name, self.coll_name))
     result = self._bulk.execute()
     Logger.default_log("result : %s" % result)
     self._unset_bulk()