def real_dispatcher(self, from_where): to_dispatch_items = random.sample(self.loaded_items, self.dispatcher_cnt) for item in to_dispatch_items: doc = self.cv_raw_store.get_one(item) del doc['_id'] self.queue.put(doc) self.already_dispatched_cnt += 1 Logger.default_log("current dispatched cnt: %d, total need cnt: %d" % (self.already_dispatched_cnt, self.dispatcher_cnt))
def run_job(self, job): if not job: return htmlfile = job.get('fn') cvId = job.get('cvId') with open(htmlfile, 'rb') as f: pagecontent = f.read() try: ps = self.cv_parser.parser(htmlContent=pagecontent, cvFrom=self.channel) cvRaw_obj = constructCvRawObj(ps) cvRaw_obj.cvId = "%s://%s" % (self.channel, cvId) cvRaw_obj.cvFrom = self.channel self._check_fields(cvRaw_obj.to_json(), cvId) # Logger.default_log("cvId: %s Ok" % cvId) except Exception as e: Logger.default_log("cvId: %s Fail" % cvId) traceback.print_exc()
def _bulk_execute(self): Logger.default_log("start bulk execute, db: %s, collection: %s" % (self.db, self.coll)) result = self._bulk.execute() self._unset_bulk()
def _bulk_execute(self): Logger.default_log("start bulk execute, db: %s, collection: %s" % (self.db_name, self.coll_name)) result = self._bulk.execute() Logger.default_log("result : %s" % result) self._unset_bulk()