def mongo2redis(self, skip=0): job_config = self.config.CONFIG['GLOBAL']['JOB'][self.job] redis_schema = job_config.get('REDIS_SCHEMA', 'DEFAULT') redis_instance = ConnectionFactory.get_redis_connection( **self.config.CONFIG['GLOBAL']['REDIS'][redis_schema]) redis_conn = redis_instance.connection mongo_schema = job_config.get('MONGO_SCHEMA', 'DEFAULT') mongo_instance = ConnectionFactory.get_mongo_connection( db=job_config['MONGO_DB'], **self.config.CONFIG['GLOBAL']['MONGO'][mongo_schema]) mongo_collection = eval('mongo_instance.db.{}'.format( job_config['MONGO_COLLECTION'])) count = mongo_collection.count() start, step = int(skip), 50 # company_set = set() while start < count: print(start) this_loop_records = mongo_collection.find().limit(step).skip(start) for i in this_loop_records: i['_id'] = str(i['_id']) # if i.get('company_name','') and i['company_name'] not in company_set: # if i.get('company_name',''): # company_set.add(i['company_name']) redis_conn.rpush(job_config['PUSH_REDIS_KEY'], json.dumps(i)) start += step mongo_instance.connection.close()
def test_get_mongo_connection(self): conn1 = ConnectionFactory.get_mongo_connection(host='192.168.8.28', db='d_weixin_robot', user='******', password='******') conn2 = ConnectionFactory.get_mongo_connection(host='192.168.8.28', db='d_weixin_robot', user='******', password='******') self.assertEqual(conn1, conn2) self.assertEqual(id(conn1.connection), id(conn2.connection))
def test_get_redis_connection(self): conn_id1, conn_id2 = None, None schema = DEMO.get('REDIS') print(schema) redis_instance1 = ConnectionFactory.get_redis_connection(**schema) with redis_instance1 as redis_conn: print(redis_conn.set('foo', 'bar')) conn_id1 = redis_conn redis_instance2 = ConnectionFactory.get_redis_connection(**schema) with redis_instance2 as redis_conn: print(redis_conn.get('foo')) conn_id2 = redis_conn print(redis_instance1, id(conn_id1), redis_instance2, id(conn_id2)) self.assertEqual(id(conn_id1), id(conn_id2))
def test_get_mongo_connection(self): conn_id1, conn_id2 = None, None schema = DEMO.get('MONGO') print(schema) mongo_instance1 = ConnectionFactory.get_mongo_connection(db='d_ccinfo', **schema) with mongo_instance1 as db: print(db.zhaobiaozhongbiao.find_one()) conn_id1 = mongo_instance1.connection mongo_instance2 = ConnectionFactory.get_mongo_connection(db='d_ccinfo', **schema) with mongo_instance2 as db: print(db.zhaobiaozhongbiao.find_one()) conn_id2 = mongo_instance2.connection print(mongo_instance1, id(conn_id1), mongo_instance2, id(conn_id2)) self.assertEqual(id(conn_id1), id(conn_id2))
def stat_by_redis(self, ret): redis_schema = self.config.CONFIG['GLOBAL']['JOB'][self.job].get( 'REDIS_SCHEMA', 'DEFAULT') redis_instance = ConnectionFactory.get_redis_connection( **self.config.CONFIG['GLOBAL']['REDIS'][redis_schema]) redis_conn = redis_instance.connection if not redis_conn: LOG.error("cannot connect to redis server") return False push_redis_key = self.config.CONFIG['GLOBAL']['JOB'][ self.job]['PUSH_REDIS_KEY'] stat_key = push_redis_key + '_stat_' + datetime.datetime.now( ).strftime("%Y-%m-%d") is_existed = redis_conn.hgetall(stat_key) if is_existed: if ret: redis_conn.hincrby(stat_key, "success", 1) else: redis_conn.hincrby(stat_key, "fail", 1) redis_conn.hset(stat_key, "last_push", int(time.time())) else: stat_map = { "project": self.config.NAME, "task": self.job, "success": 0, "fail": 0, "last_push": int(time.time()) } if ret: stat_map['success'] = 1 else: stat_map['fail'] = 1 redis_conn.hmset(stat_key, stat_map) return True
async def do_job(self): redis = ConnectionFactory.get_redis_connection() while True: task = redis.connection.rpop('test_task_list') if task is None: await asyncio.sleep(1) continue await self.run_task_api(task.decode())
def write_back_mongo(self, ret, data, flag_name): if not ret or not isinstance(data, dict) or not '_id' in data: return False job_config = self.config.CONFIG['GLOBAL']['JOB'][self.job] mongo_schema = job_config.get('MONGO_SCHEMA', 'DEFAULT') mongo_instance = ConnectionFactory.get_mongo_connection(db=job_config['MONGO_DB'], **self.config.CONFIG['GLOBAL']['MONGO'][mongo_schema]) with mongo_instance as db: mongo_collection = eval('db.{}'.format(job_config['MONGO_COLLECTION'])) ret = mongo_collection.update_one({'_id': ObjectId(data['_id'])}, {"$set": {flag_name: 1}}) return ret
def push_broker(self, tasks): ''' 将任务推到broker :param task: :return: ''' redis = ConnectionFactory.get_redis_connection() if tasks: for task in tasks: task.__dict__.pop('_sa_instance_state') redis.connection.lpush('test_task_list', json.dumps(task.__dict__)) logger.warning('push tasks to broker: {}'.format(tasks))
def get_message(self, message_queue): LOG.info('Start message producer for job {}'.format(self.job)) task_config = self.config.CONFIG['GLOBAL']['JOB'][self.job] redis_schema = task_config.get('REDIS_SCHEMA', 'DEFAULT') redis_instance = ConnectionFactory.get_redis_connection(**self.config.CONFIG['GLOBAL']['REDIS'][redis_schema]) with redis_instance as redis_conn: while True: if message_queue.qsize() < 10: record = redis_conn.blpop(task_config['PUSH_REDIS_KEY']) if record: LOG.info('put message into queue: {}'.format(record)) data = json.loads(record[1].decode('utf-8')) message_queue.put(data) else: LOG.info('too busy, have a rest...') time.sleep(1)
async def run_task_api(self, task_json_str): logger.warning('run task api: {}'.format(task_json_str)) async with aiohttp.ClientSession(loop=self._loop) as session: ret = await Lcurl.post_json( session=session, url='http://127.0.0.1:5000/run_task', json=json.loads(task_json_str), headers={'Content': 'application/json'}) logger.warning('task result: {}'.format(ret)) redis = ConnectionFactory.get_redis_connection() redis.connection.lpush('test_task_result_list', task_json_str) # if __name__ == '__main__': # from multiprocessing import Process # worker = Worker() # worker_process = Process(target=worker.run) # worker_process.start() # worker_process.join()
def async_result(self): ''' 将队列中的任务执行结果回收,调用update trigger :return: ''' redis = ConnectionFactory.get_redis_connection() while True: result = redis.connection.brpop('test_task_result_list') task = json.loads(result[1].decode()) logger.warning('async result...{}'.format(task.get('id'))) self.update_trigger(task) # if __name__ == '__main__': # from multiprocessing import Process # beat = Beat() # beat_process = Process(target=beat.run) # beat_process.start() # beat_process.join()
def run(self): task_config = self.config.CONFIG['GLOBAL']['JOB'][self.job] redis_schema = task_config.get('REDIS_SCHEMA', 'DEFAULT') redis_instance = ConnectionFactory.get_redis_connection( **self.config.CONFIG['GLOBAL']['REDIS'][redis_schema]) redis_conn = redis_instance.connection if not redis_conn: raise NetworkError("cannot connect to redis server") processor_num = int(task_config.get('PROCESSOR_NUM', 1)) # 事件循环 self._loop = asyncio.get_event_loop() try: for i in range(processor_num): asyncio.ensure_future(coro_or_future=self.worker(redis_conn), loop=self._loop) self._loop.run_forever() # self._loop.run_until_complete(asyncio.gather(self.worker(redis_conn))) except Exception as e: print(asyncio.gather(*asyncio.Task.all_tasks()).cancel()) # loop.run_until_complete(loop.shutdown_asyncgens()) finally: self._loop.close()
def test_get_redis_connection(self): conn1 = ConnectionFactory.get_redis_connection(host='192.168.8.30') conn2 = ConnectionFactory.get_redis_connection(host='192.168.8.30') self.assertEqual(conn1, conn2) self.assertEqual(id(conn1.connection), id(conn2.connection))