def submit(refresh_task, urls, executed_end_time_timestamp=420, remain_time_return_timestamp=1800, remain_time_failed_timestamp=1200): ''' 提交任务到消息队列 Parameters ---------- refresh_task : 任务 ignore_result 设置任务存储状态,如果为True,不存状态,也查询不了返回值 default_retry_delay 设置重试提交到消息队列间隔时间,默认10 分钟,单位为秒 max_retries 设置重试次数,默认为3 remain_time_return_timestamp 返回给客户的剩余时间,默认1800s remain_time_failed_timestamp 任务下发失败,返回客户的时间重新估算, 默认1200s Returns ------- ------- 修饰符 @task 将submit函数变成了异步任务。在webapp中调用submit并不会立即执行该函数, 而是将函数名、 参数等打包成消息发送到消息队列中,再由worker执行实际的代码 ''' try: logger.debug('getUrls:%s' % urls) urls = getUrlsInLimit(urls) # 增加空格处理 urls = encode_balank(urls) if not urls: return # setOveload(refresh_task, urls) # for url in urls: # url['url'] = add_https_443(url.get('url', '')) logger.debug('submit: %s' % urls) db.url.insert(urls) # for autodesk, insert a special collection in addition # if urls[0].get('username') == 'autodesk2': # add autodesk_flag username = refresh_task.get('username') try: user_list = eval( config.get('refresh_redis_store_usernames', 'usernames')) except Exception, e: logger.debug('splitter_new submit error:%s' % traceback.format_exc(e)) user_list = [] try: if username in user_list: add_rid_url_info_into_redis(refresh_task.get('r_id'), urls) except Exception, e: logger.debug('insert result into redis error:%s' % traceback.formate_exc(e))
def encode_balank(urls): """ 如果urls list 里面每个 中的url 包含空格,编码一下 Args: urls: {"r_id": u.get('r_id'), 'ignore_case': u.get('ignore_case'), "url": rewriteUrl, "status": u.get('status'), "isdir": u.get('isdir'), "username": u.get("username"), "created_time": datetime.now(), "action": u.get('action'), "is_multilayer": u.get('is_multilayer'), "parent": u.get("parent"), 'type':'rewrite', "channel_code": u.get('channel_code'), 'executed_end_time': u.get('executed_end_time'), 'executed_end_time_timestamp': u.get('executed_end_time_timestamp')} Returns: """ result = [] try: if urls: for url_t in urls: url = url_t.get('url') result.append(url_t) try: if string.count(url, ' ') > 0: url_new = {} url_new['r_id'] = url_t.get('r_id') url_new['ignore_case'] = url_t.get('ignore_case') url_new['status'] = url_t.get('status') url_new['isdir'] = url_t.get('isdir') url_new['username'] = url_t.get('username') url_new['created_time'] = url_t.get('created_time') url_new['action'] = url_t.get('action') url_new['is_multilayer'] = url_t.get('is_multilayer') url_new['parent'] = url_t.get('parent') url_new['type'] = url_t.get('type') url_new['channel_code'] = url_t.get('channel_code') url_new['executed_end_time'] = url_t.get( 'executed_end_time') url_new['executed_end_time_timestamp'] = url_t.get( 'executed_end_time_timestamp') url_new['url'] = url.replace(' ', '%20') logger.debug('url old:%s, url new:%s' % (url, url_new['url'])) result.append(url_new) except Exception: logger.debug('copy error:%s, url:%s' % (traceback.format_exc(), url)) continue except Exception: logger.debug("encode_balank error:%s" % traceback.formate_exc(e)) return urls print(result) return result
def submit(refresh_task): ''' 提交任务到消息队列 Parameters ---------- refresh_task : 任务 ignore_result 设置任务存储状态,如果为True,不存状态,也查询不了返回值 default_retry_delay 设置重试提交到消息队列间隔时间,默认10 分钟,单位为秒 max_retries 设置重试次数,默认为3 Returns ------- ------- 修饰符 @task 将submit函数变成了异步任务。在webapp中调用submit并不会立即执行该函数, 而是将函数名、 参数等打包成消息发送到消息队列中,再由worker执行实际的代码 ''' try: urls = getUrlsInLimit(getUrls(refresh_task)) logger.debug('submit: %s' % urls) if not urls: return setOveload(refresh_task, urls) # https add port 443 # for url in urls: # logger.debug("before url not have 443:%s" % url) # url['url'] = add_https_443(url.get('url', '')) # logger.debug('end url have 443:%s' % url) # logger.debug('submit: %s' % urls) db.url.insert(urls) username = refresh_task.get('username') try: user_list = eval(config.get('refresh_redis_store_usernames', 'usernames')) except Exception, e: logger.debug('splitter_new submit error:%s' % traceback.format_exc(e)) user_list = [] try: if username in user_list: add_rid_url_info_into_redis(refresh_task.get('r_id'), urls) except Exception, e: logger.debug('insert result into redis error:%s' % traceback.formate_exc(e))
def submit(refresh_task, urls): ''' 提交任务到消息队列 Parameters ---------- refresh_task : 任务 ignore_result 设置任务存储状态,如果为True,不存状态,也查询不了返回值 default_retry_delay 设置重试提交到消息队列间隔时间,默认10 分钟,单位为秒 max_retries 设置重试次数,默认为3 Returns ------- ------- 修饰符 @task 将submit函数变成了异步任务。在webapp中调用submit并不会立即执行该函数, 而是将函数名、 参数等打包成消息发送到消息队列中,再由worker执行实际的代码 ''' try: logger.debug('getUrls:%s' % urls) urls = getUrlsInLimit(urls) username_t = 'prefix_username_' + refresh_task.get("username") if REWRITE_CACHE.exists(username_t): urls = prefixReplace(urls, username_t) urls = processChinese(urls) urls = get_physical_del_channels(urls) try: urls = domain_ignore(urls) except Exception: logger.debug('domain ignore error {}'.format( traceback.format_exc())) try: urls = dir_and_url(urls) except Exception: logger.debug('domain ignore error {}'.format( traceback.format_exc())) if not urls: return # setOveload(refresh_task, urls) # for url in urls: # url['url'] = add_https_443(url.get('url', '')) # if refresh_task.get('username') in user_list_special: # logger.debug('username:%s, urls:%s' % (refresh_task.get('username'), urls)) # for url in urls: # url['url'] = delete_zip(url.get('url')) logger.debug('submit: %s' % urls) db.url.insert(urls) username = refresh_task.get('username') try: user_list = eval( config.get('refresh_redis_store_usernames', 'usernames')) except Exception: logger.debug('splitter_new submit error:%s' % traceback.format_exc()) user_list = [] try: if username in user_list: add_rid_url_info_into_redis(refresh_task.get('r_id'), urls) except Exception: logger.debug('insert result into redis error:%s' % traceback.formate_exc(e)) #筛选优先级任务 messages = [] messages_high = [] for url in urls: if url.get("status") == 'PROGRESS': url_info = get_refreshurl(refresh_task.get('username'), url) if url.get('high_priority', False): messages_high.append(url_info) else: messages.append(url_info) #messages = [get_refreshurl(refresh_task.get('username'), url) for url in urls if url.get("status") == 'PROGRESS'] logger.debug("需要加入到url_queue中的messages: %s" % messages) logger.debug("需要加入到url_high_priority_queue中的messages: %s" % messages_high) db.request.insert({ "_id": refresh_task.get('r_id'), "username": refresh_task.get("username"), "parent": refresh_task.get("parent"), "callback": refresh_task.get("callback"), "status": "PROGRESS", "unprocess": len(messages), "created_time": datetime.strptime(refresh_task.get('request_time'), '%Y-%m-%d %X') if refresh_task.get('request_time') else datetime.now(), "remote_addr": refresh_task.get('remote_addr', ''), "serial_num": refresh_task.get('serial_num', '') }) web_task = refresh_task.get('web_task') if web_task: try: webluker_tools.post_data_to_webluker(web_task.get('task_new'), web_task.get('task_all'), str(web_task.get('r_id'))) except Exception: logger.debug( 'splitter_new webluker task have r_id:%s, error:%s' % (web_task.get('r_id'), traceback.format_exc())) queue.put_json2('url_queue', messages) if messages_high: queue.put_json2('url_high_priority_queue', messages_high) if refresh_task.get('callback'): noticeEmail(refresh_task) except Exception: logger.warning('submit error! do retry. error:%s' % traceback.format_exc()) raise submit.retry(exc=e)
def submit(refresh_task): ''' 提交任务到消息队列 Parameters ---------- refresh_task : 任务 ignore_result 设置任务存储状态,如果为True,不存状态,也查询不了返回值 default_retry_delay 设置重试提交到消息队列间隔时间,默认10 分钟,单位为秒 max_retries 设置重试次数,默认为3 Returns ------- ------- 修饰符 @task 将submit函数变成了异步任务。在webapp中调用submit并不会立即执行该函数, 而是将函数名、 参数等打包成消息发送到消息队列中,再由worker执行实际的代码 ''' try: urls = getUrlsInLimit(getUrls(refresh_task)) logger.debug('submit: %s' % urls) if not urls: return setOveload(refresh_task, urls) # https add port 443 # for url in urls: # logger.debug("before url not have 443:%s" % url) # url['url'] = add_https_443(url.get('url', '')) # logger.debug('end url have 443:%s' % url) # logger.debug('submit: %s' % urls) db.url.insert(urls) username = refresh_task.get('username') try: user_list = eval(config.get('refresh_redis_store_usernames', 'usernames')) except Exception: logger.debug('splitter_new submit error:%s' % traceback.format_exc()) user_list = [] try: if username in user_list: add_rid_url_info_into_redis(refresh_task.get('r_id'), urls) except Exception: logger.debug('insert result into redis error:%s' % traceback.formate_exc(e)) # logger.debug("rubin_test can delete splitter_refreshDevice submit urls:%s" % urls) # re put the equipment into the URL for url_t in urls: url_t['devices'] = refresh_task.get('devices') # the interface does not have channel_code, instead of using the channel name url_t['channel_code'] = get_channelname(url_t.get('url')) #筛选优先级任务 messages = [] messages_high = [] for url in urls: if url.get("status") == 'PROGRESS': url_info = get_refreshurl(refresh_task.get('username'), url) if url.get('high_priority', False): messages_high.append(url_info) else: messages.append(url_info) #messages = [get_refreshurl(refresh_task.get('username'), url) for url in urls if url.get("status") == 'PROGRESS'] db.request.insert({"_id": refresh_task.get('r_id'), "username": refresh_task.get("username"), "parent": refresh_task.get("username"), "callback": refresh_task.get("callback"), "status": "PROGRESS", "unprocess": len(messages), "created_time": datetime.strptime(refresh_task.get('request_time'), '%Y-%m-%d %X') if refresh_task.get('request_time') else datetime.now(), "remote_addr": refresh_task.get('remote_addr', ''), "serial_num": refresh_task.get('serial_num', '')}) queue.put_json2('url_queue', messages) if messages_high: queue.put_json2('url_high_priority_queue', messages_high) if refresh_task.get('callback'): noticeEmail(refresh_task) except Exception: logger.warning('submit error! do retry. error:%s' % traceback.format_exc()) raise submit.retry(exc=e)
# logger.debug('username:%s, urls:%s' % (refresh_task.get('username'), urls)) # for url in urls: # url['url'] = delete_zip(url.get('url')) logger.debug('submit: %s' % urls) db.url.insert(urls) username = refresh_task.get('username') try: user_list = eval(config.get('refresh_redis_store_usernames', 'usernames')) except Exception, e: logger.debug('splitter_new submit error:%s' % traceback.format_exc(e)) user_list = [] try: if username in user_list: add_rid_url_info_into_redis(refresh_task.get('r_id'), urls) except Exception, e: logger.debug('insert result into redis error:%s' % traceback.formate_exc(e)) #筛选优先级任务 messages = [] messages_high = [] for url in urls: if url.get("status") == 'PROGRESS': url_info = get_refreshurl(refresh_task.get('username'), url) if url.get('high_priority', False): messages_high.append(url_info) else: messages.append(url_info) #messages = [get_refreshurl(refresh_task.get('username'), url) for url in urls if url.get("status") == 'PROGRESS']
def submit(refresh_task, urls, executed_end_time_timestamp=420, remain_time_return_timestamp=1800, remain_time_failed_timestamp=1200): ''' 提交任务到消息队列 Parameters ---------- refresh_task : 任务 ignore_result 设置任务存储状态,如果为True,不存状态,也查询不了返回值 default_retry_delay 设置重试提交到消息队列间隔时间,默认10 分钟,单位为秒 max_retries 设置重试次数,默认为3 remain_time_return_timestamp 返回给客户的剩余时间,默认1800s remain_time_failed_timestamp 任务下发失败,返回客户的时间重新估算, 默认1200s Returns ------- ------- 修饰符 @task 将submit函数变成了异步任务。在webapp中调用submit并不会立即执行该函数, 而是将函数名、 参数等打包成消息发送到消息队列中,再由worker执行实际的代码 ''' try: logger.debug('getUrls:%s' % urls) urls = getUrlsInLimit(urls) # 增加空格处理 urls = encode_balank(urls) if not urls: return # setOveload(refresh_task, urls) # for url in urls: # url['url'] = add_https_443(url.get('url', '')) logger.debug('submit: %s' % urls) db.url.insert(urls) # for autodesk, insert a special collection in addition # if urls[0].get('username') == 'autodesk2': # add autodesk_flag username = refresh_task.get('username') try: user_list = eval( config.get('refresh_redis_store_usernames', 'usernames')) except Exception: logger.debug('splitter_new submit error:%s' % traceback.format_exc()) user_list = [] try: if username in user_list: add_rid_url_info_into_redis(refresh_task.get('r_id'), urls) except Exception: logger.debug('insert result into redis error:%s' % traceback.formate_exc(e)) for url_temp in urls: url_temp['autodesk_flag'] = 0 db.url_autodesk.insert(urls) #筛选优先级任务 messages = [] messages_high = [] for url in urls: if url.get("status") == 'PROGRESS': url_info = get_refreshurl(refresh_task.get('username'), url) if url.get('high_priority', False): messages_high.append(url_info) else: messages.append(url_info) #messages = [get_refreshurl(refresh_task.get('username'), url) for url in urls if url.get("status") == 'PROGRESS'] logger.debug("需要加入到url_queue中的messages: %s" % messages) logger.debug("需要加入到url_high_priority_queue中的messages: %s" % messages_high) created_time = datetime.strptime( refresh_task.get('request_time'), '%Y-%m-%d %X' ) if refresh_task.get('request_time') else datetime.now() executed_end_time = datetime.fromtimestamp(executed_end_time_timestamp) db.request.insert({ "_id": refresh_task.get('r_id'), "username": refresh_task.get("username"), "parent": refresh_task.get("parent"), "callback": refresh_task.get("callback"), "status": "PROGRESS", "unprocess": len(messages), 'check_unprocess': len(messages), "created_time": created_time, "remote_addr": refresh_task.get('remote_addr', ''), "serial_num": refresh_task.get('serial_num', ''), 'executed_end_time_timestamp': executed_end_time_timestamp, 'executed_end_time': executed_end_time, 'remain_time_return_timestamp': remain_time_return_timestamp, 'remain_time_failed_timestamp': remain_time_failed_timestamp }) queue.put_json2('url_queue', messages) if messages_high: queue.put_json2('url_high_priority_queue', messages_high) if refresh_task.get('callback'): noticeEmail(refresh_task) except Exception: logger.warning('submit error! do retry. error:%s' % traceback.format_exc()) raise submit.retry(exc=e)