def main(): url_task_queue = Queue.Queue() thread_searcher_pool = ThreadPool(500, url_task_queue) thread_searcher_pool.start() topic_searcher_urls_pool = ThreadPool(200) topic_searcher_urls_pool.start() topic_threader = TopicSearcherThreader( thread_searcher_pool, topic_searcher_urls_pool, build_url_job, HenHenLuTopicUrlExer, BASE_URL, START_NUM, END_NUM, PIC_TYPE, ) topic_threader.start() while True: print '#######################################' print 'URL has task count: %s' \ % str(thread_searcher_pool.current_task_count()) print '#######################################' print '#######################################' print 'TOPIC has task count: %s' \ % str(topic_searcher_urls_pool.current_task_count()) print '#######################################' time.sleep(5) topic_threader.join() topic_searcher_urls_pool.wait_done() thread_searcher_pool.wait_done()
def start(self): if not self.db_oper.is_enabled(): return repo_list = self.db_oper.get_repo_list() if repo_list is None: self.db_oper.close_db() return thread_pool = ThreadPool(self.scan_virus, self.settings.threads) thread_pool.start() for row in repo_list: repo_id, head_commit_id, scan_commit_id = row if head_commit_id == scan_commit_id: logger.debug('No change occur for repo %.8s, skip virus scan.', repo_id) continue thread_pool.put_task( ScanTask(repo_id, head_commit_id, scan_commit_id)) thread_pool.join() self.db_oper.close_db()
def main1(): thread_pool = ThreadPool(20) thread_pool.start() session = get_session() topic_query = \ session.query(VideoTopic).filter(VideoTopic.video_type == 1) for topic in topic_query: thread_pool.add_task(job, topic.henhen_id) session.close() thread_pool.wait_done()
def main(): pool = ThreadPool(20) pool.start() session = get_session() topic_query = session.query(PicTopic).filter(PicTopic.pic_type == 'dongmantupian').order_by(PicTopic.id.desc()) for pic_topic in topic_query: pool.add_task(dump_job, pic_topic) session.close() pool.wait_done()
def run(self): cx, cu = self.db_connection() pool = ThreadPool(size=20) pool.start() file_submission_id = open(FILE_SUBMISSION_ID) finished_submissions = [int(item[0]) for item in cu.execute("select submission_id from code")] all_submissions = [int(item) for item in file_submission_id.readlines()] for line in list(set(all_submissions).difference(set(finished_submissions))): sleep(0.2) pool.append_job(s.job, line) pool.join() pool.stop()
def main(): store_list = load_stores() thread_pool = ThreadPool(size=20) pos = 0 total = len(store_list) for store in store_list: pos += 1 task = SlotStateFetchTask(store, pos=pos, total=total) thread_pool.push_task(task) thread_pool.init_pool() thread_pool.start() print('Waiting for tasks exit!!!') thread_pool.join()
def start_tasks(): stores = load_stores() thread_pool = ThreadPool(size=20) total = len(stores) pos = 0 for store in stores: pos += 1 task = UnderLoadSlotZeroTask(store=store, total=total, pos=pos) thread_pool.push_task(task) thread_pool.init_pool() thread_pool.start() print('Waiting for task exit!') thread_pool.join()
def start_tasks(): thread_pool = ThreadPool(size=20) store_list = load_stores() total_count = len(store_list) count = 0 for store in store_list: count += 1 task = FetcherTask(store=store, num=count, total=total_count) thread_pool.push_task(task) thread_pool.init_pool() thread_pool.start() print('Waiting Task Finished......') thread_pool.join()
def main(): thread_pool = ThreadPool(50) thread_pool.start() video_type = '7' base_url = 'http://www.toutoulu.com/vodlist/%s_%s.html' # init task for page_num in range(1, page_info[video_type] + 1): url = base_url % (video_type, page_num) print 'add task %s' % url thread_pool.add_task(thread_pool_job, url, video_type) thread_pool.wait_done()
def main(): store_list = load_stores() thread_pool = ThreadPool(size=20) index = 0 total = len(store_list) for store in store_list: index += 1 task = CompensationDisableTask(store=store, index=index, total=total) thread_pool.push_task(task) thread_pool.init_pool() print('Starting tasks...') thread_pool.start() print('Waiting for task exit!') thread_pool.join()
class Listener(): def __init__(self, redis_conn, channels): self.redis_conn = redis_conn self.pubsub = self.redis_conn.pubsub() self.pubsub.subscribe(channels) self.thread_pool = ThreadPool(size=10) def work(self, item): # 修改成你的代码逻辑 print item["channel"], item["data"] def run(self): self.thread_pool.start() for item in self.pubsub.listen(): self.thread_pool.append_job(self.work, item)
def scrap_connections(): areas=set() airports = [] with open('csv_files/top_100_airports.csv', 'rb') as csvfile: reader = csv.reader(csvfile) for row in reader: for airport in row: airports.append(airport) origins = dests = airports dates = [] for i in range(100): new_date = datetime.date(2014, 11, 02) + timedelta(days=i) dates.append(new_date) pool = ThreadPool(20) pool.start() for dest in dests: for origin in origins: if dest != origin: date = dates[randint(0, len(dates)-1)] pool.add_task(get_connections, origin, dest, date) areas.add(get_area(origin, dest)) pool.wait_completion() #arrange all connections in single set per area instead of list areas_conn = dict() for area in areas: conn_list = flight_resp_dal.get_connections_in_area(area) connections = set() for conn in conn_list: connections.update(set(conn)) areas_conn[area] = connections flight_resp_dal.clean_areas_to_connections_table() for area in areas: flight_resp_dal.add_connections_to_area(area, areas_conn[area])
"accepted": self.re_search("<td>Accepted</td><td align=center>(\d+)</td>", html)} except Exception as e: logging.error(e) return {"website": "hduoj", "rank": 0, "problems_submitted": 0, "problems_solved": 0, "submissions": 0, "accepted": 0} d = DBHandler() s = Spider() user_list = d.get_user_list() pool = ThreadPool(size=10) pool.start() def add_username(func, username, oj_username): data = func(oj_username) data["username"] = username return data for user in user_list: pool.append_job(add_username, s.bestcoder, user[0], user[1]) pool.append_job(add_username, s.codefoces, user[0], user[2]) pool.append_job(add_username, s.hduoj, user[0], user[3]) pool.join() pool.stop()
for i in range(0, len(item)): c = item[i].decode("gb2312") if i == 0: l.append(c) else: if c[0] == "&": l.append(0) else: l.append(1) rooms.append(l) with open( "data/" + campus + "." + building + "." + week + "." + week_day + ".json", "w") as f: f.write(json.dumps(rooms)) print "finish: week:" + week + " week_day:" + week_day return "success" if __name__ == "__main__": s = Spider() s.cookies = {"JSESSIONID": "8B7DA565F71772D37B04170241A757A8.TAB2;"} pool = ThreadPool(size=20) pool.start() for week in range(1, 21): for week_day in range(1, 8): print "start week:" + str(week) + " week_day:" + str(week_day) # 请自行确定info.py中的校区id和教学楼id是正确的 # 然后按照info.py中的数据修改校区和教学楼id pool.append_job(s.craw, "1709", "1783", str(week), str(week_day)) pool.join()
class VayantConnector(object): def __init__(self): self.flights_resp_dal = FlightsRespDAL() self.pool = ThreadPool(30) self.pool.start() def get_flight_price_async(self, trip): response = AsyncResponse(self.do_after_done) self.pool.add_task(self.calculate_flight_info, trip, response) return response def get_flight_from_cache(self, key): cached_resp = self.flights_resp_dal.get(key) while self.flights_resp_dal.has_key(key) and cached_resp is None: time.sleep(5) cached_resp = self.flights_resp_dal.get(key) return cached_resp def calculate_flight_info(self, trip, response): resp = self.get_flights_info(trip) response.set_response_value(resp) def get_flights_info(self, trip): resp = None key = self._create_cache_key_from_trip(trip) cached_resp = self.get_flight_from_cache(key) if cached_resp: return cached_resp try: self.flights_resp_dal.set(key, None) request_json = self.build_trip_request(trip_data) header = {"Content-Type": "application/JSON ", "Accept-encoding": "gzip"} req = urllib2.Request("http://fs-json.demo.vayant.com:7080/", data=json.dumps(request_json), headers=header) response = urllib2.urlopen(req) resp = self._decompress_and_extract_json(response) trip = self._get_flights_from_vayant_response(response) if trip: self.flights_resp_dal.remove(key) return None self.flights_resp_dal.set(key, trip) finally: if not resp: self.flights_resp_dal.remove(key) return resp def _create_cache_key_from_trip(self, trip): key = "" for single_trip in trip: key += single_trip["Origin"][0] + "-" key += single_trip["Destination"][0] + "-" for date in single_trip["DepartureDates"]: key += date["Date"] + "-" key += ":" return key def _decompress_and_extract_json(self, response): decompressor = zlib.decompressobj(16 + zlib.MAX_WBITS) json_resp = "" while True: data = response.read(8192) if not data: break if response.info().get('Content-Encoding') == 'gzip': data = decompressor.decompress(data) json_resp += data return json.loads(json_resp) # def print_single_flight(self, flight): # response = "\n" # response += flight["Fares"][0]["Origin"] + " -> " + flight["Fares"][0]["Destination"] + ":\n" # response += "total price = {}".format(flight['Price']['Total']['Amount']) + "\n" # response += "flights details: "+ "\n" # for leg in flight["Flights"]: # response += "\t" + leg["Origin"] + " -> " + leg["Destination"] + ":"+ "\n" # response += "\t departure: " + leg["Departure"]+ "\n" # response += "\t arrival: " + leg["Arrival"]+ "\n" # if self.flights_resp_dal.get_airline(leg["OperatingCarrier"]) is not None: # response += "\t carrier: " + self.flights_resp_dal.get_airline(leg["OperatingCarrier"])+ "\n" # else: # response += "\t carrier: " + leg["OperatingCarrier"]+ "\n" # return response def get_departure_flight_date(self, trip_response): return trip_response['Flights'][0]['Departure'][0:10] def get_return_flight_date(self, trip_response): return trip_response['Flights'][-1]['Departure'][0:10] def extract_cheapest_price(self, resp): sorted_response = sorted(resp['Journeys'], key=lambda trip: trip[0]['Price']['Total']['Amount']) try: return sorted_response[0][0]['Price']['Total']['Amount'] except: print "ERROR getting the price", resp, sorted_response return 0 def get_connections_list(self, trip): connections = set() for single in trip['Journeys']: if len(single[0]["Flights"]) == 2: if single[0]["Flights"][0]["Destination"] == single[0]["Flights"][1]["Origin"]: connections.add(single[0]["Flights"][0]["Destination"]) return connections def get_flight(self, trip, index): try: x = trip['Journeys'][index][0] return x except: return None def get_price(self, flight): if flight: return flight['Price']['Total']['Amount'] return 99999 def get_dest_flights_in_two_way(self, trip, connection): for i in xrange(len(trip['Flights'])-1): if trip['Flights'][i]['Destination'] == connection: return trip['Flights'][i], trip['Flights'][i + 1] return None, None def get_price_round_trip(self, origin, dest, depart_dates, arrive_dates, get_full_response = False): first_trip = self.build_trip(origin, dest, depart_dates, 1) second_trip = self.build_trip(dest, origin, arrive_dates, 2) return self.get_flight_price_async([first_trip, second_trip]) def get_price_one_way(self, origin, dest, depart_dates): first_trip = self.build_trip(origin, dest, depart_dates, 1) return self.get_flight_price_async([first_trip]) def do_after_done(self, resp): trip_data = resp if trip_data and trip_data.has_key("Journeys") and trip_data['Journeys'] and len(trip_data['Journeys']) > 0: return self.extract_cheapest_price(trip_data), trip_data return (None, None)
class ContentScan(object): def __init__(self): self.thread_pool = ThreadPool(self.diff_and_scan_content, appconfig.thread_num) self.thread_pool.start() def start(self): try: self.do_scan_task() except Exception as e: logging.warning('Error: %s', e) def do_scan_task(self): logging.info("Start scan task..") time_start = time.time() dt = datetime.utcnow() dt_str = dt.strftime('%Y-%m-%d %H:%M:%S') self.dt = datetime.strptime(dt_str,'%Y-%m-%d %H:%M:%S') edb_session = appconfig.session_cls() seafdb_session = appconfig.seaf_session_cls() # Get repo list from seafile-db Branch = SeafBase.classes.Branch VirtualRepo= SeafBase.classes.VirtualRepo q = seafdb_session.query(Branch.repo_id, Branch.commit_id) q = q.outerjoin(VirtualRepo, Branch.repo_id==VirtualRepo.repo_id) q = q.filter(VirtualRepo.repo_id == None) results = q.all() for row in results: repo_id = row.repo_id new_commit_id = row.commit_id last_commit_id = None q = edb_session.query(ContentScanRecord.commit_id) q = q.filter(ContentScanRecord.repo_id==repo_id) result = q.first() if result: last_commit_id = result[0] self.put_task(repo_id, last_commit_id, new_commit_id) # Remove deleted repo's record after all threads finished self.thread_pool.join() q = edb_session.query(ContentScanRecord) q = q.filter(ContentScanRecord.timestamp != self.dt) q.delete() q = edb_session.query(ContentScanResult) subqurey = edb_session.query(ContentScanRecord.repo_id) q = q.filter(ContentScanResult.repo_id.notin_(subqurey)) # need fetch subqurey q.delete(synchronize_session='fetch') edb_session.commit() edb_session.close() seafdb_session.close() logging.info('Finish scan task, total time: %s seconds\n', str(time.time() - time_start)) self.thread_pool.join(stop=True) def diff_and_scan_content(self, task, client): repo_id = task.repo_id last_commit_id = task.last_commit_id new_commit_id = task.new_commit_id edb_session = appconfig.session_cls() # repo not changed, update timestamp if last_commit_id == new_commit_id: q = edb_session.query(ContentScanRecord) q = q.filter(ContentScanRecord.repo_id==repo_id, ContentScanRecord.commit_id==last_commit_id) q.update({"timestamp": self.dt}) edb_session.commit() edb_session.close() return # diff version = 1 new_commit = commit_mgr.load_commit(repo_id, version, new_commit_id) if new_commit is None: version = 0 new_commit = commit_mgr.load_commit(repo_id, version, new_commit_id) if not new_commit: logging.warning('Failed to load commit %s/%s', repo_id, new_commit_id) edb_session.close() return last_commit = None if last_commit_id: last_commit = commit_mgr.load_commit(repo_id, version, last_commit_id) if not last_commit: logging.warning('Failed to load commit %s/%s', repo_id, last_commit_id) edb_session.close() return new_root_id = new_commit.root_id last_root_id = last_commit.root_id if last_commit else ZERO_OBJ_ID differ = CommitDiffer(repo_id, version, last_root_id, new_root_id, True, False) added_files, deleted_files, added_dirs, deleted_dirs, modified_files,\ renamed_files, moved_files, renamed_dirs, moved_dirs = differ.diff_to_unicode() # Handle renamed, moved and deleted files. q = edb_session.query(ContentScanResult).filter(ContentScanResult.repo_id==repo_id) results = q.all() if results: path_pairs_to_rename = [] paths_to_delete = [] # renamed dirs for r_dir in renamed_dirs: r_path = r_dir.path + '/' l = len(r_path) for row in results: if r_path == row.path[:l]: new_path = r_dir.new_path + '/' + row.path[l:] path_pairs_to_rename.append((row.path, new_path)) # moved dirs for m_dir in moved_dirs: m_path = m_dir.path + '/' l = len(m_path) for row in results: if m_path == row.path[:l]: new_path = m_dir.new_path + '/' + row.path[l:] path_pairs_to_rename.append((row.path, new_path)) # renamed files for r_file in renamed_files: r_path = r_file.path for row in results: if r_path == row.path: new_path = r_file.new_path path_pairs_to_rename.append((row.path, new_path)) # moved files for m_file in moved_files: m_path = m_file.path for row in results: if m_path == row.path: new_path = m_file.new_path path_pairs_to_rename.append((row.path, new_path)) for old_path, new_path in path_pairs_to_rename: q = edb_session.query(ContentScanResult) q = q.filter(ContentScanResult.repo_id==repo_id, ContentScanResult.path==old_path) q = q.update({"path": new_path}) # deleted files for d_file in deleted_files: d_path = d_file.path for row in results: if d_path == row.path: paths_to_delete.append(row.path) # We will scan modified_files and re-record later, # so delete previous records now for m_file in modified_files: m_path = m_file.path for row in results: if m_path == row.path: paths_to_delete.append(row.path) for path in paths_to_delete: q = edb_session.query(ContentScanResult) q = q.filter(ContentScanResult.repo_id==repo_id, ContentScanResult.path==path) q.delete() edb_session.commit() # scan added_files and modified_files by third-party API. files_to_scan = [] files_to_scan.extend(added_files) files_to_scan.extend(modified_files) a_count = 0 scan_results = [] for f in files_to_scan: if not self.should_scan_file (f.path, f.size): continue seafile_obj = fs_mgr.load_seafile(repo_id, 1, f.obj_id) content = seafile_obj.get_content() if not content: continue result = client.scan(content) if result and isinstance(result, dict): item = {"path": f.path, "detail": result} scan_results.append(item) else: logging.warning('Failed to scan %s:%s', repo_id, f.path) for item in scan_results: detail = json.dumps(item["detail"]) new_record = ContentScanResult(repo_id, item["path"], appconfig.platform, detail) edb_session.add(new_record) a_count += 1 if a_count >= 1: logging.info('Found %d new illegal files.', a_count) # Update ContentScanRecord if last_commit_id: q = edb_session.query(ContentScanRecord).filter(ContentScanRecord.repo_id==repo_id) q.update({"commit_id": new_commit_id, "timestamp": self.dt}) else: new_record = ContentScanRecord(repo_id, new_commit_id, self.dt) edb_session.add(new_record) edb_session.commit() edb_session.close() def put_task(self, repo_id, last_commit_id, new_commit_id): task = ScanTask(repo_id, last_commit_id, new_commit_id) self.thread_pool.put_task(task) def should_scan_file(self, fpath, fsize): if fsize > appconfig.size_limit: return False filename, suffix = splitext(fpath) if suffix[1:] not in appconfig.suffix_list: return False return True
class MessageBus(object): """ 消息总线 用于发送消息和桥接bot和命令 接收消息分发给群成员 处理消息命令,指派给相应的命令处理 供命令处理返回命令或广播命令结果 """ def __init__(self, bot_jid, stream): self.bot_jid = bot_jid self._stream = stream self.cmd_handler = CommandHandler(message_bus = self) self.admin_cmd_handler = AdminCMDHandler(message_bus = self) self._thread_pool = ThreadPool(5) self._thread_pool.start() # 启动线程池 self.logger = get_logger() return def make_message(self, to, typ, body): """ 构造消息 `to` - 接收人 JID `typ` - 消息类型 `body` - 消息主体 """ if typ not in ['normal', 'chat', 'groupchat', 'headline']: typ = 'normal' m = Message(from_jid = self.bot_jid, to_jid = to, stanza_type = typ, body = body) return m def send_to_admin(self, stanza, body): """ 给管理员发送消息 """ [self.send_message(stanza, admin, body, True) for admin in ADMINS] def send_private_msg(self, stanza, to, body): """ 发送私信 """ frm = stanza.from_jid nick = get_nick(frm) body = "[%s 悄悄对你说] %s" % (nick, body) self.send_message(stanza, to, body, True) def send_message(self, stanza, to, body, log = False): """ 发送消息 `stanza` - 消息节 `to` - 接收人 接收人不在线发送离线消息 `body` - 消息主体 `log` - 记录历史消息 """ if log: add_history(stanza.from_jid, to, body) if is_online(to): mode = get_info('mode', to) if mode == 'talk' or not mode: if isinstance(to, (str, unicode)): to = JID(to) self.logger.debug("send '{0}' to {1!r}".format(body, to)) typ = stanza.stanza_type self._stream.send(self.make_message(to, typ, body)) else: body = NOW() + ' ' + body self.logger.debug("store offline message'{0}' for {1!r}" .format(body, to)) offline_message = get_info('offline_message', to, '') offline_message += '\n' + body add_info('offline_message', offline_message, to) def send_offline_message(self, stanza): """ 发送离线消息 """ show = stanza.show frm = stanza.from_jid offline_message = get_info('offline_message', frm) if offline_message: offline_message = "离线期间的消息:\n" + offline_message m = self.make_message(frm, 'normal', offline_message) self._stream.send(m) set_online(frm, show) add_info('offline_message', '', frm) def send_all_msg(self, stanza, body): """ 给除了自己的所有成员发送消息 """ if cityid(body.strip()): return self.send_command(stanza, '-_tq ' + body.strip()) if body.strip() == 'help': return self.send_command(stanza, '-help') if body.strip() == 'ping': return self.send_command(stanza, '-_ping') mode = get_info('mode', stanza.from_jid) if mode == 'quiet': body = u'你处于{0},请使用-cd命令切换到 {1} '\ u'后发言'.format(MODES[mode], MODES['talk']) return self.send_back_msg(stanza, body) add_history(stanza.from_jid, 'all', body) members = get_members(stanza.from_jid) current = get_info('channel', stanza.from_jid, 'main') members = [m for m in members if get_info('channel', m, 'main') == current] self.logger.info("{0} send message {1} to {2!r}" .format(stanza.from_jid, body, members)) nick = get_nick(stanza.from_jid) body = "[{0}] {1}".format(nick, body) [self.send_message(stanza, m, body) for m in members] def send_back_msg(self, stanza, body): """ 发送返回消息 """ to = stanza.from_jid.bare().as_string() typ = stanza.stanza_type self._stream.send(self.make_message(to, typ, body)) def send_sys_msg(self, stanza, body): """ 发送系统消息 """ members = get_members() [self.send_message(stanza, m, body) for m in members] def send_command(self, stanza, body): """ 处理命令 为防止阻塞使用线程池处理命令 """ email = get_email(stanza.from_jid) self.logger.info("{0} run command {1}".format(stanza.from_jid, body)) if email in ADMINS: target = self.admin_cmd_handler._run_cmd else: target = self.cmd_handler._run_cmd self._thread_pool.add_job(target, stanza, body) def send_status(self, statustext, to = None): if to: to_jid = JID(to) p = Presence(status=statustext, to_jid = to_jid) else: p = Presence(status = statustext) self._stream.send(p) def send_subscribe(self, jid): """ 发送订阅 """ p1 = Presence(from_jid = self.bot_jid, to_jid = jid, stanza_type = 'subscribe') p = Presence(from_jid = self.bot_jid, to_jid = jid, stanza_type = 'subscribed') self._stream.send(p) self._stream.send(p1) def send_unsubscribe(self, jid): p1 = Presence(from_jid = self.my_jid, to_jid = jid, stanza_type = 'unsubscribe') p = Presence(from_jid = self.my_jid, to_jid = jid, stanza_type = 'unsubscribed') self._stream.send(p) self._stream.send(p1)