class DouBanPipeline(object): pool = ThreadPool(1) mongo_client = MongoDBApi() @classmethod def save_item(cls, item): cls.pool.callInThread(cls.__save_itme, item) pass @classmethod def __save_itme(cls, item): try: comments = [] if item.has_key('comments'): comments = item.pop('comments') insert_id = cls.mongo_client.insert_one(item) if insert_id: insert_id = ObjectId(insert_id) for index, comment in enumerate(comments): comment['movie_id'] = insert_id comments[index] = comment if comments: insert_ids = cls.mongo_client.insert_many( comments, 'movie_comments') logging.warn(u'========保存一条信息=======\n') else: logging.warn(u'========一条信息保存失败=======\n') except Exception as err: logging.error(traceback.format_exc(err))
def jp2_to_jpeg(_threads, _app, _source, _destination, _broken, _jpegs, _verbose): testApp(_app) t = ThreadPool(_threads) for (root, dirs, files) in os.walk(_destination): subpath = root.replace(_destination, '').lstrip('/') if _broken not in subpath: if any(".jp2" in s for s in files): print >> emaillog, 'Converting contents of ' + subpath + ' from JP2 to JPEG' for (output_file, size) in _jpegs: for file in files: if file.endswith('.jp2'): jp2 = os.path.join(root, file) newfile = os.path.join(root, os.path.splitext(file)[0]) + '_' \ + output_file command = _app + ' -size ' + size + " " + jp2 \ + ' -resize ' + size + ' ' + newfile if _verbose == True: print 'Creating ' + newfile t.add_task(executeConversion, command, None, jp2, _source, _broken, file, newfile) t.await_completion()
def __init__(self, thread_count: int, host: str, port: str, db_name: str, user: str, channel_name: str) -> None: """ Конструктор класса Инициализирует: - количество потоков - подключение к базе данных - один пул потоков :param thread_count: количество потоков в пуле потоков :param host: hostname, на которой развернута база данных :param port: порт подключения к базе данных :param db_name: наименование базы данных :param user: роль для подключения к базе данных :param channel_name: наименование канала, в который поступают сообщения от базы данных """ self._host = host self._port = port self._db_name = db_name self._user = user self._thread_count = thread_count self._channel_name = channel_name self._e = self.connect() self.pool_task = ThreadPool(self._thread_count)
def test(): print 'start testing' wm = ThreadPool(10) for i in range(1): wm.add_job(test_job, i, i * 0.001) wm.wait_for_complete() print 'end testing'
def main(event, lambdacontext): starttime = time.time() queue_url = event.get(c.KEY_SQS_QUEUE_URL, None) print "Started consumer with queue url '{}'".format(queue_url) context = event.get("context", {}) context[c.KEY_SQS_QUEUE_URL] = queue_url context[c.KEY_LAMBDA_FUNCTION] = lambdacontext.function_name if hasattr(lambdacontext, 'function_name') else None context[c.KEY_REQUEST_ID] = lambdacontext.aws_request_id if hasattr(lambdacontext, 'aws_request_id') else None context[c.KEY_IS_LAMBDA_ENV] = context[c.KEY_REQUEST_ID] is not None prefix = util.get_stack_name_from_arn(os.environ[c.ENV_DEPLOYMENT_STACK_ARN]) context[c.KEY_STACK_PREFIX] = prefix context[c.KEY_SQS] = Sqs(context, "{0}_".format(prefix)) context[c.KEY_SQS_AMOEBA] = Sqs(context, "{0}{1}_".format(prefix, c.KEY_SQS_AMOEBA_SUFFIX)) context[c.KEY_SQS_AMOEBA].set_queue_url(lowest_load_queue=True) context[c.KEY_LAMBDA] = Lambda(context) context[c.KEY_CLOUDWATCH] = CloudWatch(context) context[c.KEY_THREAD_POOL] = ThreadPool(context, 8) context[c.KEY_METRIC_BUCKET] = os.environ[c.RES_S3_STORAGE] context[c.KEY_START_TIME] = starttime context[c.CW_ATTR_SAVE_DURATION] = context[c.KEY_CLOUDWATCH].avg_save_duration(util.get_cloudwatch_namespace(os.environ[c.ENV_DEPLOYMENT_STACK_ARN])) context[c.CW_ATTR_DELETE_DURATION] = context[c.KEY_CLOUDWATCH].avg_delete_duration(util.get_cloudwatch_namespace(os.environ[c.ENV_DEPLOYMENT_STACK_ARN])) context[c.KEY_SUCCEEDED_MSG_IDS] = [] process(context) del context gc.collect() return { 'StatusCode': 200 }
def pool_time(thread_num): start = time.clock() tp = ThreadPool(thread_num) for i in range(5): tp.add_task(time.sleep, i) tp.wait_completion() return time.clock() - start
def download(): lines = ["Topics"] thread_pool = ThreadPool() d = False if enable_proxie[0]: refresh_proxie() filename = datetime.datetime.now().strftime("%d-%m-%Y %H-%M-%S") + '.txt' for i, enable in enumerate(download_enables): if enable: thread_pool.give_task(download_concrete_page, args=(download_hrefs[i], lines)) d = True thread_pool.join() if d: file = open(filename, 'w') file.write('\n'.join(lines)) file.close() print(f'All chosen topics are saved to {filename}') to_main_menu() else: print("Nothing is chosen") input("Press <Enter> to continue") return True
def tif_to_jp2(_threads, _app, _source, _destination, _broken, _options, _verbose): testApp(_app) t = ThreadPool(_threads) for (root, dirs, files) in os.walk(_source): subpath = root.replace(_source, '').lstrip('/') if _broken not in subpath: jp2Path = os.path.join(_destination, subpath) makeDir(jp2Path) if any(".tif" in s for s in files): print >> emaillog, 'Converting contents of ' + subpath + ' from TIF to JP2' for file in files: if file.endswith('.tif'): tiff = os.path.join(root, file) jp2 = os.path.join(_destination, subpath, os.path.splitext(file)[0] + '.jp2') tiffcopy = os.path.join(_destination, subpath, file) command = _app + ' -i ' + tiff + ' -o ' + jp2 + ' ' \ + _options command_post = 'shutil.move(\'' + tiff + '\',\'' + tiffcopy + '\')' if _verbose == True: print 'Creating ' + jp2 t.add_task(executeConversion, command, command_post, tiff, _destination, _broken, file, jp2) t.await_completion()
def start(self): if not self.db_oper.is_enabled(): return repo_list = self.db_oper.get_repo_list() if repo_list is None: self.db_oper.close_db() return thread_pool = ThreadPool(self.scan_virus, self.settings.threads) thread_pool.start() for row in repo_list: repo_id, head_commit_id, scan_commit_id = row if head_commit_id == scan_commit_id: logger.debug('No change occur for repo %.8s, skip virus scan.', repo_id) continue thread_pool.put_task( ScanTask(repo_id, head_commit_id, scan_commit_id)) thread_pool.join() self.db_oper.close_db()
def testcase_ThreadPool_init_thread_pool_success(self): """测试用例2:初始化函数中初始化线程池""" jobs = [str(i) for i in xrange(2)] pool = ThreadPool(3, test_function, jobs, 0) thread_count = len(pool.threads) self.assertEqual(3, thread_count) pool.wait_allcomplete()
def iterate(_source, _ignore, _patron, _patron_zip, _threads): print 'Descend into ' + _source t = ThreadPool(_threads) for (root, dirs, files) in os.walk(_source): t.add_task(patron_bundle, _patron, _patron_zip, root) t.await_completion()
def __init__(self, config_name="config.json"): self.__config = ServerConfig(config_name) logging.basicConfig(filename=self.__config.log_file, level=logging.DEBUG, format='%(asctime)s %(message)s') self.thread_pool = ThreadPool() cache_dir = Path.cwd() / self.__config.cache_dir self.cache = CacheStorage(cache_dir) self.request_handler = RequestHandler(self.cache)
def test_results(self): def my_add(a,b): return a+b tp = ThreadPool(5) for i in range(5): tp.add_task(my_add, i, i) d = tp.wait_completion() vals = d.values() vals.sort() assert vals == [0, 2, 4, 6, 8]
def threadstart(self): self.tp = ThreadPool(10) self.tpool = Thread(target=self.startThreadPool, args=()) self.tpool.daemon = True self.tpool.start() self.tdetect = Thread(target=self.detectConnect, args=()) self.tdetect.daemon = True self.tdetect.start() self.ttimeout = Thread(target=self.detectConnectTimeOut, args=()) self.ttimeout.daemon = True self.ttimeout.start()
def main(): store_list = load_stores() thread_pool = ThreadPool(size=20) pos = 0 total = len(store_list) for store in store_list: pos += 1 task = SlotStateFetchTask(store, pos=pos, total=total) thread_pool.push_task(task) thread_pool.init_pool() thread_pool.start() print('Waiting for tasks exit!!!') thread_pool.join()
def start_tasks(): stores = load_stores() thread_pool = ThreadPool(size=20) total = len(stores) pos = 0 for store in stores: pos += 1 task = UnderLoadSlotZeroTask(store=store, total=total, pos=pos) thread_pool.push_task(task) thread_pool.init_pool() thread_pool.start() print('Waiting for task exit!') thread_pool.join()
def testcase_ThreadPool_set_work_queue_success(self): """测试用例1:初始化函数中设置工作队列成功""" jobs = [str(i) for i in xrange(2)] pool = ThreadPool(2, test_function, jobs, 0) while True: try: func, param = pool.work_queue.get(block=False) res = func(param) self.assertEqual(str(0), res) except Queue.Empty as e: self.logging.info(e) break pool.wait_allcomplete()
def start_tasks(): thread_pool = ThreadPool(size=20) store_list = load_stores() total_count = len(store_list) count = 0 for store in store_list: count += 1 task = FetcherTask(store=store, num=count, total=total_count) thread_pool.push_task(task) thread_pool.init_pool() thread_pool.start() print('Waiting Task Finished......') thread_pool.join()
def main (): logging.basicConfig(filename='debug.log', filemode='w') with Imap(url) as imap: imap.login(address, password) logging.info('Logged in') folders = imap.get_folders() # print('Found folders:', len(folders)) # all_uids = get_uids_and_count(imap, folders) ui = UI() # 15 - imap simultaneous connections limit tasks = [(process_messages, (folder, ui)) for folder in folders] pool = ThreadPool(max=15) pool.run(tasks, delay=1)
def main(): store_list = load_stores() thread_pool = ThreadPool(size=20) index = 0 total = len(store_list) for store in store_list: index += 1 task = CompensationDisableTask(store=store, index=index, total=total) thread_pool.push_task(task) thread_pool.init_pool() print('Starting tasks...') thread_pool.start() print('Waiting for task exit!') thread_pool.join()
def testcase_ThreadPool_get_result_success(self): """测试用例3:get_result,所有任务执行完后结果为1""" jobs = [i for i in xrange(2)] pool = ThreadPool(3, test_function, jobs, 0) pool.wait_allcomplete() sum = 0 while True: try: res = pool.get_result() arr_res = json.loads(res) sum += int(arr_res['url']) except Queue.Empty as e: self.logging.info(e) break self.assertEqual(1, sum)
def test_thread_pool(): """ thread pool should be able to handle task processing """ thread_pool = ThreadPool() result = [] def populate_result_task(): result.extend([i for i in range(0, 10)]) return thread_pool.add_task(populate_result_task) thread_pool.tasks.join() thread_pool.terminate_all_workers() assert result == [0, 1, 2, 3, 4, 5, 6, 7, 8, 9]
def generate_threads(functionid, threads_count, iterations_per_thread, events_per_iteration, sleep_duration, use_lambda, event_type, sensitivity_type, compression_mode): start = time.time() context = {} threadpool = ThreadPool(context, threads_count) context=dict({}) db = DynamoDb(context) print "Sleep durations: ", sleep_duration print "Number of threads: ", threads_count print "Number of iterations per thread: ", iterations_per_thread print "Number of events per iteration: ", events_per_iteration print "Using event type: ", event_type print "Using sensitivity type: ", sensitivity_type print "Using compression mode: ", compression_mode for i in range(0, threads_count): threadpool.add(thread_job, functionid, iterations_per_thread, events_per_iteration, use_lambda, context, sleep_duration, event_type, sensitivity_type, compression_mode) threadpool.wait() print "A total of {} metrics have been sent to the FIFO queues.".format((iterations_per_thread*events_per_iteration)*threads_count) print "The overall process took {} seconds.".format(time.time() - start)
def main(event, request): context = dict({}) context[c.KEY_LAMBDA_FUNCTION] = request.function_name if hasattr( request, 'function_name') else None context[c.KEY_REQUEST_ID] = request.aws_request_id if hasattr( request, 'aws_request_id') else None stackid = os.environ[c.ENV_DEPLOYMENT_STACK_ARN] context[c.KEY_DB] = DynamoDb(context) context[c.KEY_ATHENA_QUERY] = Query(stackid) context[c.KEY_GLUE_CRAWLER] = Glue() thread_pool = ThreadPool(size=3) crawler_name = context[c.KEY_GLUE_CRAWLER].get_crawler_name(stackid) crawler = Crawler(context, os.environ[c.ENV_S3_STORAGE]) glue = Glue() events = glue.get_events() start = datetime.datetime.utcnow() - datetime.timedelta(hours=2) now = datetime.datetime.utcnow() found = False for type in events: dt = start while dt <= now: prefix = metric_schema.s3_key_format().format( context[c.KEY_SEPERATOR_PARTITION], dt.year, dt.month, dt.day, dt.hour, type, dt.strftime(util.partition_date_format())) found = crawler.exists(prefix) if found: print "FOUND new events=>", prefix break dt += timedelta(hours=1) if found: break if found: thread_pool.add(crawl, context, crawler_name, context[c.KEY_ATHENA_QUERY].execute_with_format) thread_pool.wait() return custom_resource_response.success_response({}, "*")
def launch(event, lambdacontext): print "Start" hours_delta = 36 context = dict({}) context[c.KEY_LAMBDA_FUNCTION] = lambdacontext.function_name if hasattr( lambdacontext, 'function_name') else None context[c.KEY_REQUEST_ID] = lambdacontext.aws_request_id if hasattr( lambdacontext, 'aws_request_id') else None global threadpool global is_lambda threadpool = ThreadPool(context, 8) is_lambda = context[c.KEY_REQUEST_ID] is not None available_amoeba_lambdas = [] available_amoeba_lambdas.append(c.ENV_AMOEBA_1) available_amoeba_lambdas.append(c.ENV_AMOEBA_2) available_amoeba_lambdas.append(c.ENV_AMOEBA_3) available_amoeba_lambdas.append(c.ENV_AMOEBA_4) available_amoeba_lambdas.append(c.ENV_AMOEBA_5) db = DynamoDb(context) crawler = Crawler(context, os.environ[c.ENV_S3_STORAGE]) glue = Glue() events = glue.get_events() #TODO: adjust the amoeba tree depth so that we have fully utilized all available amoebas; len(available_amoeba_lambdas) * 1000 #since the number of leaf nodes for the metric partitions can quickly get very large we use a 5 lambda pool to ensure we don't hit the 1000 invocation limit. start = datetime.datetime.utcnow() - datetime.timedelta(hours=hours_delta) now = datetime.datetime.utcnow() for type in events: dt = start while dt <= now: prefix = metric_schema.s3_key_format().format( context[c.KEY_SEPERATOR_PARTITION], dt.year, dt.month, dt.day, dt.hour, type, dt.strftime(util.partition_date_format())) threadpool.add(crawler.crawl, prefix, available_amoeba_lambdas, invoke_lambda) dt += timedelta(hours=1) threadpool.wait() return custom_resource_response.success_response({"StatusCode": 200}, "*")
def test_thread_pool_with_exception(): """ thread pool should be able to handle task processing even if there were exceptions in some tasks """ thread_pool = ThreadPool() result = [] def throw_ex_task(): raise Exception() def populate_result_task(): result.extend([i for i in range(0, 10)]) return thread_pool.add_task(throw_ex_task) thread_pool.add_task(populate_result_task) thread_pool.tasks.join() thread_pool.terminate_all_workers() assert result == [0, 1, 2, 3, 4, 5, 6, 7, 8, 9]
for i in range(0, len(item)): c = item[i].decode("gb2312") if i == 0: l.append(c) else: if c[0] == "&": l.append(0) else: l.append(1) rooms.append(l) with open( "data/" + campus + "." + building + "." + week + "." + week_day + ".json", "w") as f: f.write(json.dumps(rooms)) print "finish: week:" + week + " week_day:" + week_day return "success" if __name__ == "__main__": s = Spider() s.cookies = {"JSESSIONID": "8B7DA565F71772D37B04170241A757A8.TAB2;"} pool = ThreadPool(size=20) pool.start() for week in range(1, 21): for week_day in range(1, 8): print "start week:" + str(week) + " week_day:" + str(week_day) # 请自行确定info.py中的校区id和教学楼id是正确的 # 然后按照info.py中的数据修改校区和教学楼id pool.append_job(s.craw, "1709", "1783", str(week), str(week_day)) pool.join()
# In range, start, end(plus 1 to include end) and steps pool_size = [x for x in range(min_thread, max_thread, thread_step)] # Create dict with thread sizes to keep track of time for thread_count in pool_size: times[thread_count] = [] for i in pool_size: if ovrld.overloaded: i = ovrld.opt_work_threads if need_count and i == pool_size[-1] \ or need_count and ovrld.overloaded and i == ovrld.opt_work_threads: clients = i * calculate_needed() else: clients = i * count pool = ThreadPool(i) # Clients is the final goal, it'll run the thread count for "count" iterations # count is from config sched_clients += clients while clients: # Change to your desired function... pool.add_task(time_event, xmlrpc_call, i) clients -= 1 total_clients += 1 if errors.error_count > errors_threshold: quit() pool.wait_completion() avg_time = sum(times[i]) / len(times[i]) ovrld.calc_time(avg_time, i)
#!/usr/bin/env python # coding:utf-8 from thread_pool import ThreadPool import hackhttp import re import os hh = hackhttp.hackhttp(hackhttp.httpconpool(500)) tp = ThreadPool(500) package = "wooyun" if not os.path.exists(package): os.mkdir(package) def vlun(wid): print "[+]%s" % wid if os.path.isfile(wid + ".html"): return _, _, html, _, _ = hh.http(url="http://wooyun.org/bugs/%s" % wid, cookcookie=False) open(package + "/" + wid + '.html', 'wb').write(html) def catalog(page): _, _, html, _, _ = hh.http( url="http://wooyun.org/bugs/new_public/page/%d" % page, cookcookie=False) for wid in re.findall(r'href="/bugs/(wooyun-\d+-\d+)">', html): tp.add_task(vlun, wid) if page > 0:
def start_crawler(event, context): glue = Glue() crawler_id_1 = glue.get_crawler_name(event) thread_pool = ThreadPool() thread_pool.add(glue.start_crawler, crawler_id_1) thread_pool.wait()