def get_taotu_pages(category_url): # 找到某个分类下全部的分页URL print('process category: {0}'.format(category_url)) soup = commons.soup(category_url, encoding='utf8') print('process index: {0}'.format(soup.title)) last_no = get_last_page_no(soup) urls = ['{0}/list_{1}.html'.format(category_url, i) for i in range(2, last_no + 1)] # for url in urls: # download_by_page(url) retry = 0 while True: pool = ThreadPool(4) try: pool.map(download_by_page, urls) pool.close() pool.join() print('all images downloaded completely.') break except KeyboardInterrupt, e: print('download terminated by user, quit now.', e) pool.terminate() pool.join() break except Exception, e: pool.terminate() pool.join() retry += 1 traceback.print_exc() try: print('download error: {0}, {1} retry in {2}s'.format( e, retry, retry * 20 % 120)) except Exception: pass time.sleep(retry * 20 % 120)
def download_urls_to_zip(zf, urls): urls = set(urls) pool = ThreadPool(10) download_to_zip_func = lambda url: download_url_to_zip(zf, url) pool.map(download_to_zip_func, urls)
def __init__(self,records,outfile): #cat_xml = "../itma.cat.soutron_20160216.xml" print 'Parsing XML data...' #records = etree.parse(cat_xml) self.counter = 0 self.roles = ['performer'] self.locations = [] cfields = [] refnos = [] self.records = records [[cfields.append(y) for y in x.xpath('*[self::Creator or self::Contributors]')] for x in records] [[self.locations.append(y) for y in x.xpath('GeographicalLocation/text()')] for x in records] map(self.parse_roles,cfields) self.roles = list(set([x.replace('\n','').strip() for x in filter(lambda x:len(x) > 1,self.roles)])) self.locations = list(set(self.locations)) self.role_list = etree.Element("NamedRoles") self.cache = {} print 'extracting roles...' pool = Pool(processes=4) pool.map(self.process_recordlist,records) return etree.ElementTree(self.role_list).write(outfile,pretty_print=True)
def test_threading(self): pool = ThreadPool(4) results = pool.map(self.parser.parse, Dictionary().version.terms) self.assertSetEqual({str(t) for t in results}, {'[{0}]'.format(str(t)) for t in Dictionary().version.terms}) results = pool.map(script, Dictionary().version.terms) self.assertSetEqual({str(t) for t in results}, set(Dictionary().version.terms))
def get_offline_user_data(): if DEBUG_MODE: print(datetime.now().strftime('%Y-%m-%d %H:%M:%S'), 'get_offline_user_data') if r_session.exists('api_error_info'): return if datetime.now().minute < 50: return offline_users = [] for b_user in r_session.mget(*['user:%s' % name.decode('utf-8') for name in r_session.sdiff('users', *r_session.smembers('global:online.users'))]): user_info = json.loads(b_user.decode('utf-8')) username = user_info.get('username') if not user_info.get('active'): continue every_hour_key = 'user:%s:cron_queued' % username if r_session.exists(every_hour_key): continue offline_users.append(username) pool = ThreadPool(processes=5) pool.map(get_data, offline_users) pool.close() pool.join()
def build_words_weight(): st = time.time() bigvs = BigVs.objects.all() def _build(b): data = ArticlePostedResults.active_objects.filter(bigv__v_id=b.v_id, is_correct__in=(0, 1)).values('is_correct').annotate(count=Count('is_correct')).order_by('is_correct') sum_c , w, c = 0, 0, 0 for d in data: if d['is_correct'] == 1: c = d['count'] sum_c += d['count'] if sum_c: w = c * 1.0 / sum_c c = w * 200 sum_c = 200 data = Judgement.objects.filter(article__bigv=b, judge__isnull=False).values('judge').annotate(count=Count('judge')).order_by('judge') for d in data: if d['judge'] == 'right': c += d['count'] sum_c += d['count'] if sum_c: w = int(round(c * 1.0 / sum_c * 100)) b.words_weight = w b.save() print b.name, c, sum_c, w pool = Pool(8) pool.map(_build, bigvs) pool.close() pool.join() ed = time.time() debug('build_words_weight', ed - st)
def load_rowdata_to_mongo_zh(is_incremental): print("start loading row data(zh) from JSON file to MongoDB...") all_start = timeit.default_timer() static = Static() bydim_dir = static.output_folder + static.dataset_bydim_folder client = MongoClient(static.mongo_url, static.mongo_port) db = client[static.database_name] dataset_col = db[static.dataset_col_name] if not is_incremental: dataset_col.drop() file_path_array = [] for idx, file in enumerate(os.listdir(bydim_dir)): file_path = os.path.join(bydim_dir, file) if os.path.isfile(file_path): file_path_array.append(file_path) print(str(len(file_path_array)) + " files are loaded") counter = [] mapfunc = partial(insert_by_dim, counter=counter, dataset_col=dataset_col, all_start=all_start) pool = ThreadPool(12) pool.map(mapfunc, file_path_array) pool.close() pool.join() print("All the threads are completed. Total number is " + str(len(counter)) + "\n") print("total time cost: " + str(round(timeit.default_timer() - all_start)) + 's')
def collect_crystal(): print(datetime.now().strftime('%Y-%m-%d %H:%M:%S'), 'collect_crystal') pool = ThreadPool(processes=5) pool.map(check_collect, (json.loads(c.decode('utf-8')) for c in r_session.smembers('global:auto.collect.cookies'))) pool.close() pool.join()
def test_multi_threading(): import time import random from multiprocessing.dummy import Pool def op_a(a, b): time.sleep(random.random()*.02) return a+b def op_b(c, b): time.sleep(random.random()*.02) return c+b def op_c(a, b): time.sleep(random.random()*.02) return a*b pipeline = compose(name="pipeline", merge=True)( operation(name="op_a", needs=['a', 'b'], provides='c')(op_a), operation(name="op_b", needs=['c', 'b'], provides='d')(op_b), operation(name="op_c", needs=['a', 'b'], provides='e')(op_c), ) def infer(i): # data = open("616039-bradpitt.jpg").read() outputs = ["c", "d", "e"] results = pipeline({"a": 1, "b":2}, outputs) assert tuple(sorted(results.keys())) == tuple(sorted(outputs)), (outputs, results) return results N = 100 for i in range(20, 200): pool = Pool(i) pool.map(infer, range(N)) pool.close()
def main(): parser = argparse.ArgumentParser(description='Checks a LegalOne application for broken links') parser.add_argument('-d', '--domain', help='URL to check for broken links. Ex. http://colucci.release.dco.novajus.com.br', required=True) parser.add_argument("-e", '--escritorio', help='Account to check for broken links, Ex. xxxx, where xxx.release.dco.novajus.com.br', required=True) parser.add_argument("-l", '--loginpage', help='URL to login on the application. Ex. http://release.dco.novajus.com.br/conta/login', required=True) parser.add_argument("-t", '--threads', type=int, help='How many threads sarching for broken links at the same time. Default is 10', required=False, default=10) args = parser.parse_args() loginpage = args.loginpage escritorio = args.escritorio domain = args.domain threads = args.threads pages_to_test = queue.Queue(maxsize=0) cookie_login = login(domain, escritorio, loginpage) pages_to_test.put(domain + "/contatos/contatos/search") test_url(cookie_login, pages_to_test, domain, pages_to_test.get()) while not pages_to_test.empty(): pool = ThreadPool(threads) links_to_check = [] for x in range(0, threads): links_to_check.append(pages_to_test.get()) partialtest_url = partial(test_url, cookie_login, pages_to_test, domain) pool.map(partialtest_url, links_to_check) pool.close() pool.join()
def simTrans(hosts, prm): fname = str(prm.n) + 'nodes.' + str(prm.data_size) + 'MB.' + str(prm.pipes) + 'pipes.out' for h in hosts: full_name = "results/%04d/%s"%(int(h.name.split('h')[1]), fname) os.system("rm %s" % full_name) status[h.name] = [0 for i in range(prm.pipes)] ip[h.name] = h.IP() h.cmdPrint('iperf -s -f M >> %s &'%full_name) '''for h1 in hosts: for h2 in hosts: if h1 == h2: continue print "Testing %s and %s after running server" % (h1.name, h2.name) net.iperf( (h1, h2) ) ''' print neib status['h1'] = [2 for i in range(prm.pipes)] #start node print status k = [] for h in hosts: k.append((h, prm)) pool = ThreadPool(50) pool.map(perNodeProc, k) pool.close() pool.join() for h in hosts: h.cmdPrint('kill %iperf')
def main(): parser = argparse.ArgumentParser(usage='%(prog)s [options] SERVER_URL', description=__doc__) parser.add_argument( '-t', '--threads', help='Number of threads (simultaneous connections)', dest='threads', default=1, type=int) parser.add_argument('server', help='URL of server') args = parser.parse_args() server = args.server if not server.startswith('http://'): server = 'http://{}'.format(server) icons = [] for font_id, font in fonts.items(): for char in font['characters']: url = os.path.join(server, 'icon', font_id, '000', char) icons.append((font_id, char, url)) icons.sort() print('{} icons to test on {} ...'.format(len(icons), args.server)) if MAX_ICONS: icons = icons[:MAX_ICONS] pool = Pool(args.threads) pool.map(check_icon, icons) pool.close() pool.join()
def get_proxy(self): self._parse_proxy() pool = ThreadPool(8) pool.map(self._check_proxy, self.proxies) pool.close() pool.join() return self.checked_proxies
def test_upload_chunk__expired_url(): upload_parts = [{'uploadPresignedUrl': 'https://www.fake.url/fake/news', 'partNumber': 420}, {'uploadPresignedUrl': 'https://www.google.com', 'partNumber': 421}, {'uploadPresignedUrl': 'https://rito.pls/', 'partNumber': 422}, {'uploadPresignedUrl': 'https://never.lucky.gg', 'partNumber': 423} ] value_doesnt_matter = None expired = Value(c_bool, False) mocked_get_chunk_function = MagicMock(side_effect=[1, 2, 3, 4]) with patch.object(multipart_upload, "_put_chunk", side_effect=SynapseHTTPError("useless message", response=MagicMock(status_code=403))) as mocked_put_chunk, \ patch.object(warnings, "warn") as mocked_warn: def chunk_upload(part): return _upload_chunk(part, completed=value_doesnt_matter, status=value_doesnt_matter, syn=syn, filename=value_doesnt_matter, get_chunk_function=mocked_get_chunk_function, fileSize=value_doesnt_matter, partSize=value_doesnt_matter, t0=value_doesnt_matter, expired=expired, bytes_already_uploaded=value_doesnt_matter) # 2 threads both with urls that have expired mp = Pool(4) mp.map(chunk_upload, upload_parts) assert_true(expired.value) # assert warnings.warn was only called once mocked_warn.assert_called_once_with("The pre-signed upload URL has expired. Restarting upload...\n") # assert _put_chunk was called at least once assert_greater_equal(len(mocked_put_chunk.call_args_list), 1)
def BurstDz(host, path, user, passfile): hostuser = host.split('.') hostuser = hostuser[len(hostuser)-2] hostdir = [hostuser,hostuser+hostuser,'admin'+hostuser,hostuser+'123','manage'+hostuser,hostuser+'123456',hostuser+'admin','123'+hostuser] opts_list = [] f = open(passfile, 'r') password = f.read().split() dic = password+hostdir pool = ThreadPool(10) host1 = host+path for x in range(len(dic)): mima = dic[x] opts = { 'host': host1, 'user': user, 'password': mima } opts_list.append(opts) #print hostr #print result pool.map(LoginDisCuz, opts_list) #pool.join() print 'All PassWord Run Over'
def format_data(dealer_data): start_time = time.time() pool = Pool(1) dealers=[] today = datetime.now() for data in dealer_data: temp = {} temp['id'] = data[0] temp['service_advisor_id'] = data[1] temp['name'] = data[2] temp['phone_number'] = data[3] temp['order'] = data[4] temp['password'] = data[1]+'@123' temp['last_login'] = today temp['is_superuser'] = 0 temp['username'] = data[1] temp['first_name'] = ' ' temp['last_name'] = ' ' temp['email'] = '' temp['is_staff'] = 0 temp['is_active'] = 1 temp['date_joined'] = today dealers.append(temp) pool.map(process_query, dealers) end_time = time.time() print "..........Total TIME TAKEN.........", end_time-start_time
def e_cal(l, cores): global LOOPS ''' e calculator this function will recive digits of float and calculate and print status during working. This function will return value of e. ''' p = Pool() getcontext().prec = l e = Decimal(0) i = 0 temp = 0 c = 0 while True: fact = p.map(math.factorial, range(i, i+cores)) #parallel process factorial e += sum(p.map(one_div, fact)) #processed factorial will total in here i += cores c += 1 LOOPS += 1 sys.stdout.write("\r%i loops passed." % (c) ) #Print Loop status sys.stdout.flush() #print i, "loops passed." if e == temp: break temp = e sys.stdout.write("\r%i loops passed.\n" % (c) ) print i p.close() p.join() return e
def _main(): proxies = p.get_all_proxies() random.shuffle(proxies) pool = ThreadPool(2) pool.map(download, proxies[:100]) pool.close() pool.join()
def validate_proxies(proxies, test_url=None): """ Return tuple. First element is available proxies list. Second element is unavailable proxies list Arguments: - `proxies`: """ availabes, unavailables = [], [] # for proxy in proxies: # if is_proxy_available(proxy, test_url): # availabes.append(proxy) # else: # unavailables.append(proxy) # return (availabes, unavailables) def worker(proxy): if is_proxy_available(proxy, test_url): availabes.append(proxy) else: unavailables.append(proxy) pool = ThreadPool(10) pool.map(worker, proxies) pool.close() pool.join() return (availabes, unavailables)
def thread_patch(self): urls = self.get_urls() pool = ThreadPool(8) pool.map(self.patch_news, [(uri, _id) for uri, _id in urls]) pool.close() pool.join()
def main(): mht_list = get_mht_list() if not mht_list: print u'请确保目录下有mht文件\n' return print u'共有%s个mht文件中的图片需要备份\n'%len(mht_list) print u'请输入你的QQ号码(6-10位纯数字):' qq = raw_input() print u'正在搜索mht文件中待备份图片,请稍后....' get_mht_pic(mht_list) if not mht_pic_md5: print u'mht文件中未包含可备份图片\n' return print u'共找到%s张待备份图片'%len(mht_pic_md5) # QQ图片文件夹 documents_path = os.getenv('USERPROFILE') + os.sep + '\Documents' img_path = documents_path + os.sep + 'Tencent Files/' + qq + '/Image' print u'正在统计QQ聊天记录图片, 请稍后....' pic_list = get_pic_list(img_path) if not pic_list: print u'未找到QQ聊天记录图片文件夹,请确保输入了正确的QQ号码\n' main() pool = ThreadPool(thread_num) print u'正在备份....' pool.map(backup, pic_list) print u'备份完成\n图片保存在当前路径的bak文件夹下\n'
def update_all_posts_thread(self): pool = ThreadPool(4) posts = list(self.all_posts.values()) pool.map(self.update_post_detail, posts) pool.close() pool.join() self.save_history()
def test_ip_speed(self): # t0l = time.time() # pooll = Pool(processes=20) # for item in self.ip_items: # pooll.apply(self.ping_one_ip, args=(item.ip,)) # #pooll.map(self.ping_one_ip, self.ip_items) # pooll.close() # pooll.join t0 = time.time() #多线程 pool = ThreadPool(processes=20) pool.map(self.ping_one_ip, range(len(self.ip_items))) pool.close() pool.join() t1 = time.time() #print "Total time running multi: %s seconds" % ( str(t0-t0l)) print "Total time running multi: %s seconds" % ( str(t1-t0)) #单线程 # for index in range(len(self.ip_items)): # self.ping_one_ip(index) # t2 = time.time() # print "Total time running multi: %s seconds" % ( str(t1-t0)) # print "Total time running single: %s seconds" % ( str(t2-t1)) print len(self.ip_items) self.ip_items = [item for item in self.ip_items if item.speed >=0 and item.speed < 1500.0] # 超时1.5s以内 print len(self.ip_items)
def get_web_dict_multithreading(self): if POOL_NUM > 1 and POOL_NUM < 50: pool = Pool(POOL_NUM) logging.info("You are using multiple threads to get info from web:" " [ %d ]\n" % POOL_NUM) else: pool = Pool() if os.path.isfile(LOCAL_JSON_CACHE_FILE): with open(LOCAL_JSON_CACHE_FILE, 'rb') as f: local_web_cache_dict = json.loads(f.read()) species_in_local_json_cache = local_web_cache_dict.keys() logging.info( '[ CACHE ] Get cache from local JSON file:\n |- %s' % '\n |- '.join(species_in_local_json_cache)) else: species_in_local_json_cache = [] local_web_cache_dict = {} species_not_in_cache = list( set(self.non_repeatitive_species_name_list) .difference(set(species_in_local_json_cache))) pool.map(self._single_query, species_not_in_cache) _web_data_cache_dict.update(local_web_cache_dict) with open(LOCAL_JSON_CACHE_FILE, 'wb') as f: json.dump(_web_data_cache_dict, f, indent=4, separators=(',', ': ')) logging.info( '[ CACHE ] Write all cache to local JSON file:\n |- %s' % '\n |- '.join(_web_data_cache_dict.keys())) pool.close() pool.join()
def main(): parser = argparse.ArgumentParser(formatter_class=argparse.ArgumentDefaultsHelpFormatter) parser.add_argument('-i',help='comma separated input map files from HMP', required=True) parser.add_argument('-s',help='comma separated list of body sites',required=True) parser.add_argument('-j',help='threads',type=int,default=1) args = parser.parse_args() logname = 'import-%s' % strftime("%Y%m%d%H%M") logger = make_log(logname) map_files = args.i.split(',') locs_list = args.s.split(',') mapper = {} map_to_write = [] for filename in map_files: with open(filename, 'rU') as f: r = csv.reader(f,delimiter='\t') header = r.next() for row in r: this_mapper = dict(zip(header,row)) rid = this_mapper['RunID'] bs = this_mapper['HMPBodySubsiteHMPBodySite'] sid = this_mapper['SRS_SampleID'] if bs not in locs_list: continue map_to_write.append(make_map_to_write_row(this_mapper)) if rid not in mapper: mapper[rid] = this_mapper else: logger.warning('found two entries for %s saved just one fastq file but both %s and %s will be added to map' % (rid,sid,mapper[rid]['SRS_SampleID'])) pool = ThreadPool(args.j) holder = [[mapper_item, logger] for nom,mapper_item in mapper.iteritems()] pool.map(dummy,holder) pool.close() pool.join() write_map(map_to_write, logger) logger.info('done')
def spider(self): pages = self.__total_pages() pool = ThreadPool(16) pool.map(self.make_market_data, range(pages)) pool.close() pool.join()
def test_multithread(self): logger = HitLogger(300) def single_thread_process(logger): time.time = mock.Mock(return_value=0) logger.log_hit() time.sleep(1) time.time = mock.Mock(return_value=10) logger.get_hits() logger.log_hit() logger.get_hits() logger.log_hit() time.sleep(1) time.time = mock.Mock(return_value=11) logger.get_hits() time.sleep(1) time.time = mock.Mock(return_value=100) logger.log_hit() time.sleep(1) time.time = mock.Mock(return_value=200) logger.log_hit() logger.get_hits() time.sleep(1) time.time = mock.Mock(return_value=300) logger.log_hit() logger.get_hits() time.sleep(1) time.time = mock.Mock(return_value=310) logger.get_hits() pool = Pool(5) pool.map(single_thread_process, [ logger ] * 5) input('Press any key to exit...')
def start_download(page_url): links = get_pattern_group(image_pattern, get_url_content(page_url)) pool = ThreadPool(3) pool.map(download, links) pool.close() pool.join()
def test_generate_in_progress_resizer_option_true( redis_cache, resizetarget_opts, image1_data, image1_name, tmpdir ): config = Config( root=str(tmpdir), url='/', redis_host=redis_cache.redis, raise_on_generate_in_progress=True ) resizer = flask_resize.make_resizer(config) # Save original file resizer.storage_backend.save(image1_name, image1_data) def run(x): return resizer(image1_name) pool = Pool(2) with pytest.raises(flask_resize.exc.GenerateInProgress): pool.map(run, [None] * 2)
def get_reviews(widget, nb_reviews_limit): print "fetching users' reviews..." soup = BeautifulSoup(widget.text, 'lxml') url_basic = soup.body.find('iframe', id = 'the_iframe')['src'] reviews = [] start = 1 while True: nb_pages = int(ceil((nb_reviews_limit - len(reviews)) / 7.0)) urls = [re.sub('min_rating=&', 'min_rating=&page=' + str(i) + '&', url_basic) for i in range(start, nb_pages + start)] start += nb_pages pool_reviews = ThreadPool(4) reviews_url = list(chain.from_iterable(pool_reviews.map(get_reviews_url, urls))) pool_reviews.close() pool_reviews.join() reviews_url = set(reviews_url) pool_reviews = ThreadPool(4) reviews_this = pool_reviews.map(get_review_single, reviews_url) pool_reviews.close() pool_reviews.join() if reviews_this is None: break reviews_this_non_empty = filter(lambda x: x != {}, reviews_this) if len(reviews_this_non_empty) == 0: break reviews.extend(reviews_this_non_empty) if len(reviews) >= nb_reviews_limit: break return reviews
page = 1 urls = [] for i in xrange(5): urls.append(url + str(i + 1)) comments = [] def getsource(url): html = requests.get(url) selector = etree.HTML(html.text) content = selector.xpath( '//cc/div[starts-with(@id,"post_content_")]/text()') for i in content: i = i.replace('\n', '').replace('\t', '').strip() comments.extend(content) print url print len(content) print len(comments) if __name__ == '__main__': # content = getsource(urls[0]) pool.map(getsource, urls) for i in comments: print i ''' //*[@id="post_content_79742208605"] '''
def run(self): pool = ThreadPool(4) pool.map(self.retrieve_pic, self.process_multiple()) pool.close() pool.join()
def start_clawer(user_id): time.sleep(2) logger.info('开始爬取第%d个用户信息 ' % user_id) user_base_information = get_user_base_information(user_id) follow_fans = get_user_follow_fans(user_id) if user_base_information is not None: if follow_fans is not None: user_base_information.update(follow_fans) logger.debug(user_base_information) print(user_base_information.keys()) put_into_database(user_base_information, user_id) else: logger.warning('用户%d的信息未爬取' % user_id) if __name__ == '__main__': logger.info('爬虫开始抓取用户信息') pool = ThreadPool(processes=4) i = 1 j = 10000 for k in range(37020): pool.map(start_clawer, [t for t in range(i + k * 10000, j + k * 10000 + 1)]) """ for i in range(1,370200000): pool.apply_async(start_clawer, (i,)) pool.close() pool.join() """
def get_current_match_details(s, region, champion_id): matchlist = cass.get_match_history(summoner=s, champions=[champion_id], begin_index=0, end_index=10) len(matchlist) # to fill matchlist try: pool = Pool(10) pool.map(load_match, matchlist) pool.close() pool.join() except: pool.close() return HttpResponse(status=500) response = {} q = {} leagues = cass.get_league_positions(summoner=s, region=region) for league in leagues: q[league.queue.value] = { 'tier': league.tier.value, 'division': league.division.value, 'points': league.league_points } if league.promos is not None: q[league.queue.value]['promos'] = league.promos.progress q[league.queue.value]['notPlayed'] = league.promos.not_played # summoner stats for past 20 matches on a champion stats = { "kills": 0, "deaths": 0, "assists": 0, "totalCs": 0, "cs10": 0, "cs20": 0, "cs30": 0, "gold10": 0, "gold20": 0, "gold30": 0, "wins": 0, "losses": 0 } match_history = [] games10 = 0 games20 = 0 games30 = 0 cs10 = 0 cs20 = 0 cs30 = 0 gold10 = 0 gold20 = 0 gold30 = 0 match_count = 0 for match in matchlist: if (match.region.value == region): match_count += 1 participants = match.participants for participant in participants: if participant.summoner.id == s.id: user = participant break stats["kills"] += user.stats.kills stats["deaths"] += user.stats.deaths stats["assists"] += user.stats.assists stats["totalCs"] += user.stats.total_minions_killed if user.stats.win: stats["wins"] += 1 match_history.append(1) else: stats["losses"] += 1 match_history.append(0) dur = match.duration.seconds try: if dur > 10 * 60: gold10 += user.timeline.gold_per_min_deltas['0-10'] * 10 cs10 += user.timeline.creeps_per_min_deltas['0-10'] * 10 games10 += 1 if dur > 20 * 60: gold20 += (user.timeline.gold_per_min_deltas['0-10'] + user.timeline.gold_per_min_deltas['10-20']) * 10 cs20 += (user.timeline.creeps_per_min_deltas['0-10'] + user.timeline.creeps_per_min_deltas['10-20']) * 10 games20 += 1 if dur > 30 * 60: gold30 += (user.timeline.gold_per_min_deltas['0-10'] + user.timeline.gold_per_min_deltas['10-20'] + user.timeline.gold_per_min_deltas['20-30']) * 10 cs30 += (user.timeline.creeps_per_min_deltas['0-10'] + user.timeline.creeps_per_min_deltas['10-20'] + user.timeline.creeps_per_min_deltas['20-30']) * 10 games30 += 1 except: log.warn('user timeline data does not exist', match.id) stats["kills"] /= 10 stats["deaths"] /= 10 stats["assists"] /= 10 stats["totalCs"] /= 10 try: stats["cs10"] = round(cs10 / games10, 2) stats["cs20"] = round(cs20 / games20, 2) stats["cs30"] = round(cs30 / games30, 2) stats["gold10"] = round(gold10 / games10, 2) stats["gold20"] = round(gold20 / games20, 2) stats["gold30"] = round(gold30 / games30, 2) except: # divide by 0 pass build = {} boots = {} core = {} situational = {} all_items = {} # get recommended build if match_count > 0: try: champ_items = ChampionItems.objects.get(champ_id=user.champion.id) items_blob = ujson.loads(champ_items.item_blob) blob_items = items_blob.items() for item, occurence in blob_items: if int(item) in Items.boots: boots[item] = occurence elif int(item) in Items.full_items: all_items[item] = occurence sorted_all = sorted(all_items, key=all_items.get, reverse=True) core_arr = sorted_all[:3] situational_arr = sorted_all[3:8] for item in core_arr: core[item] = all_items[item] for item in situational_arr: situational[item] = all_items[item] except: pass build['boots'] = boots build['core'] = core build['situational'] = situational response['stats'] = stats response['build'] = build response['leagues'] = q return response
url = 'https://www.pearvideo.com/category_5' page_text = requests.get(url=url).text tree = etree.HTML(page_text) li_list = tree.xpath('//ul[@id="listvideoListUl"]/li') url_list = [] for i in li_list: detail_url = "https://www.pearvideo.com/" + i.xpath('./div/a/@href')[0] name = i.xpath('./div/a/div[2]/text()')[0] + '.mp4' detail_page = requests.get(url=detail_url).text ex = 'srcUrl="(.*?)",vdoUrl' video_url = re.findall(ex, detail_page)[0] dic = {'name': name, 'url': video_url} url_list.append(dic) def get_video_data(d): url = d['url'] data = requests.get(url=url).content print(d['name'], "正在下载。。。") with open(d['name'], 'wb') as f: f.write(data) print(d['name'], "下载成功。。。") pool = Pool(4) pool.map(get_video_data, url_list) pool.close() pool.join()
def ping(self, targets=list(), filename=str(), status=str()): """ Attempt to ping a list of hosts or networks (can be a single host) :param targets: List - Name(s) or IP(s) of the host(s). :param filename: String - name of the file containing hosts to ping :param status: String - if one of ['alive', 'dead', 'noip'] then only return results that have that status. If this is not specified, then all results will be returned. :return: Type and results depends on whether status is specified: if status == '': return dict: {targets: results} if status != '': return list: targets if targets == status """ if targets and filename: raise SyntaxError("You must specify only one of either targets=[] " "or filename=''.") elif not targets and not filename: raise SyntaxError("You must specify either a list of targets or " "filename='', but not both.") elif filename: targets = self.read_file(filename) my_targets = {'hosts': [], 'nets': []} addresses = [] # Check for valid networks and add hosts and nets to my_targets for target in targets: # Targets may include networks in the format "network mask", or, # a file could contain multiple hosts or IP's on a single line. if len(target.split()) > 1: target_items = target.split() for item in target_items: try: ip = IPAddress(item) # If it is an IPv4 address or mask put in in addresses if ip.version == 4: addresses.append(str(ip)) except AddrFormatError: # IP Address not detected, so assume it's a host name my_targets['hosts'].append(item) except ValueError: # CIDR network detected net = IPNetwork(item) # Make sure it is a CIDR address acceptable to fping if net.ip.is_unicast() and net.version == 4 and \ net.netmask.netmask_bits() in range(8, 31): my_targets['nets'].append(target_items[0]) else: msg = str(str(net) + ':Only IPv4 unicast addresses' ' with bit masks\n ' ' from 8 to 30 are supported.') raise AttributeError(msg) # Iterate over the IP strings in addresses while len(addresses) > 1: ip = IPAddress(addresses[0]) mask = IPAddress(addresses[1]) # Test to see if IP is unicast, and mask is an actual mask if ip.is_unicast() and mask.is_netmask(): net = IPNetwork(str(ip) + '/' + str( mask.netmask_bits())) # Convert ip and mask to CIDR and remove from addresses my_targets['nets'].append(str(net.cidr)) addresses.pop(0) addresses.pop(0) elif ip.is_unicast() and not ip.is_netmask(): # mask was not a mask so only remove IP and start over my_targets['hosts'].append(str(ip)) addresses.pop(0) # There could be one more item in addresses, so check it if addresses: ip = IPAddress(addresses[0]) if ip.is_unicast() and not ip.is_netmask(): my_targets['hosts'].append(addresses[0]) addresses.pop() # target has only one item, so check it else: try: ip = IPAddress(target) if ip.version == 4 and ip.is_unicast() and \ not ip.is_netmask(): my_targets['hosts'].append(target) else: msg = str(target + 'Only IPv4 unicast addresses are ' 'supported.') raise AttributeError(msg) except AddrFormatError: # IP Address not detected, so assume it's a host name my_targets['hosts'].append(target) except ValueError: # CIDR network detected net = IPNetwork(target) if net.ip.is_unicast() and net.version == 4 and \ net.netmask.netmask_bits() in range(8, 31): my_targets['nets'].append(target) else: msg = str(str(net) + ':Only IPv4 unicast addresses' ' with bit masks\n ' ' from 8 to 30 are supported.') raise AttributeError(msg) """ Build the list of commands to run. """ commands = [] if len(my_targets['hosts']) != 0: for target in range(len(my_targets['hosts'])): commands.append([self.fping, '-nV', my_targets['hosts'][ target]]) if len(my_targets['nets']) != 0: for target in range(len(my_targets['nets'])): commands.append([self.fping, '-ngV', my_targets['nets'][ target]]) """ Start pinging each item in my_targets and return the requested results when done. """ pool = ThreadPool(self.num_pools) raw_results = pool.map(self.get_results, commands) pool.close() pool.join() self.results = {host: result for host, result in csv.reader( ''.join(raw_results).splitlines())} if not status: return self.results elif status == 'alive': return self.alive elif status == 'dead': return self.dead elif status == 'noip': return self.noip else: raise SyntaxError("Valid status options are 'alive', 'dead' or " "'noip'")
# from multiprocessing import Pool # import multiprocessing as mp from multiprocessing.dummy import Pool # 虛擬池化 import time import os def longTimeTask(i): print('task: {}, PID: {}'.format(i, os.getpid())) time.sleep(2) result = 10**30 return result if __name__ == '__main__': start_time = time.time() print('母程序PID', os.getpid()) # 觀察PID p = Pool(4) # data is a list catch 每次遞迴的回傳值 data = p.map(longTimeTask, iterable=[2, 4, 6, 8]) # iterable=range(4) 也可用 list 進行疊代 p.close() p.join() print(data) end_time = time.time() print('花了 {} 秒'.format(end_time - start_time))
########## ####### ####### ######## ### ### ''', 'red') print banner try: target = [i.strip() for i in open(sys.argv[1], mode='r').readlines()] except IndexError: exit('python3 script.py urilist.txt 50') usragt = {'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/71.0.3578.98 Safari/537.36'} def check(uri): try: if not uri.endswith('/'): uri = uri + '/' if not uri.startswith('http'): uri = 'http://' + uri r = requests.get(url, headers=usragt, timeout=10) if r.status_code == 200: print (uri+' OK\n') with open('200.txt', mode='a') as d: d.write(uri + '/\n') except: pass mp = Pool(sys.argv[2]) mp.map(check, target) mp.close() mp.join()
def trigger_tasks(tasks, thread_count): pool = ThreadPool(int(thread_count)) pool.map(Task.run_tests, tasks)
self.urls.append(make_url(self.domain, url)) urls = [] def test(url): print("URLS : ", len(urls)) ax = Spider(url) urls.extend(ax.urls) return url if __name__ == '__main__': content = "" url = input('Enter the url : ') urls.append(url) spider = Spider(url) print("Will visit :" + str(len(spider.urls)) + " urls") pool = ThreadPool(10) results = pool.map(test, spider.urls) """for i in spider.urls: print("URLS : ", len(urls)) ax = Spider(i) urls.extend(ax.urls) print(len(urls))""" #spider.find_urls(content) #print(content)
def initiate_threads(): _pool = Pool(5) _pool.map(traverse_directory, self.valid_directories) _pool.close() _pool.join()
remote_server_ip = socket.gethostbyname(remote_server) ports = [] print('Please wait, scanning remote host ', remote_server_ip) socket.setdefaulttimeout(5) def scan_port(port): try: s = socket.socket(2, 1) res = s.connect_ex((remote_server_ip, port)) if res == 0: print('Port {}: OPEN'.format(port)) s.close() except Exception as e: print(str(e.message)) for i in range(1, 65535): ports.append(i) t1 = datetime.now() pool = ThreadPool(32) results = pool.map(scan_port, ports) pool.close() pool.join() print('Multiprocess Scanning Completed in ', datetime.now() - t1)
def paths_to_bids(path_to_dataset, path_to_csv, bids_dir, modality): """ This method converts all the T1 images found in the AIBL dataset downloaded in BIDS :param path_to_dataset: path_to_dataset :param path_to_csv: path to the csv file containing clinical data :param bids_dir: path to save the AIBL-T1-dataset converted in a BIDS format :param modality: string 't1', 'av45', 'flute' or 'pib' :return: list of all the images that are potentially converted in a BIDS format and saved in the bids_dir. This does not guarantee existence """ from os.path import join, exists from numpy import nan import pandas as pds from clinica.utils.stream import cprint from multiprocessing.dummy import Pool from multiprocessing import cpu_count, Value import glob if modality.lower() not in ['t1', 'av45', 'flute', 'pib']: # This should never be reached raise RuntimeError(modality.lower() + ' is not supported for conversion') counter = None def init(args): """ store the counter for later use """ global counter counter = args def create_file(image): global counter subject = image.Subjects_ID session = image.Session_ID name_of_path = { 't1': 'Path_to_T1', 'av45': 'Path_to_pet', 'flute': 'Path_to_pet', 'pib': 'Path_to_pet' } # depending on the dataframe, there is different way of accessing # the iage object image_path = image[name_of_path[modality]] with counter.get_lock(): counter.value += 1 if image_path is nan: cprint('No path specified for ' + subject + ' in session ' + session) return nan cprint('[' + modality.upper() + '] Processing subject ' + str(subject) + ' - session ' + session + ', ' + str(counter.value) + ' / ' + str(total)) session = viscode_to_session(session) # creation of the path if modality == 't1': output_path = join(bids_dir, 'sub-AIBL' + subject, 'ses-' + session, 'anat') output_filename = 'sub-AIBL' + subject + '_ses-' + session + '_T1w' elif modality in ['flute', 'pib', 'av45']: output_path = join(bids_dir, 'sub-AIBL' + subject, 'ses-' + session, 'pet') output_filename = 'sub-AIBL' + subject + '_ses-' + session \ + '_task-rest_acq-' + modality + '_pet' # image is saved following BIDS specifications if exists(join(output_path, output_filename + '.nii.gz')): cprint('Subject ' + str(subject) + ' - session ' + session + ' already processed.') output_image = join(output_path, output_filename + '.nii.gz') else: output_image = dicom_to_nii(subject, output_path, output_filename, image_path) return output_image # it reads the dataframe where subject_ID, session_ID and path are saved if modality == 't1': images = find_path_to_T1(path_to_dataset, path_to_csv) else: path_to_csv_pet_modality = glob.glob( join(path_to_csv, 'aibl_' + modality + 'meta_*.csv'))[0] if not exists(path_to_csv_pet_modality): raise FileNotFoundError(path_to_csv_pet_modality + ' file not found in clinical data folder') # Latest version of Flutemetamol CSV file (aibl_flutemeta_01-Jun-2018.csv) # has an extra column for some rows. However, each CSV file (regarding PET tracers) # contains the same columns. The usecols fixes this issue. df_pet = pds.read_csv(path_to_csv_pet_modality, sep=',|;', usecols=list(range(0, 36))) images = find_path_to_pet_modality(path_to_dataset, df_pet) images.to_csv(join(bids_dir, modality + '_paths_aibl.tsv'), index=False, sep='\t', encoding='utf-8') counter = Value('i', 0) total = images.shape[0] # Reshape inputs to give it as a list to the workers images_list = [] for i in range(total): images_list.append(images.iloc[i]) # intializer are used with the counter variable to keep track of how many # files have been processed poolrunner = Pool(cpu_count(), initializer=init, initargs=(counter, )) output_file_treated = poolrunner.map(create_file, images_list) del counter return output_file_treated
def main(): expected_arg = "[A valid PAGE_URL or IMAGE_URL]" num_args = len(sys.argv) if num_args < 2 or num_args > 2: print("\n* INVALID RUN COMMAND! * Usage:") print("classify %s\n" % expected_arg) elif num_args == 2: url = sys.argv[1] valid_url = web_core.is_valid_url(url) if not valid_url: file_path = url if isfile(file_path): best_guess = image_base.classify_local_image(file_path) elif isdir(file_path): best_guess = image_base.classify_folder_images( file_path, return_dict=True) else: raise Exception("Error: %s is not a valid image path!" % url) print("\n*** Best match classification: ***") print(best_guess) print("") return content_type = web_core.get_content_type(url) if content_type == 'other': raise Exception( "Error: %s does not evaluate to %s" % (url, expected_arg)) elif content_type == 'image': best_guess = image_base.classify_image_url(url) print("\n*** Best match classification: ***") print(best_guess) print("") elif content_type == 'html': global images_classified image_list = image_base.get_all_images_on_page(url) if 'linux2' not in sys.platform and settings.MAX_THREADS > 1: # Multi-threading the work when not using Docker Linux pool = ThreadPool(settings.MAX_THREADS) pool.map(download_and_classify_image, image_list) pool.close() pool.join() else: # Single-threading the image classification work min_w_h = settings.MIN_W_H # Minimum size for classification for image in image_list: web_core.save_file_as(image, "temp_image.png") image_base.convert_image_file_to_jpg( "downloads_folder/temp_image.png") width, height = image_base.get_image_file_dimensions( "downloads_folder/temp_image.jpg") if width >= min_w_h and height >= min_w_h: best_guess = classify_image.external_run( "downloads_folder/temp_image.jpg") if images_classified == 0: print("\n*** " "Best match classifications for page images:" " ***") images_classified += 1 print(best_guess) if images_classified >= settings.MAX_IMAGES_PER_PAGE: break if images_classified >= settings.MAX_IMAGES_PER_PAGE: print("\n(NOTE: Exceeded page classification limit " "of %d images per URL! Stopping early.)" % ( settings.MAX_IMAGES_PER_PAGE)) if images_classified == 0: print("\nCould not find images to classify on the page! " "(Min size = %dx%d pixels)" % ( settings.MIN_W_H, settings.MIN_W_H)) print("") else: raise Exception( "Unexpected content type %s. Fix the code!" % content_type)
valid_pred[:, c] = np.round(bag_pred_f) if not os.path.isfile('dir/tmp_%d_%d.tar.gz' % (c, c0)): os.system('tar -zcf dir/tmp_%d_%d.tar.gz dir/tmp_%d_%d' % (c, c0, c, c0)) os.system('rm -r dir/tmp_%d_%d' % (c, c0)) pool = ThreadPool(nthreads) for c0, svmC in enumerate(clist): runC = [c for c in range(C) if np.sum(trlbl[:, c]) > 0] valid_pred = np.zeros(vlbl.shape) # write the mfiles #for c in range(C): pool.map(train_model_class, runC) vccr[c0] = np.mean((vlbl == valid_pred)**2) print('>>>>', c0, svmC, vccr[c0]) c0 = np.argmax(vccr) bestC = clist[c0] # make prediction on the test set '''test_pred = np.zeros(tlbl.shape) temp_sent = np.zeros(gt_sent2.shape) test_sent_pred = np.zeros(gt_sent.shape) svmC = bestC #for c in range(C): def test_model_class(c): global test_pred global test_sent_pred
import sys import argparse from threading import Thread from multiprocessing.dummy import Pool as ThreadPool topdir = os.path.dirname(sys.argv[0]) + '/../../' topdir = os.path.abspath(topdir) sys.path.insert(0, topdir) import iota.harness.infra.utils.parser as parser cmdargs = argparse.ArgumentParser(description='Cleanup Script') cmdargs.add_argument('--testbed', dest='testbed_json', default="/warmd.json", help='Testbed JSON file') GlobalOptions = cmdargs.parse_args() SSHCMD = "sshpass -p docker ssh -o StrictHostKeyChecking=no" SCPCMD = "sshpass -p docker scp -o StrictHostKeyChecking=no" def cleanup_node(node_ip): print("Cleaning up %s" % node_ip) os.system("%s %s/iota/scripts/cleanup_node.sh root@%s:/tmp/" % (SCPCMD, topdir, node_ip)) os.system("%s root@%s source /tmp/cleanup_node.sh 2>&1" % (SSHCMD, node_ip)) return tbspec = parser.JsonParse(GlobalOptions.testbed_json) pool = ThreadPool(len(tbspec.Instances)) mgmt_ips = [] for instance in tbspec.Instances: mgmt_ips.append(instance.NodeMgmtIP) results = pool.map(cleanup_node, mgmt_ips)
ip2 = ch[1] taz = str(ip1) + '.' + str(ip2) + '.' i = 0 while i <= 254: i += 1 o = 0 while o <= 255: o += 1 jembot = str(taz) + str(i) + '.' + str(o) jembots = GREEN + str(taz) + CYAN + str(i) + '.' + str(o) + WHITE ngecek = requests.get("https://sonar.omnisint.io/reverse/"+jembot+"", headers={'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:102.0) Gecko/20100101 Firefox/102.0'}, verify=False, timeout=20) if 'null' in ngecek.text or '{"error":' in ngecek.text: print WHITE + "Ranging ==> " + jembots + " => NO" else: result = json.loads(ngecek.text) total = len(result) print WHITE + "Ranging ==> " + jembots + " => " + str(total) + " DOMAINS" for domain in result: open('Resultz.txt', 'a').write(domain + "\n") except (requests.exceptions.RequestException, ValueError) as e: print WHITE + "Ranging ==> " + jembots + str(e) site = open(raw_input(Fore.WHITE+'List:~# '),'r').read().replace(' ', '').splitlines() Thread = raw_input('Thread :~# ') pool = ThreadPool(int(Thread)) pool.map(scan, site) pool.close() pool.join()
#!/usr/bin/env python2 # -*- encoding: utf-8 -*- #hackhttp: https://github.com/BugScanTeam/hackhttp/ #Author: afei import hackhttp from multiprocessing.dummy import Pool as ThreadPool def upload(value): h = hackhttp.hackhttp() code, head, html, redirect_url, log = h.http( "http://afei123.com:8020/upload/x.php") print code pool = ThreadPool(50) pool.map(upload, range(5000000)) pool.close() pool.join()
from multiprocessing import Pool from multiprocessing.dummy import Pool as ThreadPool from scrapers2 import * scrapers = [DraftKingsScraper(), FTDScraper(), FanduelScraper()] pool = ThreadPool(3) pool.map(lambda s: s.scrape(), scrapers) pool.close() pool.join() for s in scrapers: for c in s.contests[:5]: print c
print(name) #对详情页的url发起请求 detail_page_text = requests.get(url=detail_url, headers=headers).text #从详情页中解析出视频的地址(url) ex = 'srcUrl="(.*?)",vdoUrl' video_url = re.findall(ex, detail_page_text)[0] print(video_url) dic = {'name': name, 'url': video_url} urls.append(dic) print(urls) #对视频链接发起请求获取视频的二进制数据,然后将视频数据进行返回 def get_video_data(dic): url = dic['url'] print(dic['name'], '正在下载......') data = requests.get(url=url, headers=headers).content #持久化存储操作 with open(dic['name'], 'wb') as fp: fp.write(data) print(dic['name'], '下载成功!') #使用线程池对视频数据进行请求(较为耗时的阻塞操作) pool = Pool(4) pool.map(get_video_data, urls) pool.close() pool.join()
# Load audio file with sr.AudioFile(name) as source: print("Audio Loaded") audio = r.record(source) # Transcribe audio file print("Reaching transcribing") text = r.recognize_google_cloud( audio, credentials_json=GOOGLE_CLOUD_SPEECH_CREDENTIALS, language="hi-IN") print(name + " done") # sampleout=sampleout+" "+text return {"idx": idx, "text": text} all_text = pool.map(transcribe, enumerate(files)) pool.close() pool.join() transcript = "" print(transcript + " is empty") # print(all_text+" is all text") for t in sorted(all_text, key=lambda x: x['idx']): #sampleout=sampleout+t['text'] #total_seconds = t['idx'] * 30 # Cool shortcut from: # https://stackoverflow.com/questions/775049/python-time-seconds-to-hms # to get hours, minutes and seconds #m, s = divmod(total_seconds, 60) #h, m = divmod(m, 60)
def run_map_on_cluster(self) -> List: pool = Pool(mp.cpu_count() * 3) map_result = pool.map(self.trigger_map, self.slaves) return map_result
TP = (assignment_matrix >= IoU_thresh).sum() FP = (assignment_matrix < IoU_thresh).sum() FN = len(trains.keys()) - TP #stats.append([TP, FP, FN]) return [TP, FP, FN] # MAIN PART # compute the IoUs for dir_name in glob.glob(test_dir+'*'): print dir_name # threading things stats = [] list_of_files = glob.glob(dir_name+'/'+'*.npy') pool = ThreadPool(len(list_of_files)) stats = pool.map(get_results, list_of_files) global_stats.append(stats) #global_IoU_vals.append(assignment_matrix) # assemble stats stats = np.asarray(stats) print stats.shape avg_TP = float(stats[:,0].sum()) / float(stats.shape[0]) avg_FP = float(stats[:,1].sum()) / float(stats.shape[0]) avg_FN = float(stats[:,2].sum()) / float(stats.shape[0]) # compute P, R precision = float(avg_TP) / float(avg_TP + avg_FP) recall = float(avg_TP) / float(avg_TP + avg_FN)
def calculateParallel(lines, threads=2): pool = ThreadPool(threads) rslt = pool.map(run_one, lines) pool.close() pool.join() return rslt
def _selection(self, samples_x, samples_y_aggregation, samples_y, x_bounds, x_types, max_resampling_per_x=3, threshold_samplessize_exploitation=12, threshold_samplessize_resampling=50, no_candidates=False, minimize_starting_points=None, minimize_constraints_fun=None): next_candidate = None candidates = [] samples_size_all = sum([len(i) for i in samples_y]) samples_size_unique = len(samples_y) # ===== STEP 1: Compute the current optimum ===== gp_model = gp_create_model.create_model(samples_x, samples_y_aggregation) lm_current = gp_selection.selection( "lm", samples_y_aggregation, x_bounds, x_types, gp_model['model'], minimize_starting_points, minimize_constraints_fun=minimize_constraints_fun) if not lm_current: return None if no_candidates is False: candidates.append({'hyperparameter': lm_current['hyperparameter'], 'expected_mu': lm_current['expected_mu'], 'expected_sigma': lm_current['expected_sigma'], 'reason': "exploitation_gp"}) # ===== STEP 2: Get recommended configurations for exploration ===== results_exploration = gp_selection.selection( "lc", samples_y_aggregation, x_bounds, x_types, gp_model['model'], minimize_starting_points, minimize_constraints_fun=minimize_constraints_fun) if results_exploration is not None: if _num_past_samples(results_exploration['hyperparameter'], samples_x, samples_y) == 0: candidates.append({'hyperparameter': results_exploration['hyperparameter'], 'expected_mu': results_exploration['expected_mu'], 'expected_sigma': results_exploration['expected_sigma'], 'reason': "exploration"}) logger.info("DEBUG: 1 exploration candidate selected\n") else: logger.info("DEBUG: No suitable exploration candidates were") # ===== STEP 3: Get recommended configurations for exploitation ===== if samples_size_all >= threshold_samplessize_exploitation: print("Getting candidates for exploitation...\n") try: gmm = gmm_create_model.create_model(samples_x, samples_y_aggregation) results_exploitation = gmm_selection.selection( x_bounds, x_types, gmm['clusteringmodel_good'], gmm['clusteringmodel_bad'], minimize_starting_points, minimize_constraints_fun=minimize_constraints_fun) if results_exploitation is not None: if _num_past_samples(results_exploitation['hyperparameter'], samples_x, samples_y) == 0: candidates.append({'hyperparameter': results_exploitation['hyperparameter'],\ 'expected_mu': results_exploitation['expected_mu'],\ 'expected_sigma': results_exploitation['expected_sigma'],\ 'reason': "exploitation_gmm"}) logger.info("DEBUG: 1 exploitation_gmm candidate selected\n") else: logger.info("DEBUG: No suitable exploitation_gmm candidates were found\n") except ValueError as exception: # The exception: ValueError: Fitting the mixture model failed # because some components have ill-defined empirical covariance # (for instance caused by singleton or collapsed samples). # Try to decrease the number of components, or increase reg_covar. logger.info("DEBUG: No suitable exploitation_gmm candidates were found due to exception.") logger.info(exception) # ===== STEP 4: Get a list of outliers ===== if (threshold_samplessize_resampling is not None) and \ (samples_size_unique >= threshold_samplessize_resampling): logger.info("Getting candidates for re-sampling...\n") results_outliers = gp_outlier_detection.outlierDetection_threaded(samples_x, samples_y_aggregation) if results_outliers is not None: for results_outlier in results_outliers: if _num_past_samples(samples_x[results_outlier['samples_idx']], samples_x, samples_y) < max_resampling_per_x: candidates.append({'hyperparameter': samples_x[results_outlier['samples_idx']],\ 'expected_mu': results_outlier['expected_mu'],\ 'expected_sigma': results_outlier['expected_sigma'],\ 'reason': "resampling"}) logger.info("DEBUG: %d re-sampling candidates selected\n") else: logger.info("DEBUG: No suitable resampling candidates were found\n") if candidates: # ===== STEP 5: Compute the information gain of each candidate towards the optimum ===== logger.info("Evaluating information gain of %d candidates...\n") next_improvement = 0 threads_inputs = [[ candidate, samples_x, samples_y, x_bounds, x_types, minimize_constraints_fun, minimize_starting_points ] for candidate in candidates] threads_pool = ThreadPool(4) # Evaluate what would happen if we actually sample each candidate threads_results = threads_pool.map(_calculate_lowest_mu_threaded, threads_inputs) threads_pool.close() threads_pool.join() for threads_result in threads_results: if threads_result['expected_lowest_mu'] < lm_current['expected_mu']: # Information gain temp_improvement = threads_result['expected_lowest_mu'] - lm_current['expected_mu'] if next_improvement > temp_improvement: next_improvement = temp_improvement next_candidate = threads_result['candidate'] else: # ===== STEP 6: If we have no candidates, randomly pick one ===== logger.info( "DEBUG: No candidates from exploration, exploitation,\ and resampling. We will random a candidate for next_candidate\n" ) next_candidate = _rand_with_constraints(x_bounds, x_types) \ if minimize_starting_points is None else minimize_starting_points[0] next_candidate = lib_data.match_val_type(next_candidate, x_bounds, x_types) expected_mu, expected_sigma = gp_prediction.predict(next_candidate, gp_model['model']) next_candidate = {'hyperparameter': next_candidate, 'reason': "random", 'expected_mu': expected_mu, 'expected_sigma': expected_sigma} # ===== STEP 7: If current optimal hyperparameter occurs in the history or exploration probability is less than the threshold, take next config as exploration step ===== outputs = self._pack_output(lm_current['hyperparameter']) ap = random.uniform(0, 1) if outputs in self.history_parameters or ap<=self.exploration_probability: if next_candidate is not None: outputs = self._pack_output(next_candidate['hyperparameter']) else: random_parameter = _rand_init(x_bounds, x_types, 1)[0] outputs = self._pack_output(random_parameter) self.history_parameters.append(outputs) return outputs
def _send_bulk(emails, uses_multiprocessing=True, log_level=None): # Multiprocessing does not play well with database connection # Fix: Close connections on forking process # https://groups.google.com/forum/#!topic/django-users/eCAIY9DAfG0 if uses_multiprocessing: db_connection.close() if log_level is None: log_level = get_log_level() sent_emails = [] failed_emails = [] # This is a list of two tuples (email, exception) email_count = len(emails) logger.info('Process started, sending %s emails' % email_count) def send(email): try: email.dispatch(log_level=log_level, commit=False, disconnect_after_delivery=False) sent_emails.append(email) logger.debug('Successfully sent email #%d' % email.id) except Exception as e: logger.debug('Failed to send email #%d' % email.id) failed_emails.append((email, e)) # Prepare emails before we send these to threads for sending # So we don't need to access the DB from within threads for email in emails: # Sometimes this can fail, for example when trying to render # email from a faulty Django template try: email.prepare_email_message() except Exception as e: failed_emails.append((email, e)) number_of_threads = min(get_threads_per_process(), email_count) pool = ThreadPool(number_of_threads) pool.map(send, emails) pool.close() pool.join() connections.close() # Update statuses of sent emails email_ids = [email.id for email in sent_emails] Email.objects.filter(id__in=email_ids).update(status=STATUS.sent) # Update statuses and conditionally requeue failed emails num_failed, num_requeued = 0, 0 max_retries = get_max_retries() scheduled_time = timezone.now() + get_retry_timedelta() emails_failed = [email for email, _ in failed_emails] for email in emails_failed: if email.number_of_retries is None: email.number_of_retries = 0 if email.number_of_retries < max_retries: email.number_of_retries += 1 email.status = STATUS.requeued email.scheduled_time = scheduled_time num_requeued += 1 else: email.status = STATUS.failed num_failed += 1 Email.objects.bulk_update(emails_failed, ['status', 'scheduled_time', 'number_of_retries']) # If log level is 0, log nothing, 1 logs only sending failures # and 2 means log both successes and failures if log_level >= 1: logs = [] for (email, exception) in failed_emails: logs.append( Log(email=email, status=STATUS.failed, message=str(exception), exception_type=type(exception).__name__) ) if logs: Log.objects.bulk_create(logs) if log_level == 2: logs = [] for email in sent_emails: logs.append(Log(email=email, status=STATUS.sent)) if logs: Log.objects.bulk_create(logs) logger.info( 'Process finished, %s attempted, %s sent, %s failed, %s requeued', email_count, len(sent_emails), num_failed, num_requeued, ) return len(sent_emails), num_failed, num_requeued
def multi_thread(): from multiprocessing.dummy import Pool pool = Pool(3) pool.map(hello, persons) pool.close() pool.join()
def extract_features(extract_fn, image_path, ofile, params={}): cwd = os.getcwd() image_path=utils.absolute_path(image_path) # Read filenames files=[] if os.path.isdir(image_path): for dirpath,_,filenames in os.walk(image_path): for f in filenames: path=os.path.abspath(os.path.join(dirpath, f)) if not utils.is_valid_image(path): print("Warning, please provide a valid image: ", f) else: files.append(path) else: files=[image_path] files.sort(key=utils.natural_sort_key) output_file=utils.absolute_path(ofile) if os.path.isdir(output_file): print("The provided file is a directory:", output_file) sys.exit(0) if os.path.exists(output_file): print("Output file already exists! cotinue ...") with open(output_file+".label", 'r') as f: labels = [os.path.join(image_path, x) for x in f.read().splitlines()] pending_files = [x for x in files if x not in labels] files = pending_files print("Pending files:", len(files)) def extract_and_save(path): try: X = extract_fn(path, **params) except Exception as e: print("Cannot extract feactures from", path) print(str(e)) return X = X.reshape((1, X.shape[0])) lock.acquire() with open(output_file, 'a+') as f_handle: with open(output_file+".label", 'a+') as f_handle_label: numpy.savetxt(f_handle, X) f_handle_label.write(os.path.basename(path)+"\n") lock.release() pool = ThreadPool(cpu_count()) results = pool.map(extract_and_save, files) pool.close() pool.terminate() pool.join() """ for path in files: X = feaext.SRM_extract(path, **params) print X.shape X = X.reshape((1, X.shape[0])) with open(sys.argv[3], 'a+') as f_handle: numpy.savetxt(f_handle, X) """ os.chdir(cwd)
RuntimeContext.register_slot('operation_id', '<empty>') _console_lock = threading.Lock() def println(msg): with _console_lock: print(msg) def work(name): println('Entering worker[{}]: {}'.format(name, RuntimeContext)) RuntimeContext.operation_id = name time.sleep(0.01) println('Exiting worker[{}]: {}'.format(name, RuntimeContext)) if __name__ == "__main__": println('Main thread: {}'.format(RuntimeContext)) RuntimeContext.operation_id = 'main' pool = ThreadPool(2) # create a thread pool with 2 threads pool.map(RuntimeContext.with_current_context(work), [ 'bear', 'cat', 'dog', 'horse', 'rabbit', ]) pool.close() pool.join() println('Main thread: {}'.format(RuntimeContext))
async def periodic(sleep_for): # NOQA [C901] while True: await asyncio.sleep(sleep_for) """ Проверка доступности сайтов """ urls = Urls() urls = urls.get_all_urls() pool = ThreadPool(len(urls)) old_urls_443 = pool.map(getcode200.check_443, urls) old_urls_80 = pool.map(getcode200.check_80, urls) pool.close() pool.join() new_urls_80 = [] new_urls_443 = [] for url_80, url_443, url in zip(old_urls_80, old_urls_443, urls): if 'Error' in url_443 or 'False' in url_443: new_urls_443.append(url + ' - ' + url_443) db.set_alarm_url(url) if 'Error' in url_80 or 'False' in url_80: new_urls_80.append(url + ' - ' + url_80) db.set_alarm_url(url) users = ChatUsers() names, surnames, usernames, chat_ids = users.get_all_chat_users_with_sub() for chat_id in chat_ids: if (db.access_check_collegue(str(chat_id)) or db.access_check_admin(str(chat_id))): if new_urls_80 != []: answer_80 = 'ALARM!\nНе открываются на порту 80:\n' answer_80 += '\n'.join(new_urls_80) bot.send_message(chat_id, answer_80, disable_web_page_preview=True) if new_urls_443 != []: answer_443 = 'ALARM!\nНе открываются на порту 443:\n' answer_443 += '\n'.join(new_urls_443) bot.send_message(chat_id, answer_443, disable_web_page_preview=True) """ Проверяем ssl """ urls = Urls() urls = urls.get_all_urls() await asyncio.sleep(1) data_3 = check_ssl_cirt(urls) valid_list = list(map(list, zip(urls, data_3))) await asyncio.sleep(1) reply = '' for j in valid_list: if 'invalid' in j[1]: db.set_alarm_url(j[0]) reply += '{}'.format(j[0] + ', ssl: ' + j[1] + '\nPlease, update this url\n') users = ChatUsers() names, surnames, usernames, chat_ids = users.get_all_chat_users_with_sub() for chat_id in chat_ids: if (db.access_check_collegue(str(chat_id)) or db.access_check_admin(str(chat_id))): await bot.send_message(chat_id, reply, disable_notification=True) """ Проверяем сервера """ servers = Servers() servers = servers.get_all_servers() hosts = [] usernames = [] for r in servers: n = (r.split('@')) host = n[1] usname = n[0] hosts.append(host) usernames.append(usname) keys = Keys() keys = keys.get_all_keys() for i in range(len(hosts)): ssh = paramiko.SSHClient() ssh.set_missing_host_key_policy(paramiko.AutoAddPolicy()) host_temp = hosts[i] username_temp = usernames[i] if '-----BEGIN' in keys[i]: pr_key_temp = keys[i] pr_key_temp = pr_key_temp[1:-1] pr_key_temp = StringIO(pr_key_temp) privkey = paramiko.RSAKey.from_private_key(pr_key_temp) else: pr_key_temp = "src/bot/db/" + keys[i] try: privkey = paramiko.RSAKey.from_private_key_file(pr_key_temp) except SSHException: privkey = paramiko.DSSKey.from_private_key_file(pr_key_temp) try: ssh.connect(hostname=host_temp, username=username_temp, pkey=privkey) logging.debug("Connect Server") finally: logging.debug("Finally Connect Server") stdin, stdout, stderr = ssh.exec_command('df --output=pcent /home') pcent = [] for line in stdout: pcent.append(line.strip('\n')) stdin, stdout, stderr = ssh.exec_command('df --output=avail --block-size=G /home') mem_avail_gb = [] for line in stdout: mem_avail_gb.append(line.strip('\n')) ssh.close() pcent_num = pcent[1].replace('%', '') pcent_num = int(pcent_num.replace(' ', '')) mem_avail_gb_num = mem_avail_gb[1].replace('G', '') mem_avail_gb_num = int(mem_avail_gb_num.replace(' ', '')) if ((100 - pcent_num) <= memorythreshold_pc) or (mem_avail_gb_num <= memorythreshold_gb): db.set_alarm_server(host_temp) reply = """{}@{}\nCRITICAL! LOW MEMORY!\n{}% Used\n{}Gb Avaible""".format( username_temp, host_temp, pcent_num, mem_avail_gb_num) users = ChatUsers() names, surnames, usernames, chat_ids = users.get_all_chat_users_with_sub() for chat_id in chat_ids: if (db.access_check_collegue(str(chat_id)) or db.access_check_admin(str(chat_id))): await bot.send_message(chat_id, reply, disable_notification=True)