def collect_samples(n, m, x): global post_a, post_b sample_list = [] derivative_list = [] arg_list = [] for i in range(n): theta = np.random.gamma(post_a, 1 / post_b) arg_list.append((m, theta, x)) pool = ThreadPool() results = pool.starmap(collect_inner_samples, arg_list) pool.close() pool.join() for res in results: sample_list.append(res[0]) derivative_list.append(res[1]) return sample_list, derivative_list
def main(): pool = ThreadPool(int(brojDretvi)) # execute only if run as a script with open(inputUsporedbe, "r") as csvUsporedbe, open(inputZavisnosti, "r") as csvZavisnosti: usporedbeReader = csv.reader(csvUsporedbe, delimiter=';') zavisnostiReader = csv.reader(csvZavisnosti, delimiter=';') listaZavisnosti = list(zavisnostiReader) listaUsporedbi = list(usporedbeReader) for usporedba in listaUsporedbi: doPartOfSimulation = partial(doClusterTest, usporedba[0]) results.update( pool.imap_unordered(doPartOfSimulation, [redak[0] for redak in listaZavisnosti], chunksize=400))
def find_all_distances_in_matrix(matrix): index_array = [i for i in range(0, len(matrix))] pool = ThreadPool(mp.cpu_count() - 2) func = partial(find_all_dist_with_target, matrix, index_array) all_dp_distances = list(pool.imap(func, index_array[:-1])) all_dp_distances = dict(ChainMap(*all_dp_distances)) pool.close() pool.join() return all_dp_distances
def collect_samples(n, m, x): global post_a, post_b sample_list = np.zeros(n) derivative_list = np.zeros(n) arg_list = np.zeros((n, 3)) for i in range(n): theta = np.random.gamma(post_a, 1 / post_b) arg_list[i] = (m, theta, x) pool = ThreadPool() results = pool.starmap(collect_inner_samples, arg_list.tolist()) pool.close() pool.join() for i in range(n): sample_list[i] = results[i][0] derivative_list[i] = results[i][1] return np.array(sample_list), np.array(derivative_list)
def replays2traces(dir, nbac, nbsec, tw, out): global params params["dir"] = dir params["nbac"] = nbac params["nbsec"] = nbsec params["tw"] = tw params["out"] = out replays_to_parse = sc2reader.utils.get_files(dir) pool = ThreadPool(4) results = pool.map(parse_replay, replays_to_parse) pool.close() pool.join() with open(out, "w") as text_file: for r in results: if len(r) > 0: text_file.write(r)
def __init__(self, number_of_classes, memory_size=100, number_of_executors=100, image_size=(20, 20)): super(OMNIGLOTClassifier, self).__init__() self.number_of_classes = number_of_classes self.image_size = image_size self.memory_size = memory_size self.image_names = np.array([]) self.image_loads = {} self.image_labels = {} self.different_labels = {} self.representing_image_cache = {} self.image_points_cache = {} self.executionPool = ThreadPool(min(number_of_executors, memory_size))
def learn_matrix_parallel(self): #print(len(self.directories)) pool = ThreadPool(len(self.directories)) results = [] results.append(pool.map(self.fetch_matrix_for_date, self.directories)) print("Aggregating results...") matrix = [[0.0 for x in range(self.transition_matrix_size)] for y in range(self.transition_matrix_size)] for daily_matrix in results: #print(len(daily_matrix), len(daily_matrix[0])) for i in range(len(self.transition_matrix)): for j in range(len(self.transition_matrix[0])): matrix[i][j] += daily_matrix[i][j] self.transition_matrix = matrix
def get_tieba_detail(): vip_headers = get_vip_headers(host='tieba.baidu.com', referer='https://tieba.baidu.com/') youku_movie_url_dict = { u'头像吧': 'http://tieba.baidu.com/f?ie=utf-8&kw=%E5%A4%B4%E5%83%8F', u'唯美图片吧': 'https://tieba.baidu.com/f?ie=utf-8&kw=%E5%94%AF%E7%BE%8E%E5%9B%BE%E7%89%87', u'动漫头像吧': 'https://tieba.baidu.com/f?ie=utf-8&kw=%E5%8A%A8%E6%BC%AB%E5%A4%B4%E5%83%8F', u'动漫图片': 'https://tieba.baidu.com/f?ie=utf-8&kw=%E5%8A%A8%E6%BC%AB%E5%9B%BE%E7%89%87', u'欧美图片': 'https://tieba.baidu.com/f?ie=utf-8&kw=%E6%AC%A7%E7%BE%8E%E5%9B%BE%E7%89%87', u'二次元图片': 'https://tieba.baidu.com/f?ie=utf-8&kw=%E4%BA%8C%E6%AC%A1%E5%85%83%E5%9B%BE%E7%89%87', } name_url_fee_list = [] for fee, url in youku_movie_url_dict.iteritems(): page_num = 1 while url: video_url_list = [] soup = get_url_bs4soup(url, vip_headers) for li in soup.find_all('div', class_='threadlist_title'): video_name = li.a.get('title') video_url = li.a.get('href') if not video_url.startswith('https://tieba.baidu.com'): video_url = 'https://tieba.baidu.com' + video_url if video_name and video_url: video_url_list.append((fee, video_url)) pool = ThreadPool(4) pool, map(get_page_detail_img_save, video_url_list) pool.close() pool.join() url = None if soup.find('a', text='下一页>'): url = soup.find('a', text='下一页>').get('href') print '%s-->第%s页已完成' % (fee, page_num) page_num += 1 print 'Get name complete...' + '总计%d个' % len(name_url_fee_list)
def _get_data_generator(midi_paths, note_representation, encoding, window_size, batch_size, shuffle, num_threads, glove_dimension): if num_threads > 1: pool = ThreadPool(num_threads) load_index = 0 max_files_in_ram = 10 while True: load_files = midi_paths[load_index:load_index + max_files_in_ram] load_index = (load_index + max_files_in_ram) % len(midi_paths) # print('loading large batch: {}'.format(max_files_in_ram)) # print('Parsing midi files...') # start_time = time.time() if num_threads > 1: parsed = pool.map(parse_midi, load_files) else: parsed = list(map(parse_midi, load_files)) # print('Finished in {:.2f} seconds'.format(time.time() - start_time)) # print('parsed, now extracting data') data = _windows_from_monophonic_instruments(parsed, window_size, note_representation, encoding, glove_dimension) # if shuffle: # # shuffle in unison # tmp = list(zip(data[0], data[1])) # random.shuffle(tmp) # tmp = zip(*tmp) # data[0] = np.asarray(tmp[0]) # # THIS ERRORS SOMETIMES w/: # # IndexError: list index out of range # data[1] = np.asarray(tmp[1]) batch_index = 0 while batch_index + batch_size < len(data[0]): # print('yielding small batch: {}'.format(batch_size)) res = (data[0][batch_index:batch_index + batch_size], data[1][batch_index:batch_index + batch_size]) yield res batch_index = batch_index + batch_size # probably unneeded but why not del parsed # free the mem del data # free the mem
def filebyfileHandle(fileSavedPath='./news/', rejectOfDocSize=400, multiprocess=4, number_doc=-1): x = 0 global filesPath filesPath = fileSavedPath list = os.listdir(fileSavedPath) if number_doc == -1 or number_doc > len(list): number_doc = len(list) # list = sorted(list[:number_doc], key=lambda x: (int(re.sub('\D','',x)),x)) pool = ThreadPool(multiprocess) dictionary = pool.map(dealwith_mulitpocess, list) pool.close() pool.join()
def collect_samples(n, m, x): global theta_samples sample_list = [] derivative_list = [] arg_list = [] for i in range(n): index = int(np.random.rand() * 100000) theta = theta_samples[index] arg_list.append((m, theta, x)) pool = ThreadPool() results = pool.starmap(collect_inner_samples, arg_list) pool.close() pool.join() for res in results: sample_list.append(res[0]) derivative_list.append(res[1]) return sample_list, derivative_list
def filter_data(data_dict, valid_ids): from multiprocessing import Pool as ThreadPool, Manager from functools import partial pool = ThreadPool(8) func = partial(filter_df, verbose=True, data_dict=data_dict, valid_ids=valid_ids) ls = list(pool.imap(func, list(data_dict.keys()))) ls.sort(key=lambda x: x[1], reverse=True) print(f"There are {len(valid_ids)} unique students considered.\n") print_table( ["Filename", "Contains", "Percentage", "Valid Cols", "Removed cols"], ls, max_width=30)
def find_all_distances_in_matrix(matrix, index_array, total_indexes, feature_weights): pool = ThreadPool(mp.cpu_count() - 2) func = partial(find_all_dist_with_target, matrix, index_array, total_indexes, feature_weights) all_dp_distances = list(pool.imap_unordered(func, index_array[:-1])) # Store dps relationships and the distances all_dp_dist_list = [np.array([])] * matrix.shape[0] # Convert map to list for dp_dict in all_dp_distances: all_dp_dist_list[list(dp_dict.keys())[0]] = \ list(dp_dict.values())[0] return all_dp_dist_list
def fill(recipes, collection, queue, threads): """ Collects the json files for each id and writes them into the database. It iterates over the ids in recipes and pushes them into `collection`, while also adding it to queue to write them to a file. Params: ------- recipes: list the list holding the recipe ids to fetch collection: mongoDB collection A collection object in which the recipes should be pushed queue: Synced Queue The queue to push the json objects in to save to file threads: int How many threads to use concurrent Returns: -------- recipe_objects: list a list of json objects. Returned for convenience """ pool = ThreadPool(threads) count = 0 recipe_obj = [] for obj in pool.imap(collect_recipes, recipes): if type(obj) in [dict, SON, RawBSONDocument, MutableMapping]: recipe_obj.append(obj) # one object per line queue.put((json_util.dumps(obj) + ",")) collection.insert_one(obj) count += 1 print("{:.2f}% ({}/{})".format((count * 100 / len(recipes)), count, len(recipes)), end='\r', flush=True) print("{:.2f}% ({}/{})".format((count * 100 / len(recipes)), count, len(recipes)), flush=True) pool.close() pool.join() return recipe_obj
def main(): dirname = sys.argv[1] image_number = int(sys.argv[2]) (model_dir, img_dir, width, height, num_images, ensemble_size, max_levels, \ sampled, assignments, average_img) = config.load_cfg(dirname) init(model_dir, img_dir) print("image_number = %s, num_images = %s"\ % (str(image_number), str(num_images))) test_data, network_data = kmeans2d.assignment_data_to_test_data(\ assignments, image_number, num_images, average_img) input_map = np.zeros((height, width, 3), dtype = np.float64) image_out = np.zeros((height, width, 3), dtype = np.float64) thread_data = [(data, test_data, model_dir) for data in network_data] start_time = time.clock() pool = ThreadPool(8) results = pool.map(predict_thread, thread_data) pool.close() pool.join() for result in results: # predictions (level, network_id, start, count, predictions, test_out) = result kmeans2d.predictions_to_image(image_out, test_out, predictions) # debug data prediction_out = np.zeros((height, width, 3), dtype = np.float64) update_input_map(level, network_id, width, height, test_data,\ [level, network_id, start, count], input_map) kmeans2d.predictions_to_image(prediction_out, test_out, predictions) prediction_file_name = "render_images/prediction_%04d_%04d.png" %\ (level, network_id) misc.imsave(prediction_file_name, prediction_out); image_out = np.divide(image_out, float(ensemble_size)) image_out = np.clip(255.0 * image_out, 0.0, 255.0) end_time = time.clock() image_file_name = "render_images/" + str(sys.argv[2]) + '.png' misc.imsave("render_images/render_input_map.png", input_map) misc.imsave(image_file_name, image_out) print("time = %5.5f" % (end_time - start_time)) print("saved %s" % (image_file_name))
def run(self, args): if args.tool_args: args.tool_args = ' '.join(args.tool_args) else: args.tool_args = "" if args.profile1: args.tool_args += " " + args.profile1_data elif args.profile2: args.tool_args += " " + args.profile2_data elif args.profile3: args.tool_args += " " + args.profile3_data elif args.profile4: args.tool_args += " " + args.profile4_data if not args.binary: self.binary = which.run(self.binary_name) else: self.binary = which.run(args.binary) if not self.binary: print( "%s binary not found. Please explicitly provide path with --binary" % self.name) else: if args.timeout: timeout = int(args.timeout) else: timeout = None targets = self.get_targets(args) if not args.no_binary and targets: cmd = self.build_cmd(args) cmds = [ shlex.split(cmd.format(**t)) + [timeout] for t in targets ] self.pre_run(args) pool = ThreadPool(int(args.threads)) pool.map(run_cmd, cmds) self.post_run(args) if targets: self.process_output(targets)
def get_data_generator(midi_paths, window_size=20, batch_size=32, num_threads=8, max_files_in_ram=170): print(num_threads) if num_threads > 1: # load midi data pool = ThreadPool(num_threads) load_index = 0 while True: #print("gdg") load_files = midi_paths[load_index:load_index + max_files_in_ram] # print('length of load files: {}'.format(len(load_files))) load_index = (load_index + max_files_in_ram) % len(midi_paths) # print('loading large batch: {}'.format(max_files_in_ram)) # print('Parsing midi files...') # start_time = time.time() if num_threads > 1: parsed = pool.map(parse_midi, load_files) else: parsed = map(parse_midi, load_files) # print('Finished in {:.2f} seconds'.format(time.time() - start_time)) # print('parsed, now extracting data') data = _windows_from_monophonic_instruments(parsed, window_size) batch_index = 0 #print(data[0]) while batch_index + batch_size < len(data[0]): # print('getting data...') # print('yielding small batch: {}'.format(batch_size)) res = (data[0][batch_index:batch_index + batch_size], data[1][batch_index:batch_index + batch_size]) # print (type(res[0])) # print (res[0]) #print (np.sum(res[1],axis=1)) yield res batch_index = batch_index + batch_size # probably unneeded but why not del parsed # free the mem del data # free the mem
def Pkey_quad(candslist, method, direction): res_all = [] simmatrix = get_sim_matrix(candslist, method, direction) pool = ThreadPool(25) partial_evalkey = partial(Pevalkey, candslist=candslist, simmatrix=simmatrix) I = [[j] * len(candslist[j]) for j in range(len(candslist))] res_all = pool.map(partial_evalkey, zip(itertools.chain(*candslist), itertools.chain(*I))) pool.close() pool.join() res, score = max(res_all, key=lambda x: x[1]) titles = ids2title(res) return res, titles
def draw(self): data = np.zeros((self.camera.height, self.camera.width, 3), dtype=np.uint8) pool = ThreadPool(self.poolsize) print("Strarting pool") for result in pool.imap_unordered(march_task, self.camera.rays(self), 64): (x, y, color) = result data[y, x] = color # print(color) print("pool finished") img = toimage(data) img.save('out.png')
def generate_mesh(filename, output_file , normalized = True): """ input: normalized feature && output file output: output mesh """ global_para(ref_mesh_filename) t0 = time.time() prepare_T(filename, normalized) pool = ThreadPool() b_ = list(pool.map(compute_temp,range(V))) pool.close() P = scipy.sparse.linalg.spsolve(A.tocsc(), scipy.sparse.csc_matrix(b_, dtype=float)) V_new = p2e(-P.tocsr().todense()) igl.writeOBJ(output_file, V_new, F) print("write mesh DONE")
def bench_put_nonaligned(): """Load data in parallel and non-block aligned on branch (very slow). """ from multiprocessing import Pool as ThreadPool pool = ThreadPool(16) # create pieces zsize, ysize, xsize = data.shape shift_queue = [] for z in range(0, zsize, 32): for y in range(0, ysize, 32): for x in range(0, xsize, 32): shift_queue.append((z, y, x, uuid3, data, name)) pool.map(load_data, shift_queue) pool.close() pool.join() return True, ""
def createNgrams(licenseList, ngramJsonLoc, threads=os.cpu_count(), verbose=0): ''' Creates a Ngram_keywords.json in location specified by user that contains unique ngrams for each license cluster :param licenseList: Processed License List (CSV) :param ngramJsonLoc: Specify N-Gram Json File location :param threads: Number of CPU to be used for creating n-grams. This is done to speed up the process. :param verbose: Specify if verbose mode is on or not (Default is Off/ None) :return: Returns - n-gram json file location, - Array - matched_output (Licenses that has non-zero unique n-gram identifiers) - Array - no_keyword_matched (licenses woth zero unique n-gram identifiers) ''' uniqueNGrams, cluster_arr, licenses = load_database(licenseList, verbose) no_keyword_matched = [] matched_output = [] ngram_keywords = [] cpuCount = os.cpu_count() threads = cpuCount * 2 if threads > cpuCount * 2 else threads pool = ThreadPool(threads) for idx, row in enumerate( tqdm(pool.imap_unordered(unique_ngrams, uniqueNGrams), desc="Licenses processed", total=len(cluster_arr), unit="license")): matched_output.append([str(uniqueNGrams[idx]['shortname']), len(row)]) if len(row) == 0: no_keyword_matched.append(uniqueNGrams[idx]['shortname']) ngram_keywords.append({ 'shortname': uniqueNGrams[idx]['shortname'], 'ngrams': row }) ngramJsonLoc = os.path.abspath(ngramJsonLoc) folder = os.path.dirname(ngramJsonLoc) Path(folder).mkdir(exist_ok=True) with open(ngramJsonLoc, 'w') as myfile: myfile.write(json.dumps(ngram_keywords)) return ngramJsonLoc, matched_output, no_keyword_matched
def multiProc(sshbfTarget, sshbfUname, sshpwFile, threadCount): pooL = ThreadPool(threadCount) total = sum(1 for line in open(sshpwFile)) passworD = [] awshet = [sshbfTarget, sshbfUname, total, threadCount] with open(sshpwFile) as BANG: for line in BANG: passworD.append(line) pass sshbfResponse = pooL.map(SSHbf4, zip(passworD, itertools.repeat(awshet))) # print(sshbfResponse) # for responsE, sshbfpD in sshbfResponse: # if responsE is None: # print(("\nTarget: %s User: %s Password: %s SUCCESSFUL!!" % (sshbfTarget, sshbfUname, sshbfpD))) # sys.exit(0) # else: # pass pooL.close() pooL.join()
def solve(self): self._processes = [] pool = ThreadPool(self._nthreads) print("Starting calculation") start_time = datetime.now() self._solutions = pool.map(self.nqueens, range(self._nthreads)) script_time = datetime.now() - start_time pool.close() pool.join() print("\nElapsed time: %d.%d" %(script_time.seconds, script_time.microseconds)) print("\nThere are %d solutions for %d queens." %(sum(self._solutions), self._nq)); return script_time.seconds, script_time.microseconds, sum(self._solutions)
def predict_parallel(df_l, df_r, model, G1, G2, name_sim_threshold, threads=4): load_cache() pool = ThreadPool(threads) df_l = df_l[['uid', 'name_vk']].dropna().values df_r = df_r[['uid', 'name_inst']].dropna().values data_list = prepare_data_for_threads(df_l, df_r, model, G1, G2, threads, name_sim_threshold=name_sim_threshold) results = pool.map(find_sim_and_predict, data_list) pool.close() pool.join() for i, res in enumerate(results): print(i, 'true count', len(res)) clear_cache() return results
def crack(self, min_pass_len=MIN_PASSWD_LEN, max_pass_len=MAX_PASSWD_LEN, possible_chars=POSSIBLE_CHARS): """Attacks the hash with all possible combinations, increasing in length Returns: The password if a matching password was found; None otherwise """ with ThreadPool() as p: passwd = first( p.imap_unordered( self._test_passwd, possible_passwds( min_pass_len, max_pass_len, possible_chars))) if passwd is not None: return passwd return None
def base_on_usage_for_TF(version, mini=False, col='package', thres_hold=0): rootdir = './output/start_close/' list = os.listdir(rootdir) # 列出文件夹下所有的目录与文件 path_list = sorted(list, reverse=True) path_list = [os.path.join(rootdir, item) for item in path_list if item.endswith('csv')] from multiprocessing import Pool as ThreadPool from functools import partial pool = ThreadPool(processes=8) process_file = partial(cal_tf_for_individual_file, col=col, thres_hold=thres_hold) results = pool.map(process_file, path_list) df = pd.concat(results) df.fillna(0,inplace=True) print(f'Share of device package usage is:{df.shape}') return df.sort_index()
def update_stocks(filename=TICKER_FILENAME, threads=16): ### Start Timer logging.basicConfig(filename=LOG_FILENAME, filemode="a", level=logging.INFO) logger = logging.getLogger("rstocks.update_stocks") start = data.start_logger(logger) ### Generate a list of tickers with open(filename) as inFile: symbols = inFile.read().splitlines() ### Use a Thread Pool to asynchronously make updates to the database with ThreadPool(threads) as pool: pool.map(store_stock, symbols) ### End Timer data.end_logger(start, logger)
def multithread_process_file(l_file, n_threads): # start multithreading pool = ThreadPool(n_threads) tmp_res = pool.map_async(process_file, l_file, chunksize=1) results_2 = tmp_res.get() pool.close() pool.join() # load species dict dict_species = utils.get_pickle(Path('dir_step1') / 'species_index.pic') # create log file log_file = open(out_dir / 'log_step2.txt', 'w+') log_file.write('#species_file nb_phylo nb_NO_phylo nb_empty_ali_ali nb_pbm_tree\n') # save log for l in results_2: log_file.write(dict_species[l[0]] + ' ' + ' '.join(l[1:]) + '\n') log_file.close()
def check_urls(urls, cache=None): if cache is None: cache = {} result = {} urls_no_cache = [] for url in urls: if url in cache: if DEBUG: print('url in cache ' + url) result.update({url: cache[url]}) else: urls_no_cache.append(url) pool = ThreadPool(4) result.update({key: val for key, val in zip(urls_no_cache, pool.map(load_url, urls_no_cache))}) pool.close() pool.join() cache.update(result) return {key: True if val != "" else False for key, val in result.items()}