def run_all_intersections(write_anomaly, incomplete, intersections, multi_model, smooth): print "Running all on", os.getpid() start_time = time.time() if incomplete: key = '_id' query = [ {'$match': {'anomaly': {'$exists': False}}}, {'$group': {'_id': '$site_no'}} ] if intersections != '': query[0]['$match']['site_no'] = {'$in': intersections.split(',')} locations = list(readings_collection.aggregate(query)) else: key = 'intersection_number' if intersections != '': query = {key: {'$in': intersections.split(',')}} else: query = {key: {'$regex': '3\d\d\d'}} locations = list(locations_collection.find(query)) gen = [(str(l[key]), write_anomaly, incomplete, False, multi_model, smooth) for l in locations] pool = Pool(8, maxtasksperchild=1) pool.map(run_single_intersection, gen) print("TOTAL TIME: --- %s seconds ---" % (time.time() - start_time))
def main(): parser = argparse.ArgumentParser( description="A simple tool to backup your Bitbucket repositories", ) parser.add_argument('username', type=str, help='Username') parser.add_argument('password', type=str, help='Password') parser.add_argument('backupdir', type=str, help='The target backup directory') args = parser.parse_args() bitbucket = Bitbucket(args.username, args.password) repos = list(bitbucket.get_repositories()) random.shuffle(repos) pool = multiprocessing.pool.ThreadPool(20) pool.map(lambda x: x.backup(args.backupdir), repos) failed = 0 for repo in repos: if repo.failed is None: continue failed += 1 print 'WARNING: the following repositories failed to update:' print repo.name print repo.output print repo.failed if failed: sys.exit(2)
def _push(self, src, dst): """ Push src to dst on the remote. """ force = False if src.startswith('+'): src = src[1:] force = True present = [self._refs[name][1] for name in self._refs] present.extend(self._pushed.values()) # before updating the ref, write all objects that are referenced objects = git_list_objects(src, present) try: # upload objects in parallel pool = multiprocessing.pool.ThreadPool(processes=self._processes) pool.map(Binder(self, '_put_object'), objects) except Exception: self._fatal('exception while writing objects') sha = git_ref_value(src) error = self._write_ref(sha, dst, force) if error is None: self._write('ok %s' % dst) self._pushed[dst] = sha else: self._write('error %s %s' % (dst, error))
def build_packages(self): """Build all the Spinnaker packages.""" all_subsystems = [] all_subsystems.extend(SUBSYSTEM_LIST) all_subsystems.extend(ADDITIONAL_SUBSYSTEMS) if self.__options.build: # Build in parallel using half available cores # to keep load in check. weighted_processes = self.__options.cpu_ratio * multiprocessing.cpu_count() pool = multiprocessing.pool.ThreadPool( processes=int(max(1, weighted_processes))) pool.map(self.__do_build, all_subsystems) if self.__build_failures: if set(self.__build_failures).intersection(set(SUBSYSTEM_LIST)): raise RuntimeError('Builds failed for {0!r}'.format( self.__build_failures)) else: print 'Ignoring errors on optional subsystems {0!r}'.format( self.__build_failures) if self.__options.nebula: return wait_on = set(all_subsystems).difference(set(self.__build_failures)) pool = multiprocessing.pool.ThreadPool(processes=len(wait_on)) print 'Copying packages...' pool.map(self.__do_copy, wait_on) return
def parallel_compile(self, sources, output_dir=None, macros=None, include_dirs=None, debug=0, extra_preargs=None, extra_postargs=None, depends=None): """New compile function that we monkey patch into the existing compiler instance. """ import multiprocessing.pool # Copied from the regular compile function macros, objects, extra_postargs, pp_opts, build = \ self._setup_compile(output_dir, macros, include_dirs, sources, depends, extra_postargs) cc_args = self._get_cc_args(pp_opts, debug, extra_preargs) def _single_compile(obj): try: src, ext = build[obj] except KeyError: return self._compile(obj, src, ext, cc_args, extra_postargs, pp_opts) # Set by fix_compiler global glob_use_njobs if glob_use_njobs == 1: # This is equivalent to regular compile function for obj in objects: _single_compile(obj) else: # Use ThreadPool, rather than Pool, since the objects are picklable. pool = multiprocessing.pool.ThreadPool(glob_use_njobs) pool.map(_single_compile, objects) pool.close() pool.join() # Return *all* object filenames, not just the ones we just built. return objects
def get_item_by_url(urls): pool =mul.Pool() t1 =clock() pool.map(get_item_info,urls) t2=clock() print 'time\t'+str(t2-t1) print 'total count\t'+str(count)
def test_threads(self): n, f = 10000, 10 i = AnnoyIndex(f, 'euclidean') for j in xrange(n): i.add_item(j, numpy.random.normal(size=f)) i.build(10) pool = multiprocessing.pool.ThreadPool() def query_f(j): i.get_nns_by_item(1, 1000) pool.map(query_f, range(n))
def run_master(self): logging.info('Creating a pool of ' + str(self.num_processes) + ' subprocess workers.') # create a pool of processes. pool = Pool(processes=self.num_processes,) # apply map on the chunks in parallel. regions = pool.map(self.apply_map, range(0, self.num_processes)) # do the intermediate grouping step on each chunks in parallel. inters = pool.map(self.apply_intermediate, range(0, self.num_processes))
def _ConvertToWebP(webp_binary, png_files): pool = multiprocessing.pool.ThreadPool(10) def convert_image(png_path): root = os.path.splitext(png_path)[0] webp_path = root + '.webp' args = [webp_binary, png_path] + _PNG_TO_WEBP_ARGS + [webp_path] subprocess.check_call(args) os.remove(png_path) # Android requires pngs for 9-patch images. pool.map(convert_image, [f for f in png_files if not f.endswith('.9.png')]) pool.close() pool.join()
def build_jars(self): """Build the Spinnaker packages as jars """ subsystems = ['halyard'] if self.__options.do_jar_build: weighted_processes = self.__options.cpu_ratio * multiprocessing.cpu_count() pool = multiprocessing.pool.ThreadPool( processes=int(max(1, weighted_processes))) pool.map(self.__do_jar_build, subsystems) self.__check_build_failures(subsystems)
def scan_all(): items = [(name, addr) for name, endpoints in config['endpoints'].items() for addr in endpoints] pool.map(scan_one, items) info() if 'verbose' in sys.argv: import pprint; pprint.pprint(dict(active)) pprint.pprint(dict(inactive)) header = "".join([ "name".center(29), "active".rjust(8), "inactive".rjust(9), "percent".rjust(9), "reason".center(32), ]) info() info(header + "\n") info("-" * len(header) + "\n") active_n_total, inactive_n_total = 0, 0 for name in sorted(config['endpoints']): active_n = len(active[name]) inactive_n = len(inactive[name]) active_n_total += active_n inactive_n_total += inactive_n total = active_n + inactive_n percent = "" if total: percent = "%%%0.1f" % (100 * float(active_n) / total) reasons = set([reason for _, reason in inactive[name]]) info(name.rjust(29)) info(str(active_n).rjust(8)) info(str(inactive_n).rjust(9)) info(percent.rjust(9)) info(", ".join(reasons).rjust(32) + "\n") info("-" * len(header) + "\n") info(" total active: %i\n" % active_n_total) info("total inactive: %i\n" % inactive_n_total) value = 100 * float(active_n_total) / (active_n_total + inactive_n_total) info("percent active: %%%0.1f\n" % value) return value
def build_container_images(self): """Build the Spinnaker packages as container images. """ subsystems = [comp for comp in SUBSYSTEM_LIST if comp != 'spinnaker'] subsystems.append('spinnaker-monitoring') if self.__options.container_builder: weighted_processes = self.__options.cpu_ratio * multiprocessing.cpu_count() pool = multiprocessing.pool.ThreadPool( processes=int(max(1, weighted_processes))) pool.map(self.__do_container_build, subsystems) self.__check_build_failures(subsystems)
def run_program(self): logging.info('Running the framework...') # Fixing the start time. start_time = time.time() """ Create a pool of processes. The number of processes is equal to the number of files. One process takes care of one file. """ pool = MyMRPool(len(self.files)) logging.info('The initial number of running processes is ' + str(len(self.files)) + '.') """ Apply call_map_reduce on all files in parallel. All files will be partitioned/mapped/shuffled individually. """ apply_map_reduces = pool.map(self.call_map_reduce, self.files) self.shuffle() """ At this point we have bunch of inter-shuffled files. We can reduce them in parallel. """ reduces = pool.map(self.apply_reduce, range(0, self.num_processes)) """ At this point we have bunch of reduced files so we can merge all reduce files into one final file. """ self.merge_reduce_results() """ Finilizing the framework execution. """ self.__finalize_program() logging.info('The program is successfully finished.') """ Fixing the end time. We use this for calculating the total execution time of the framework. """ end_time = time.time() logging.info('The total execution time is: ' + str(end_time - start_time))
def _ConvertToWebP(webp_binary, png_files): pool = multiprocessing.pool.ThreadPool(10) def convert_image(png_path): root = os.path.splitext(png_path)[0] webp_path = root + '.webp' args = [webp_binary, png_path, '-mt', '-quiet', '-m', '6', '-q', '100', '-lossless', '-o', webp_path] subprocess.check_call(args) os.remove(png_path) pool.map(convert_image, [f for f in png_files if not _PNG_WEBP_BLACKLIST_PATTERN.match(f)]) pool.close() pool.join()
def executeOperatorSequence(operator, kwargsUpdated, parallel): outputPathPattern = '' inputPathPattern = '' for key, value in kwargsUpdated.iteritems(): arg = str(value) if '*' in arg: if operator.get_targets().count(key) == 1: outputPathPattern = arg outputKey = key else: # get file of path in arg with Unix style pathname pattern expansion fileList = glob.glob(arg) if not fileList: log.error("%s: Could not find any files for input pattern %s in Slot %s" % operator.name, outputPathPattern, outputKey) inputFileList = fileList inputPathPattern = arg inputKey = key if outputPathPattern == '' or inputPathPattern == '': log.error("If two file patterns (paths with '*' are used, one must be an argument for a non-target parameter and one in target parameter") pre, post = inputPathPattern.split('*') outputFileList = [] for fil in inputFileList: tmp = str(fil).replace(pre, '') tmp = str(tmp).replace(post, '') outputFileList.append(outputPathPattern.replace('*', tmp)) args_list = [] for j in range(len(inputFileList)): kwargs_new = OrderedDict(kwargsUpdated) kwargs_new[inputKey] = inputFileList[j] kwargs_new[outputKey] = outputFileList[j] args_new = list(kwargs_new.values()) args_new.append(operator) args_list.append(args_new) if parallel: # multiprocessing num_of_workers = multiprocessing.cpu_count() pool = multiprocessing.Pool(num_of_workers - 1) # blocks until finished pool.map(callWrapper, args_list) else: for args in args_list: callWrapper(args) return outputPathPattern
def process_images(): """Process all images in parallel. Like app.process_images, use a process pool for convenience. The last three steps of the problem (cropping and saving) are also parallelized. This cannot be done using multiprocessing.Pool because it daemonizes its children processes, and they in turn cannot have children of their own. Use custom Pool and Process subclasses that ensure the children are not daemonized. """ pool = NoDaemonPool() # use cpu_count() processes pool.map(process_image, image_paths()) pool.close() pool.join()
def _ConvertToWebP(webp_binary, png_files): pool = multiprocessing.pool.ThreadPool(10) def convert_image(png_path): root = os.path.splitext(png_path)[0] webp_path = root + '.webp' args = [webp_binary, png_path, '-mt', '-quiet', '-m', '6', '-q', '100', '-lossless', '-o', webp_path] subprocess.check_call(args) os.remove(png_path) # Android requires pngs for 9-patch images. # Daydream (*.dd) requires pngs for icon files. pool.map(convert_image, [f for f in png_files if not (f.endswith('.9.png') or f.endswith('.dd.png'))]) pool.close() pool.join()
def build_packages(self): """Build all the Spinnaker packages.""" all_subsystems = [] all_subsystems.extend(SUBSYSTEM_LIST) all_subsystems.extend(ADDITIONAL_SUBSYSTEMS) if self.__options.build: # Build in parallel using half available cores # to keep load in check. weighted_processes = self.__options.cpu_ratio * multiprocessing.cpu_count() pool = multiprocessing.pool.ThreadPool( processes=int(max(1, weighted_processes))) pool.map(self.__do_build, all_subsystems) self.__check_build_failures(SUBSYSTEM_LIST)
def _map_parallel(function, args, n_jobs): """multiprocessing.Pool(processors=n_jobs).map with some error checking""" # Following the error checking found in joblib multiprocessing = int(os.environ.get('JOBLIB_MULTIPROCESSING', 1)) or None if multiprocessing: try: import multiprocessing import multiprocessing.pool except ImportError: multiprocessing = None if sys.platform.startswith("win") and PY2: msg = "Multiprocessing is not supported on Windows with Python 2.X. Setting n_jobs=1" logger.warning(msg) n_jobs = 1 # 2nd stage: validate that locking is available on the system and # issue a warning if not if multiprocessing: try: _sem = multiprocessing.Semaphore() del _sem # cleanup except (ImportError, OSError) as e: multiprocessing = None logger.warning('{}. _map_parallel will operate in serial mode'.format(e)) if multiprocessing and int(n_jobs) not in (0, 1): if n_jobs == -1: n_jobs = None try: pool = multiprocessing.Pool(processes=n_jobs) map_result = pool.map(function, args) finally: pool.close() pool.join() else: map_result = list(map(function, args)) return map_result
def _map_parallel(function, args, n_jobs): """multiprocessing.Pool(processors=n_jobs).map with some error checking""" # Following the error checking found in joblib multiprocessing = int(os.environ.get('JOBLIB_MULTIPROCESSING', 1)) or None if multiprocessing: try: import multiprocessing import multiprocessing.pool except ImportError: multiprocessing = None # 2nd stage: validate that locking is available on the system and # issue a warning if not if multiprocessing: try: _sem = multiprocessing.Semaphore() del _sem # cleanup except (ImportError, OSError) as e: multiprocessing = None warnings.warn('%s. _map_parallel will operate in serial mode' % (e,)) if multiprocessing and int(n_jobs) not in (0, 1): if n_jobs == -1: n_jobs = None pool = multiprocessing.Pool(processes=n_jobs) map_result = pool.map(function, args) pool.close() pool.join() else: map_result = list(map(function, args)) return map_result
def get_all_commits_stats(project_stats): """Extract commits that have not been upstreamed in all projects. Args: project_stats: A dict of matching upstream and downstream projects including stats for projects that matches. Returns: A dict of commits not upstreamed. """ commit_stats = {} downstream_stats = {match['downstream']: match for match in project_stats} # Only analyze modified projects modified_projects = [] for name, stats in downstream_stats.iteritems(): if stats['status'].startswith('Modified'): stats['name'] = name modified_projects.append(stats) pool = multiprocessing.Pool() commit_stats = pool.map(get_commit_stats_in_project, modified_projects) commit_stats = {stats['name']: stats['stats'] for stats in commit_stats} return commit_stats
def run_abstraction_parallel(self): # initialization self.__get_methods() self.__read_config() self.__get_dataset() # get filename and properties filename_properties = [] for filename, properties in self.files.iteritems(): filename_properties.append((filename, properties)) # run experiment in multiprocessing mode total_cpu = multiprocessing.cpu_count() pool = NoDaemonProcessPool(processes=total_cpu) results = pool.map(self, filename_properties) pool.close() pool.join() # open evaluation file self.__check_path(self.files['evaluation_directory']) f = open(self.files['evaluation_file'], 'wt') writer = csv.writer(f) # set header for evaluation file header = [] if self.configuration['main']['abstraction'] == '1': header = self.configuration['abstraction_evaluation']['evaluation_file_header'].split('\n') writer.writerow(tuple(header)) # write experiment result for result in results: writer.writerow(result) # close evaluation file f.close()
def slippy_test(test_options, width=TILE_WIDTH, height=TILE_HEIGHT, tile_factor=TILE_FACTOR): #assume each screen is a 10x5 grid of tiles #this approximately the OTM map size at full screen #at my desk z = test_options['z'] x = test_options['x'] y = test_options['y'] url_prefix = test_options['url_prefix'] tiles_to_request = [] for x_iter in range(x - width/2, x + width/2 - 1): for y_iter in range(y - height/2, y + height/2 - 1): tiles_to_request.append(url_prefix + '%d/%d/%d.png' % (z, x_iter, y_iter)) pool = multiprocessing.Pool(processes=tile_factor) start_time = time.time() results = pool.map(slippy_test_helper, tiles_to_request) end_time = time.time() pool.close() pool.join() sys.stderr.write('.') if(False in results): return '%d,ERROR,%f' % (-1, float('nan')) return '%d,OK,' % z + str(end_time - start_time)
def multiprocess_cost(self): ''' A multiprocessing framework to parallelize the cost computations of kde ''' class_pairs = [] required_data = [] for class_i in self.class_indexes.keys(): for class_j in self.class_indexes.keys(): if class_i == class_j: continue if class_j > class_i: # Calculating only one pair of the classes class_pairs.append((class_i, class_j)) required_data.append((class_i, class_j, self.ld_training_data, self.class_indexes, self.ld_sigma_inv)) pool = Pool(processes=8) # start 4 worker process #chernoff_distance_matrices = pool.map(multiprocessing_fast_chernoff_distance, required_data) chernoff_distance_matrices = pool.map(shared_variable_multiprocessing_fast_chernoff_distance, class_pairs) total_chernoff_distance = 0 for idx, class_pair in enumerate(class_pairs): class_i, class_j = class_pair self.pre_calculated_chernoff_distance_matrix[class_i, class_j] = chernoff_distance_matrices[idx] self.pre_calculated_chernoff_distance_matrix[class_j, class_i] = chernoff_distance_matrices[idx] total_chernoff_distance += float(sum(sum(chernoff_distance_matrices[idx], axis=1), axis=0)) Dataset.pre_calculated_chernoff_distance_matrix = self.pre_calculated_chernoff_distance_matrix return 2*sum(total_chernoff_distance)*(1.0/self.training_data.shape[1])
def threshold(X, e, a, b, k, num_workers, metric): """ Get all threshold clusters (algorithm 7, lines 1-6) :param X: Data matrix :param e: lower bound on fractional size of each cluster :param a: lower bound on fractional size of a set inside own cluster for which stability holds :param b: lower bound on fractional size of a set outside own cluster for which stability holds :param k: Number of clusters :param num_workers: Number of workers :param metric: metric is in the set {avg, min, max} :return: Threshold clusters """ print("Populating list with all threshold clusters with metric:", metric) start = time.time() n = len(X) minsize = int(e * n) with Pool(num_workers) as pool: func = partial(get_thresholds, X, minsize, num_workers, metric) items = pool.map(func, range(n)) pool.close() pool.join() threshold_lists = [item[0] for item in items] L = [item for sublist in threshold_lists for item in sublist] D = dict([(item[1], item[2]) for item in items]) end = time.time() print("Length of L = ", len(L)) print("time = {0:.2f}s".format(end - start)) return refine(L, X, D, e, a, b, k, num_workers, metric)
def count_intersect(self, threshold, frequency=True): self.counts = OrderedDict() self.rlen, self.qlen = {}, {} self.nalist = [] if frequency: self.frequency = OrderedDict() # if self.mode_count == "bp": # print2(self.parameter, "\n{0}\t{1}\t{2}\t{3}\t{4}".format("Reference","Length(bp)", "Query", "Length(bp)", "Length of Intersection(bp)")) # elif self.mode_count == "count": # print2(self.parameter, "\n{0}\t{1}\t{2}\t{3}\t{4}".format("Reference","sequence_number", "Query", "sequence_number", "Number of Intersection")) for ty in self.groupedreference.keys(): self.counts[ty] = OrderedDict() self.rlen[ty], self.qlen[ty] = OrderedDict(), OrderedDict() if frequency: self.frequency[ty] = OrderedDict() for r in self.groupedreference[ty]: if r.total_coverage() == 0 and len(r) > 0: self.nalist.append(r.name) continue else: self.counts[ty][r.name] = OrderedDict() if self.mode_count == "bp": rlen = r.total_coverage() elif self.mode_count == "count": rlen = len(r) self.rlen[ty][r.name] = rlen mp_input = [] for q in self.groupedquery[ty]: if r.name == q.name: continue else: mp_input.append([q, self.nalist, self.mode_count, self.qlen, threshold, self.counts, frequency, self.frequency, ty, r]) # q, nalist, mode_count, qlen_dict, threshold, counts, frequency, self_frequency, ty, r pool = multiprocessing.Pool(processes=multiprocessing.cpu_count() - 1) mp_output = pool.map(mp_count_intersect, mp_input) pool.close() pool.join() # qname, nalist, qlen_dict[ty][q.name], counts[ty][r.name][q.name], self_frequency[ty][q.name].append(c[2]) for output in mp_output: if output[1]: self.nalist.append(output[1]) else: self.qlen[ty][output[0]] = output[2] self.counts[ty][r.name][output[0]] = output[3] # print(r.name) # print(output[0]) # print(output[3]) try: self.frequency[ty][output[0]][r.name] = output[3][2] except: self.frequency[ty][output[0]] = {} self.frequency[ty][output[0]][r.name] = output[3][2]
def superheadhunter(filelist, keys, defaults=[], **kw): #TODO: BENCHMARK! Nchunks, Nfiles #TODO: OPTIMIZE? '''Headhunter looped over a list of files.''' Nchunks = kw.get( 'Nchunks', 25 ) with_parent = kw.get( 'with_parent', False ) return_type = kw.get( 'return_type', 'list' ) hunt = functools.partial(headhunter, keys=keys, Nchunks=Nchunks, return_type='raw', with_parent=False ) pool = Pool() raw = pool.map( hunt, filelist ) pool.close() #pool.join() #Flatten the twice nested list of string matches (this is the fastest way of doing this!!) results = [] for r in raw: results.extend(r) return merger(results, keys, defaults, return_type)
def refine(L, X, D, e, a, b, k, num_workers, metric): """ Throw out bad points (algorithm 7, lines 7-17) :param L: List of subsets :param X: Data matrix :param D: dictionary :param e: lower bound on fractional size of each cluster :param a: lower bound on fractional size of a set inside own cluster for which stability holds :param b: lower bound on fractional size of a set outside own cluster for which stability holds :param k: Number of clusters :param num_workers: Number of workers :param metric: metric is in {avg, max, min} :return: Refined clusters """ print("Getting rid of bad points") print("Length of L at start = ", len(L)) start = time.time() n = len(X) T = int((e - 2 * a - b * k) * n) t = int((e - a) * n) with Pool() as pool: func = partial(refine_individual, D, T, t) L = pool.map(func, L) pool.close() pool.join() end = time.time() print("Length of L on end = ", len(L)) print("time = {0:.2f}s".format(end - start)) return grow(L, X, a, num_workers, metric)
def _CompileDeps(aapt_path, dep_subdirs, temp_dir): partials_dir = os.path.join(temp_dir, 'partials') build_utils.MakeDirectory(partials_dir) partial_compile_command = [ aapt_path + '2', 'compile', # TODO(wnwen): Turn this on once aapt2 forces 9-patch to be crunched. # '--no-crunch', ] pool = multiprocessing.pool.ThreadPool(10) def compile_partial(directory): dirname = os.path.basename(directory) partial_path = os.path.join(partials_dir, dirname + '.zip') compile_command = (partial_compile_command + ['--dir', directory, '-o', partial_path]) build_utils.CheckOutput(compile_command) # Sorting the files in the partial ensures deterministic output from the # aapt2 link step which uses order of files in the partial. sorted_partial_path = os.path.join(partials_dir, dirname + '.sorted.zip') _SortZip(partial_path, sorted_partial_path) return sorted_partial_path partials = pool.map(compile_partial, dep_subdirs) pool.close() pool.join() return partials
def test_no_thread_pool(): pool = xmon_stepper.ThreadlessPool() result = pool.map(lambda x: x + 1, range(10)) assert result == [x + 1 for x in range(10)] # No ops. pool.terminate() pool.join()
def _ConvertToWebP(webp_binary, png_files, path_info): pool = multiprocessing.pool.ThreadPool(10) def convert_image(png_path_tuple): png_path, original_dir = png_path_tuple # No need to add an extension, android can load images fine without them. webp_path = os.path.splitext(png_path)[0] args = [ webp_binary, png_path, '-mt', '-quiet', '-m', '6', '-q', '100', '-lossless', '-o', webp_path ] subprocess.check_call(args) os.remove(png_path) path_info.RegisterRename(os.path.relpath(png_path, original_dir), os.path.relpath(webp_path, original_dir)) pool.map( convert_image, [f for f in png_files if not _PNG_WEBP_BLACKLIST_PATTERN.match(f[0])]) pool.close() pool.join()
def work(num_procs): print("Creating %i (daemon) workers and jobs in child." % num_procs) pool = multiprocessing.Pool(num_procs) result = pool.map(sleepawhile, [randint(1, 5) for x in range(num_procs)]) # The following is not really needed, since the (daemon) workers of the # child's pool are killed when the child is terminated, but it's good # practice to cleanup after ourselves anyway. pool.close() pool.join() return result
def build_container_images(self): """Build the Spinnaker packages as container images. """ subsystems = [comp for comp in SUBSYSTEM_LIST if comp != 'spinnaker'] subsystems.append('spinnaker-monitoring') if self.__options.container_builder: weighted_processes = self.__options.cpu_ratio * multiprocessing.cpu_count( ) pool = multiprocessing.pool.ThreadPool( processes=int(max(1, weighted_processes))) pool.map(self.__do_container_build, subsystems) if self.__build_failures: if set(self.__build_failures).intersection(set(subsystems)): raise RuntimeError('Builds failed for {0!r}'.format( self.__build_failures)) else: print 'Ignoring errors on optional subsystems {0!r}'.format( self.__build_failures) return
def sampleRawData(self): print("Start sampling market data...") args = [] for date in self._dates: args.append((self._symbol, self._colo, date)) start_time = time.time() pool = MyPool(NUM_THREADS) self._md_dfs = pool.map(_getRawWrapper, args) pool.close() pool.join() print("Sampling finished!") print("Sampling time: " + str(time.time() - start_time) + " s")
def _getBehaviors(self, mat): ''' Extracting other people's behavior ''' print("Extracting other people's behaviors...") args = [(mat[i+LOOK_BACK], mat[i+LOOK_BACK+1]) for i in range(len(mat) - self._cutoff)] start_time = time.time() pool = MyPool(NUM_THREADS) targets = pool.map(_getLabelsWrapper, args) pool.close() pool.join() print("Extracting finished!") print("Extracting time: " + str(time.time() - start_time) + " s") return targets
def _ConvertToWebP(webp_binary, png_files): pool = multiprocessing.pool.ThreadPool(10) def convert_image(png_path): root = os.path.splitext(png_path)[0] webp_path = root + '.webp' args = [ webp_binary, png_path, '-mt', '-quiet', '-m', '6', '-q', '100', '-lossless', '-o', webp_path ] subprocess.check_call(args) os.remove(png_path) # Android requires pngs for 9-patch images. # Daydream (*.dd) requires pngs for icon files. pool.map(convert_image, [ f for f in png_files if not (f.endswith('.9.png') or f.endswith('.dd.png')) ]) pool.close() pool.join()
def main(): clear() println(u'''\ =============================================================== GoProxy 服务端部署程序, 开始上传 %s 应用文件夹 Linux/Mac 用户, 请使用 python uploader.py 来上传应用 =============================================================== 请输入您的appid, 多个appid请用|号隔开 特别提醒:appid 请勿包含 ID/Email 等个人信息! '''.strip() % GAE_DIR) if not os.path.isdir(CACHE_DIR): os.mkdir(CACHE_DIR) appids = input_appids() retry_upload(4, GAE_DIR, appids[0]) pool = multiprocessing.pool.ThreadPool(processes=50) pool.map(functools.partial(retry_upload, 4, GAE_DIR), appids[1:]) shutil.rmtree(CACHE_DIR, ignore_errors=True) println(os.linesep + u'上传完毕,请检查 http://<appid>.appspot.com 的版本,谢谢。按回车键退出程序。') raw_input()
def build_packages(self): """Build all the Spinnaker packages.""" if self.__options.build: # Build in parallel using half available cores # to keep load in check. pool = multiprocessing.pool.ThreadPool(processes=min( 1, self.__options.cpu_ratio * multiprocessing.cpu_count())) pool.map(self.__do_build, SUBSYSTEM_LIST) if self.__build_failures: raise RuntimeError('Builds failed for {0!r}'.format( self.__build_failures)) # Copy subsystem packages. processes = [] for subsys in SUBSYSTEM_LIST: processes.append(self.start_copy_debian_target(subsys)) print 'Waiting for package copying to finish....' for p in processes: p.check_wait()
def scan(pool, ip_list): """ @return [(ip,mac,rtt),] """ ping_results = [] def ping_warp(ip): ping_results.append(ping(ip)) pass ping_results = [] try: pool.map(ping_warp, ip_list) except Exception: logging.exception("Scan Error") ping_results = [result for result in ping_results if result[1]] arp_table = get_arp_table() result = [(ip, arp_table[ip], rtt) for ip, rtt in ping_results if ip in arp_table] return result
def activation(self, init_input: np.ndarray, training: bool = False) -> np.ndarray: activation_values = init_input for layer in self.network: pool = multiprocessing.pool.ThreadPool(processes=len(layer)) activation_values = pool.map( lambda s_p: s_p.activation(activation_values, training=training), layer) activation_values = np.transpose(np.asarray(activation_values)) return activation_values
def parse_from(self, handle, threads=None): # Load the OBO graph into a syntax tree using fastobo doc = fastobo.load_graph(handle).compact_ids() # Extract metadata from the OBO header with typechecked.disabled(): self.ont.metadata = self.extract_metadata(doc.header) # Resolve imported dependencies self.ont.imports.update( self.process_imports( self.ont.metadata.imports, self.ont.import_depth, os.path.dirname(self.ont.path or str()), self.ont.timeout, threads=threads, )) # Merge lineage cache from imports self.import_lineage() # Extract frames from the current document. with typechecked.disabled(): try: with multiprocessing.pool.ThreadPool(threads) as pool: pool.map(self.extract_entity, doc) except SyntaxError as err: location = self.ont.path, err.lineno, err.offset, err.text raise SyntaxError(err.args[0], location) from None # OBOJSON can define classes implicitly using only `is_a` properties # mapping to unresolved identifiers: in this case, we create the # term ourselves for lineage in list(self.ont._terms.lineage.values()): for superclass in lineage.sup.difference( self.ont._terms.lineage): self.ont.create_term(superclass) # Update lineage cache self.symmetrize_lineage()
def _execute_query(self) -> Any: details_chunk_size = self.conf.get_int( AtlasSearchDataExtractor.ATLAS_DETAILS_CHUNK_SIZE_KEY) process_pool_size = self.conf.get_int( AtlasSearchDataExtractor.PROCESS_POOL_SIZE_KEY) guids = [] approximate_count = self._get_approximate_count_of_entities() LOGGER.info(f'Received count: {approximate_count}') if approximate_count > 0: offsets = [ i * self.search_chunk_size for i in range( int(approximate_count / self.search_chunk_size) + 1) ] else: offsets = [] with multiprocessing.pool.ThreadPool( processes=process_pool_size) as pool: guid_list = pool.map(self._get_entity_guids, offsets, chunksize=1) for sub_list in guid_list: guids += sub_list LOGGER.info(f'Received guids: {len(guids)}') if guids: guids_chunks = AtlasSearchDataExtractor.split_list_to_chunks( guids, details_chunk_size) with multiprocessing.pool.ThreadPool( processes=process_pool_size) as pool: return_list = pool.map(self._get_entity_details, guids_chunks) for sub_list in return_list: for entry in sub_list: yield entry
def get_imgurls(keyword, keyword_url, keyword_file): logging.info("keyword: %s\n" "keyword_url: %s\n" "keyword_file: %s", keyword, keyword_url, keyword_file) pages = int( re.search( r"page 1 of ([0-9]+)", requests.get( "https://www.zerochan.net/" + keyword_url, headers={ "cookie": COOKIES }, ).text, ).group(1)) logging.info("total %s pages", pages) # use manager to communicate between parent and child imgurl_mgr = multiprocessing.Manager() imgurl_queue = imgurl_mgr.Queue() jobs = [] for i in range(pages): jobs.append((i + 1, keyword, keyword_url, keyword_file, imgurl_queue)) pool = multiprocessing.pool.ThreadPool(os.cpu_count() * THREAD_PER_CPU) pool.map(get_imgurl_worker, jobs) try: pool.close() pool.join() except KeyboardInterrupt: print('KeyboardInterrrrrrrrrupt!') pool.terminate() pool.join() res = [] while not imgurl_queue.empty(): res.append(imgurl_queue.get()) return list(set(res))
def run_iteration(fRanges, Xs, n_random, n_main_freqs, fixed_mask_freqs, imfUseInds, sample_rate, freqs0, mask_args, lossFunc, nprocesses): pool = MyPool(nprocesses) args = (Xs, fRanges, freqs0, n_main_freqs, fixed_mask_freqs, mask_args, imfUseInds, sample_rate, lossFunc, nprocesses) it_outputs = pool.map(run_subIteration, [args for i in range(n_random)]) pool.close() pool.join() it_mask_freqs = np.row_stack([it_outputsi[0] for it_outputsi in it_outputs]) it_mix_scores = np.row_stack([it_outputsi[1] for it_outputsi in it_outputs]) return it_mask_freqs, it_mix_scores
def test_step(z): b = queue.get() pool = multiprocessing.Pool(processes=args.processes) rowNum = [(i, b) for i in range(len(b))] b_next = pool.map(x, rowNum) print(f'Time step #{z+1}') for i in range(len(b_next)): for j in range(len(b_next[i])): print(b_next[i][j], end="") print(end='\n') print(end='\n') queue.put(b_next) return b_next
def test_process_pool_map(): pool = multiprocessing.Pool(processes=multiprocessing.pool.cpu_count()) print('Starting run at ' + time.strftime('%Y-%m-%d-%H:%M:%S', time.gmtime())) tot = 0 iter_cnt = 10000 proc_cnt = 16 for j in xrange(iter_cnt): dl = [XY(1.1111113 + i, 2.133 + j) for i in xrange(proc_cnt)] sqr = pool.map(square, dl) tot += sum(sqr) print(tot) print('Ending run at ' + time.strftime('%Y-%m-%d-%H:%M:%S', time.gmtime()))
def registerAggregates(aggStores, cert_path, urn_to_urls_map, ip, config): """ Function to register aggregates in the database. :param aggStores: a json dictionary object corresponding to the "aggregatestores" entry of the opsconfig json. :param cert_path: the path to the collector certificate used to retrieve the aggregate manager characteristics from the aggregate data store. :param urn_to_urls_map: Map of aggregate manager URN to AM API URLs. :param ip: instance of the InfoPopulator object used to populate the DB """ ops_agg_schema = ip.tbl_mgr.schema_dict["ops_aggregate"] # TODO parameterize these agg_schema_str = BASE_SCHEMA_URL + "/aggregate#" version_filename = top_path + "/VERSION" try: version_file = open(version_filename) monitoring_version = version_file.readline().strip() version_file.close() except Exception as e: ip.tbl_mgr.logger.warning( "Could not read monitoring version from file %s: %s" % (version_filename, str(e))) monitoring_version = "unknown" extck_measRef = ip.extckStoreBaseUrl + "/data/" myLock = multiprocessing.Lock() argsList = [] for aggregate in aggStores: amtype = aggregate['amtype'] urn = aggregate['urn'] args = (cert_path, urn_to_urls_map, ip, amtype, urn, ops_agg_schema, agg_schema_str, monitoring_version, extck_measRef, aggregate, myLock) argsList.append(args) pool = multiprocessing.pool.ThreadPool( processes=int(config.get_populator_pool_size())) pool.map(registerOneAggregate, argsList)
def do_benchmarking(opts, data): size_mb = len(data) / 1024. / 1024 mechanism = 'serial evaluation(s)' if opts.threads: mechanism = "thread(s)" elif opts.processes or opts.processes_copy: mechanism = "process(es)" print("Using python: %s" % ('.'.join(map(str, sys.version_info[:3])), )) print("Checksuming %.2f [MB] using %d %s" % (size_mb, opts.number_tasks, mechanism)) print("Checksum methods to be tested:") for variant, info in checksums.items(): print(' * %s: %s' % (variant, info.name)) pool = _get_pool(opts, data, size_mb) print("") print( "Algo Chksum Chksum(int) BufSize Speed [MB/s] Time [s] Setup Time [s]" ) print( "======== ======== =========== ======= ==================== ============== ==============" ) for variant, bufsize_log2 in itertools.product(checksums, list(range(9, 21)) + [0]): # bufsize = 0 causes the whole buffer to be processed in one go bufsize = 2**bufsize_log2 if bufsize_log2 > 0 else 0 # Go, go, go! _checksum_data = _get_checksum_function(opts, data, bufsize, variant) t0 = time.time() res = pool.map(_checksum_data, range(opts.number_tasks)) crcs = [x[0] for x in res] setup_times = [x[1] - t0 for x in res] times = [x[2] for x in res] speeds = [size_mb / t for t in times] if not all(x == crcs[0] for x in crcs): raise Exception("Different checksum results obtained for %s: %r" % (variant, crcs)) crc = crcs[0] mean_time, stddev_time = mean_and_stddev(times) mean_setup, stddev_setup = mean_and_stddev(setup_times) mean_speed, stddev_speed = mean_and_stddev(speeds) args = (variant, crc & 0xffffffff, crc, bufsize, mean_speed, stddev_speed, mean_time, stddev_time, mean_setup, stddev_setup) print( u"%-8s %08x %11d %-7d %9.3f \u00b1 %8.3f %6.3f \u00b1 %5.3f %6.3f \u00b1 %5.3f" % args)
def __init__(self, directory, image_data_generator, batch_size=32, classes=None, follow_links=False): self.directory = directory self.image_data_generator = image_data_generator self.classes = classes white_list_formats = {'png', 'jpg', 'jpeg', 'bmp'} self.samples = 0 if not classes: classes = [] for subdir in sorted(os.listdir(directory)): if os.path.isdir(os.path.join(directory, subdir)): classes.append(subdir) self.num_classes = len(classes) self.class_indices = dict(zip(classes, range(len(classes)))) pool = multiprocessing.pool.ThreadPool() function_partial = partial(_count_valid_files_in_directory, white_list_formats=white_list_formats, follow_links=follow_links) self.samples = sum( pool.map(function_partial, (os.path.join(directory, subdir) for subdir in classes))) print('Found {} images belonging to {} classes'.format( self.samples, self.num_classes)) results = [] self.filenames = [] self.classes = [2 % 2 for i in range(self.samples)] i = 0 for dirpath in (os.path.join(directory, subdir) for subdir in classes): results.append( pool.apply_async(_list_valid_filenames_in_directory, (dirpath, white_list_formats, self.class_indices, follow_links))) for res in results: classes, filenames = res.get() self.classes[i:i + len(classes)] = classes self.filenames += filenames i += len(classes) pool.close() pool.join() super(DirectoryIterator, self).__init__(self.samples, batch_size)
def post_multi(senders, reqs_per_sender=100): print("Running POST multi on {}...".format(url)) test_form = { 'id': '12345678', 'name': 'Tester Testman', 'to_address': 'test@test', 'email_type': 'submitted', 'from_class': '01', 'to_class': '02', 'unit': 'TST1234', 'class_type': 'Laboratory', } pool = multiprocessing.pool.ThreadPool(senders) run = True time_queue = queue.PriorityQueue() def todo(calls): max_time = 0 while run and calls > 0: init_time = time.time() try: response = requests.post(url, test_form) if response.status_code != 200: print("?", end='', flush=True) elif calls % 10 == 0: print(".", end='', flush=True) except ConnectionError: print("!", end='', flush=True) finally: end_time = time.time() - init_time if end_time > max_time: max_time = end_time calls -= 1 print("#", end='', flush=True) time_queue.put((-max_time, max_time)) pool.map(todo, [reqs_per_sender] * senders) print("\nMax Time: {}ms".format(round(time_queue.get()[1] * 1000)))
def generate_multithread_evtime(self, time_width, path): print("inside display synced frame") path_p = os.path.join(path, 'pos') path_n = os.path.join(path, 'neg') path_ef = os.path.join(path, 'events') # try: # os.makedirs(path_ef) # except OSError: # if not os.path.isdir(path_ef): # raise path_image = os.path.join(path, 'images.txt') # img_file = open(path_image,'r') def evtime_image(line): junk, frame_id = line.split('/') cnt = 0 cnt = int(frame_id[6:-4]) inner_list = [elt.strip() for elt in line.split(' ')] # print(inner_list[0]) frame_start = float(inner_list[0]) frame_end = float(inner_list[0])+time_width # print("frame start, frame_end", frame_start, frame_end) frame_data = self.data[(self.data.ts >= frame_start) & (self.data.ts < frame_end)] frame_data.ts = (frame_data.ts - frame_data.ts[0]) img_p = self.get_frame_positive(frame_data) # cv2.imwrite(os.path.join(path_p, 'pos_'+str(cnt)+'.jpg'), img_p) img_n = self.get_frame_negative(frame_data) # cv2.imwrite(os.path.join(path_n, 'neg_'+str(cnt)+'.jpg'), img_n) img_t = self.get_time_frame(frame_data, time_width) event_time_frame = cv2.merge((img_p, img_n, img_t)) cv2.imwrite(os.path.join(path_ef, 'event_'+str(cnt)+'.png'), event_time_frame) imagesList = [line.rstrip('\n') for line in open(path_image)] pool = ThreadPool(16) pool.map(evtime_image, imagesList)
def get_all_datapoints(metrics, service): if service == c.SERVICE_TYPE_EC2: function = ec2pool elif service == c.SERVICE_TYPE_ELB: function = elbpool elif service == c.SERVICE_TYPE_RDS: function = rdspool else: return account_name = sys.argv[1] pool = multiprocessing.Pool(c.POOL_DICTIONARY.get(account_name)) datalist = pool.map(function, metrics) filtered_list = filter(func.exists, datalist) return filtered_list
def trans_para(src_lines, src_lang, dst_lang, flag=True): threads = 35 if len(src_lines) < 35: threads = len(src_lines) pool = ThreadPool(threads) to_lines = pool.map(handle, src_lines) pool.close() pool.join() if (flag): new_pool = ThreadPool(threads) to_lines = new_pool.map(handle_dot, to_lines) new_pool.close() new_pool.join() return to_lines
def __call__(self, net_lists): evaluations = np.zeros(len(net_lists)) for i in np.arange(0, len(net_lists), self.gpu_num): process_num = np.min((i + self.gpu_num, len(net_lists))) - i pool = NoDaemonProcessPool(process_num) arg_data = [ (cnn_eval, net_lists[i + j], j, self.epoch_num, self.batchsize, self.dataset, self.verbose, self.imgSize) for j in range(process_num) ] evaluations[i:i + process_num] = pool.map(arg_wrapper_mp, arg_data) pool.terminate() return evaluations
def directory_processor(basedir, targetdir, software, protocol, processors, padding, additional): if processors is None: processors = 1 print( "Could not detect number of CPU cores. Only 1 processor will be used." ) if additional is not None: additionalFormated = ' '.join(f'-{additional[i]} {additional[i+1]}' for i in range(0, len(additional), 2)) else: additionalFormated = '' subdirs = [subdir for subdir in os.listdir(targetdir)] lendir = len(subdirs) # pick number of workers.... worker_count = min(processors, lendir) with multiprocessing.Pool(worker_count) as pool: pool.map(functools.partial(_directory_worker, basedir, targetdir, software, protocol, padding, additionalFormated), subdirs, chunksize=int(lendir / worker_count))
def start_multiprocess_obj(func_name, params, debug=False, verbose=False, nb_cpus=None): """ # TODO: broken for n_cpus > 1: `TypeError: can't pickle _thread.RLock objects` Parameters ---------- func_name : str params : List[List] each element in params must be object with attribute func_name (+ optional: kwargs) debug : boolean verbose : bool nb_cpus : int Returns ------- result: List list of function returns """ if nb_cpus is None: nb_cpus = cpu_count() if debug: nb_cpus = 1 nb_cpus = min(nb_cpus, len(params), cpu_count()) if nb_cpus > 1: log_mp.warning('`start_multiprocess_imap` is broken for n_cpus > 1:' ' `TypeError: cant pickle _thread.RLock objects`') nb_cpus = 1 if verbose: log_mp.debug("Computing %d parameters with %d cpus." % (len(params), nb_cpus)) for el in params: el.insert(0, func_name) start = time.time() if nb_cpus > 1: pool = MyPool(nb_cpus) result = pool.map(multi_helper_obj, params) pool.close() pool.join() else: result = list(map(multi_helper_obj, params)) if verbose: log_mp.debug("Time to compute: {:.1f} min".format( (time.time() - start) / 60.)) return result
def script(): max_pool_tasks = 3 print("Creating 5 (non-daemon) workers and jobs in main process.") max_tasks = len(lst_parameters_change) if max_pool_tasks > max_tasks: max_pool_tasks = max_tasks for i in np.arange(0, 10, max_pool_tasks): range_ini_task = i range_end_task = i + max_pool_tasks if range_end_task >= max_tasks: range_end_task = max_tasks pool = NoDaemonPool(max_pool_tasks) pool.map(work, range(range_ini_task, range_end_task)) pool.close() pool.join() # Finish if range_end_task >= max_tasks: break
def Execute(self, thread_count): """Runs the migration procedure. Args: thread_count: A number of threads to execute the migration with. Raises: AssertionError: If not all clients have been migrated. ValueError: If the relational database backend is not available. """ if not data_store.RelationalDBWriteEnabled(): raise ValueError("No relational database available.") sys.stdout.write("Collecting clients...\n") client_urns = _GetClientUrns() sys.stdout.write("Clients to migrate: {}\n".format(len(client_urns))) sys.stdout.write("Threads to use: {}\n".format(thread_count)) self._total_count = len(client_urns) self._migrated_count = 0 batches = utils.Grouper(client_urns, _CLIENT_BATCH_SIZE) self._Progress() pool = multiprocessing.pool.ThreadPool(processes=thread_count) pool.map(self._MigrateBatch, list(batches)) self._Progress() if self._migrated_count == self._total_count: message = "\nMigration has been finished (migrated {} clients).\n".format( self._migrated_count) sys.stdout.write(message) else: message = "Not all clients have been migrated ({}/{})".format( self._migrated_count, self._total_count) raise AssertionError(message)
def mix_multi(senders, reqs_per_sender=100): print("Running MIX QA on {}...".format(url)) pool = multiprocessing.pool.ThreadPool(senders) run = True time_queue = queue.PriorityQueue() avg_queue = queue.Queue() args = None def todo(calls): max_time = 0 while run and calls > 0: init_time = time.time() try: response = requests.get(url, args) if response.status_code != 200: print(response.json()) print("?", end='', flush=True) elif calls % 10 == 0: print(".", end='', flush=True) except ConnectionError: print("!", end='', flush=True) finally: end_time = time.time() - init_time if end_time > max_time: max_time = end_time calls -= 1 avg_queue.put(end_time) print("#", end='', flush=True) time_queue.put((-max_time, max_time)) pool.map(todo, [reqs_per_sender] * senders) avg_sum = 0 avg_len = avg_queue.qsize() while not avg_queue.empty(): avg_sum += avg_queue.get() print("\nMax Time: {}ms\nAvg Time: {}ms".format( round(time_queue.get()[1] * 1000), round((avg_sum / avg_len) * 1000)))
def __call__(self, chunksize): chunksize = int(chunksize) cur = self.con.cursor() # Delete unreferenced LibraryFileContent entries. cur.execute( """ DELETE FROM LibraryFileContent USING ( SELECT content FROM UnreferencedLibraryFileContent WHERE id BETWEEN %s AND %s) AS UnreferencedLibraryFileContent WHERE LibraryFileContent.id = UnreferencedLibraryFileContent.content """, (self.index, self.index + chunksize - 1)) rows_deleted = cur.rowcount self.total_deleted += rows_deleted self.con.commit() # Remove files from disk. We do this outside the transaction, # as the garbage collector happily deals with files that exist # on disk but not in the DB. cur.execute( """ SELECT content FROM UnreferencedLibraryFileContent WHERE id BETWEEN %s AND %s """, (self.index, self.index + chunksize - 1)) pool = multiprocessing.pool.ThreadPool(10) try: pool.map(self.remove_content, (row[0] for row in cur.fetchall())) finally: pool.close() pool.join() self.con.rollback() self.index += chunksize