def _get_results_by_threading(self, func, params): """ Query github API by multithreading. return a list containing all results. """ num_workers = self.num_workers if func.__name__ not in [ "multi_pulls", "multi_commits", "multi_watchers" ]: num_workers = 1 if self.debug_counts: p = ThPool(num_workers) pool_args = params[:self.debug_counts] return p.map(func, pool_args) else: stats = [] start = time.time() for i in range(int(params.totalCount / self.batch_size) + 1): if self.num_workers != 1 and i != 0 and ( i + 1) * self.batch_size % 800 == 0: print("Sleep 30 sec") sleep(30) p = ThPool(num_workers) temp = p.map( func, params[i * self.batch_size:(i + 1) * self.batch_size]) stats += temp print( f"{self.repo_name}, {func.__name__} takes: {round(time.time() - start, 3)} secs" ) return stats
def run(self, *args): """ kickoff the program """ self.add_args() if len(args) > 0: program_args = self.parser.parse_args(args) else: program_args = self.parser.parse_args() # setup the mongoDB connection mongo_connection = GritsMongoConnection(program_args) # Confirm the user wants to apply the indexes confirm = True if not program_args.force: confirm = self.query_yes_no("This will lock the database. Are your sure?", "no") if confirm: # ensure that the indexes are applied to the collections pool = ThreadPool(nodes=1) results = pool.amap(mongo_connection.ensure_indexes, [None]) while not results.ready(): # command-line spinner for cursor in '|/-\\': sys.stdout.write('\b%s' % cursor) sys.stdout.flush() time.sleep(.25) sys.stdout.write('\b') sys.stdout.flush() # async-poll is done, get the results result = results.get() logging.info(result)
def GMM_Ineq_parall(Theta0, DATA_STRUCT, d_struct): Theta = { "comm_mu": Theta0[0], "priv_mu": Theta0[1], "epsilon_mu": Theta0[2], "comm_var": Theta0[3], "priv_var": Theta0[4], "epsilon_var": Theta0[5], } rng = np.random.RandomState(d_struct['rng_seed']) start = time.time() print('--------------------------------------------------------') print('current parameter set are :') print(Theta) ''' parallel programming with two levels data separating runing the estimation ''' data_n = len(DATA_STRUCT) work_pool = ThreadPool(nodes=data_n) cpu_num = multiprocessing.cpu_count() cpu_num_node = int((cpu_num - 1) / data_n) # change the submit to mpa so that we can run multi-part altogether results = work_pool.amap( partial(para_data_allo_1, Theta, cpu_num_node, rng, d_struct), iter(DATA_STRUCT)) work_pool.close() while not results.ready(): time.sleep(5) print(".") # work_pool.join() auction_result = np.nanmean(list(results.get())) end = time.time() print("object value : " + str(auction_result)) print("time spend in this loop: ") print(end - start) print('--------------------------------------------------------\n') ## save the parameters and objective value with open('para.txt', 'a+') as f: for item in Theta0: f.write("%f\t" % item) f.write("%f\t" % auction_result) f.write("%f\n" % (end - start) / 60) return auction_result
def run(self, *args): """ kickoff the program """ self.add_args() if len(args) > 0: program_args = self.parser.parse_args(args) else: program_args = self.parser.parse_args() # setup the mongoDB connection mongo_connection = GritsMongoConnection(program_args) # Confirm the user wants to apply the indexes confirm = True if not program_args.force: confirm = self.query_yes_no( "This will lock the database. Are your sure?", "no") if confirm: # ensure that the indexes are applied to the collections pool = ThreadPool(nodes=1) results = pool.amap(mongo_connection.ensure_indexes, [None]) while not results.ready(): # command-line spinner for cursor in '|/-\\': sys.stdout.write('\b%s' % cursor) sys.stdout.flush() time.sleep(.25) sys.stdout.write('\b') sys.stdout.flush() # async-poll is done, get the results result = results.get() logging.info(result)
def _get_results_by_threading(self, func, params): """ Query github API by multithreading. return a list containing all results. """ num_workers = self.num_workers if func.__name__ not in [ "multi_pulls", "multi_commits", "multi_watchers" ]: num_workers = 1 stats = [] start = time.time() for i in range(len(params) // NUM_PER_PAGE): # pdb.set_trace() if self.num_workers != 1 and (i == 0 or (i + 1) * NUM_PER_PAGE % 400 == 0): sec = random.choice(range(10, 60)) print("Sleep {} sec".format(sec)) sleep(sec) p = ThPool(num_workers) temp = p.map(func, params[i * NUM_PER_PAGE:(i + 1) * NUM_PER_PAGE]) stats += temp print( f"{self.repo_name}, {func.__name__} takes: {round(time.time()-start,3)} secs" ) return stats
def begin_processing(self): pool = ThreadPool(nodes=Helper.config('threads')) for course in self.course_data: pool.map(self.download_lesson, course['lessons']) print( '--- Course "{course_title}" has been downloaded, with total of "{lessons_amount}" lessons.' .format(course_title=course['title'], lessons_amount=len(course['lessons']))) time.sleep(Helper.config('sleep'))
def _split_variable(self): """Split by variable.""" outputfiles = [ self._define_outputfilename(var, self.years) for var in self.variables ] years = len(outputfiles) * [self.years] if not self.threads: pool = Pool() else: pool = Pool(nodes=self.threads) pool.map(self._getdata, self.variables, years, outputfiles)
def build( charm_list, layer_list, layer_index, charm_branch, layer_branch, resource_spec, filter_by_tag, to_channel, rebuild_cache, ): build_env = BuildEnv(build_type=BuildType.CHARM) build_env.db["build_args"] = { "artifact_list": charm_list, "layer_list": layer_list, "layer_index": layer_index, "charm_branch": charm_branch, "layer_branch": layer_branch, "resource_spec": resource_spec, "filter_by_tag": list(filter_by_tag), "to_channel": to_channel, "rebuild_cache": rebuild_cache, } build_env.pull_layers() entities = [] for charm_map in build_env.artifacts: for charm_name, charm_opts in charm_map.items(): if not any(match in filter_by_tag for match in charm_opts["tags"]): continue charm_entity = f"cs:~{charm_opts['namespace']}/{charm_name}" entities.append( BuildEntity(build_env, charm_name, charm_opts, charm_entity)) click.echo(f"Queued {charm_entity} for building") def _run_build(build_entity): build_entity.setup() if not build_entity.has_changed: return build_entity.proof_build() build_entity.push() build_entity.attach_resource("unpublished") build_entity.promote(to_channel=to_channel) pool = ThreadPool() pool.map(_run_build, entities) build_env.save()
def _split_variable_yr(self): """Fetch variable split by variable and year.""" outputfiles = [] variables = [] for var in self.variables: outputfiles = [ self._define_outputfilename(var, [yr]) for yr in self.years ] variables += len(outputfiles) * [var] if not self.threads: pool = Pool() else: pool = Pool(nodes=self.threads) pool.map(self._getdata, variables, self.years, outputfiles)
def candle_df(candles, candleamount): print("candle_df") # iterate over rows with iterrows() cpool = ThreadPool() #for index, data in candles.tail(candleamount).iterrows(): #candle_df_thread(index, data) indices = candles.tail(candleamount).index.values.tolist() data = candles.tail(candleamount).values.tolist() results = cpool.uimap(candle_df_thread, indices, data) print( "Computing candlestick dataframe for given params with candles multithreaded..." ) result = list(results) print(results) return (result)
def pull_layers(self): """clone all downstream layers to be processed locally when doing charm builds""" layers_to_pull = [] for layer_map in self.layers: layer_name = list(layer_map.keys())[0] if layer_name == "layer:index": continue layers_to_pull.append(layer_name) pool = ThreadPool() results = pool.map(self.download, layers_to_pull) self.db["pull_layer_manifest"] = [result for result in results]
def __call__( self, in_paths: List[str], out_paths: List[str], mfcc=False, n_workers=2 ) -> Tuple[List[str], List[str]]: def write_out(inp, out): try: if os.path.exists(out): print("Skipping:", out) return (out, True) tensors = self.model.from_path( inp, return_two=self.return_two, return_mfcc=mfcc ) dir = os.path.dirname(out) if not os.path.exists(dir): os.mkdir(dir) torch.save(tensors, out) print("Success:", out) return (out, True) except Exception as e: print("Failure:", e, out) return (out, False) with ThreadPool(nodes=n_workers) as P: results = P.uimap(write_out, in_paths, out_paths) successes = [path for path, res in results if res] failures = [path for path, res in results if not res] return successes, failures
def make_science_image(self, rstate=None): if rstate is not None: np.random.seed(rstate) science_image = np.zeros(self.image_shape_pix) if self.parallel: pool = ThreadPool(8) coadds = list( tqdm(pool.imap(self._science_image_loop, range(self.ncoadds)), total=self.ncoadds)) return np.array(coadds).sum(axis=0) else: coadds = np.array( list(map(self._science_image_loop, trange(self.ncoadds)))) return science_image + coadds.sum(axis=0)
def pull_layers(self): """ clone all downstream layers to be processed locally when doing charm builds """ if self.rebuild_cache: click.echo("- rebuild cache triggered, cleaning out cache.") shutil.rmtree(str(self.layers_dir)) shutil.rmtree(str(self.interfaces_dir)) os.mkdir(str(self.layers_dir)) os.mkdir(str(self.interfaces_dir)) layers_to_pull = [] for layer_map in self.layers: layer_name = list(layer_map.keys())[0] if layer_name == "layer:index": continue layers_to_pull.append(layer_name) pool = ThreadPool() pool.map(self.download, layers_to_pull) self.db["pull_layer_manifest"] = [] _paths_to_process = { "layer": glob("{}/*".format(str(self.layers_dir))), "interface": glob("{}/*".format(str(self.interfaces_dir))), } for prefix, paths in _paths_to_process.items(): for _path in paths: build_path = _path if not build_path: raise BuildException( f"Could not determine build path for {_path}") git.checkout(self.layer_branch, _cwd=build_path) layer_manifest = { "rev": git("rev-parse", "HEAD", _cwd=build_path).stdout.decode().strip(), "url": f"{prefix}:{Path(build_path).stem}", } self.db["pull_layer_manifest"].append(layer_manifest) click.echo( f"- {layer_manifest['url']} at commit: {layer_manifest['rev']}" )
def process(self, mongo_connection): """ process a chunk of rows in the file """ reader = UnicodeReader(self.program_arguments.infile, dialect=self.provider_type.dialect) self.find_header(reader) for chunk in GritsFileReader.gen_chunks(reader, mongo_connection): # collections of valid and invaid records to be batch upsert / insert many valid_records = [] invalid_records = [] # is threading enabled? this may increase performance when mongoDB # is not running on localhost due to busy wait on finding an airport # in the case of FlightGlobalType. if settings._THREADING_ENABLED: pool = ThreadPool(nodes=settings._NODES) results = pool.amap(self.process_row, chunk) while not results.ready(): # command-line spinner for cursor in '|/-\\': sys.stdout.write('\b%s' % cursor) sys.stdout.flush() time.sleep(.25) sys.stdout.write('\b') sys.stdout.flush() # async-poll is done, get the results result = results.get() valid_records = [x[0] for x in result if x[0] is not None] invalid_records = [x[1] for x in result if x[1] is not None] else: # single-threaded synchronous processing for data in chunk: valid, invalid = self.process_row(data) if valid != None: valid_records.append(valid) if invalid != None: invalid_records.append(invalid) # bulk upsert / inset many of the records valid_result = mongo_connection.bulk_upsert( self.provider_type.collection_name, valid_records) invalid_result = mongo_connection.insert_many( settings._INVALID_RECORD_COLLECTION_NAME, invalid_records) logging.debug('valid_result: %r', valid_result) logging.debug('invalid_result: %r', invalid_result)
def data_func(measurement): if not use_threads: data = numpy.full(sources.shape + geobox.shape, measurement['nodata'], dtype=measurement['dtype']) for index, datasets in numpy.ndenumerate(sources.values): _fuse_measurement( data[index], datasets, geobox, measurement, fuse_func=fuse_func, skip_broken_datasets=skip_broken_datasets, driver_manager=driver_manager) else: def work_load_data(array_name, index, datasets): data = sa.attach(array_name) _fuse_measurement( data[index], datasets, geobox, measurement, fuse_func=fuse_func, skip_broken_datasets=skip_broken_datasets, driver_manager=driver_manager) array_name = '_'.join( ['DCCORE', str(uuid.uuid4()), str(os.getpid())]) sa.create(array_name, shape=sources.shape + geobox.shape, dtype=measurement['dtype']) data = sa.attach(array_name) data[:] = measurement['nodata'] pool = ThreadPool(32) pool.map(work_load_data, repeat(array_name), *zip(*numpy.ndenumerate(sources.values))) sa.delete(array_name) return data
def process(self, mongo_connection): """ process a chunk of rows in the file """ reader = UnicodeReader(self.program_arguments.infile, dialect=self.provider_type.dialect) self.find_header(reader) for chunk in GritsFileReader.gen_chunks(reader, mongo_connection): # collections of valid and invaid records to be batch upsert / insert many valid_records = [] invalid_records = [] # is threading enabled? this may increase performance when mongoDB # is not running on localhost due to busy wait on finding an airport # in the case of FlightGlobalType. if settings._THREADING_ENABLED: pool = ThreadPool(nodes=settings._NODES) results = pool.amap(self.process_row, chunk) while not results.ready(): # command-line spinner for cursor in '|/-\\': sys.stdout.write('\b%s' % cursor) sys.stdout.flush() time.sleep(.25) sys.stdout.write('\b') sys.stdout.flush() # async-poll is done, get the results result = results.get() valid_records = [ x[0] for x in result if x[0] is not None ] invalid_records = [ x[1] for x in result if x[1] is not None ] else: # single-threaded synchronous processing for data in chunk: valid, invalid = self.process_row(data) if valid != None: valid_records.append(valid) if invalid != None: invalid_records.append(invalid) # bulk upsert / inset many of the records valid_result = mongo_connection.bulk_upsert(self.provider_type.collection_name, valid_records) invalid_result = mongo_connection.insert_many(settings._INVALID_RECORD_COLLECTION_NAME, invalid_records) logging.debug('valid_result: %r', valid_result) logging.debug('invalid_result: %r', invalid_result)
def saveEngulfingSignals(candles, candleamount, params=[], symbol='XBTUSD'): global t_e_candles global t_symbol global t_candleamount t_e_candles = ind.candle_df(candles, candleamount) t_symbol = symbol t_candleamount = candleamount epool = ThreadPool() results = epool.uimap(saveEngulf_thread, params) print("Computing engulfing signals for all params multithreaded...") #DO NOT REMOVE THIS PRINT, IT IS NEEDED TO FINISH THE MULTITHREAD result = list(results) print(result) return (result) #Examples #saveKeltnerBands(100, [10,1], [True, False]) #saveATR(100, [1,20,30])
def tuneHyperParameters(simsettingsFileName, hyperSettings=None, saved_fd_model_path=None): """ For some set of parameters the function will sample a number of them In order to find a more optimal configuration. """ import os result_data = {} file = open(simsettingsFileName) settings = json.load(file) print("Settings: " + str(json.dumps(settings, indent=4))) file.close() file = open(hyperSettings) hyper_settings = json.load(file) print("Settings: " + str(json.dumps(settings, indent=4))) file.close() num_sim_samples = hyper_settings['meta_sim_samples'] ## Check to see if there exists a saved fd model, if so save the path in the hyper settings if (not (saved_fd_model_path is None)): directory = getDataDirectory(settings) # file_name_dynamics=directory+"forward_dynamics_"+"_Best_pretrain.pkl" if not os.path.exists(directory): hyper_settings['saved_fd_model_path'] = saved_fd_model_path param_settings = get_param_values(hyper_settings) result_data['hyper_param_settings_files'] = [] sim_data = [] data_name = settings['data_folder'] for params in param_settings: ## Loop over each setting of parameters data_name_tmp = "" for par in range( len(params) ): ## Assemble the vector of parameters and data folder name param_of_interest = hyper_settings['param_to_tune'][par] data_name_tmp = data_name_tmp + "/_" + param_of_interest + "_" + str( params[par]) + "/" settings[param_of_interest] = params[par] settings['data_folder'] = data_name + data_name_tmp directory = getBaseDataDirectory(settings) if not os.path.exists(directory): os.makedirs(directory) # file = open(settingsFileName, 'r') out_file_name = directory + os.path.basename(simsettingsFileName) result_data['hyper_param_settings_files'].append(out_file_name) print("Saving settings file with data to: ", out_file_name) print("settings['data_folder']: ", settings['data_folder']) out_file = open(out_file_name, 'w') out_file.write(json.dumps(settings, indent=4)) # file.close() out_file.close() sim_data.append( (simsettingsFileName, num_sim_samples, copy.deepcopy(settings), hyper_settings['meta_sim_threads'], copy.deepcopy(hyper_settings))) # p = ProcessingPool(2) p = ThreadPool(hyper_settings['tuning_threads']) t0 = time.time() result = p.map(_trainMetaModel, sim_data) t1 = time.time() print("Hyper parameter tuning complete in " + str(datetime.timedelta(seconds=(t1 - t0))) + " seconds") result_data['sim_time'] = "Meta model training complete in " + str( datetime.timedelta(seconds=(t1 - t0))) + " seconds" result_data['meta_sim_result'] = result result_data['raw_sim_time_in_seconds'] = t1 - t0 result_data['Number_of_simulations_sampled'] = len(param_settings) result_data['Number_of_threads_used'] = hyper_settings['tuning_threads'] print(result) return result_data
def filter_results(self, im_array, results, image_times, model, psf_sigma=1.0, batch_size=32, chunk_size=10000): """ Use a keras neural network model to detect real objects based upon the coadded postage stamps of those objects. Filter and keep only actual objects going forward. Parameters ---------- im_array: numpy array, required The masked original images. See loadMaskedImages in searchImage.py. results_arr: numpy recarray, required The results output from findObjects in searchImage. image_times: numpy array, required An array containing the image times in DAYS with the first image at time 0. Note: This is different than other methods so the units of this may change. Watch this documentation. model: keras model, required A previously trained model loaded from an hdf5 file. batch_size: int Batch size for keras predict. Returns ------- filtered_results: numpy array An edited version of results_arr with only the rows where true objects were classified. """ keep_objects = np.array([]) total_chunks = np.ceil(len(results) / float(chunk_size)) chunk_num = 1 circle_vals = [] enumerated_results = list(enumerate(results)) self.im_array = im_array self.image_times = image_times self.psf_sigma = psf_sigma # for chunk_start in range(0, len(results), chunk_size): # test_class = [] # p_stamp_arr = [] # #circle_chunk = [] # for imNum in range(chunk_start, chunk_start+chunk_size): # try: # p_stamp = self.createPostageStamp(im_array, # list(results[['t0_x', 't0_y']][imNum]), # np.array(list(results[['v_x', 'v_y']][imNum])), # image_times, [25., 25.])[0] # p_stamp = np.array(p_stamp) # p_stamp[np.isnan(p_stamp)] = 0. # p_stamp[np.isinf(p_stamp)] = 0. # #p_stamp -= np.min(p_stamp) # #p_stamp /= np.max(p_stamp) # #p_stamp # image_thresh = np.max(p_stamp)*0.5 # image = (p_stamp > image_thresh)*1. # #pre_image = p_stamp > image_thresh # #image = np.array(pre_image*1.) # mom = measure.moments(image) # cr = mom[0,1]/mom[0,0] # cc = mom[1,0]/mom[0,0] # #moments = measure.moments(image, order=3) # #cr = moments[0,1]/moments[0,0] # #cc = moments[1,0]/moments[0,0] # cent_mom = measure.moments_central(image, cr, cc, order=4) # norm_mom = measure.moments_normalized(cent_mom) # hu_mom = measure.moments_hu(norm_mom) # #p_stamp_arr.append(hu_mom) # #print moments[0,0], measure.perimeter(image) # #circularity = (4*np.pi*moments[0,0])/(measure.perimeter(image)**2.) # #circularity = (cent_mom[0,0]**2.)/(2.*np.pi*(cent_mom[2,0] + cent_mom[0,2])) # circularity = (1/(2.*np.pi))*(1/hu_mom[0]) # #circularity = (cent_mom[0,0]**2.)/(2*np.pi*(cent_mom[2,0] + cent_mom[0,2])) # psf_sigma = psf_sigma # gaussian_fwhm = psf_sigma*2.35 # fwhm_area = np.pi*(gaussian_fwhm/2.)**2. # #print circularity, cr, cc # if ((circularity > 0.6) & (cr > 10.) & (cr < 14.) & (cc > 10.) & (cc < 14.) & # (cent_mom[0,0] < (9.0*fwhm_area)) & (cent_mom[0,0] > 3.0)): #Use 200% error margin on psf_sigma for now # # test_class.append(1.) # # print circularity, cr, cc, moments[0,0] # #else: # # test_class.append(0.) # test_class.append(1.) # else: # test_class.append(0.) # circle_vals.append([circularity, cr, cc, cent_mom[0,0], image_thresh]) # #print circularity, cr, cc, cent_mom[0,0], image_thresh # except: # #p_stamp_arr.append(np.ones((25, 25))) # p_stamp_arr.append(np.zeros(7)) # test_class.append(0.) # circle_vals.append([0., 0., 0., 0., 0.]) # continue # p_stamp_arr = np.array(p_stamp_arr)#.reshape(chunk_size, 625) #test_class = model.predict_classes(p_stamp_arr, batch_size=batch_size, # verbose=1) pool = Pool(nodes=8) test_classes = pool.map(self.circularity_test, enumerated_results) test_classes = np.array(test_classes).T keep_idx = test_classes[0][np.where( np.array(test_classes[1]) > .5)] # + chunk_start print keep_idx #print np.where(np.array(test_class) > .5) print test_classes[0][np.where(np.array(test_classes[1]) > .5)] keep_objects = keep_idx #np.append(keep_objects, keep_idx) #circle_vals[keep_idx] = np.array(circle_chunk) print "Finished chunk %i of %i" % (chunk_num, total_chunks) chunk_num += 1 # keep_objects = np.arange(len(results)) filtered_results = results[np.array(keep_objects, dtype=np.int)] #circle_vals = np.array(circle_vals) #circle_vals_keep = circle_vals[np.array(keep_objects, dtype=np.int)] return filtered_results #, circle_vals_keep
#!/usr/bin/env python # # Author: Mike McKerns (mmckerns @caltech and @uqfoundation) # Copyright (c) 1997-2015 California Institute of Technology. # License: 3-clause BSD. The full license text is available at: # - http://trac.mystic.cacr.caltech.edu/project/pathos/browser/pathos/LICENSE """ minimal interface to python's multiprocessing module """ from pathos.multiprocessing import ProcessPool, __STATE from pathos.threading import ThreadPool #XXX: thread __STATE not imported from pathos.helpers import cpu_count mp = ProcessPool() tp = ThreadPool() # backward compatibility #FIXME: deprecated... and buggy! (fails to dill on imap/uimap) def mp_map(function, sequence, *args, **kwds): '''extend python's parallel map function to multiprocessing Inputs: function -- target function sequence -- sequence to process in parallel Additional Inputs: nproc -- number of 'local' cpus to use [defaut = 'autodetect'] type -- processing type ['blocking', 'non-blocking', 'unordered'] threads -- if True, use threading instead of multiprocessing '''
def mt_decompile_apks(apk_fpaths, out_dir, nproc): with ThreadPool(nproc) as p: apk_dirs = p.map(decompile_one_apk, apk_fpaths, [out_dir] * len(apk_fpaths)) # apk_dirs = [i for i in apk_dirs if i is not None] return apk_dirs
def mt_download_apk(urls, out_dir, nproc): with ThreadPool(nproc) as p: apk_fns = p.map(download_apk, urls, [out_dir] * len(urls)) return apk_fns
def create_storage(coords, geobox, measurements, data_func=None, use_threads=False): """ Create a :class:`xarray.Dataset` and (optionally) fill it with data. This function makes the in memory storage structure to hold datacube data, loading data from datasets that have been grouped appropriately by :meth:`group_datasets`. :param dict coords: OrderedDict holding `DataArray` objects defining the dimensions not specified by `geobox` :param GeoBox geobox: A GeoBox defining the output spatial projection and resolution :param measurements: list of :class:`datacube.model.Measurement` :param data_func: function to fill the storage with data. It is called once for each measurement, with the measurement as an argument. It should return an appropriately shaped numpy array. If not provided, an empty :class:`xarray.Dataset` is returned. :param bool use_threads: Optional. If this is set to True, IO will be multi-thread. May not work for all drivers due to locking/GIL. Default is False. :rtype: :class:`xarray.Dataset` .. seealso:: :meth:`find_datasets` :meth:`group_datasets` """ def empty_func(measurement_): coord_shape = tuple(coord_.size for coord_ in coords.values()) return numpy.full(coord_shape + geobox.shape, measurement_.nodata, dtype=measurement_.dtype) data_func = data_func or empty_func result = xarray.Dataset(attrs={'crs': geobox.crs}) for name, coord in coords.items(): result[name] = coord for name, coord in geobox.coordinates.items(): result[name] = (name, coord.values, {'units': coord.units}) def work_measurements(measurement, data_func): return data_func(measurement) use_threads = use_threads and THREADING_REQS_AVAILABLE if use_threads: pool = ThreadPool(32) results = pool.map(work_measurements, measurements, repeat(data_func)) else: results = [data_func(a) for a in measurements] for measurement in measurements: data = results.pop(0) attrs = measurement.dataarray_attrs() attrs['crs'] = geobox.crs dims = tuple(coords.keys()) + tuple(geobox.dimensions) result[measurement.name] = (dims, data, attrs) return result
doc_embed_dict[docid] = mean_vec parser = argparse.ArgumentParser( description="Generate ELMo embeddings for docs") parser.add_argument("-d", "--data_dict", required=True, help="Path to bbc data dict file") parser.add_argument("-tn", "--thread_count", type=int, required=True, help="No of threads in Thread pool") parser.add_argument("-o", "--out", required=True, help="Path to output file") args = vars(parser.parse_args()) bbc_data_dict_file = args["data_dict"] thread_count = args["thread_count"] outfile = args["out"] with open(bbc_data_dict_file, 'r') as dd: bbc_data_dict = json.load(dd) preproc_doctext_dict = preprocessed_paratext(bbc_data_dict) doc_embed_dict = dict() print("Data loaded") doclist = list(preproc_doctext_dict.keys()) with ThreadPool(nodes=thread_count) as pool: pool.map(get_mean_elmo_embeddings, doclist) np.save(outfile, doc_embed_dict)
def ThreadPool( self, *args, **kwargs ): thread_pool = ThreadPool(*args, **kwargs) self.register_atexit( thread_pool ) return thread_pool
def GlobalThreadPool( self, *args, **kwargs ): if self.thread_pool is None: self.thread_pool = ThreadPool(*args, **kwargs) return self.thread_pool
saver = tf.train.Saver(max_to_keep = 4) # ============================================================================= # Initialize the variables (i.e. assign their default value) # ============================================================================= init = tf.global_variables_initializer() # ============================================================================= # Start Training # ============================================================================= # Start a new TF session conf = tf.ConfigProto() conf.gpu_options.allow_growth=True conf.log_device_placement=False #@myself: use this for debugging conf.allow_soft_placement=True P = Pool() with tf.Session(config = conf) as sess: # Run the initializer sess.run(init) sess.run(normalize_rel_op) # Training NOW_DISPLAY = False epoch=1 step=1 temp_Type2Data = deepcopy(Type2Data) mean_losses = np.zeros([5]) mean_delta = 0 while (epoch < NUM_EPOCHS): if sum(map(len,temp_Type2Data.values())) < 0.1 * TOT_RELATIONS: epoch += 1
def generate(self, instruments: List[int], batch_size: int, n_threads=4, max_chunks_per_music=-1, chunk_reuse=1, chunk_pool_size=1000): """ Creates a generator that iterates over the dataset to generate chunks. The generator first starts will filling a pool of chunks. :param instruments: the id of the instruments to keep when generating chunks :param batch_size: the size of the batches yielded :param n_threads: the number of threads to synthesize waveforms in parallel :param chunk_reuse: the number of times a single chunk will be used per epoch :param chunk_pool_size: the minimum number of chunks the pool must contain before starting to yield batches :return: """ assert chunk_pool_size >= batch_size, \ "The chunk pool size should be greater or equal to the batch size." # Reset all generation statistics self.epochs = 0 self.epoch_progress = 0. self.musics_sampled = 0 self.chunks_generated = 0 # Create a generator that loops infinitely over the songs in a random order def midi_fpath_generator(): midi_fpaths = list( self._get_files_by_instruments(instruments, at_least=2)) midi_fpaths = shuffle(midi_fpaths) while True: for i, midi_fpath in enumerate(midi_fpaths, 1): yield midi_fpath self.debug_midi_fpaths.append(midi_fpath) if len(self.debug_midi_fpaths) > n_threads * 2: del self.debug_midi_fpaths[0] self.epoch_progress = i / len(midi_fpaths) self.epochs += 1 midi_fpath_generator = midi_fpath_generator() # Define a function to fill a buffer def begin_next_buffer(): # Estimate how many musics to sample from to generate a full batch avg_n_chunks = self.chunks_generated / self.musics_sampled if self.musics_sampled else 0 n_musics = int( np.ceil(batch_size / avg_n_chunks) if avg_n_chunks else 0) + n_threads self.musics_sampled += n_musics # Begin filling the buffer with threads from the threadpool func = lambda fpath: self.extract_chunks(fpath, instruments, max_chunks_per_music) midi_fpaths = [next(midi_fpath_generator) for _ in range(n_musics)] return thread_pool.uimap(func, midi_fpaths) # Define a function the fill the chunk pool def refill_chunk_pool(chunk_pool, chunk_pool_uses, buffer): # Do nothing if the pool is already full if len(chunk_pool) >= chunk_pool_size: return chunk_pool, chunk_pool_uses, buffer while len(chunk_pool) < chunk_pool_size: # Retrieve the elements from the next buffer that were generated in the # background. If it is not done generating, block until so with a call to list(). start = timer() buffer = list(buffer) # Flatten the buffer to retrieve a list of chunks, and append all the contents of # the buffer to the chunk pool n_musics = len(buffer) buffer = [chunk for chunks in buffer for chunk in chunks] chunk_pool.extend(buffer) chunk_pool_uses.extend([chunk_reuse] * len(buffer)) delta = timer() - start print("Blocked %dms to generate %d chunks from %d musics." % (int(delta * 1000), len(buffer), n_musics)) # Register statistics about the number of generated chunks to better estimate how # many jobs will be needed to fill the pool the next time self.chunks_generated += len(buffer) # Begin a new buffer in the background buffer = begin_next_buffer() # Shuffle the chunk pool so as to mix different musics in a same batch chunk_pool, chunk_pool_uses = shuffle(chunk_pool, chunk_pool_uses) return chunk_pool, chunk_pool_uses, buffer # Create the threadpool, the chunk pool and initialize the buffers thread_pool = ThreadPool(n_threads) chunk_pool = [] chunk_pool_uses = [] buffer = begin_next_buffer() # We wrap the generator inside an explicit generator function. We could simply make this # function (MidiDataset.generate()) the generator itself, but splitting the initialization # code and the actual generator allows us to execute the initialization when # MidiDataset.generate() is called for the first time, rather than when we start iterating # from the dataset. def generator(chunk_pool, chunk_pool_uses, buffer): while True: # Make sure the chunk pool is full chunk_pool, chunk_pool_uses, buffer = \ refill_chunk_pool(chunk_pool, chunk_pool_uses, buffer) # Consume elements from the chunk pool to generate a batch chunks = chunk_pool[:batch_size] chunks_uses = chunk_pool_uses[:batch_size] del chunk_pool[:batch_size] del chunk_pool_uses[:batch_size] for chunk, chunk_uses in zip(chunks, chunks_uses): if chunk_uses == 1: continue chunk_pool.append(chunk) chunk_pool_uses.append(chunk_uses - 1) # Yield the chunks as a batch yield self.collate(chunks, instruments) return generator(chunk_pool, chunk_pool_uses, buffer)
def process(self): """ process rules """ pool = ThreadPool() pool.map(self.__process, self.files_to_process)
import os from PIL import Image import numpy as np import hub from pathlib import Path from pathos.threading import ThreadPool import time pool = ThreadPool(nodes=20) #val_path = list(Path('./ILSVRC/Data/CLS-LOC/val').glob('*.JPEG')) val_path = list(Path('./ILSVRC/Data/CLS-LOC/train').glob('**/*.JPEG')) shape = (len(val_path), 500, 375, 3) x = hub.array(shape, name='imagenet/test:latest', dtype='uint8') print(x.shape) index = 1 def upload_val(index): t1 = time.time() # Preprocess the image img = Image.open(val_path[index]) img = img.resize((500, 375), Image.ANTIALIAS) img = np.asarray(img) if len(img.shape) == 2: img = np.expand_dims(img, -1) if img.shape[-1] == 4: img = img[..., :3] img = np.transpose(img, axes=(1, 0, 2)) # Upload the image
def backtest_mt(params): global capital su = None saveIndicators(candleamount=candleamount) #fix later candleSplice = candleData.tail(candleamount) atrseries = pd.Series(dtype=np.uint16) keltner_signals = pd.Series(dtype=object) engulf_signals = pd.Series(dtype=object) signals = pd.DataFrame(columns=['S']) atrperiod = params['atrperiod'] #candleSplice = candleSplice.reset_index(drop=True) if (params['keltner'] == True) and (params['engulf'] == True): engulf_signals = pd.read_csv( 'IndicatorData//' + params['symbol'] + '//Engulfing//' + "SIGNALS_t" + str(params['engulfthreshold']) + '_ignoredoji' + str(params['ignoredoji']) + '.csv', sep=',') keltner_signals = pd.read_csv('IndicatorData//' + params['symbol'] + '//Keltner//' + "SIGNALS_kp" + str(params['kperiod']) + '_sma' + str(params['ksma']) + '.csv', sep=',') signals = pd.concat([engulf_signals, keltner_signals], axis=1) signals.columns = ["E", "K"] signals['S'] = np.where((signals['E'] == signals['K']), Signal(0), signals['E']) elif (params['keltner'] == True): keltner_signals = pd.read_csv('IndicatorData//' + params['symbol'] + '//Keltner//' + "SIGNALS_kp" + str(params['kperiod']) + '_sma' + str(params['ksma']) + '.csv', sep=',') signals['S'] = np.array(keltner_signals).reshape( 1, len(keltner_signals))[0] elif (params['engulf'] == True): engulf_signals = pd.read_csv( 'IndicatorData//' + params['symbol'] + '//Engulfing//' + "SIGNALS_t" + str(params['engulfthreshold']) + '_ignoredoji' + str(params['ignoredoji']) + '.csv', sep=',') signals['S'] = np.array(engulf_signals).reshape( 1, len(engulf_signals))[0] print(signals['S']) #signals.to_csv('BacktestData//Signals//' + currentTime + '.csv') atrseries = pd.read_csv('IndicatorData//' + params['symbol'] + "//ATR//" + "p" + str(atrperiod) + '.csv', sep=',') copyIndex = candleSplice.index candleSplice = candleSplice.reset_index(drop=True) #candleSplice.merge(atrseries, left_index=True) #candleSplice.merge(signals['S'], right_on='S', left_index=True) candleSplice = pd.DataFrame.join(candleSplice, atrseries) candleSplice = pd.DataFrame.join( candleSplice, signals['S']) #COMBINE SIGNALS AND CANDLE DATA candleSplice.index = copyIndex candleSplice['timestamp'] = pd.to_datetime(candleSplice.timestamp) finalCapitalData = None currentTime = datetime.now().strftime("%Y%m%d-%H%M") backtestDir = params['symbol'] + '//' + "len" + str( candleamount) + "_k" + str(params['keltner']) + "_e" + str( params['engulf'] ) + "_id" + str(params['ignoredoji']) + "_eThrs" + str( params['engulfthreshold'] ) + "_ATR" + str(params['atrperiod']) + "_kP" + str( params['kperiod']) + "_kSMA" + str(params['ksma']) + "_pm" + str( params['posmult']) + "_ST" + params['stoptype'] + "_sm" + str( params['stopmult']) + "_tm" + str( params['tmult']) + "_TR" + params['trade'] bt_profit = 0 if (percision != 1): isafe = [] candleSplit = [] initialLength = len(candleSplice) firstStart = candleSplice.index[0] lastDistanceSafe = None if params['symbol'] == 'XBTUSD': su = xbtusd_su elif params['symbol'] == 'ETHUSD': su = ethusd_su for i in range(percision - 1): #abs() is a temporary fix to running the backtest on short intervals isafe.append((i + 1) * ((abs(initialLength - percision * su)) / percision) + i * su) #candleSplit = list(np.array_split(candleSplice, percision)) #candleSplit = list(candleSplit) for i in isafe: ia = int(i) if isafe.index(i) != 0: candleSplit.append(candleSplice.iloc[int(isafe[isafe.index(i) - 1]):ia + 1]) lastDistanceSafe = ia #print("lds", lastDistanceSafe) # else: #candleSplit.append(candleSplice.iloc[:ia+1]) #print("lds", lastDistanceSafe) #if(len(isafe) > 1): candleSplit.append(candleSplice.iloc[lastDistanceSafe:]) #print(candleSplit) #time.sleep(100) #generate parameters for multithreading safe_length = len(candleSplit) safe_candleamount = np.repeat(candleamount, safe_length).tolist() safe_capital = np.repeat(capital, safe_length).tolist() safe_params = np.repeat(params, safe_length).tolist() withSafe = np.repeat(True, safe_length).tolist() print("safe thread amount:", safe_length) #create multithread pool start = time.time() #print(candleSplit) #time.sleep(1000) pool = ThreadPool(safe_length) #run initial chunks multithreaded to find safepoints safe_results = pool.uimap(backtest_strategy, safe_candleamount, safe_capital, safe_params, candleSplit, withSafe) pool.close() #Compute anything we need to while threads are running candleSafe = [] final_length = safe_length + 2 withoutSafe = np.repeat(False, final_length).tolist() final_candleamount = np.repeat(candleamount, final_length).tolist() final_capital = np.repeat(capital, final_length).tolist() final_params = np.repeat(params, final_length).tolist() static_capital = capital safePoints = list(safe_results) ###################################### #time.sleep(1000) pool.join() for i in safePoints: if i == -1: backtest_mt.q.put( 'Not all safe points found for given percision. Reduce percision, or increase timeframe' ) return safePoints = sorted(safePoints) if find_su: su = [] for i, point in enumerate(safePoints): su.append(point - candleSplit[i].index[0]) suAvg = mean(su) #only works on evenly spliced chunks chunkLength = len(candleSplit[0]) backtest_mt.q.put(["su average:", suAvg, ' / ', chunkLength]) return (su) print("safe points:", safePoints) idx = 0 for i in safePoints: ia = i - firstStart idx = safePoints.index(i) if safePoints.index(i) != 0: candleSafe.append(candleSplice.iloc[lastDistanceSafe - idx:ia + 1]) lastDistanceSafe = ia + 1 else: candleSafe.append(candleSplice.iloc[:ia + 1]) lastDistanceSafe = ia + 1 candleSafe.append(candleSplice.iloc[lastDistanceSafe - idx:]) print("final thread amount:", final_length) #print(candleSafe) #time.sleep(10000) fpool = ThreadPool(final_length) final_results = fpool.uimap(backtest_strategy, final_candleamount, final_capital, final_params, candleSafe, withoutSafe) fpool.close() final_result = list(final_results) fpool.join() ordered_result = sorted(final_result, key=lambda x: x[0]) for i in range(len(ordered_result)): #print(final_result.index) if i != 0: #for non-static position size: ##capital += capital*((i[1]-static_capital)/static_capital) ordered_result[i][1]['capital'] += bt_profit bt_profit = ordered_result[i][1].iloc[-1][ 'capital'] - static_capital finalCapitalData = pd.concat( [finalCapitalData, ordered_result[i][1]], ignore_index=True) else: bt_profit = ordered_result[i][1].iloc[-1][ 'capital'] - static_capital finalCapitalData = pd.DataFrame(ordered_result[i][1]) capital = finalCapitalData['capital'].iloc[-1] else: #run chunks spliced by safepoints multithreaded to retrieve fully accurate results final_results = backtest_strategy(candleamount, capital, params, candleSplice, False) final_result = list(final_results) capital = str(final_result[1]['capital'].iloc[-1]) finalCapitalData = final_result[1] print(finalCapitalData) #time.sleep(1000) visualize_trades(finalCapitalData, backtestDir) saveBacktest(capital, params, backtestDir) backtest_mt.q.put(capital) end = time.time() print("Thread time: ", end - start) return ('done')