def compute(self, list_of_start_vectors, parallel): sol_func = self.sol_func def function_to_dill(start_vector): return sol_func(start_vector) if parallel: pool = Pool(processes=len(list_of_start_vectors)) list_of_results = pool.map(function_to_dill, list_of_start_vectors) else: list_of_results = [ function_to_dill(sv) for sv in list_of_start_vectors ] return np.array(list_of_results)
def forecast_returns(self, **kw): """Get the returns for individual forecasts for each instrument, useful for bootstrapping forecast Sharpe ratios""" with closing(Pool()) as pool: d = dict( pool.map(lambda x: (x.name, x.forecast_returns(**kw).dropna()), self.valid_instruments().values())) return d
def get_dict_features_from_df_parallel(self, df, nworkers=8): print("extracting features...") df_split = np.array_split(df, nworkers) pool = Pool(nworkers) res_dicts = pool.map(self.get_dict_features_from_df, df_split) pool.close( ) # informs the processor that no new tasks will be added to the pool pool.join( ) # stops and waits for all of the results to be finished and collected before proceeding with the rest of big_dic = defaultdict(lambda: defaultdict(int)) # merge feature dictionaries created for data frame splits into one big dictionary for dic in res_dicts: for k, v in dic.items(): big_dic[k] = v return pd.concat([ pd.get_dummies(df[df.columns.difference(["event", "venue"])], prefix="@", columns=["month", "weekday"]), pd.DataFrame.from_dict(big_dic, orient='index') ], axis=1, join_axes=[df.index]).fillna(0.)
def inst_calc(self): """ Calculate the base positions for every instrument, before applying portfolio-wide weighting and volatility scaling. """ with closing(Pool()) as pool: d = dict( pool.map(lambda x: (x.name, x.calculate()), self.valid_instruments().values())) return d
def weighted_forecasts(self, **kw): """ Returns a dict of weighted forecasts for every Instrument in the Portfolio. """ with closing(Pool()) as pool: d = dict( pool.map(lambda x: (x.name, x.weighted_forecast(**kw)), self.valid_instruments().values())) return d
def instrument_stats(self): """ Returns individual metrics for every Instrument in the Portfolio. Not used for trading, just for research. """ with closing(Pool()) as pool: df = pd.DataFrame( dict( pool.map(lambda x: (x.name, x.curve().stats_list()), self.valid_instruments().values()))).transpose() return df
def parallelize_dataframe(self, df, func): df_split = np.array_split(df, 1) pool = Pool(1) rr = pool.map(func, df_split) df = pd.concat(rr) pool.close() pool.join() return df
def inst_calc(self): """Calculate all the things we need on all the instruments and cache it.""" try: return self.memo_inst_calc except: if len(self.portfolio) > 1 and self.multiproc: with closing(Pool()) as pool: self.memo_inst_calc = dict( pool.map(lambda x: (x.name, x.calculate()), self.portfolio)) else: self.memo_inst_calc = dict( map(lambda x: (x.name, x.calculate()), self.portfolio)) return self.memo_inst_calc
def parallel_launcher(data_dir, data, worker, pool_size, files_num): files = modified_get_files(data_dir) batches = [(files[i:i + files_num], data, j) for j, i in enumerate(range(0, len(files), files_num))] pool = Pool(pool_size) output = pool.starmap(worker, batches) pool.close() pool.join() return output
def execute(self): """""" print("Starting NLP pipeline...") myPipe = pipeUtils.MyPipe(self.verbose, self.mode, self.stok, self._tmfile if path.isfile(self._tmfile) else self.tfile, self.mfile) self._db_creator() if self.edir: print("Warning! PHI will be written to disk as an eHOST project.") class_set = set() attr_set = None if self.mode == 'separate': attr_set = set() # batching batch_start = self.start while(True): batch_end = batch_start+self.batch-1 if batch_end > self.end: batch_end = self.end batch_records = self._db_reader(batch_start, batch_end) # write batch notes to disk if edir is set if self.edir: makedirs(path.join(self.edir, 'corpus'), exist_ok=True) for record in batch_records: with(open(path.join(self.edir, 'corpus', str(record[0])+'.txt'), 'w')) as f: f.write(record[1]) with Pool(self.processes) as pool: batch_results = [x for x in pool.map(myPipe.process, batch_records) if x] if batch_results: self._db_writer(batch_results) if self.edir: eht.knowtator_writer(self.mode, self.edir, batch_results) class_set.update([y[3] for x in batch_results for y in x]) if self.mode == 'separate': attr_set.update([y[4] for x in batch_results for y in x]) batch_results.clear() print("Records "+str(batch_start)+" through "+str(batch_end)+" processed") batch_start+=self.batch if batch_start > self.end: break if self.edir: eht.create_config_file(self.mode, path.join(self.edir, 'config', 'projectschema.xml'), class_set, attr_set) print("done")
def find_misspellings(self): """ Optional misspelling finder """ if self.roc != 1.0: print("Finding misspellings...") self._represent_ordered_dicts() with open(self.tfile, 'r') as stream: targets = list(safe_load_all(stream)) vocabulary = set() batch_start = self.start while(True): batch_end = batch_start+self.batch-1 if batch_end > self.end: batch_end = self.end for doc in self._db_reader(batch_start, batch_end): if doc[1]: vocabulary.update([word.lower() for word in re.split(r'-(?!\w)|(?<!\w)-|[^\w-]', doc[1]) if len(word) >= 4]) print("Scanned records "+str(batch_start)+" through "+str(batch_end)) batch_start+=self.batch if batch_start > self.end: break print("Vocabulary:", len(vocabulary), "words >= 4 chars") print("Targets:", len(targets), "terms (Lex)") print("Splitting vocabulary into", self.processes, "groups for multiprocessing...") gsplitvocab = self._gsplit(list(vocabulary), self.processes) print("Finding words in vocabulary similar to targets ("+str(self.roc)+")...") _is_similar_p = partial(self._is_similar, targets=targets) with Pool(self.processes) as pool2: similar_results = pool2.map(_is_similar_p, gsplitvocab) new_targets = [x for x in set.union(*similar_results) if x[2].lower() not in [t['Lex'].lower() for t in targets]] if len(new_targets) > 0: for new_target in new_targets: print("*"+new_target[0]+":",new_target[2],"("+new_target[3]+")") print("New similar terms: ", len(new_targets)) print("Writing to", self._tmfile+'...') copyfile(self.tfile, self._tmfile) with open(self._tmfile, 'a') as out: for new_target in new_targets: out.write('---\n') dump(OrderedDict([('Comments',new_target[0]), ('Direction',new_target[1]), ('Lex',new_target[2]), ('Regex',''), ('Type',new_target[3]), ('Code',new_target[4])]), default_flow_style=False, stream=out) print("If this list contains errors or you want to verify before execution, edit the new targets+misspellings.yml file now and continue or exit, edit it, and re-run without the \"-roc\" argument.")
def l_minima(self): """ Find the local minima using the chosen local minimisation method with the minimisers as starting points. """ # Sort to start with lowest minimizer Min_ind = self.minimizers(self.K_opt) Min_fun = self.F[Min_ind] fun_min_ind = numpy.argsort(Min_fun) Min_ind = Min_ind[fun_min_ind] Min_fun = Min_fun[fun_min_ind] # Init storages self.x_vals = [] self.Func_min = numpy.zeros_like(Min_ind, dtype=float) if self.maxfev is not None: # Update number of sampling points self.maxfev -= self.n # Pool processes if multiprocessing if self.multiproc: p = Pool() lres_list = p.map(self.process_pool, Min_ind) for i, ind in zip(range(len(Min_ind)), Min_ind): if not self.multiproc: if self.callback is not None: print('Callback for ' 'minimizer starting at {}:'.format(self.C[ind, :], )) if self.disp: print('Starting local ' 'minimization at {}...'.format(self.C[ind, :])) # Find minimum x vals lres = scipy.optimize.minimize(self.func, self.C[ind, :], **self.minimizer_kwargs) elif self.multiproc: lres = lres_list[i] self.x_vals.append(lres.x) self.Func_min[i] = lres.fun # Local function evals for all minimisers self.res.nlfev += lres.nfev if self.maxfev is not None: self.maxfev -= lres.nfev self.minimizer_kwargs['options']['maxfev'] = self.maxfev if self.maxfev <= 0: self.res.message = 'Maximum number of function' \ ' evaluations exceeded' self.res.success = False self.break_routine = True if self.disp: print('Maximum number of function evaluations exceeded' 'breaking' 'minimizations at {}...'.format(self.C[ind, :])) if not self.multiproc: for j in range(i + 1, len(Min_ind)): self.x_vals.append(self.C[Min_ind[j], :]) self.Func_min[j] = self.F[Min_ind[j]] if not self.multiproc: break self.x_vals = numpy.array(self.x_vals) # Sort and save ind_sorted = numpy.argsort(self.Func_min) # Sorted indexes in Func_min # Save ordered list of minima self.res.xl = self.x_vals[ind_sorted] # Ordered x vals self.res.funl = self.Func_min[ind_sorted] # Ordered fun values # Find global of all minimisers self.res.x = self.x_vals[ind_sorted[0]] # Save global minima x_global_min = self.x_vals[ind_sorted[0]][0] self.res.fun = self.Func_min[ind_sorted[0]] # Save global fun value return x_global_min
def connect(self, turbine_coordinates): from multiprocessing_on_dill import Pool #print turbine_coordinates from site_conditions.terrain.terrain_models import depth from farm_energy.wake_model_mean_new.wake_1angle import energy_one_angle from farm_energy.wake_model_mean_new.wake_1angle_turbulence import max_turbulence_one_angle # from costs.investment_costs.BOS_cost.cable_cost.Hybrid import draw_cables from farm_description import cable_list, central_platform #print "=== PREPARING WIND CONDITIONS ===" self.windrose = self.inflow_model() self.wind_directions = self.windrose.direction self.direction_probabilities = self.windrose.dir_probability if self.inflow_model == MeanWind: self.wind_speeds = self.windrose.speed self.freestream_turbulence = [0.11] self.wind_speeds_probabilities = [ [100.0] for _ in range(len(self.wind_directions)) ] # self.wind_speeds = [8.5 for _ in self.wind_speeds] elif self.inflow_model == WeibullWind: self.wind_speeds = [ range(25) for _ in range(len(self.wind_directions)) ] self.freestream_turbulence = [ 0.11 for _ in range(len(self.wind_speeds[0])) ] self.wind_speeds_probabilities = self.windrose.speed_probabilities( self.wind_speeds[0]) #print "=== CALCULATING WATER DEPTH ===" self.water_depths = depth(turbine_coordinates, self.depth_model) #print "=== OPTIMISING INFIELD CABLE TOPOLOGY (COST)===" # draw_cables(turbine_coordinates, central_platform, cable_list) self.cable_topology_costs, self.cable_topology = self.cable_topology_model( turbine_coordinates) #print str(self.cable_topology_costs) + " EUR" self.energies_per_angle = [] self.turbulences_per_angle = [] self.cable_efficiencies_per_angle = [] self.array_efficiencies = [] # #print [sum(self.wind_speeds_probabilities[i]) for i in range(len(self.wind_speeds_probabilities))] #print "=== CALCULATING ENERGY, TURBULENCE PER WIND DIRECTION ===" def angle_loop(i): self.aero_energy_one_angle, self.powers_one_angle = energy_one_angle( turbine_coordinates, self.wind_speeds[i], self.wind_speeds_probabilities[i], self.wind_directions[i], self.freestream_turbulence, self.wake_mean_model, self.power_model, self.thrust_coefficient_model, self.wake_merging_model) # #print self.aero_energy_one_angle # #print self.powers_one_angle, max(self.powers_one_angle) # #print turbine_coordinates, self.wind_speeds[i], self.windrose.direction[i], self.freestream_turbulence[0], Jensen, self.thrust_coefficient_model, self.wake_turbulence_model self.turbulences = max_turbulence_one_angle( turbine_coordinates, self.wind_speeds[i], self.windrose.direction[i], self.freestream_turbulence, Jensen, self.thrust_coefficient_model, self.wake_turbulence_model) self.cable_topology_efficiency = self.cable_efficiency_model( self.cable_topology, turbine_coordinates, self.powers_one_angle) self.energy_one_angle_weighted = self.aero_energy_one_angle * self.direction_probabilities[ i] / 100.0 self.array_efficiency = (self.aero_energy_one_angle / (float(len(turbine_coordinates)) * max(self.powers_one_angle) * 8760.0)) self.array_efficiencies_weighted = self.array_efficiency * self.direction_probabilities[ i] / 100.0 self.array_efficiencies.append(self.array_efficiencies_weighted) self.energies_per_angle.append(self.energy_one_angle_weighted) self.turbulences_per_angle.append(self.turbulences) self.cable_efficiencies_per_angle.append( self.cable_topology_efficiency) p = Pool(8) p.map(angle_loop, range(12)) # #print self.array_efficiencies #print " --- Array efficiency---" self.array_efficiency = sum(self.array_efficiencies) #print str(self.array_efficiency * 100.0) + " %\n" #print " --- Farm annual energy without losses---" self.farm_annual_energy = sum(self.energies_per_angle) #print str(self.farm_annual_energy / 1000000.0) + " MWh\n" #print " --- Infield cable system efficiency ---" self.cable_efficiency = sum(self.cable_efficiencies_per_angle) / len( self.cable_efficiencies_per_angle) #print str(self.cable_efficiency * 100.0) + " %\n" #print " --- Maximum wind turbulence intensity ---" self.turbulence = max(self.turbulences_per_angle) #print str(self.turbulence * 100.0) + " %\n" #print " --- Support structure costs ---" self.support_costs = self.support_design_model(self.water_depths, self.turbulence) #print str(self.support_costs) + " EUR\n" self.aeroloads = 0.0 self.hydroloads = 0.0 #print " --- O&M costs ---" self.om_costs, self.availability = self.OandM_model( self.farm_annual_energy, self.aeroloads, self.hydroloads, turbine_coordinates) #print str(self.om_costs * 20.0) + " EUR\n" #print " --- AEP ---" self.aep = self.aep_model(self.farm_annual_energy, self.availability, self.cable_topology_efficiency) * 20.0 #print str(self.aep / 1000000.0) + " MWh\n" #print " --- Total costs ---" self.total_costs = self.costs_model(self.cable_topology_costs, self.support_costs, self.om_costs * 20.0) #print str(self.total_costs) + " EUR\n" #print " --- COE ---" self.finance = self.finance_model(self.total_costs * 100.0, self.aep / 1000.0) print(str(self.finance) + " cents/kWh\n")
for option in hypothesis: option_graphs[option] = utils.stanford_ie_v2(options[option], coref) hypothesis_graphs[option] = utils.stanford_ie_v2( hypothesis[option], coref) q_dict = { 'correct_answer': correct_answer, 'hypothesis_graphs': hypothesis_graphs, 'option_graphs': option_graphs } return line['id'], q_dict with open("../../data/ARC-V1-Feb2018-2/ARC-Challenge/ARC-Challenge-Test.jsonl", "r") as in_file: lines = in_file.readlines() with Pool(processes=32) as pool: max_ = len(lines) q_dicts = list( tqdm(pool.imap_unordered(create_graph, lines), ascii=True, total=max_)) for a, b in tqdm(q_dicts, ascii=True): q_graphs[a] = b print(len(q_graphs)) pickle.dump(q_graphs, open(out, "wb"))
def mp_optimize_weights(samples, instrument, **kw): """Calls the Optimize function, on different CPU cores""" with closing(Pool()) as pool: return pool.map(partial(optimize_weights, instrument), samples)