def measure_local_accuracy(model, number_of_core_samples, step_size, name, output_path): """ Computes the mixed derivative for each sample, using finite differences mathod :param model: The imported model module :param data: The sampled data in structured form :param step_size: The dx time step taken between each :returns: hessian matrix, with the core sample index as rows and feature pair as column name """ feature_vectors = pd.DataFrame(np.load('{}/feature_vectors_{}_{}_{}.npy'.format(output_path, number_of_core_samples, step_size, name)), index = np.arange(number_of_core_samples), columns=pd.MultiIndex.from_product([model.perturbation_status_columns, model.feature_names], names=['perturbation_status','features'])) outputs = pd.DataFrame(np.load('{}/outputs_{}_{}_{}.npy'.format(output_path, number_of_core_samples, step_size, name)), index = np.arange(number_of_core_samples), columns=pd.MultiIndex.from_product([model.output_names, model.perturbation_status_columns_output], names=['outputs','perturbation_status'])) hessian = calculate_hessian(model, outputs, step_size) (centers, magnitudes, dimensions) = model.get_local_ground_truth(output_path,number_of_core_samples, step_size, name) core_feature_vectors = feature_vectors.loc[:, 'core'] output_name = model.output_names[0] interaction_maps = list(futures.map(functools.partial(create_interaction_map, model, hessian, core_feature_vectors, output_name, 'nearest'), model.feature_pairs)) local_ranking = list(futures.map(functools.partial(rank_samples_in_pair, model, centers, magnitudes, dimensions), zip(interaction_maps, model.feature_pairs))) ranking = np.concatenate(np.array(local_ranking), axis=1) accuracies = average_precision_score(ranking[1,:], np.abs(ranking[0,:])) ROCs = np.array(precision_recall_curve(ranking[1,:], np.abs(ranking[0,:]))) pickle.dump(obj = accuracies, file = open('{}/local_accuracies_{}_{}_{}.pickle'.format(output_path,number_of_core_samples, step_size, name),'wb')) pickle.dump(obj = ROCs, file = open('{}/local_ROCs_{}_{}_{}.pickle'.format(output_path,number_of_core_samples, step_size, name),'wb')) return accuracies
def big_cluster_completeness(inf3, grab, inkey, cluster, cluster_map, tanimoto, c_list, j): mx = pd.read_csv( inf3, sep=',', header=0, usecols=grab, engine='c' ) # loads in only the columns from the grab list, i.e. all cols for a unique cluster mx.index = inkey # reindexes the df with the orf labels after importing specific columns with usecols # how many orfs in the full cluster j_orfs = len(cluster_map[cluster]) args_list = [ mx, j_orfs, cluster_map ] # organizes all the arguments that the parallelized function needs into a list if __name__ == '__main__': if tanimoto: results = list( futures.map(partial(parallel_tanimoto, args_list=args_list), c_list)) else: results = list( futures.map(partial(parallel_minicluster, args_list=args_list), c_list)) bigmat = pd.concat( results, axis=0 ) # stack all the results into a single column in a dataframe # print(bigmat.shape[0]) bigmat.index = c_list # now the index is just the clusters, not the orfs # DEBUG - will print the progress every 50 clusters (across columns--the slower dimension). if j % 5: pass elif j == 0: print('Processed first cluster... moving on!') else: print('Processed %d clusters' % j) del mx return bigmat
def main(): parser = make_arg_parser() args = parser.parse_args() # Parse command line tanimoto = args.tanimoto with open(args.mpfa, 'r') as inf: # Generates dictionary with each unique 'refseq_cluster' as keys, ORFs as values cluster_map = build_cluster_map(inf, bread=args.bread) with open(args.input, 'r') as inf2: inkey = generate_index_list(inf2) print('\nOk, processing input file...\n') with open(args.input, 'r') as in_csv2: headers = generate_chunk_list(in_csv2) c_list = list(cluster_map.keys()) grabbed_clusters = [] data_to_pool = [] # print(c_list) for cluster in c_list: grab = pick_a_cluster(headers, cluster) # uses the name of the cluster to get a list of all orfs for a particular unique cluster # print(grab) if not grab: pass else: # print(grab) grabbed_clusters.extend([cluster]) with open(args.input, 'r') as inf3: mx = pd.read_csv(inf3, sep=',', header=0, usecols=grab, engine='c') # loads in only the columns from the grab list, i.e. all cols for a unique cluster mx.index = inkey # reindexes the df with the orf labels after importing specific columns with usecols data_to_pool.append(mx) dlen = len(data_to_pool) print('Built the data list of %s clusters' % dlen) args_list = [cluster_map, c_list] # organizes all the arguments that the parallelized function needs into a list print('\nSending data to Workers... work, Workers, work!\n') if args.tanimoto: if __name__ == '__main__': results = list(futures.map(partial(parallel_tanimoto, args_list=args_list), data_to_pool)) outdf = pd.concat(results, axis=1) if not args.tanimoto: if __name__ == '__main__': results = list(futures.map(partial(parallel_minicluster, args_list=args_list), data_to_pool)) outdf = pd.concat(results, axis=1) # bigmat = pd.concat(results, axis=0) # stack all the results into a single column in a dataframe # print(bigmat.shape[0]) # bigmat.index = c_list # now the index is just the clusters, not the orfs # print(bigmat) print('File processing complete; writing output file...\n') del data_to_pool with open(args.output, 'w') if args.output != '-' else sys.stdout as outf: # outdf = pd.concat(results, axis=1) outdf.columns = grabbed_clusters # names the columns (and index, next line) according to clusters in the order they were processed outdf.index = c_list outdf.sort_index(axis=0, inplace=True) outdf.sort_index(axis=1, inplace=True) outdf = outdf.round(decimals=3) outdf.to_csv(outf)
def set_params(self, **params): K.clear_session() map(clear_session, [10] * 10) self.check_data_params() data_params = self.data_params data_params['lags'] = params['input_size'] self.set_data_params(**data_params) params['num_inputs'] = len(self.data_params['vars'][0]) params['num_outputs'] = len(self.data_params['vars'][1]) # params['input_size'] = self.data_params['lags'] super(ForecastRegressor, self).set_params(**params)
def run_coarse_grained_ga(population_size, deme_size, chromosome_size, number_of_generations, neighbourhood_size, server_ip_addr, server_user, server_password, num_of_migrants, fitness): ins = CoarseGrainedBase(population_size=population_size, deme_size=deme_size, chromosome_size=chromosome_size, number_of_generations=number_of_generations, neighbourhood_size=neighbourhood_size, server_ip_addr=server_ip_addr, server_user=server_user, server_password=server_password, num_of_migrants=num_of_migrants, fitness=fitness) populations = ins.initialize_population(deme_size) print(str(populations)) channels = ins.initialize_topology() results = list(futures.map(ins, populations, channels)) dct = {} for data in results: best_chromosome = data.pop(0) fitness_val = best_chromosome.fit vector = best_chromosome.chromosome dct[fitness_val] = vector logger.info("END RESULT" + str(sorted(dct.items()).pop()))
def _send_individuals_reproduce(self): """ Select individuals for reproduction with probability based on fitness value. Weak individuals are removed and replaced with newly generated ones. """ # retrieve best fitness of population results = list(futures.map(self._fitness, self._population)) neighbours = self._Individuals() for i in range(0, self._population_size): fit_val = results.pop(0) chromosome = self._population[i] neighbours.append_object(self._Individual(fit_val, chromosome)) chosen_individuals = self._choose_individuals_based_on_fitness( neighbours) chromosomes_reproducing = chosen_individuals.sort_objects() best_individual = chosen_individuals.best_individual # it is sure that this is the right result # but the algorithm needs to continue because of other demes if best_individual is not None: while len(self._population) <= self._population_size: self._population.append(best_individual.chromosome) return best_individual = chromosomes_reproducing.pop(0) # remove old population del self._population[:] logger.info("Number of individuals chosen for reproduction is " + str(len(chromosomes_reproducing))+ " while best individuals has fitness "+ str(best_individual.fit)) # Reproducing requires two individuals. # If number of selected individuals is even # put the best individual to the new population. # Otherwise, put him to individuals dedicated # for reproduction if len(chromosomes_reproducing) % 2 == 0: self._population.append(best_individual.chromosome) else: # put the best individual to max index in order to not rewrite existing chromosomes_reproducing.append(best_individual) # randomly choose pairs for crossover # then mutate new individuals and put them to new population while len(chromosomes_reproducing) >= 2: father = chromosomes_reproducing.pop(random.randrange(len( chromosomes_reproducing))).chromosome mother = chromosomes_reproducing.pop(random.randrange(len( chromosomes_reproducing))).chromosome self._crossover(father, mother) # mutate self._mutation(father) self._mutation(mother) self._population.append(father) self._population.append(mother) # Generate new individuals in order to make new population the same size while len(self._population) != self._population_size: self._population.append(self._gen_individual())
def multiple_runs_mean(nb_runs): generations = None all_fit_mins, all_fit_avg, all_duration_mins, all_duration_maxs = [], [], [], [] # The next two lines are for sequential runs, comment them out when using parallel runs # for i in range(1, nb_runs + 1): # gen, fit_mins, fit_avg, duration_mins, duration_maxs = single_run(i) # The next two lines are for parallel runs, comment them out when using sequential runs runs_results = futures.map(single_run, range(1, nb_runs + 1)) for gen, fit_mins, fit_avg, duration_mins, duration_maxs in runs_results: if generations == None: generations = gen all_fit_mins.append(fit_mins) all_fit_avg.append(fit_avg) all_duration_mins.append(duration_mins) all_duration_maxs.append(duration_maxs) def mean_values(all_values): return [sum(x) / nb_runs for x in zip(*all_values)] mean_fit_mins = mean_values(all_fit_mins) mean_fit_avg = mean_values(all_fit_avg) mean_duration_mins = mean_values(all_duration_mins) mean_duration_maxs = mean_values(all_duration_maxs) return nb_runs, generations, mean_fit_mins, mean_fit_avg, mean_duration_mins, mean_duration_maxs
def aimFunc(self, pop): # 目标函数 Vars = pop.Phen # 得到决策变量矩阵 args = list( zip(list(range(pop.sizes)), [Vars] * pop.sizes, [self.data] * pop.sizes, [self.dataTarget] * pop.sizes)) pop.ObjV = np.array(list(futures.map( subAimFunc, args))) # 调用SCOOP的map函数进行分布式计算,并构造种群所有个体的目标函数值矩阵ObjV
def find_optimum(data_l, FEATS, method, n_neighbors=8): from functools import partial rmse_l = [] r2_l = [] res_l = [] for _i in range(1, len(FEATS) + 1): sel_feat = FEATS[:_i] gexp = data_l[-1].loc[:, sel_feat] DATA_l = [data_l[0], data_l[1], gexp] sm = partial(select_model, data_l=DATA_l, FEATS=sel_feat, method=method, n_neighbors=n_neighbors) df_l = list(futures.map(sm, range(20))) Act = df_l[0].Actual meanPred = np.mean(np.vstack( [df.Predicted.loc[Act.index].values for df in df_l]), axis=0) Pred = pa.Series(meanPred, df_l[0].Actual.index) rmse, r2, __, __ = do_ols(Act, Pred) r2_l.append(r2) rmse_l.append(rmse) res_l.append(pa.DataFrame({'Actual': Act, 'Predicted': Pred})) ii = np.argmax(r2_l) print method, ii, r2_l[ii] return r2_l, rmse_l, res_l
def evaluate_parallel(invalid_pops): """Evaluate model by SCOOP or map, and set fitness of individuals according to calibration step.""" popnum = len(invalid_pops) labels = list() try: # parallel on multi-processors or clusters using SCOOP from scoop import futures invalid_pops = list(futures.map(toolbox.evaluate, [cali_obj] * popnum, invalid_pops)) except ImportError or ImportWarning: # Python build-in map (serial) invalid_pops = list(toolbox.map(toolbox.evaluate, [cali_obj] * popnum, invalid_pops)) for tmpind in invalid_pops: if step == 'Q': # Step 1 Calibrating discharge tmpind.fitness.values, labels = tmpind.cali.efficiency_values('Q', object_names) elif step == 'SED': # Step 2 Calibrating sediment sedobjvs, labels = tmpind.cali.efficiency_values('SED', object_names) qobjvs, qobjlabels = ind.cali.efficiency_values('Q', object_names) labels += [qobjlabels[0]] sedobjvs += [qobjvs[0]] tmpind.fitness.values = sedobjvs[:] elif step == 'NUTRIENT': # Step 3 Calibrating NUTRIENT,TN,TP tnobjvs, tnobjlabels = tmpind.cali.efficiency_values('CH_TN', object_names) tpobjvs, tpobjlabels = tmpind.cali.efficiency_values('CH_TP', object_names) qobjvs, qobjlabels = ind.cali.efficiency_values('Q', object_names) sedobjvs, sedobjlabels = tmpind.cali.efficiency_values('SED', object_names) objvs = [tnobjvs[0], tpobjvs[0], qobjvs[0], sedobjvs[0]] labels = [tnobjlabels[0], tpobjlabels[0], qobjlabels[0], sedobjlabels[0]] tmpind.fitness.values = objvs[:] # NSE > 0 is the preliminary condition to be a valid solution! if filter_NSE: invalid_pops = [tmpind for tmpind in invalid_pops if tmpind.fitness.values[0] > 0] if len(invalid_pops) < 2: print('The initial population should be greater or equal than 2. ' 'Please check the parameters ranges or change the sampling strategy!') exit(0) return invalid_pops, labels # Currently, `invalid_pops` contains evaluated individuals
def recursiveFunc(level): if level == 0: return 1 else: args = [level-1] * 2 s = sum(futures.map(recursiveFunc, args)) return s
def evaluate_losses(self, num_runs): variables = self.variables pred_keys = ['train pred', 'val pred', 'test pred'] fcast_keys = ['train fcast', 'val fcast', 'test fcast'] sets = ['train', 'val', 'test'] result = OrderedDict() for pred_key, fcast_key, set in zip(pred_keys, fcast_keys, sets): pred_fcast = list(map(self._evaluate_losses, [set] * num_runs)) pred_res = [i[0] for i in pred_fcast] fcast_res = [i[1] for i in pred_fcast] # pred_res = list(map(self.evaluate_prediction, [set] * num_runs, [True] * num_runs)) result[pred_key] = np.mean(np.squeeze(pred_res), axis=0) # fcast_res = list(map(self.evaluate_forecast, [set] * num_runs, [True] * num_runs)) result[fcast_key] = np.mean(np.squeeze(fcast_res), axis=0) # K.clear_session() if self.is_multioutput: result = pd.DataFrame(result, index=['total'] + variables) else: result = pd.DataFrame(result, index=variables) return result
def best_feat(L, ii, name='', p=0.1): print "Finding best feature." eff_d = {} lstsq_d = {} dummy = 100 SC_GROWTH = np.log(2) / 1.5 DS_BINS = np.linspace(0.98, 1.02, 5) * SC_GROWTH for feat, df, bins, eff in L: if 'noise' in name: col = 'Predicted' elif name.startswith('slavov-holstege'): col = 'Downsampled' else: col = 'Noiseless' norm = partial(compare_columns, df=df, dnsamp_bins=DS_BINS, column=col) D_l = list(futures.map(norm, range(dummy))) Dmu = np.mean(D_l) #score = D+eff*p eff_d[feat] = eff lstsq_d[feat] = Dmu eff_ser = pa.Series(eff_d) eff_ser.to_pickle('eff_%d_%s.pkl' % (ii + 1, name)) lstsq_ser = pa.Series(lstsq_d) lstsq_ser.to_pickle('lstsq_%d_%s.pkl' % (ii + 1, name)) score_ser = lstsq_ser + p * eff_ser sel = score_ser.idxmin() print "The minimum feature at", ii + 1, "features is:", sel, lstsq_ser.loc[ sel], lstsq_ser.idxmin(), eff_ser.loc[sel] ll = filter(lambda l: l[0] == sel, L) return ll[0] #L[sel]
def main(number): random.seed(4) N_ISLES = number FREQ = 5 pob = int(500 / number) islands = [toolbox.population(n=pob) for i in range(N_ISLES)] toolbox.unregister("indices") toolbox.unregister("individual") toolbox.unregister("population") toolbox.register("alg_scoop", algorithms.eaSimple, toolbox=toolbox, cxpb=0.8, mutpb=0.2, ngen=5, verbose=False) start_time = time.time() for i in range(0, 400, FREQ): results = futures.map(toolbox.alg_scoop, islands) islands = [pop for pop, logbook in results] tools.migRing(islands, 15, tools.selBest) print("--- %s seconds ---" % (time.time() - start_time)) return "finished"
def maxTreeDepthDivide(rootValue, currentDepth=0, parallelLevel=2): """Finds a tree node that represents rootValue and computes the max depth of this tree branch. This function will emit new futures until currentDepth=parallelLevel""" thisRoot = shared.getConst('myTree').search(rootValue) if currentDepth >= parallelLevel: return thisRoot.maxDepth(currentDepth) else: # Base case if not any([thisRoot.left, thisRoot.right]): return currentDepth if not all([thisRoot.left, thisRoot.right]): return thisRoot.maxDepth(currentDepth) # Parallel recursion return max( futures.map( maxTreeDepthDivide, [ thisRoot.left.payload, thisRoot.right.payload, ], cycle([currentDepth + 1]), cycle([parallelLevel]), ) )
def evaluate_parallel(invalid_pops): """Evaluate model by SCOOP or map, and get fitness of individuals.""" popnum = len(invalid_pops) try: # parallel on multiprocesor or clusters using SCOOP from scoop import futures invalid_pops = list( futures.map(toolbox.evaluate, [sceobj.cfg] * popnum, invalid_pops)) except ImportError or ImportWarning: # serial invalid_pops = list( toolbox.map(toolbox.evaluate, [sceobj.cfg] * popnum, invalid_pops)) # Filter for a valid solution if filter_ind: invalid_pops = [ tmpind for tmpind in invalid_pops if check_validation(tmpind.fitness.values) ] if len(invalid_pops) < 2: print( 'The initial population should be greater or equal than 2. ' 'Please check the parameters ranges or change the sampling strategy!' ) exit(2) return invalid_pops # Currently, `invalid_pops` contains evaluated individuals
def main_pso(): pop = toolbox.population(n=1000) stats = tools.Statistics(lambda ind: ind.fitness.values) stats.register("avg", numpy.mean) stats.register("std", numpy.std) stats.register("min", numpy.min) stats.register("max", numpy.max) logbook = tools.Logbook() logbook.header = ["gen", "evals"] + stats.fields GEN = 10 best = None for g in range(GEN): fitnesses = list(futures.map(toolbox.evaluate, pop)) for ind, fit in zip(pop, fitnesses): ind.fitness.values = fit for part in pop: if not part.best or part.best.fitness < part.fitness: part.best = creator.Particle(part) part.best.fitness.values = part.fitness.values if not best or best.fitness < part.fitness: best = creator.Particle(part) best.fitness.values = part.fitness.values for part in pop: toolbox.update(part, best) #Gather all the fitnesses in one list and print the stats logbook.record(gen=g, evals=len(pop), **stats.compile(pop)) print("generation: %i" % g) #print(logbook.stream) # return pop, logbook, best
def rank_global(model, number_of_core_samples, step_size, name, output_path, top_k_to_plot): """ Computes the mixed derivative for each sample, using finite differences mathod :param model: The imported model module :param data: The sampled data in structured form :param step_size: The dx time step taken between each :returns: hessian matrix, with the core sample index as rows and feature pair as column name """ outputs = pd.DataFrame(np.load('{}/outputs_{}_{}_{}.npy'.format(output_path, number_of_core_samples, step_size, name)), index = np.arange(number_of_core_samples), columns=pd.MultiIndex.from_product([model.output_names, model.perturbation_status_columns], names=['outputs','perturbation_status'])) outputs = normalize_outputs(model, outputs) hessian = calculate_hessian(model, outputs, step_size) hessian = denoise_hessian(hessian) ranked_hessian = hessian.abs().mean(axis=0) ranking = [] for output_name in model.output_names: sorted_pairs = ranked_hessian.loc[output_name].loc[model.normalization_feature_pairs].sort_values()[::-1] ranking.append((output_name, list(sorted_pairs.index), sorted_pairs.values)) if top_k_to_plot: feature_vectors = pd.DataFrame(np.load('{}/feature_vectors_{}_{}_{}.npy'.format(output_path, number_of_core_samples, step_size, name)), index = np.arange(number_of_core_samples), columns=pd.MultiIndex.from_product([model.perturbation_status_columns, model.feature_names], names=['perturbation_status','features'])) core_feature_vectors = feature_vectors.loc[:, 'core'].copy() core_feature_vectors = normalize_inputs(model, core_feature_vectors) interaction_maps = list(futures.map(functools.partial(create_interaction_map, model, hessian, core_feature_vectors, output_name, 'linear'), model.feature_pairs)) ranked_feature_pairs = np.array(ranking)[:, 1][0][:top_k_to_plot] for pair_name in ranked_feature_pairs: ind = model.feature_pairs.index(pair_name) first_variable, second_variable = model.feature_pairs[ind].split(' and ') most_nonlinear_sample = hessian[output_name][model.feature_pairs[ind]].abs().idxmax() y_coord = 100 * (feature_vectors.loc[most_nonlinear_sample, 'core'][first_variable] - model.feature_limits[first_variable][0]) / (model.feature_limits[first_variable][1] - model.feature_limits[first_variable][0]) x_coord = 100 * (feature_vectors.loc[most_nonlinear_sample, 'core'][second_variable] - model.feature_limits[second_variable][0]) / (model.feature_limits[second_variable][1] - model.feature_limits[second_variable][0]) plot_interaction_map(model, name, interaction_maps[ind], output_name, first_variable, second_variable, x_coord, y_coord, output_path) pickle.dump(obj = ranking, file = open('{}/global_ranking_{}_{}_{}.pickle'.format(output_path,number_of_core_samples, step_size, name),'wb')) return ranking
def maxTreeDepthDivide(rootValue, currentDepth=0, parallelLevel=2): """Finds a tree node that represents rootValue and computes the max depth of this tree branch. This function will emit new futures until currentDepth=parallelLevel""" thisRoot = shared.getConst('myTree').search(rootValue) if currentDepth >= parallelLevel: return thisRoot.maxDepth(currentDepth) else: # Base case if not any([thisRoot.left, thisRoot.right]): return currentDepth if not all([thisRoot.left, thisRoot.right]): return thisRoot.maxDepth(currentDepth) # Parallel recursion return max( futures.map( maxTreeDepthDivide, [ thisRoot.left.payload, thisRoot.right.payload, ], cycle([currentDepth + 1]), cycle([parallelLevel]), ))
def run_fine_grained_ga(population_size, chromosome_size, number_of_generations, neighbourhood_size, server_ip_addr, fitness, server_user, server_password, mate_best_neighbouring_individual=True): ins = FineGrainedBase( population_size=population_size, chromosome_size=chromosome_size, number_of_generations=number_of_generations, neighbourhood_size=neighbourhood_size, server_ip_addr=server_ip_addr, fitness=fitness, mate_best_neighbouring_individual=mate_best_neighbouring_individual, server_user=server_user, server_password=server_password) populations = ins.initialize_population(None) channels = ins.initialize_topology() result = list(futures.map(ins, populations, channels)) dct = {} while len(result): fitness_val, vector = result.pop(0) dct[fitness_val] = vector logger.info("END RESULT " + str(sorted(dct.items()).pop()))
def funcLambdaSubfuncNotGlobal(n): """Tests a lambda function containing a call to a function that is not in the globals().""" my_mul = operator.mul lambda_func = lambda x : my_mul(x, x) result = list(futures.map(lambda_func, [i+1 for i in range(n)])) return sum(result)
def sel_pairs_predictions(feat_l, bins_l, kind='feat', recalc=False, err=.1, useNoise=True, noiseFolds=4, ext='', filt_thr=1e-6, testAll=True, n_neighbors=7): l = evecs.columns.tolist() #pa.read_pickle('sacCer_%s_l.pkl' % kind) L = list(set(l) - set(feat_l)) if not testAll: L = filter(lambda x: float(x) > filt_thr, L) test_genes = sorted(L, key=lambda l: float(l), reverse=True) LOL = [ feat_l + [g] for g in sorted(test_genes, key=lambda x: float(x), reverse=True) ] NN = True if kind.startswith('feat') else False p_cvp = partial(cv_mc, corr=NN, all_bins=bins_l, recalc=recalc, gn_err=err, useNoise=True, noiseFolds=4, ext=ext, n_neighbors=n_neighbors) L = list(futures.map(p_cvp, LOL)) return L
def write_patient_data(pdata): output_dir = os.path.join( utils.output_directory, 'p{0}{1}'.format(pdata['patient_id'] + 1, pdata['dtype'])) create_dir_if_not_exist(output_dir) raw_data = list(futures.map(create_spectrogram_images, pdata['mat_file'])) raw_spectrograms = [] std_spectrograms = [] for el in raw_data: raw_spectrograms.append(el[:, 0, :, :]) std_spectrograms.append(el[:, 1, :, :]) print np.array(raw_spectrograms).shape mat_file_names = map(basename, pdata['mat_file']) input_data = { 'raw_spectrograms': np.array(raw_spectrograms), 'std_spectrograms': np.array(std_spectrograms), 'file_name': np.array(mat_file_names), 'segment': pdata['segment'] } if pdata['dtype'] == 'train': input_data['target'] = pdata['target'] np.save(os.path.join(output_dir, 'data.npy'), input_data)
def average_results(lookahead, N): results = futures.map(do_individual_experiment, [(lookahead, seed) for seed in range(N)]) results = list(results) avg_steps = sum(steps for steps, _ in results) / N avg_elapsed = sum(elapsed for _, elapsed in results) / N return avg_steps, avg_elapsed
def do_noise_simulation(self, variable_name, range_, steps, sigma, set_variables=list()): v = range_[0] + (np.arange( 0, steps, 1)) / (steps - 1.0) * (range_[1] - range_[0]) self.duration = 100.0 self.doPreRun = False fqs = list() fl_phase_durs = list() ex_phase_durs = list() phases = list() gaits = list() paras = futures.map( functools.partial(self.do_noise_iteration, variable_name=variable_name, sigma=sigma, set_variables=set_variables), v) for i, (fq, fl, ex, ph, g) in enumerate(paras): fqs.append(fq) fl_phase_durs.append(fl) ex_phase_durs.append(ex) phases.append(ph) gaits.append(g) return (v, fqs, fl_phase_durs, ex_phase_durs, phases, gaits)
def evalVars(self, Vars): # 目标函数 N = Vars.shape[0] args = list( zip(list(range(N)), [Vars] * N, [self.data] * N, [self.dataTarget] * N)) ObjV = np.array(list(futures.map( subEvalVars, args))) # 调用SCOOP的map函数进行分布式计算,并构造目标函数值矩阵ObjV return ObjV
def calcPi(workers, tries): bt = time() expr = futures.map(test, [tries] * workers) piValue = 4. * sum(expr) / float(workers * tries) totalTime = time() - bt print("pi = " + str(piValue)) print("total time: " + str(totalTime)) return piValue
def main2(n): # This call results in a generator function result = futures.map(func4, [i+1 for i in range(n)]) print result # The results are evaluated here when they are accessed. d = sum(result) print d return d
def calcPi(workers, tries): bt = time() expr = futures.map(test, [tries] * workers) piValue = 4. * sum(expr) / float(workers * tries) totalTime = time() - bt print("pi = " + str(piValue)) print("total time: " + str(totalTime)) return (piValue, totalTime)
def get_max_filters(matrix, num_filters = 100, threshold = 3): matrix_size = matrix.shape[0] filter_sizes = np.linspace(5, matrix_size, num_filters).astype(int) filter_results = list(futures.map(functools.partial(max_filter_activation, np.abs(matrix)), filter_sizes)) if len(np.where(np.array(filter_results) >= threshold)[0]) == 0: return -1 else: return np.where(np.array(filter_results) < threshold)[0][0] - 1
def func1(n): try: # The map alone doesn't throw the exception. The exception is raised # in the sum which calls the map generator. result = sum(futures.map(func2, [i+1 for i in range(n)])) except Exception as err: # We could do some stuff here raise Exception("This exception is normal") return result
def main(): # Create object instances myInstances = [myClass() for _ in range(20)] # Modify them parallely myAnswers = list(futures.map(modifyClass, myInstances)) # Each result is a new object with the modifications applied print(myAnswers) print([a.myVar for a in myAnswers])
def updateParticle(part, best, phi1, phi2): u1 = (random.uniform(0, phi1) for _ in range(len(part))) u2 = (random.uniform(0, phi2) for _ in range(len(part))) v_u1 = futures.map(operator.mul, u1, map(operator.sub, part.best, part)) v_u2 = futures.map(operator.mul, u2, map(operator.sub, best, part)) part.speed = list( futures.map(operator.add, part.speed, map(operator.add, v_u1, v_u2))) for i, speed in enumerate(part.speed): if speed < part.smin: part.speed[i] = part.smin elif speed > part.smax: part.speed[i] = part.smax part[:] = list(map(operator.add, part, part.speed)) for (ind, v) in enumerate(part[:]): if v < part.pmin: part[ind] = part.pmin if v > part.pmax: part[ind] = part.pmax
def eval_fun(pop, *args): # call each particle in parallel bdat = args[1] pnames = args[2] likes = list( futures.map(eval_mod, [indiv for indiv in pop], [pnames] * len(pop), [bdat] * len(pop))) return np.array(likes)
def func3(n): result = [] try: result = list(futures.map(func4, [i+1 for i in range(n)])) except Exception as e: # We return what we can return e.args[0] + sum(result) # No exception was generated return sum(result)
def func3(n): result = [] try: result = list(futures.map(func4, [i + 1 for i in range(n)])) except Exception as e: # We return what we can return e.args[0] + sum(result) # No exception was generated return sum(result)
def func1(n): try: # The map alone doesn't throw the exception. The exception is raised # in the sum which calls the map generator. result = sum(futures.map(func2, [i + 1 for i in range(n)])) except Exception as err: # We could do some stuff here raise Exception("This exception is normal") return result
def _predict(self,xte,mode,svmList,labels): ypred2 = list( fu.map(self._predict2, svmList,[xte]*len(svmList)) ) ypred3 = [] # ypred merged from all classifiers for i in range(len(xte)):# for each member/sample of the vector xte ypred3i = [ypred2[j][i] for j in range(len(svmList))]# of each sample from all classifiers ypred3i = self._merge(ypred3i,mode,labels) ypred3.append(ypred3i) return ypred3
def get_universe(self): # Download historical data for our universe key_value_pairs = list(futures.map(self.get_historical, self.symbols)) #key_value_pairs = map(self.get_historical, self.symbols) #key_value_pairs.remove((None, None)) # remove any failed items if (None, None) in key_value_pairs: key_value_pairs.remove((None, None)) # remove any failed items self.hist = dict(key_value_pairs) if None in self.hist: del self.hist[None]
def fit(self,ixtr,iytr): xyTrList = cutil.divideSamples(ixtr,iytr,self._maxTrainingSamplesPerBatch) if self._maxNumberOfTrainingBatches != 0: xyTrList = xyTrList[0:self._maxNumberOfTrainingBatches] self._svmList = list( fu.map(self._fit, [xytr[0] for xytr in xyTrList], [xytr[1] for xytr in xyTrList]) ) assert len(self._svmList)!=0,'empty _svmList in fit()' self._labels = self._svmList[0][0].classes_.tolist() for svm in self._svmList: assert svm[0].classes_.tolist()==self._labels
def evolveparallel(self): from scoop import futures toolbox.register("genind", self.mkeind,self.indsize) toolbox.register("individual", tools.initIterate, creator.Individual, toolbox.genind) toolbox.register("population",tools.initRepeat, list, toolbox.individual, n=self.popsize) toolbox.register("evaluate", self.evalr2) toolbox.register("mate", tools.cxOnePoint) #Uniform, indpb=0.5) toolbox.register("mutate", self.mutaRan)#, indpb=self.mut) toolbox.register("select", tools.selBest) toolbox.register("map", pool.map) population=toolbox.population() #print population fits=toolbox.map(toolbox.evaluate, population) for fit, ind in zip(fits,population): ind.fitness.values=fit #print fit, ind #print fit #offspring=algorithms.varOr(population, toolbox, lambda_=100, cxpb=.5, mutpb=.05) #print toolbox.map(toolbox.evaluate, offspring) avgfitnesses=[] for gen in range(self.ngen): offspring=algorithms.varOr(population, toolbox, lambda_=self.popsize, cxpb=self.cx, mutpb=self.mut) #print "offspring",offspring fits=futures.map(toolbox.evaluate, offspring) for fit, ind in zip(fits,population): ind.fitness.values=fit # for ind in offspring: # ind.fitness.values=toolbox.evaluate(ind) population=toolbox.select([k for k,v in itert.groupby(sorted(offspring+population))], k=100) popfits = futures.map(toolbox.evaluate, population) avgfitnesses.append(np.mean(popfits))
def predict(self,ixte): assert len(self._svmList)!=0,'empty _svmList in predict()' xyTeList = cutil.divideSamples(ixte,None,self._maxTestingSamplesPerBatch) xTeList = [i[0] for i in xyTeList]; n = len(xTeList) ypredList = list( fu.map(self._predict, xTeList,[self._mode]*n,[self._svmList]*n,[self._labels]*n) ) ypredMerged = []; yscoreMerged = []; for i in ypredList: ypredMerged += [j[0] for j in i] yscoreMerged += [j[1] for j in i] assert len(ypredMerged)==len(ixte),str(len(ypredMerged))+'!='+str(len(ixte)) return (ypredMerged,yscoreMerged)
def main(argv): if len(argv)!=3: print 'USAGE: python devel.py [dataMode] [valMode]' return dataMode = argv[1] valMode = argv[2] # load development dataset, containing com-pro connectivity connMat,comList,proList = yam.loadComProConnMat(dataMode) kernel = yam.loadKernel(dataMode) ## dataX = [] dataY = [] for i,ii in enumerate(comList): for j,jj in enumerate(proList): dataX.append( (ii,jj) ) dataY.append( connMat[i][j] ) nData = len(dataY) ## nFolds = None kfList = None if valMode=='loocv': nFolds = nData kfList = KFold(nData, n_folds=nFolds, shuffle=True) elif valMode=='kfcv': nFolds = 10 kfList = StratifiedKFold(dataY, n_folds=nFolds, shuffle=True) else: assert(False) kronrls = KronRLS(connMat,comList,proList,kernel) ## prep for parallel xTestList = [] yTestList = [] for trIdxList, testIdxList in kfList: xTest = [dataX[i] for i in testIdxList] yTest = [dataY[i] for i in testIdxList] xTestList.append(xTest) yTestList.append(yTest) ## yPredList = fu.map(evalPerFold,xTestList,yTestList,[kronrls]*nFolds, [connMat]*nFolds,[comList]*nFolds,[proList]*nFolds,[kernel]*nFolds)
def get_data(path): file_counter = 0 tasks = [] results = [] data_collector = dict() for file_name in os.listdir(path): if file_name.startswith("secure"): file_counter += 1 logfile = open("%s/%s" % (path, file_name)) tasks.append(logfile.read()) logfile.close() results.append(list(futures.map(parse_log, tasks))) for result in results: for data in result: data_collector = receive_result(data_collector,data) return data_collector
def main(argv): assert len(argv)==3 xprmtDir = cfg.xprmtDir+'/'+argv[1]; print xprmtDir; assert os.path.isdir(xprmtDir) nTop = int(argv[2]) metrics = defaultdict(list) # X_train = np.genfromtxt(xprmtDir+'/data/X_train.csv', delimiter=',') X_test = np.genfromtxt(xprmtDir+'/data/X_test.csv', delimiter=',') y_train = np.genfromtxt(xprmtDir+'/data/y_train.csv', delimiter=',') y_test = np.genfromtxt(xprmtDir+'/data/y_test.csv', delimiter=',') # param = dict() with open(xprmtDir+'/log2.json') as f: param = yaml.load(f) hofFilepath = xprmtDir+'/gen-'+str(param['nGen']-1)+'/hofIndividual.csv' funcStrList = [] with open(hofFilepath, 'r') as f: funcStrList = f.readlines() funcStrList = [f for f in funcStrList if len(f)!=0] if nTop > len(funcStrList): nTop = len(funcStrList) funcStrList = funcStrList[0:nTop] # take only the nTop best func/individual funcStrList.append( util.tanimotoStr() ) funcStrList = [s.rstrip() for s in funcStrList] funcStrList = [util.expandFuncStr(s) for s in funcStrList] metrics['funcStr'] = funcStrList nIndividual = len(funcStrList) perfList = fu.map (tuneTrainTest,funcStrList, [X_train]*nIndividual, [y_train]*nIndividual, [X_test]*nIndividual, [y_test]*nIndividual) for p in perfList: metrics['accuracy'].append( p[0]) metrics['precision'].append(p[1]) metrics['recall'].append(p[2]) metrics['fscore'].append([3]) metrics['support'].append([4]) with open(xprmtDir+"/data/perf_metrics.json", 'wb') as f: json.dump(metrics, f, indent=2, sort_keys=True)
def test_concurrent_scoop(self): # test several restarts old_iter = self.ITERATIONS for jrun in range(3): self.ITERATIONS = 11 url = get_random_port_url() filename = make_temp_dir('locker_test/scoop.txt') self.create_file(filename) self.start_server(url) lock = LockerClient(url) lock.start() iterator = [(irun, lock, filename) for irun in range(self.ITERATIONS)] list(futures.map(the_job, iterator)) lock.send_done() self.check_file(filename) self.lock_process.join() # errwrite(str(irun)) self.ITERATIONS = old_iter
def selectSVD(atlas, minDiv=1e-2): atlas.sort(lambda x, y: cmp(x.shift, y.shift)) U, s, V = evalSVD(atlas) for i, ind in enumerate(atlas): ind.label = i def selParetoRank(sValue, vValues): if sValue < 1e-5 : return [] def calcFitness(uVal, ind): shift, robust = ind.shift, ind.robustness norm = np.abs(uVal) if norm < minDiv: ind.fitness.values = 0., np.inf, 0., len(ind) elif not np.isfinite(shift): ind.fitness.values = 0., np.inf, 0., len(ind) else: ind.fitness.values = shift*norm, robust/norm, norm, len(ind) ind.svd = norm ind.size = len(ind) map(calcFitness, vValues, atlas) nRet = 5 if len(atlas) < nRet: nRet = len(atlas) return tools.selSPEA2(atlas, 5) #return returnParetoFront(atlas) selected = sum(futures.map(selParetoRank, s, V), []) #Discard Duplicates removed = [] for ind in selected: addInd = True for ind2 in removed: if ind2.label == ind.label: addInd = False if addInd: removed.append(ind) return removed
def pi_calculus_with_Montecarlo_Method(workers, attempts): print("number of workers %i - number of attempts %i" %(workers,attempts)) bt = time() #in this point we call scoop.futures.map function #the evaluate_number_of_points_in_unit_circle \ #function is executed in an asynchronously way #and several call this function can be made cuncurrently evaluate_task = \ futures.map(evaluate_points_in_circle, \ [attempts] * workers) Taskresult = sum(evaluate_task) print ("%i points fallen in a unit disk after " \ %(Taskresult/attempts)) piValue = (4. * Taskresult / float(workers * attempts)) computationalTime = time() - bt print("value of pi = " + str(piValue)) print ("error percentage = " + \ str((((abs(piValue - math.pi)) * 100) / math.pi))) print("total time: " + str(computationalTime))
def mergesort(lst, current_depth=0, parallel_depth=0): if len(lst) <= 1: return lst middle = int(len(lst) / 2) if current_depth < parallel_depth: results = list( futures.map( mergesort, [ lst[:middle], lst[middle:], ], repeat(current_depth+1), repeat(parallel_depth), ) ) else: results = [] results.append(mergesort(lst[:middle])) results.append(mergesort(lst[middle:])) return merge(*results)
def execute(dbHost, dbPort, dbName, start, stop, cases, numPlayers, initialCash, numTurns, ops, fops): """ Executes a general experiment. @param dbHost: the database host @type dbHost: L{str} @param dbPort: the database port @type dbPort: L{int} @param dbName: the database collection name @type dbName: L{str} @param start: the starting seed @type start: L{int} @param stop: the stopping seed @type stop: L{int} @param cases: the list of designs to execute @type cases: L{list} @param numPlayers: the number of players @type numPlayers: L{int} @param initialCash: the initial cash @type initialCash: L{int} @param numTurns: the number of turns @type numTurns: L{int} @param ops: the operations definition @type ops: L{str} @param fops: the federation operations definition @type fops: L{str} """ executions = [(dbHost, dbPort, dbName, [e for e in elements.split(' ') if e != ''], numPlayers, initialCash, numTurns, seed, ops, fops) for (seed, elements) in itertools.product(range(start, stop), cases)] numComplete = 0.0 logging.info('Executing {} cases with seeds from {} to {} for {} total executions.' .format(len(cases), start, stop, len(executions))) for results in futures.map(queryCase, executions): print results
def func1(n): # To force an immediate evaluation, you can wrap your map in a list such as: result = list(futures.map(func2, [i+1 for i in range(n)])) return sum(result)
def func3(n): # This call results in a generator function result = futures.map(func4, [i+1 for i in range(n)]) # The results are evaluated here when they are accessed. return sum(result)
def main(): if len(sys.argv)!=4: print 'USAGE:' print 'python -m scoop devel.py [cloneID] [clusterDir] [outputDir]' print 'see devel_config.py' return cloneID = sys.argv[1] clusterDir = sys.argv[2]; assert clusterDir[-1]=='/',"should be ended with '/'" baseOutDir = sys.argv[3]; assert baseOutDir[-1]!='/',"should NOT be ended with '/'" clfParam = None method = cfg['method'] if method=='esvm': from esvm_config import config as clfParam elif method=='psvm': from psvm_config import config as clfParam else: print 'FATAL: unknown method' return outDir = os.path.join(baseOutDir,'devel-'+os.path.basename(baseOutDir)) if not(os.path.isdir(baseOutDir)): os.makedirs(baseOutDir) if not(os.path.isdir(outDir)): os.makedirs(outDir) ## Load data ################################################################################### dataLog = {}; dataLogFpath = os.path.join(outDir,'data_log_'+os.path.basename(baseOutDir)+'.json') dataset = clusterDir.split('/')[-2].split('-')[-1]; dataLog['dataset'] = dataset datasetParams = dataset.split('#') assert datasetParams[0]=='yamanishi' xyDevFpath = os.path.join(baseOutDir,'_'.join(['xdev','ydev','xrel','yrel']+datasetParams)+'.h5') if os.path.exists(xyDevFpath): print 'loading data from PREVIOUS...' with h5py.File(xyDevFpath,'r') as f: xdev = f['xdev'][:] ydev = f['ydev'][:] xrel = f['xrel'][:] yrel = f['yrel'][:] xrelraw = f['xrelraw'][:] with open(dataLogFpath,'r') as f: dataLog = yaml.load(f) else: print 'loading data FRESHLY...' print 'loading cluster result...' nUnlabels = [] statFnames = [i for i in os.listdir(clusterDir) if 'labels_stat.json' in i] for i in statFnames: with open(os.path.join(clusterDir,i),'r') as f: stat = yaml.load(f) nUnlabels.append(stat['0']) # use the cluster with minimum numbers of unlabeled samples metric = '_'.join(statFnames[ nUnlabels.index(min(nUnlabels)) ].split('_')[0:2]) dataLog['metric'] = metric connFpath = os.path.join(clusterDir,metric+'_labels.pkl') with open(connFpath,'r') as f: data = pickle.load(f) ## print 'getting devel and release data...' xraw = []; yraw = [] for k,v in data.iteritems(): for vv in v: xraw.append(vv) yraw.append(k) devIdx = [i for i in range(len(xraw)) if yraw[i]!=0] xdev = [xraw[i] for i in devIdx] ydev = [yraw[i] for i in devIdx] relIdx = [i for i in range(len(xraw)) if yraw[i]==0] xrel = [xraw[i] for i in relIdx] yrel = [yraw[i] for i in relIdx] dataLog['nDevel'] = len(devIdx); dataLog['nData'] = len(yraw) dataLog['rDevel:Data'] = dataLog['nDevel']/float(dataLog['nData']) dataLog['nDevel(+)'] = len( [i for i in ydev if i==1] ); assert dataLog['nDevel(+)']!=0 dataLog['nDevel(-)'] = len( [i for i in ydev if i==-1] ); assert dataLog['nDevel(-)']!=0 dataLog['rDevel(+):Devel'] = float(dataLog['nDevel(+)'])/dataLog['nDevel'] dataLog['rDevel(-):Devel'] = float(dataLog['nDevel(-)'])/dataLog['nDevel'] dataLog['rDevel(+):(-)'] = float(dataLog['nDevel(+)'])/float(dataLog['nDevel(-)']) dataLog['nRelease'] = len(relIdx); dataLog['rRelease:Data'] = dataLog['nRelease']/float(dataLog['nData']) ## print 'loading com, pro feature...' krFpath = os.path.join(cfg['datasetDir'],datasetParams[0],'feature', 'klekotaroth','klekotaroth-'+datasetParams[1]+'.h5') aacFpath = os.path.join(cfg['datasetDir'],datasetParams[0],'feature', 'amino-acid-composition','amino-acid-composition-'+datasetParams[1]+'.h5') krDict = {}; aacDict = {} with h5py.File(krFpath, 'r') as f: for com in [str(i) for i in f.keys()]: krDict[com] = f[com][:] with h5py.File(aacFpath, 'r') as f: for pro in [str(i) for i in f.keys()]: aacDict[pro] = f[pro][:] # aacDict[pro] = list( fu.map(lambda x: float('%.2f'%(x)),f[pro][:]) ) # rounding comFeaLenOri = len(krDict.values()[0]) proFeaLenOri = len(aacDict.values()[0]) ## msg = 'extract (com,pro) feature... dims: '+str(comFeaLenOri)+','+str(proFeaLenOri) msg += ' of '+str(len(ydev))+' and '+str(len(yrel)) print msg sh.setConst(krDict=krDict) sh.setConst(aacDict=aacDict) xdevf = list( fu.map(cutil.extractComProFea,xdev) ) xrelf = list( fu.map(cutil.extractComProFea,xrel) ) ## xyDevList = cutil.divideSamples(xdevf,ydev,cfg['smoteBatchSize']) if cfg['maxNumberOfSmoteBatch'] != 0: xyDevList = xyDevList[0:cfg['maxNumberOfSmoteBatch']] smoteSeed = util.seed(); dataLog['smoteSeed'] = smoteSeed sh.setConst(smoteSeed=smoteSeed) print 'resampling via Smote FRESHLY... '+str(len(xyDevList))+' smote(s)'+' on '+str(len(ydev)) smoteTic = time.time() xdevfr = []; ydevr = [] xydevfrList = list( fu.map(ensembleSmote,xyDevList) ) for xdevfri,ydevri in xydevfrList: for x in xdevfri: xdevfr.append(x.tolist()) for y in ydevri: ydevr.append(y) assert len(xdevfr)==len(ydevr),'len(xdevfr)!=len(ydevr)' dataLog['nSmote'] = len(xyDevList) dataLog['nDevelResampled'] = len(ydevr) dataLog['rDevelResampled:Data'] = dataLog['nDevelResampled']/float(dataLog['nData']) dataLog['nDevelResampled(+)'] = len( [i for i in ydevr if i==1] ) dataLog['nDevelResampled(-)'] = len( [i for i in ydevr if i==-1] ) dataLog['rDevelResampled(+):DevelResampled'] = dataLog['nDevelResampled(+)']/float(dataLog['nDevelResampled']) dataLog['rDevelResampled(-):DevelResampled'] = dataLog['nDevelResampled(-)']/float(dataLog['nDevelResampled']) dataLog['rDevelResampled(+):(-)'] = dataLog['nDevelResampled(+)']/float(dataLog['nDevelResampled(-)']) dataLog['timeSMOTE'] = str(time.time()-smoteTic) ## print 'update xdev,ydev,xrel... '+str(np.asarray(xdevfr).shape) xrelraw = xrel[:] # raw: feature is NOT extracted xrel = xrelf[:] xdev = xdevfr[:] ydev = ydevr[:] print 'writing updated xdev,ydev and xrel,yrel...' with h5py.File(xyDevFpath,'w') as f: f.create_dataset('xdev',data=xdev,dtype=np.float32) f.create_dataset('ydev',data=ydev,dtype=np.int8) f.create_dataset('xrel',data=xrel,dtype=np.float32) f.create_dataset('yrel',data=yrel,dtype=np.int8) f.create_dataset('xrelraw',data=xrelraw) print 'writing dataLog...' dataLog['nCom'] = len(krDict) dataLog['nPro'] = len(aacDict) with open(dataLogFpath,'w') as f: json.dump(dataLog,f,indent=2,sort_keys=True) ## TUNE+TRAIN+TEST ############################################################################# devLog = {} devSeed = util.seed(); dataLog['devSeed'] = devSeed tag = '_'.join([method+'#'+cloneID,dataset,util.tag()]) ## split devel dataset msg = ' '.join( ['devel',dataset,cloneID]) xtr,xte,ytr,yte = tts(xdev,ydev,test_size=cfg['testSize'], random_state=devSeed,stratify=ydev) if cfg['maxTestingSamples']>0: chosenIdx = np.random.randint(len(xte),size=cfg['maxTestingSamples']) xte = [xte[i] for i in chosenIdx]; yte = [yte[i] for i in chosenIdx] devLog['nTraining'] = len(xtr) devLog['nTraining(+)'] = len([i for i in ytr if i==1]) devLog['nTraining(-)'] = len([i for i in ytr if i==-1]) devLog['rTraining(+):(-)'] = devLog['nTraining(+)']/float(devLog['nTraining(-)']) devLog['rTraining:Devel'] = devLog['nTraining']/float(dataLog['nDevelResampled']) devLog['nTesting'] = len(xte) devLog['nTesting(+)'] = len([i for i in yte if i==1]) devLog['nTesting(-)'] = len([i for i in yte if i==-1]) devLog['rTesting(+):(-)'] = devLog['nTesting(+)']/float(devLog['nTesting(-)']) devLog['rTesting:Devel'] = devLog['nTesting']/float(dataLog['nDevelResampled']) ## tuning clf = None if method=='esvm': clf = eSVM(simMat=None) elif method=='psvm': clf = svm.SVC(kernel=clfParam['kernel'],probability=True) ## training print msg+': fitting nTr= '+str(len(ytr)) trTic = time.time() if method=='esvm': clf.fit(xtr,ytr) devLog['labels'] = clf.labels() devLog['nSVM'] = clf.nSVM() devLog['xtrDimAllBatches'] = clf.xtrDimAllBatches() elif method=='psvm': if cfg['method']['kernel']=='precomputed': assert False # simMatTr = cutil.makeComProKernelMatFromSimMat(xtr,xtr,simMat) # clf.fit(simMatTr,ytr) else: clf.fit(xtr,ytr) devLog['labels'] = clf.classes_.tolist() devLog['timeTraining'] = str(time.time()-trTic) ## testing print msg+': predicting nTe= '+str(len(yte)) teTic = time.time() if method=='esvm': ypred,yscore = clf.predict(xte) elif method=='psvm': if cfg['method']['kernel']=='precomputed': assert False # simMatTe = cutil.makeComProKernelMatFromSimMat(xte,xtr,simMat) # ypred = clf.predict(simMatTe) # yscore = clf.predict_proba(simMatTe) else: ypred = clf.predict(xte) yscore = clf.predict_proba(xte) yscore = [max(i.tolist()) for i in yscore] devLog['timeTesting'] = str(time.time()-teTic) ## TEST RELEASE ################################################################################ print msg+': predicting RELEASE n= '+str(len(yrel)) relTic = time.time() if method=='esvm': yrel,yrelscore = clf.predict(xrel) elif method=='psvm': if cfg['method']['kernel']=='precomputed': assert False # simMatTe = cutil.makeComProKernelMatFromSimMat(xrel,xtr,simMat) # yrel = clf.predict(simMatTe) # yrelscore = clf.predict_proba(simMatTe) else: yrel = clf.predict(xrel) yrelscore = clf.predict_proba(xrel) yrelscore = [max(i.tolist()) for i in yrelscore] devLog['timeRelease'] = str(time.time()-relTic) ## WRITE RESULT ################################################################################ result = {'yte':yte,'ypred':ypred,'yscore':yscore, 'xrelraw':xrelraw,'yrel':yrel,'yrelscore':yrelscore} print 'writing prediction...' with h5py.File(os.path.join(outDir,'result_'+tag+'.h5'),'w') as f: for k,v in result.iteritems(): if 'raw' in k: f.create_dataset(k,data=v) else: dt = np.int8 if 'score' in k: dt = np.float32 f.create_dataset(k,data=v,dtype=dt) ## print 'writing devLog...' devLog['clfParam'] = clfParam devLog['devParam'] = cfg with open(os.path.join(outDir,'devLog_'+tag+'.json'),'w') as f: json.dump(devLog,f,indent=2,sort_keys=True)
from scoop import futures import nlp import glob import sys UPLOAD_PATH = './uploads/' def analyze_emotion(filename): with open(filename, 'rU') as f: return {filename:dict(nlp.emotion_analysis(f.read()))} if __name__ == "__main__": files = glob.glob(UPLOAD_PATH+'*.txt') + glob.glob(UPLOAD_PATH+'*.data') ret = futures.map(analyze_emotion, files) print([x for x in ret if isinstance(x,dict)])
def my_map(*args, **kwargs): return list(futures.map(*args, **kwargs))
def map_(*args, **kwargs): return map(WorkerWrapper(args[0]), *args[1:], **kwargs)
def worker_run_simple(counter): """Execute the cmd to be called with """ cmd_sanity = ["%s" % x for x in parse_worker_args()] ## ready to join set_scoop_env('counter', counter) ec, out = run_simple(' '.join(cmd_sanity), disable_log=True) return ec, out ## return 1 item if __name__ == '__main__': _log = make_worker_log(NAME, debug=_DEBUG) worker_func = worker_run_simple res = None start, stop, step = parse_worker_args(False) try: _log.debug("main_run: going to start map") res_generator = futures.map(worker_func, xrange(start, stop, step)) _log.debug("main_run: finished map") res = [x for x in res_generator] _log.debug("main_run: finished res from generator") except: _log.exception("main_run: main failed with main_func %s with start %s stop %s" % (worker_func, start, stop)) print res
start = time.time() for q in y: x*q print 'for loop took %f seconds' %(time.time()-start) start = time.time() [x*q for q in y] print 'list comp took %f seconds' %(time.time()-start) start = time.time() x*z print 'broadcasting took %f seconds' %(time.time()-start) start = time.time() map(lambda q: x*q, y) print 'serial map took %f seconds' %(time.time()-start) start = time.time() futures.map(lambda q: x*q, y) print 'parallel map took %f seconds' %(time.time()-start) flong = flop(x) start = time.time() futures.map(flong.compute, y) print 'parallel map with method took %f seconds' %(time.time()-start)