def lorenz(): sigma = 10 rho = 28 beta = 8.0 / 3 theta = 3 * np.pi / 4 def lor(xyz, t): x, y, z = xyz x_dot = sigma * (y - x) y_dot = x * rho - x * z - y z_dot = x * y - beta * z return [x_dot, y_dot, z_dot] initial = (-10, -7, 35) t = np.arange(0, 100, 0.006) solution = odeint(lor, initial, t) x = solution[:, 0] y = solution[:, 1] z = solution[:, 2] xprime = np.cos(theta) * x - np.sin(theta) * y colors = ["#C6DBEF", "#9ECAE1", "#6BAED6", "#4292C6", "#2171B5", "#08519C", "#08306B"] p = figure(title="Lorenz example", tools='', toolbar_location=None, responsive='box') p.title_location = 'right' p.multi_line(np.array_split(xprime, 7), np.array_split(z, 7), line_color=colors, line_alpha=0.8, line_width=1.5) return p
def reshape_soln_y(ug, nx, ny, p, px, py): # evenly split ug into a list of p parts soln = np.array_split(ug, p) # reshape each part soln = np.hstack([a.reshape(ny, nx) for a in soln]) soln = np.vstack([arr.transpose() for arr in np.array_split(soln.transpose(), p)]) return soln
def create_task_list(D, calc_second_order, n_processors): # Create list with one entry (key, parameter 1, parameter 2) per sobol # index (+conf.). This is used to supply parallel tasks to multiprocessing.Pool tasks_first_order = [[d, j, None] for j in range( D) for d in ('S1', 'S1_conf', 'ST', 'ST_conf')] # Add second order (+conf.) to tasks tasks_second_order = [] if calc_second_order: tasks_second_order = [[d, j, k] for j in range(D) for k in range(j + 1, D) for d in ('S2', 'S2_conf')] if n_processors is None: n_processors = min(cpu_count(), len( tasks_first_order) + len(tasks_second_order)) if not calc_second_order: tasks = np.array_split(tasks_first_order, n_processors) else: # merges both lists alternating its elements and splits the resulting list into n_processors sublists tasks = np.array_split([v for v in sum( zip_longest(tasks_first_order[::-1], tasks_second_order), ()) if v is not None], n_processors) return tasks, n_processors
def solar_position_numba(unixtime, lat, lon, elev, pressure, temp, delta_t, atmos_refract, numthreads, sst=False): """Calculate the solar position using the numba compiled functions and multiple threads. Very slow if functions are not numba compiled. """ loc_args = np.array([lat, lon, elev, pressure, temp, delta_t, atmos_refract, sst]) ulength = unixtime.shape[0] result = np.empty((6, ulength), dtype=np.float64) if unixtime.dtype != np.float64: unixtime = unixtime.astype(np.float64) if ulength < numthreads: pvl_logger.warning('The number of threads is more than the length of' + ' the time array. Only using %s threads.', ulength) numthreads = ulength if numthreads <= 1: pvl_logger.debug('Only using one thread for calculation') solar_position_loop(unixtime, loc_args, result) return result split0 = np.array_split(unixtime, numthreads) split2 = np.array_split(result, numthreads, axis=1) chunks = [[a0, loc_args, split2[i]] for i, a0 in enumerate(split0)] # Spawn one thread per chunk threads = [threading.Thread(target=solar_position_loop, args=chunk) for chunk in chunks] for thread in threads: thread.start() for thread in threads: thread.join() return result
def distribute_nodes(self, path_index): path = self.paths[path_index] if path.type == 'linear': digits = int(np.ceil(np.log10(path.ne))) base = path.index * 10 ** digits energies = np.linspace(path.begin, path.end, path.ne) weights = path.weights2 + [1] * (path.ne - 6) + path.weights3 weights = np.array(weights) * path.int_step nids = np.arange(path.ne) + base + 1 elif path.type == 'poles': base = path.index * 100 nids0 = base + 10 + np.arange(path.poles_num) + 1 nids1 = base + 20 + np.arange(path.poles_num) + 1 nids = np.append(nids0, nids1) energies0 = path.begin + (np.arange(path.poles_num) * 2 - 1) * np.pi * 1.j energies1 = path.end + (np.arange(path.poles_num) * 2 - 1) * np.pi * 1.j weights0 = [-1] * path.poles_num weights1 = [1] * path.poles_num weights = np.append(weights0, weights1) loc_nids = np.array_split(nids, self.comm.size)[self.comm.rank] loc_energies = np.array_split(energies, self.comm.size)[self.comm.rank] loc_weights = np.array_split(weights, self.comm.size)[self.comm.rank] return loc_nids, loc_energies, loc_weights
def add_data(self, X, T): """Feed new training data (X,T) to ELM model in batches; does not solve ELM itself. Helper method that updates intermediate solution parameters HH and HT, which are used for solving ELM later. Updates accumulate, so this method can be called multiple times with different parts of training data. To reset accumulated training data, use `ELM.nnet.reset()`. For training an ELM use `ELM.train()` instead. Args: X (matrix): input training data T (matrix): output training data """ # initialize batch size nb = int(np.ceil(float(X.shape[0]) / self.batch)) wc_vector = None # find automatic weights if none are given if self.classification == "wc" and self.wc is None: ns = T.sum(axis=0).astype(self.precision) # number of samples in classes self.wc = ns.sum() / ns # weights of classes for X0, T0 in zip(np.array_split(X, nb, axis=0), np.array_split(T, nb, axis=0)): if self.classification == "wc": wc_vector = self.wc[np.where(T0 == 1)[1]] # weights for samples in the batch self.nnet.add_batch(X0, T0, wc_vector)
def vocode(self, segment_voice, segment_gen): """This is the vocoder. It multiplies the amplitudes of two seperate signals to produce a singular response""" temp_final = [] for j in range(self.num_channels): saw_spec = segment_gen[j].make_spectrum() input_spec = segment_voice[j].make_spectrum() input_hs = input_spec.hs saw_hs = saw_spec.hs saw_bands = np.array_split(saw_hs, self.num_bands) input_bands = np.array_split(input_hs, self.num_bands) final_bands = np.empty_like(saw_bands) for i in range(self.num_bands): amp_multi = np.abs(saw_bands[i])*np.abs(input_bands[i]) phase_multi = np.angle(saw_bands[i]) final_bands[i] = amp_multi*(np.cos(phase_multi)+(np.sin(phase_multi)*1j)) temp_final.append(np.ma.concatenate(final_bands).data) final_wave = [] for i in range(len(temp_final)): final_wave.append(thinkdsp.Spectrum(hs=temp_final[i], framerate = self.framerate).make_wave()) output = final_wave[0] for i in range(1,len(final_wave)): output |= final_wave[i] return output
def generate_indices(mode='r',iterations=1,train_size=300): if mode=='d': # deterministic def get_indices(): ind = numpy.arange(1000) pos_train_ind = ind[:train_size] pos_test_ind = ind[train_size:] neg_train_ind = ind[:train_size] neg_test_ind = ind[train_size:] for i in range(iterations): yield (pos_train_ind, pos_test_ind, neg_train_ind, neg_test_ind) elif mode=='r': # random def get_indices(): for i in range(iterations): pos_ind = shuffle_ind() pos_train_ind = pos_ind[:train_size] pos_test_ind = pos_ind[train_size:] neg_ind = shuffle_ind() neg_train_ind = neg_ind[:train_size] neg_test_ind = neg_ind[train_size:] yield (pos_train_ind, pos_test_ind, neg_train_ind, neg_test_ind) elif mode=='k': # k-fold cross-validation # here, iterations = number of folds pos_ind = shuffle_ind() neg_ind = shuffle_ind() pos_folds = numpy.array_split(pos_ind,iterations) neg_folds = numpy.array_split(neg_ind,iterations) def get_indices(): for i in range(iterations): pos_train_ind = numpy.hstack(pos_folds[:i] + pos_folds[i+1:]).tolist() pos_test_ind = pos_folds[i].tolist() neg_train_ind = numpy.hstack(neg_folds[:i] + neg_folds[i+1:]).tolist() neg_test_ind = neg_folds[i].tolist() yield (pos_train_ind, pos_test_ind, neg_train_ind, neg_test_ind) return get_indices()
def process(self, data, output, processes, process): """ """ print "in the process function" if data.center_of_rotation is None: centre_of_rotation = np.ones(data.get_number_of_sinograms()) centre_of_rotation = centre_of_rotation * self.parameters["center_of_rotation"] else: centre_of_rotation = data.center_of_rotation[:] if centre_of_rotation is None: centre_of_rotation = np.ones(data.get_number_of_sinograms()) centre_of_rotation = centre_of_rotation * self.parameters["center_of_rotation"] sinogram_frames = np.arange(data.get_number_of_sinograms()) frames = np.array_split(sinogram_frames, len(processes))[process] centre_of_rotations = np.array_split(centre_of_rotation, len(processes))[process] angles = data.rotation_angle.data[:] for i in range(len(frames)): frame_centre_of_rotation = centre_of_rotations[i] sinogram = data.data[:, frames[i], :] reconstruction = self.reconstruct( sinogram, frame_centre_of_rotation, angles, (output.data.shape[0], output.data.shape[2]), (output.data.shape[0] / 2, output.data.shape[2] / 2), ) output.data[:, frames[i], :] = reconstruction self.count += 1 print self.count
def create_experiment_20151020(): # Using stratified sampling, select n assets, one from each # stratum, for n = 5, 10, 25, 100 for monthly and n = 5, 10, 25, # 100, 250, 500 for daily. monthly_returns = read_monthly_returns() monthly_indices = monthly_returns.tail(12).mean().sort_values().index monthly_asset_stratums = { i : np.array_split(monthly_indices, i) for i in [5, 10, 25, 100]} daily_returns = read_daily_returns() daily_indices = daily_returns.tail(90).mean().sort_values().index daily_asset_stratums = { i : np.array_split(daily_indices, i) for i in [5, 10, 25, 100, 250, 500]} def select_assets(stratums): return [np.random.choice(i) for i in stratums] # Write monthly return data. for (num_assets, stratum) in monthly_asset_stratums.items(): filename = ( '../data/experiments/pu_bounds_uncertainty_20151020' + '/monthly_scenario_' + str(num_assets) + '.csv') monthly_returns.loc[:, select_assets(stratum)].to_csv(filename) # Write daily return data. for (num_assets, stratum) in daily_asset_stratums.items(): filename = ( '../data/experiments/pu_bounds_uncertainty_20151020' + '/daily_scenario_' + str(num_assets) + '.csv') daily_returns.loc[:, select_assets(stratum)].to_csv(filename)
def score(self, X, y): """Returns the score obtained for each estimators/data slice couple. Parameters ---------- X : array, shape (n_samples, n_features, n_estimators) The input samples. For each data slice, the corresponding estimator score the prediction: e.g. [estimators[ii].score(X[..., ii], y) for ii in range(n_estimators)] y : array, shape (n_samples,) | (n_samples, n_targets) The target values. Returns ------- score : array, shape (n_samples, n_estimators) Score for each estimator / data slice couple. """ self._check_Xy(X) if X.shape[-1] != len(self.estimators_): raise ValueError('The number of estimators does not match ' 'X.shape[2]') # For predictions/transforms the parallelization is across the data and # not across the estimators to avoid memory load. parallel, p_func, n_jobs = parallel_func(_sl_score, self.n_jobs) X_splits = np.array_split(X, n_jobs, axis=-1) est_splits = np.array_split(self.estimators_, n_jobs) score = parallel(p_func(est, x, y) for (est, x) in zip(est_splits, X_splits)) if n_jobs > 1: score = np.concatenate(score, axis=0) else: score = score[0] return score
def _setup_grids_(mf, dm): mol = mf.mol grids = mf.grids if rank == 0: grids.build(with_non0tab=False) grids.coords = numpy.array_split(grids.coords, mpi.pool.size) grids.weights = numpy.array_split(grids.weights, mpi.pool.size) grids.coords = mpi.scatter(grids.coords) grids.weights = mpi.scatter(grids.weights) ground_state = (isinstance(dm, numpy.ndarray) and dm.ndim == 2) if mf.small_rho_cutoff > 1e-20 and ground_state: rho = mf._numint.get_rho(mol, dm, grids, mf.max_memory) n = comm.allreduce(numpy.dot(rho, grids.weights)) if abs(n-mol.nelectron) < rks.NELEC_ERROR_TOL*n: rw = mpi.gather(rho * grids.weights) idx = abs(rw) > mf.small_rho_cutoff / grids.weights.size logger.alldebug1(mf, 'Drop grids %d', grids.weights.size - numpy.count_nonzero(idx)) grids.coords = numpy.asarray(grids.coords [idx], order='C') grids.weights = numpy.asarray(grids.weights[idx], order='C') grids.non0tab = grids.make_mask(mol, grids.coords) return grids
def transform(self, pts, verbose=None): """Apply the warp. Parameters ---------- pts : shape (n_transform, 3) Source points to warp to the destination. Returns ------- dest : shape (n_transform, 3) The transformed points. """ logger.info('Transforming %s points' % (len(pts),)) from scipy.spatial.distance import cdist assert pts.shape[1] == 3 # for memory reasons, we should do this in ~100 MB chunks out = np.zeros_like(pts) n_splits = max(int((pts.shape[0] * self._destination.shape[0]) / (100e6 / 8.)), 1) for this_out, this_pts in zip(np.array_split(out, n_splits), np.array_split(pts, n_splits)): dists = _tps(cdist(this_pts, self._destination, 'sqeuclidean')) L = np.hstack((dists, np.ones((dists.shape[0], 1)), this_pts)) this_out[:] = np.dot(L, self._weights) assert not (out == 0).any() return out
def lorenz_example(): sigma = 10 rho = 28 beta = 8.0/3 theta = 3 * np.pi / 4 def lorenz(xyz, t): x, y, z = xyz x_dot = sigma * (y - x) y_dot = x * rho - x * z - y z_dot = x * y - beta* z return [x_dot, y_dot, z_dot] initial = (-10, -7, 35) t = np.arange(0, 100, 0.006) solution = odeint(lorenz, initial, t) x = solution[:, 0] y = solution[:, 1] z = solution[:, 2] xprime = np.cos(theta) * x - np.sin(theta) * y colors = ["#C6DBEF", "#9ECAE1", "#6BAED6", "#4292C6", "#2171B5", "#08519C", "#08306B",] output_file("lorenz.html", title="lorenz.py example") multi_line(np.array_split(xprime, 7), np.array_split(z, 7), line_color=colors, line_alpha=0.8, line_width=1.5, tools="pan,zoom,resize", title="lorenz example", name="lorenz_example") return curplot()
def filter_params(self, p_sets, p_fmins, nkeep=5, method='best'): # rank inits by costfx error low-to-high fmin_series = pd.Series(p_fmins) rankorder = fmin_series.sort_values() # eliminate extremely bad parameter sets rankorder = rankorder[rankorder<=5.0] if method=='random': # return nkeep from randomly sampled inits inits = p_sets[:nkeep] inits_err = p_fmins[:nkeep] elif method=='best': # return nkeep from inits with lowest err inits = [p_sets[i] for i in rankorder.index[:nkeep]] inits_err = rankorder.values[:nkeep] elif method=='lmh': # split index for low, med, and high err inits # if nkeep is odd, will sample more low than high if nkeep<3: nkeep=3 ix = rankorder.index.values nl, nm, nh = [arr.size for arr in np.array_split(np.arange(nkeep), 3)] # extract indices roughly equal numbers of parameter sets with low, med, hi err keep_ix = np.hstack([ix[:nl], np.array_split(ix,2)[0][-nm:], ix[-nh:]]) inits = [p_sets[i] for i in keep_ix] inits_err = [fmin_series[i] for i in keep_ix] return inits, np.min(inits_err)
def cross_validate_k(S, X, t, l, min_feature=100, max_feature=250,disp=True): """Performs Cross validation to optimize the number of features that gives the best error rate.""" X_groups = np.array_split(X, S) Y_groups = np.array_split(t, S) min_err_hp = (0, float("inf")) for k in range(min_feature, max_feature+1): if disp: print("Starting S-fold cross-validation for k =", k) if disp: print("Training run:", end=' ') # i represents the held-out group error_rates = np.ndarray(S) for i in range(S): if disp: print(i+1, end=' ') X_others = [X_groups[x] for x in range(S) if x!=i] Y_others = [Y_groups[x] for x in range(S) if x!=i] X_training = np.concatenate(tuple(X_others), axis=0) Y_training = np.concatenate(tuple(Y_others), axis=0) # Feature selection has to be done for each partition to generalize the fitting. # It leads to over-fitting to the validation data if the scoring & selection is done # once on the entire dataset and then masked locally in a partition. mask = univariate_fs(X_training, Y_training, k=k) X_training_subset = X_training[:, mask] X_val_subset = X_groups[i][:, mask] w_star = train(X_training_subset, Y_training, l, disp=0) # Prediction on the hold-out partition error_rates[i] = prediction_performance(X_val_subset, Y_groups[i], w_star, report=False) error_rate = error_rates.mean() if disp: print("; Error rate:",error_rate) if error_rate < min_err_hp[1]: min_err_hp = (k, error_rate) return min_err_hp
def ModelSelectionTest01(): from sklearn import datasets, svm import numpy as np digits = datasets.load_digits() X_digits = digits.data Y_digits = digits.target svc = svm.SVC(C = 1, kernel = 'linear') score = svc.fit(X_digits[:-100], Y_digits[:-100]).score(X_digits[-100:], Y_digits[-100:]) #print score X_folds = np.array_split(X_digits, 3) Y_folds = np.array_split(Y_digits, 3) #print len(X_folds[0]) scores = list() for k in range(3): X_train = list(X_folds) #这里的X_folds是一个具有3个元素的list X_test = X_train.pop(k) #test是train的第K个元素 X_train = np.concatenate(X_train) #这里是把X_train减去X_test #print len(X_train) Y_train = list(Y_folds) Y_test = Y_train.pop(k) Y_train = np.concatenate(Y_train) scores.append(svc.fit(X_train, Y_train).score(X_test, Y_test)) #print scores from sklearn import cross_validation k_fold = cross_validation.KFold(n = 6, n_folds = 3) for train_indices, test_indices in k_fold: print train_indices, test_indices k_fold = cross_validation.KFold(len(X_digits), n_folds = 3) scores = [svc.fit(X_digits[train], Y_digits[train]).score(X_digits[test], Y_digits[test]) for train , test in k_fold] #print scores scores = cross_validation.cross_val_score(svc, X_digits, Y_digits, cv = k_fold, n_jobs = 1) #print scores from sklearn.grid_search import GridSearchCV gammas = np.logspace(-6, -1, 10) clf = GridSearchCV(estimator = svc, param_grid = dict(gamma = gammas), n_jobs = 1) clf.fit(X_digits[:1000], Y_digits[:1000]) print clf.best_score_ print clf.best_estimator_.gamma from sklearn import linear_model, datasets lasso = linear_model.LassoCV() #这里的lassoCV和lasso有什么区别? diabetes = datasets.load_diabetes() X_diabetes = diabetes.data Y_diabetes = diabetes.target lasso.fit(X_diabetes, Y_diabetes) print lasso.alpha_
def ellis_bpm(fname, start_bpm, hpss=True, hop_length=512, tightness=100.0, plot=False, sound=False): y, sr = librosa.load(fname, sr=None) log.debug(u'Estimating tempo: {}'.format(TERM.cyan(fname))) if hpss: log.debug(TERM.magenta("Getting percussive elements")) y_harmonic, y_percussive = librosa.effects.hpss(y) chunks = np.array_split(y_percussive, PLOT_SPLIT) log.debug(TERM.magenta("Estimating beats per minute")) bpm, beat_frames = librosa.beat.beat_track(y=y_percussive, sr=sr, start_bpm=start_bpm, hop_length=hop_length, tightness=tightness) else: log.debug(TERM.magenta("Estimating beats per minute")) bpm, beat_frames = librosa.beat.beat_track(y=y, sr=sr, start_bpm=start_bpm, hop_length=hop_length, tightness=tightness) chunks = np.array_split(y, PLOT_SPLIT) log.debug(u'Tempo: {:6.2f} bpm'.format(bpm)) if plot: plt.figure(figsize=(16,10)) curr_frame = 0 for i in range(PLOT_SPLIT): plt.subplot(PLOT_SPLIT * 100 + 11 + i) plt.plot(curr_frame + np.arange(len(chunks[i])), chunks[i], 'g') for b in beat_frames: plt.axvline(x=b*hop_length, color='k') plt.xlim([curr_frame, len(chunks[i]) + curr_frame]) curr_frame += len(chunks[i]) plt.show(block=False) if sound: beat_times = librosa.frames_to_time(beat_frames, sr=sr, hop_length=hop_length) clicks = mir_eval.sonify.clicks(beat_times, sr, length=len(y)) sd.play(y + clicks, sr) input('Press Return key to stop sound') sd.stop() return bpm
def cross_validate(S, X, t, disp=True): """ Return the most Optimal value for the regularization hyper-parameter by performing S-fold cross validation for all allowed values of λ.""" X_groups = np.array_split(X, S) Y_groups = np.array_split(t, S) min_err_hp = (0, float("inf")) for p in range(-3,2): l = 10 ** p if disp: print("Starting S-fold cross-validation for λ =", l) if disp: print("Training run:", end=' ') # i represents the held-out group error_rates = np.ndarray(S) for i in range(S): if disp: print(i+1, end=' ') X_others = [X_groups[x] for x in range(S) if x!=i] Y_others = [Y_groups[x] for x in range(S) if x!=i] X_training = np.concatenate(tuple(X_others), axis=0) Y_training = np.concatenate(tuple(Y_others), axis=0) w_star = train(X_training, Y_training, l, disp=0) # Prediction on the help-out group error_rates[i] = prediction_performance(X_groups[i], Y_groups[i], w_star, report=False) if disp: print("") error_rate = error_rates.mean() if error_rate < min_err_hp[1]: min_err_hp = (l, error_rate) return min_err_hp
def RSM(self,avgl,rossete = 4,loopdist='gaussian'): x = np.arange(1,self.N) pickpdist = pdist() if loopdist == 'gaussian': cdf = np.cumsum(pickpdist.gaussian(avgl)(x)) while True: self.looplst = x[np.searchsorted(cdf,np.random.random(self.M))] if np.sum(self.looplst) < self.N - 1: break rest = self.N-1-np.sum(self.looplst) temp = np.array_split(np.arange(rest),3*rossete) anchor = [] for i in range(rossete): temp3 = [] temp3.append(np.random.choice(temp[i*3+1],1)[0]) for j in np.arange([len(np.array_split(np.arange(self.M),rossete)[k]) for k in range(rossete)][i]-1): temp3.append(temp3[-1]+np.random.randint(1,5)) anchor.append(temp3) anchor = np.array(anchor) anchor = anchor.flatten() temp1 = anchor + np.cumsum(self.looplst) temp2 = temp1 - self.looplst self.pair = np.array(zip(temp2,temp1)) return self.looplst,self.pair
def __setitem__(self, attr, val): if self.read_only: raise ValueError('Cannot set item in read-only mode.') is_np = type(val).__module__ == np.__name__ if isinstance(self.data, dict): if isinstance(attr, bytes): attr = attr.decode('utf-8') if is_np: self.data[attr] = pickle.dumps(val) # We have to remember to unpickle in __getitem__ self.data['_{}_pickled'.format(attr)] = True else: self.data[attr] = val return if isinstance(self.data, h5py.Group) and attr in self.data: raise KeyError('Cannot set attribute. ' 'Group with name "{}" exists.'.format(attr)) if is_np: dataset = self.data.create_dataset(attr, val.shape, dtype=val.dtype) if not val.shape: # scalar dataset[()] = val else: dataset[:] = val elif isinstance(val, (list, tuple)): # Check that no item in `data` is larger than `HDF5_OBJECT_HEADER_LIMIT` # because in that case even chunking the array would not make the saving # possible. bad_attributes = [x for x in val if len(x) > HDF5_OBJECT_HEADER_LIMIT] # Expecting this to never be true. if bad_attributes: raise RuntimeError('The following attributes cannot be saved to ' 'HDF5 file because they are larger than ' '%d bytes: %s' % (HDF5_OBJECT_HEADER_LIMIT, ', '.join(bad_attributes))) if (val and sys.version_info[0] == 3 and isinstance( val[0], six.string_types)): # convert to bytes val = [x.encode('utf-8') for x in val] data_npy = np.asarray(val) num_chunks = 1 chunked_data = np.array_split(data_npy, num_chunks) # This will never loop forever thanks to the test above. is_too_big = lambda x: x.nbytes > HDF5_OBJECT_HEADER_LIMIT while any(map(is_too_big, chunked_data)): num_chunks += 1 chunked_data = np.array_split(data_npy, num_chunks) if num_chunks > 1: for chunk_id, chunk_data in enumerate(chunked_data): self.data.attrs['%s%d' % (attr, chunk_id)] = chunk_data else: self.data.attrs[attr] = val else: self.data.attrs[attr] = val
def schedule_generator_maintenance_loop(load, pmaxs, annual_maintenance_rates, dispatch_periods, scheduling_order): # if nothing else, better to schedule the large generators first scheduling_order = np.argsort(-pmaxs) if scheduling_order is None else scheduling_order # annual maintenance rates must be between zero and one annual_maintenance_rates = np.clip(annual_maintenance_rates, 0, 1) # gives the index for the change between dispatch_periods group_cuts = list(np.where(np.diff(dispatch_periods) != 0)[0] + 1) if dispatch_periods is not None else None group_lengths = np.array([group_cuts[0]] + list(np.diff(group_cuts)) + [len(load) - group_cuts[-1]]) num_groups = len(group_cuts) + 1 # necessary to scale load in some cases for the optimization to work. Basically, load shouldn't be > gen load_scaled = scale_load_to_system(load, pmaxs) load_scaled = np.concatenate([[np.max(ls)]*gl for gl, ls in zip(group_lengths, np.array_split(load_scaled, np.array(group_cuts)))]) pmaxs_clipped = copy.deepcopy(pmaxs) pmaxs_clipped = np.clip(pmaxs_clipped, 1e-1, None) maintenance_energy = annual_maintenance_rates*pmaxs_clipped*len(load) scheduled_maintenance = np.zeros((num_groups, len(pmaxs))) # loop through and schedule maintenance for each generator one at a time. Update the net load after each one. for i in scheduling_order: energy_allocation = dispatch_budget.dispatch_to_energy_budget(load_scaled, -maintenance_energy[i], pmins=0, pmaxs=pmaxs_clipped[i]) scheduled_maintenance[:, i] = np.clip(np.array([np.mean(ls) for ls in np.array_split(energy_allocation, np.array(group_cuts))])/pmaxs_clipped[i], 0, 1) load_scaled += np.concatenate([[sm * pmaxs[i]]*gl for gl, sm in zip(group_lengths, scheduled_maintenance[:, i])]) if not all(np.isclose(annual_maintenance_rates, (scheduled_maintenance.T * group_lengths).sum(axis=1)/len(load))): logging.warning("scheduled maintance rates don't all match the annual maintenance rates") return scheduled_maintenance
def split_data(ras, decs): """ It will split the RAs and DECs into smaller chunks which would be better for cache coherent """ size = ceil(len(ras)/256.0) return zip(array_split(ras, size), array_split(decs, size))
def gp2(data, block_size = 100, nugget = 0.005): c = data[0] s = data[1] s_2 = np.array_split(s, len(s)/block_size + 1) c_2 = np.array_split(c, len(s)/block_size + 1) sapflux_pred = [] nug = nugget; for a in range(0,len(s_2)): t0 = time.time() X = np.atleast_2d(c_2[a]).T y = np.atleast_2d(s_2[a]).T gproc = gaussian_process.GaussianProcess(theta0=0.01, thetaL=1e-4, thetaU=1e-1,nugget=nug) gproc.fit(X, y) y_pred, sigma2_pred = gproc.predict(X, eval_MSE=True) sapflux_pred.extend(y_pred.ravel()) t1 = time.time() print t1-t0 return np.array([c, s, np.array(sapflux_pred)])
def ensemble_maker_inner(train_mat,labels,model_gen_function, info_dict,num=10): ## contains core functions to make ensemble models ## from training data and labels ## model_gen_function is a functiont that takes NO arguments and returns a keras model ## info_dict is a dictionary of training info train_mat, labels = shuffle(train_mat, labels) train_mat = np.array_split(train_mat, num, axis=0) labels = np.array_split(labels, num, axis=0) earlystop = EarlyStopping(monitor=info_dict['monitor'], min_delta=info_dict['min_delta'], patience=info_dict['patience'], verbose=0, mode='auto') callbacks_list = [earlystop] model_list = [] for ii in range(num): train_feature = array_stack(train_mat, ii) train_labels = array_stack(labels, ii) loaded_model = model_gen_function() # note the call to gen new model current_model = reset_weights(loaded_model) history = current_model.fit(train_feature, train_labels, epochs=info_dict['epochs'], verbose=0, batch_size=info_dict['batch_size'], callbacks=callbacks_list) model_list.append(current_model) return(model_list)
def get_gradient(theta): global fractional_counts, event_index, feature_index, event_grad, rc, N assert len(theta) == len(feature_index) event_grad = {} cpu_count = multiprocessing.cpu_count() pool = Pool(processes=cpu_count) # uses all available CPUs batches_fractional_counts = np.array_split(range(len(event_index)), cpu_count) events_to_split = events_to_features.keys() batches_events_to_features = np.array_split(events_to_split, cpu_count) # for batch_of_fc in batches_fractional_counts: for batch_of_fc in batches_events_to_features: pool.apply_async(batch_gradient, args=(theta, batch_of_fc), callback=batch_accumilate_gradient) pool.close() pool.join() # grad = np.zeros_like(theta) grad = -2 * rc * theta # l2 regularization with lambda 0.5 for e in event_grad: feats = events_to_features.get(e, []) for f in feats: grad[feature_index[f]] += event_grad[e] # for s in seen_index: # grad[s] += -theta[s] # l2 regularization with lambda 0.5 assert len(grad) == len(feature_index) return -grad
def generateTrainAndTest(self): """ Generate train and test data and then yield :return: """ partitions = np.array_split(self.dataset, self.numOfFolds) labels_partitions = np.array_split(self.labels, self.numOfFolds) for fold in range(self.numOfFolds): self.test = partitions[fold] self.labels_test = labels_partitions[fold] fold_left = partitions[:fold] fold_right = partitions[fold + 1:] labels_fold_left = labels_partitions[:fold] labels_fold_right = labels_partitions[fold + 1:] if fold_left.__len__() == 0: self.train = np.concatenate(fold_right) self.labels_train = np.concatenate(labels_fold_right) elif fold_right.__len__() == 0: self.train = np.concatenate(fold_left) self.labels_train = np.concatenate(labels_fold_left) else: self.train = np.concatenate((np.concatenate(fold_left), np.concatenate(fold_right))) self.labels_train = np.concatenate( (np.concatenate(labels_fold_left), np.concatenate(labels_fold_right))) yield
def parallelMorton(iMortonRanges, xMortonRanges, childMethod, numProcessesQuery): if iMortonRanges != None: numMRanges = max((len(iMortonRanges), len(xMortonRanges))) if numMRanges > numProcessesQuery: numChunks = numProcessesQuery else: numChunks = numMRanges ichunks = numpy.array_split(iMortonRanges, numChunks) xchunks = numpy.array_split(xMortonRanges, numChunks) else: numMRanges = len(xMortonRanges) if numMRanges > numProcessesQuery: numChunks = numProcessesQuery else: numChunks = numMRanges ichunks = numpy.array_split([], numChunks) xchunks = numpy.array_split(xMortonRanges, numChunks) children = [] for i in range(numChunks): children.append(multiprocessing.Process(target=childMethod, args=(ichunks[i],xchunks[i]))) children[-1].start() # wait for all children to finish their execution for i in range(numChunks): children[i].join()
def make_batches(x, y, batch_size=128, shuffle=True, nest=True): for i in range(len(x)): x[i] = atleast_4d(x[i]) y = atleast_4d(y) num_batches = (y.shape[0] // batch_size) if y.shape[0] % batch_size is not 0: num_batches += 1 if shuffle: shuffled_arrays = sk.utils.shuffle(*x, y) x = shuffled_arrays[:len(x)] y = shuffled_arrays[-1] x_batches_list = [] for i in range(len(x)): x_batches_list.append(np.array_split(x[i], num_batches)) if nest: x_batches = [] for i in range(num_batches): x_batch = [] for x_input in x_batches_list: x_batch.append(x_input[i]) x_batches.append(x_batch) else: x_batches = x_batches_list y_batches = np.array_split(y, num_batches) return x_batches, y_batches, num_batches
def make_batches(X, y, batch_size=128, shuffle=True, nest=True): for i in range(len(X)): X[i] = atleast_4d(X[i]) y = atleast_4d(y) num_batches = (y.shape[0] // batch_size) if y.shape[0] % batch_size is not 0: num_batches += 1 if shuffle: shuffled_arrays = sk.utils.shuffle(*X, y) X = shuffled_arrays[:len(X)] y = shuffled_arrays[-1] X_batches_list = [] for i in range(len(X)): X_batches_list.append(np.array_split(X[i], num_batches)) if nest: X_batches = [] for i in range(num_batches): X_batch = [] for X_input in X_batches_list: X_batch.append(X_input[i]) X_batches.append(X_batch) else: X_batches = X_batches_list y_batches = np.array_split(y, num_batches) return X_batches, y_batches, num_batches
def apply_by_multiprocessing_list_to_list(df, func, **kwargs): workers = kwargs.pop('workers') pool = Pool(processes=workers) result = pool.map(apply_list, [(d, func, kwargs) for d in np.array_split(df, workers)]) pool.close() return result
init = tf.global_variables_initializer() local_init = tf.local_variables_initializer() pred_data = pd.DataFrame() # Train model. with tf.Session() as session: epochs = 100 batch_size = 25 session.run(init) session.run(local_init) num_batches = int(l_t_matrix.shape[0] / batch_size) l_t_matrix = np.array_split(l_t_matrix, num_batches) for i in range(epochs): avg_cost = 0 for batch in l_t_matrix: _, l = session.run([optimizer, loss], feed_dict={X: batch}) avg_cost += l avg_cost /= num_batches print("epoch: {} Loss: {}".format(i + 1, avg_cost)) l_t_matrix = np.concatenate(l_t_matrix, axis=0) preds = session.run(decoder_op, feed_dict={X: l_t_matrix}) preds = pd.DataFrame(preds)
def classify(data, house, f): data, old_labels = relabel(data) res_obj = {"y_pred": [], "y_true": [], "acc": []} #data.iloc[::6] # WWW # dividing the data into training and testing #trainDf, testDf, trainLens, testLens, testFrac = split.trainTest( # data, 5400, 5400*2, testSize=0.3) # e.g. structure of the array # X = [np.array([ [f1],[f2],[f3 ] ... [ N days], dtype=uint8 )] # Y = [np.array([ a, b , c])] # splitting so that we get a fraction of the day for training the labels #X_train = np.array(np.array_split(trainDf.values[:, :trainDf.shape[1] - 2], 10)) #y_train = np.array(np.array_split(trainDf.values[:, trainDf.shape[1] - 1], 10)) # test dataset - dividing into subsequences #X_test = np.array(np.array_split(testDf.values[:, :testDf.shape[1] - 2], 30)) #y_test = np.array(np.array_split(testDf.values[:, testDf.shape[1] - 1], 30)) # WWW X_train = np.array(data.values[:, :data.shape[1] - 2]) y_train = np.array(data.values[:, data.shape[1] - 1]) #print X_train.shape #test_SSVM(X_train, X_test, y_train, y_test) #exit() # 5 fold validation; #label = np.unique(data['activity']) kf = StratifiedKFold(data['activity'], n_folds=5) clfs = [] accuracies = [] # cross validation for i, (train_index, test_index) in enumerate(kf): print("TRAIN:", train_index, "TEST:", test_index) X_train1, X_test1 = X_train[train_index], X_train[test_index] y_train1, y_test1 = y_train[train_index], y_train[test_index] #print np.unique(np.concatenate(y_train1).ravel()) #print np.unique(np.concatenate(y_test1).ravel()) X_train1 = np.array_split(X_train1, 100) X_test1 = np.array_split(X_test1, 10) y_train1 = np.array_split(y_train1, 100) y_test1 = np.array_split(y_test1, 10) #print X_train1.shape, y_train1.shape #print X_train1[0].shape, y_train1[0].shape fname = 'ssvm_models/ssvm_' + house + f + str(i) + '.pkl' if os.path.isfile(fname): pkl_file = open(fname, 'rb') clf = pickle.load(pkl_file) print i, ". Classifier Loaded:", house, f, clf else: clf = train_SSVM(X_train1, y_train1) output = open(fname, 'wb') pickle.dump(clf, output) accuracy, y_pred, y_true = test_SSVM(clf, X_test1, y_test1) y_pred = map(lambda x: old_labels[int(x)], y_pred) y_true = map(lambda x: old_labels[int(x)], y_true) #save the model res_obj['y_pred'].append(y_pred) res_obj['y_true'].append(y_true) res_obj['acc'].append(accuracy) obj = {"y_pred": y_pred, "y_true": y_true, "acc": accuracy} #write the results: with gzip.open('ssvm_models/ssvm_' + house + f + str(i) + '.json.gz', 'w') as out: json.dump(obj, out) #clfs.append(clf) accuracies.append(accuracy) print 'House:', house, 'Feature:', f, print accuracies with gzip.open('ssvm_models/ssvm_' + house + f + '_all.json.gz', 'w') as out: json.dump(res_obj, out) #ssvm = clfs[np.argmax(accuracies)] #print "Learning complete..." #accuracy = ssvm.score(X_test, y_test) #print("Test score with chain CRF: %f" % accuracy ) print "Learning SVM complete."
def process_manager(args): (path_to_neatmo_ppt_hdf5, path_to_prim_netw_ppt_hdf5, path_to_filtered_pws) = args #========================================================= HDF5_pws_ppt = HDF5(infile=path_to_neatmo_ppt_hdf5) all_pws_ids = HDF5_pws_ppt.get_all_names() pws_coords = HDF5_pws_ppt.get_coordinates(all_pws_ids) pws_in_coords_df = pd.DataFrame(index=all_pws_ids, data=pws_coords['easting'], columns=['X']) y_pws_coords = pws_coords['northing'] pws_in_coords_df.loc[:, 'Y'] = y_pws_coords pws_in_coords_df.dropna(how='all', inplace=True) assert pws_in_coords_df.isna().sum().sum() == 0 #========================================================= HDF5_prim_netw_ppt = HDF5(infile=path_to_prim_netw_ppt_hdf5) all_prim_netw_stns_ids = HDF5_prim_netw_ppt.get_all_names() prim_netw_coords = HDF5_prim_netw_ppt.get_coordinates( all_prim_netw_stns_ids) prim_netw_in_coords_df = pd.DataFrame(index=all_prim_netw_stns_ids, data=prim_netw_coords['easting'], columns=['X']) y_prim_netw_coords = prim_netw_coords['northing'] prim_netw_in_coords_df.loc[:, 'Y'] = y_prim_netw_coords prim_netw_in_coords_df.dropna(how='all', inplace=True) assert prim_netw_in_coords_df.isna().sum().sum() == 0 #========================================================= # select on 'good' pws ids_pws_to_use = pd.read_csv(path_to_filtered_pws, index_col=0).index.to_list() #========================================================= date_range = pd.date_range(start=start_date, end=end_date, freq='H') date_range_summer = pd.DatetimeIndex([ date_ for date_ in date_range if date_.month not in not_convective_season ]) print('Using Workers: ', n_workers) # devide stations on workers all_timestamps_worker = np.array_split(date_range_summer, n_workers) args_worker = [] for time_list in all_timestamps_worker: empty_data = np.zeros(shape=(len(time_list), len(all_pws_ids))) empty_data[empty_data == 0] = np.nan df_save_results = pd.DataFrame(index=time_list, columns=all_pws_ids, data=empty_data) # args_workers = list(repeat(args, n_worker)) args_worker.append((path_to_prim_netw_ppt_hdf5, prim_netw_in_coords_df, path_to_neatmo_ppt_hdf5, pws_in_coords_df, ids_pws_to_use, time_list, df_save_results)) my_pool = mp.Pool(processes=n_workers) # TODO: Check number of accounts results = my_pool.map(on_evt_filter_pws, args_worker) # my_pool.terminate() my_pool.close() my_pool.join() results_df = pd.concat(results) results_df.to_csv(os.path.join(out_save_dir, 'pws_flagged_%s.csv' % (_year)), sep=';') return
def main(): parser = argparse.ArgumentParser() parser.add_argument( 'input', type=str, default=None, help= 'a directory with remapped/precomputed-*, config.pkl, and graph.pkl') parser.add_argument('--output', type=str, default=None, help='output_directory') parser.add_argument('--resolution', type=str, default='6,6,40') parser.add_argument('--chunk_size', type=str, default='256,256,64') parser.add_argument( '--batch_scale', type=int, default=1, help= 'Controls how much data is loaded from h5 each time, by multiplying chunk_size' ) parser.add_argument('--global_offset', type=str, default='0,0,0') parser.add_argument('--flip_h5', type=bool, default=False) parser.add_argument('--verbose', type=bool, default=True) args = parser.parse_args() if args.verbose: logging.basicConfig(level='DEBUG') else: logging.basicConfig(level='ERROR') resolution = [int(i) for i in args.resolution.split(',')] chunk_size = [int(i) for i in args.chunk_size.split(',')] global_offset = [int(i) for i in args.global_offset.split(',')] if args.output is None: output = args.input config_path = os.path.join(args.input, 'config.pkl') if mpi_rank == 0: assert os.path.exists(config_path), 'Run reconciliate_remap first' with open(config_path, 'rb') as fp: seg_map = pickle.load(fp) os.makedirs(output, exist_ok=True) else: seg_map = None seg_map = mpi_comm.bcast(seg_map, 0) merge_output = os.path.join(output, 'agglomerated') h5_path = os.path.join(output, 'intermediate.h5') if mpi_rank == 0: union_bbox, cv_merge_path = get_union_bbox_and_merge_path( seg_map, merge_output, global_offset) # preset precomputed union_offset = np.array(union_bbox.minpt) union_size = np.array(union_bbox.maxpt) - np.array(union_bbox.minpt) cv_merge = prepare_precomputed(cv_merge_path, offset=union_offset, size=union_size, resolution=resolution, chunk_size=chunk_size) # sub divide aligned bboxes sub_bbox_size = [i * args.batch_scale for i in chunk_size] bbs = get_chunk_bboxes(union_bbox, sub_bbox_size) sub_bbs = np.array_split(bbs, mpi_size) logging.warn('write shapes %s %s', union_bbox, sub_bbox_size) else: # union_bbox = None union_offset = None cv_merge_path = None sub_bbs = None union_offset = mpi_comm.bcast(union_offset, 0) cv_merge_path = mpi_comm.bcast(cv_merge_path, 0) sub_bbs = mpi_comm.scatter(sub_bbs, 0) h5_to_cloudvolume(h5_path, cv_merge_path, union_offset, sub_bbs, resolution, chunk_size, args.flip_h5) sys.exit()
def main(_): logging.basicConfig( level=logging.INFO, format='%(asctime)s %(name)-12s %(levelname)-8s %(message)s', datefmt='%y-%m-%d %H:%M:%S') logging.info("job_name:%s, task_index:%d" % (job_name, task_index)) ps_hosts = cluster_spec['ps'] worker_hosts = cluster_spec['worker'] cluster = tf.train.ClusterSpec({"ps": ps_hosts, "worker": worker_hosts}) # allows this node know about all other nodes if job_name == 'ps': # checks if parameter server server = tf.train.Server(cluster, job_name="ps", task_index=task_index) server.join() else: # it must be a worker server is_chief = (task_index == 0) # checks if this is the chief node server = tf.train.Server(cluster, job_name="worker", task_index=task_index) logging.info("Loading data from worker index = %d" % task_index) if "TRAINING_DATA_PATH" in os.environ: logging.info("This is a normal worker..") training_data_path = os.environ["TRAINING_DATA_PATH"] else: logging.info("This is a backup worker") # watching certain file in hdfs which contains its training data # Read model structure info from ModelConfig with open('./ModelConfig.json') as f: model_conf = json.load(f) logging.info("model" + str(model_conf)) EPOCH = int(model_conf['train']['numTrainEpochs']) global VALID_TRAINING_DATA_RATIO VALID_TRAINING_DATA_RATIO = model_conf['train']['validSetRate'] is_continue_train = model_conf['train']['isContinuous'] global BATCH_SIZE if "MiniBatchs" in model_conf['train']['params']: BATCH_SIZE = model_conf['train']['params']['MiniBatchs'] logging.info("Batch size: " + str(BATCH_SIZE) + ", VALID_TRAINING_DATA_RATIO: " + str(VALID_TRAINING_DATA_RATIO)) # import data context = load_data(training_data_path) # split data into batch total_batch = int(len(context["train_data"]) / BATCH_SIZE) x_batch = np.array_split(context["train_data"], total_batch) y_batch = np.array_split(context["train_target"], total_batch) sample_w_batch = np.array_split(context["train_data_sample_weight"], total_batch) logging.info("Testing set size: %d" % len(context['valid_data'])) logging.info("Training set size: %d" % len(context['train_data'])) valid_x = np.asarray(context["valid_data"]) valid_y = np.asarray(context["valid_target"]) valid_sample_w = np.asarray(context["valid_data_sample_weight"]) # Graph worker_device = "/job:%s/task:%d" % (job_name, task_index) with tf.device( tf.train.replica_device_setter( #ps_tasks=n_pss, cluster=cluster, worker_device=worker_device)): input_placeholder = tf.placeholder(dtype=tf.float32, shape=(None, FEATURE_COUNT), name="shifu_input_0") label_placeholder = tf.placeholder(dtype=tf.int32, shape=(None, 1)) sample_weight_placeholder = tf.placeholder(dtype=tf.float32, shape=(None, 1)) opt, train_step, loss, global_step, y = model( input_placeholder, label_placeholder, sample_weight_placeholder, model_conf) # init ops init_tokens_op = opt.get_init_tokens_op() # initialize local step local_init = opt.local_step_init_op if is_chief: # initializes token queue local_init = opt.chief_init_op # checks if global vars are init ready_for_local_init = opt.ready_for_local_init_op # Initializing the variables init_op = tf.initialize_all_variables() logging.info("---Variables initialized---") # ************************************************************************************** # Session sync_replicas_hook = opt.make_session_run_hook(is_chief) stop_hook = tf.train.StopAtStepHook(num_steps=EPOCH) chief_hooks = [sync_replicas_hook, stop_hook] if is_continue_train: scaff = None else: scaff = tf.train.Scaffold( init_op=init_op, local_init_op=local_init, ready_for_local_init_op=ready_for_local_init) # Configure if "IS_BACKUP" in os.environ: config = tf.ConfigProto(log_device_placement=False, allow_soft_placement=True, device_filters=[ '/job:ps', '/job:worker/task:0', '/job:worker/task:%d' % task_index ]) else: config = tf.ConfigProto(log_device_placement=False, allow_soft_placement=True) # Create a "supervisor", which oversees the training process. sess = tf.train.MonitoredTrainingSession(master=server.target, is_chief=is_chief, config=config, scaffold=scaff, hooks=chief_hooks, stop_grace_period_secs=10, checkpoint_dir=tmp_model_path) if is_chief and not is_continue_train: sess.run(init_tokens_op) #start_tensorboard(tmp_model_path) logging.info("chief start waiting 40 sec") time.sleep( 40 ) # grace period to wait on other workers before starting training logging.info("chief finish waiting 40 sec") # Train until hook stops session logging.info('Starting training on worker %d' % task_index) run_metadata = tf.RunMetadata() run_options = tf.RunOptions(trace_level=tf.RunOptions.FULL_TRACE) while not sess.should_stop(): try: start = time.time() for i in range(total_batch): train_feed = { input_placeholder: x_batch[i], label_placeholder: y_batch[i], sample_weight_placeholder: sample_w_batch[i] } _, l, gs = sess.run([train_step, loss, global_step], feed_dict=train_feed, options=run_options, run_metadata=run_metadata) training_time = time.time() - start valid_start = time.time() # compute validation loss TODO, check if batch compute valid_loss, gs = sess.run( [loss, global_step], feed_dict={ input_placeholder: valid_x, label_placeholder: valid_y, sample_weight_placeholder: valid_sample_w }) valid_time = time.time() - valid_start logging.info('Step: ' + str(gs) + ' worker: ' + str(task_index) + " training loss:" + str(l) + " training time:" + str(training_time) + " valid loss:" + str(valid_loss) + " valid time:" + str(valid_time)) # Send intermediate result to master message = "worker_index:{},time:{},current_epoch:{},training_loss:{},valid_loss:{},valid_time:{}\n".format( str(task_index), str(training_time), str(gs), str(l), str(valid_loss), str(valid_time)) if sys.version_info < (3, 0): socket_client.send(bytes(message)) else: socket_client.send(bytes(message), 'utf8') except RuntimeError as re: if 'Run called even after should_stop requested.' == re.args[ 0]: logging.info('About to execute sync_clean_up_op!') else: raise logging.info('Done' + str(task_index)) # We just need to make sure chief worker exit with success status is enough if is_chief: tf.reset_default_graph() # add placeholders for input images (and optional labels) x = tf.placeholder(dtype=tf.float32, shape=(None, FEATURE_COUNT), name="shifu_input_0") with tf.get_default_graph().as_default(): if BUILD_MODEL_BY_CONF_ENABLE and model_conf is not None: output_digits, output_nodes = generate_from_modelconf( x, model_conf) else: output_digits = nn_layer(x, FEATURE_COUNT, HIDDEN_NODES_COUNT, act_op_name="hidden_layer1") output_nodes = HIDDEN_NODES_COUNT logging.info("output_nodes : " + str(output_nodes)) prediction = nn_layer(output_digits, output_nodes, 1, act=tf.nn.sigmoid, act_op_name="shifu_output_0") # restore from last checkpoint saver = tf.train.Saver() with tf.Session() as sess: ckpt = tf.train.get_checkpoint_state(tmp_model_path) logging.info("ckpt: {}".format(ckpt)) assert ckpt, "Invalid model checkpoint path: {}".format( tmp_model_path) saver.restore(sess, ckpt.model_checkpoint_path) logging.info( "Exporting saved_model to: {}".format(final_model_path)) # exported signatures defined in code simple_save(session=sess, export_dir=final_model_path, inputs={"shifu_input_0": x}, outputs={"shifu_output_0": prediction}) logging.info("Exported saved_model") tl = timeline.Timeline(run_metadata.step_stats) ctf = tl.generate_chrome_trace_format() logging.info("ctf:" + str(ctf)) f = tf.gfile.GFile(final_model_path + "/timeline.json", mode="w+") f.write(ctf) time.sleep(40) # grace period to wait before closing session #sess.close() logging.info('Session from worker %d closed cleanly' % task_index) sys.exit()
from glob import glob import h5py from joblib import delayed, Parallel import os import sys from icae.tools.config_loader import config from icae.tools import performance # - in_files = config.root + config.data.raw + "*.hdf" errors = 0 files = glob(in_files) batches = np.array_split(files, 24) # adjust to available RAM # + {} import tables def try_read(file): try: return pd.read_hdf(file) except OSError: return None def process_batch(batch_files, frame_counter): print("Reading…") read_errors = 0 unique_frames_seen = 0
def prepareDataC(function, splitting=False, percentage=100): """ Reads train and test data, prepares data, converts sentence representations """ print(datetime.now()) #define paths to train and test pickles input_train_covid = '../../Covid_data_11nov/traindata_covidbatch.pkl' input_test_covid = '../../Covid_data_11nov/testdata_covidbatch.pkl' print("Reading pickle files...") #read pickle files with open(input_train_covid, "rb") as pkl_file: traindata_c = pickle.load(pkl_file) with open(input_test_covid, "rb") as pkl_file: testdata_c = pickle.load(pkl_file) print("Creating and filtering dataframes...") #prepare training dataframes df_tr_c = function(traindata_c)[0] #take out sentences with labels that we should ignore (background, target, view_patient, view_thirdparty, info_thirdparty) rows_to_delete_tr_c, filtered_df_tr_c = filterDataframe(df_tr_c) #prepare test dataframes df_te_c = function(testdata_c)[0] rows_to_delete_te_c, filtered_df_te_c = filterDataframe(df_te_c) #extract test labels filtered_labels_te_c = filtered_df_te_c['domain'].to_list() filtered_encodings_te_c = filtered_df_te_c['encoding'].tolist() print("Retrieve note id's...") #get note id's for aggregation try: ids_c = [] list_keys_c = filtered_df_te_c['key'].tolist() for key in list_keys_c: y = key.split('--')[3] ids_c.append(y) except KeyError: ids_c = [] print("Downsampling training labels...") #Original to randomly select indices of negative examples for downsampling #Get original support 0 class #seriesObj = filtered_df_tr.apply(lambda x: True if x['domain'] == 'None' else False , axis=1) # Count number of True in series #numOfRows = len(seriesObj[seriesObj == True].index) #print('Number of Rows in dataframe in which domain is None =', numOfRows) #per_50 = (numOfRows/2) #per_25 = (per_50/2) #per_125 = (per_25/2) #per_625 = (per_625/2) #per_3125 = (per_3125/2) #N = int(per_50) #+ int(per_25) + int(per_125) + int(per_625) + int(per_3125) #down_df_tr, indices = downsample(filtered_df_tr, N) with open("down_indices_covid2.pkl", "rb") as f: indices = pickle.load(f) down_df_tr_c = filtered_df_tr_c.drop(indices) if splitting == False: downsampled_filtered_labels_tr_c = down_df_tr_c['domain'].to_list() downsampled_filtered_encodings_tr_c = down_df_tr_c['encoding'].tolist() if splitting == True: #splitting final dataframw shuffled = down_df_tr_c.sample(frac=1) parts = np.array_split(shuffled, 4) df_25 = parts[0] df_50 = df_25.append(parts[1]) df_75 = df_50.append(parts[2]) if percentage == 25: #extract training labels downsampled_filtered_labels_tr_c = df_25['domain'].to_list() downsampled_filtered_encodings_tr_c = df_25['encoding'].tolist() if percentage == 50: downsampled_filtered_labels_tr_c = df_50['domain'].to_list() downsampled_filtered_encodings_tr_c = df_50['encoding'].tolist() if percentage == 75: downsampled_filtered_labels_tr_c = df_75['domain'].to_list() downsampled_filtered_encodings_tr_c = df_75['encoding'].tolist() print('Converting encodings...') sen_reps_tr_c = [] for entry in downsampled_filtered_encodings_tr_c: entry2 = entry[-4:] #take mean of last 4 layers to create sentence representation entry3 = torch.mean(entry2, dim=0) #convert in numpy array array = entry3.numpy() sen_reps_tr_c.append(array) #prepare test features #filtered_encodings_te = [i for j, i in enumerate(encodings_te) if j not in set(rows_to_delete_te)] sen_reps_te_c = [] for entry in filtered_encodings_te_c: entry2 = entry[-4:] #take mean of last 4 layers to create sentence representation entry3 = torch.mean(entry2, dim=0) #convert in numpy array array = entry3.numpy() sen_reps_te_c.append(array) return (downsampled_filtered_labels_tr_c, filtered_labels_te_c, sen_reps_tr_c, sen_reps_te_c, ids_c)
ind = pkl.load(open('pkls/gaus_trans.pkl', 'rb')) inds = ind data = parallel_rw_pkl(None, 'inter_sent%i' % 4, 'r') mask = parallel_rw_pkl(None, 'inter_sentm%i' % 4, 'r') sent = readfile([ 11, ], fhead)['B11'] sent = ScaleExtent(sent, (10980, 10980)) cm = parallel_rw_pkl(None, '0510diacm', 'r') sent[cm] = np.nan stm = parallel_rw_pkl(None, 'std_m', 'r') print 'finshed reading data' data[mask] = np.nan modis_sent = np.array(data) Sent = sent Stm = stm patches = np.array( zip(np.mgrid[0:10, 0:10][0].ravel(), np.mgrid[0:10, 0:10][1].ravel())) pros = np.array(np.array_split(patches, 16)) par = partial(applied) pool = multiprocessing.Pool(processes=16) data = pool.map(par, pros) pool.close() pool.join() parallel_rw_pkl(data, 'psfb11_modis', 'w') print 'lol finished psf b11!!!!!'
# # break # # tweets_filtered = tweets_filtered[:100] # print('time taken to load keyword filtered sample:', str(time.time() - start_time), 'seconds') # print(tweets_filtered.shape) print('Load Random Tweets:') # random contains 7.3G of data!! start_time = time.time() paths_to_random=list(np.array_split( # glob(os.path.join(path_to_data,'random','*.parquet')), glob(os.path.join(path_to_data,'random_10perct_sample','*.parquet')), # glob(os.path.join(path_to_data,'random_1perct_sample','*.parquet')), SLURM_ARRAY_TASK_COUNT)[SLURM_ARRAY_TASK_ID]) print('#files:', len(paths_to_random)) tweets_random=pd.DataFrame() for file in paths_to_random: print(file) tweets_random=pd.concat([tweets_random,pd.read_parquet(file)[['tweet_id','text']]]) print(tweets_random.shape) # break # tweets_random = tweets_random[:100] print('time taken to load random sample:', str(time.time() - start_time), 'seconds')
* sec_loc: by knowing that the dimension of the sample is (Ux, Uy, Uz)=(4, 4, 2), and because we slice on X-dir and Y-dir, we define sec_loc which shows the location of the slices in X-dir and Y-dir * patch_1, patch_2 : since we need to make two 129x129 images (patches) from 257x129 images, we need to define patch_1 and patch_2 in which we can find the indices we need to this end. """ width = 257 height = 129 sec_loc = np.linspace(0.0, 4.0, width) numb_sections = len(sec_loc[np.arange(0, width, 5)]) patch_1 = np.array_split(np.arange(height * width).reshape(height, width), 2, axis=1)[0].flatten() patch_2 = patch_1 + height - 1 """ Reading, slicing, and saving the dataset * time_interval : this is a numpy array used to define the time interval that we want to extract the infromation from the solution which is saved in the dataset. Note that we start from time step number 5 because solutions at the very begining of the dataset is affected by the initial states introduced to the system, and the width of interfaces are pretty large. * df : this is a pandas dataframe which includes (X, Y, Z, phi) features * saving_frames : this is numpy array in which we save phi values of all the slices we created. """ """ Iterating over names of the files (*.h5) in the reading_directory """
if i % 500 == 0: figshow(pred[0, 0, :, :]) plt.savefig("./png/model_" + modelstr + "." + str(i) + ".pred.png") figshow(np.log(((target_r + eps) / (normmat_r + eps)))[0, :, :], np=True) plt.savefig("./png/model_" + modelstr + "." + str(i) + ".label.png") torch.save(net.state_dict(), "./models/model_" + modelstr + ".checkpoint") torch.save(optimizer.state_dict(), "./models/model_" + modelstr + ".optimizer") if i % 2000 == 0: net.eval() corr = [] mse = [] mseloss = nn.MSELoss() t = 0 for sequence, target in zip( np.array_split(validation_sequences, 256), np.array_split(validation_targets, 256), ): pred = net(torch.Tensor(sequence).transpose(1, 2).cuda()) target_r = np.nanmean( np.nanmean(np.reshape(target, (target.shape[0], 250, 4, 250, 4)), axis=4), axis=2, ) if t < 10: figshow(pred[0, 0, :, :]) plt.savefig("./png/model_" + modelstr + ".test" + str(t) + ".pred.png") figshow(np.log(((target_r + eps) / (normmat_r + eps)))[0, :, :], np=True) plt.savefig("./png/model_" + modelstr + ".test" + str(t) + ".label.png") t += 1 if np.mean(np.isnan(target_r)) < 0.7: target_cuda = torch.Tensor(
def paaTransformData(self, ts, n_pieces): splitted = np.array_split(ts, n_pieces) return np.asarray(map(lambda xs: xs.mean(axis=0), splitted))
psuX.append(data.columns[-1]) mList.append(psuX) print(" M LIST ", mList) # j=0 # In[6]: a = 0 pre = 0 re = 0 fM = 0 for train, test in kf.split(vector): newData = data trainResult = np.array_split(train, split) testResult = np.array(test) test = vector[testResult] testList = [] for x in range(len(mList)): testList.append(test[:, mList[x]]) groundTruth = test[:, -1] decisionTreeLst = [] # // TREE FORMATION for tr in range(len(trainResult)): # m = dataSet = vector[trainResult[tr]]
def calcIntensitiesCUDA(x, waveNumber, y1Vals, y2Vals, y1Amps): #number of sections to divide y array into: N/x t0 = time.time() y1Vals = np.array(y1Vals) y2Vals = np.array(y2Vals) #print(y1Vals,y2Vals) """ Break y1 and y2 into sections """ """ Need to change this yourself """ numSections = 20 #y1sections = np.array([np.array_split(y1Vals, numSections)]) #y2sections = np.array([np.array_split(y2Vals, numSections)]) y1secs = np.array_split(y1Vals, numSections) y2secs = np.array_split(y2Vals, numSections) #y1amps = #Need to look through these later for more gratings y1AmpSecs = np.array_split(y1Amps, numSections) y1sections = np.array(y1secs) y2sections = np.array(y2secs) y1Amps = np.array(y1AmpSecs) ampInc = 0 for y2section in y2sections: inc = 0 ampColumns = [None] * numSections #ampColumns = [[0],[0],[0],[0]] # Make double for loop for y1section in y1sections: #print(y1section) #print(y2section) y1s, y2s = np.meshgrid(y1section, y2section) rArray = np.sqrt(x**2 + (y2s - y1s)**2) #rArray = np.transpose(rArray) waveNumArray = np.full(rArray.shape, waveNumber) #y1Amps = np.transpose(np.repeat(np.array([y1Amps]),rArray.shape[1],0)) y1Amps = np.repeat(np.array([y1Amps[0]]), rArray.shape[0], 0) ampComponentArray = np.zeros_like(rArray, dtype=complex) ampComponentArray[:, :] = complexAmplitudeCUDA( y1Amps[:, :], waveNumArray[:, :], rArray[:, :]) Amps = ampComponentArray.sum(axis=1) ampColumns[inc] = Amps inc += 1 ampColumns = np.array(ampColumns) summedAmps = ampColumns.sum(axis=0) #ampColumns[0] + ampColumns[1] + ampColumns[2] + ampColumns[3] ampInc += 1 summedAmps = (summedAmps * np.conjugate(summedAmps)).real #print(summedAmps) sendSumAndSendTo("tempData.txt", summedAmps, 'a') intensities = readFromFile("tempData.txt") return intensities
def get_window_mean(ls): WINDOW_COUNT = 30 return [np.mean(x) for x in np.array_split(ls, WINDOW_COUNT)]
def reorder_axis(arr, ax): pos_part, neg_part = np.array_split(arr, 2, axis=ax) rejoined_arr = np.concatenate((neg_part, pos_part), axis=ax) return rejoined_arr
def cross_reactivity_density_paratope_epitope_ppi_mpi(nodefile, edgefile, sourcetag, targettag): ''' prep neat data for cross reactivity density plots both paratope and epitope retrofit for ppi usage :return: ''' # edge_files = fifi('abdb_outfiles_2019', 'internet_edges.csv') # node_files = fifi('abdb_outfiles_2019', 'internet_nodes.csv') # infiles = edge_files + node_files # print(infiles) # sys.exit() peinfile = 'abdb_outfiles_2019/ppi_internet_edges.csv' rinfile = 'abdb_outfiles_2019/downsampled_ppi_internet_edges.csv' peinfile_nodes = 'abdb_outfiles_2019/ppi_internet_nodes.csv' rinfile_nodes = 'abdb_outfiles_2019/downsampled_ppi_internet_nodes.csv' penodesdf = pd.read_csv(peinfile_nodes) rnodesdf = pd.read_csv(rinfile_nodes) pedf = pd.read_csv(peinfile).iloc[:] rdf = pd.read_csv(rinfile) print(pedf.head()) data = [] data2 = [] n = 4 nodes_paratope = [ item for item in penodesdf.id.tolist() if '*' not in item ][:] nodes_epitope = [item for item in penodesdf.id.tolist() if '*' in item][:] # chunk the list chunks = np.array_split(nodes_paratope, n) chunks2 = np.array_split(nodes_epitope, n) print(len(chunks[0]), len(nodes_paratope)) # scatter the params comm = MPI.COMM_WORLD print(comm.Get_size()) if comm.rank == 0: params = chunks params2 = chunks2 else: params = None params2 = None params = comm.scatter(params, root=0) params2 = comm.scatter(params2, root=0) outdir = 'supfig12outs' # clear outdir before making a new one os.rmdir(outdir) os.mkdir(outdir) outname = outdir + '/' + nodefile.split( '.')[0] + 'rep%s_%s' % (comm.rank, sourcetag) + '.csv' outname2 = outdir + '/' + nodefile.split( '.')[0] + 'rep%s_%s' % (comm.rank, targettag) + '.csv' print(outname) print(outname2) print(params, comm.rank) print(params2, comm.rank) for motif in params: mdf = pedf[pedf.source == motif] partners = mdf.target for motif2 in nodes_paratope: mdf2 = pedf[pedf.source == motif2] partners2 = mdf2.target intersect = set(partners) & set(partners2) percent_overlap = round( len(intersect) / float(len(partners)) * 100, 1) # print(motif, percent_overlap) datum = [motif, motif2, percent_overlap, 'ppimotif'] data.append(datum) colnames = ['motif1', 'motif2', 'percent_overlap', 'motif_source'] outdf1 = pd.DataFrame(data, columns=colnames) outdf1.to_csv(outname, index=False) for motif in params2: mdf = pedf[pedf.target == motif] partners = mdf.source for motif2 in nodes_epitope: mdf2 = pedf[pedf.target == motif2] partners2 = mdf2.source intersect = set(partners) & set(partners2) percent_overlap = round( len(intersect) / float(len(partners)) * 100, 1) # print(motif, percent_overlap) datum = [motif, motif2, percent_overlap, 'ppimotifpartner'] data.append(datum) outdf2 = pd.DataFrame(data2, columns=colnames) outdf2.to_csv(outname2, index=False)
def _down_sample(ltable, rtable, y_param, show_progress=True, verbose=False, seed=None, rem_puncs=True, rem_stop_words=True, n_ltable_chunks=-1, n_rtable_chunks=-1): """ Down sampling command implementation. We have reproduced the down sample command because the input to the down sample command is the down sampled right table. """ if not isinstance(ltable, pd.DataFrame): logger.error('Input table A (ltable) is not of type pandas DataFrame') raise AssertionError( 'Input table A (ltable) is not of type pandas DataFrame') if not isinstance(rtable, pd.DataFrame): logger.error('Input table B (rtable) is not of type pandas DataFrame') raise AssertionError( 'Input table B (rtable) is not of type pandas DataFrame') if len(ltable) == 0 or len(rtable) == 0: logger.error('Size of the input table is 0') raise AssertionError('Size of the input table is 0') if y_param == 0: logger.error('y cannot be zero (3rd and 4th parameter of downsample)') raise AssertionError( 'y_param cannot be zero (3rd and 4th parameter of downsample)') if seed is not None and not isinstance(seed, int): logger.error('Seed is not of type integer') raise AssertionError('Seed is not of type integer') validate_object_type(verbose, bool, 'Parameter verbose') validate_object_type(show_progress, bool, 'Parameter show_progress') validate_object_type(rem_stop_words, bool, 'Parameter rem_stop_words') validate_object_type(rem_puncs, bool, 'Parameter rem_puncs') validate_object_type(n_ltable_chunks, int, 'Parameter n_ltable_chunks') validate_object_type(n_rtable_chunks, int, 'Parameter n_rtable_chunks') # rtable_sampled = sample_right_table(rtable, size) rtable_sampled = rtable ltbl_str_cols = _get_str_cols_list(ltable) proj_ltable = ltable[ltable.columns[ltbl_str_cols]] if n_ltable_chunks == -1: n_ltable_chunks = multiprocessing.cpu_count() ltable_chunks = np.array_split(proj_ltable, n_ltable_chunks) preprocessed_tokenized_tbl = [] start_row_id = 0 for i in range(len(ltable_chunks)): result = delayed(process_tokenize_concat_strings)(ltable_chunks[i], start_row_id, rem_puncs, rem_stop_words) preprocessed_tokenized_tbl.append(result) start_row_id += len(ltable_chunks[i]) preprocessed_tokenized_tbl = delayed(wrap)(preprocessed_tokenized_tbl) if show_progress: with ProgressBar(): logger.info('Preprocessing/tokenizing ltable') preprocessed_tokenized_tbl_vals = preprocessed_tokenized_tbl.compute( scheduler="processes", num_workers=multiprocessing.cpu_count()) else: preprocessed_tokenized_tbl_vals = preprocessed_tokenized_tbl.compute( scheduler="processes", num_workers=multiprocessing.cpu_count()) ltable_processed_dict = {} for i in range(len(preprocessed_tokenized_tbl_vals)): ltable_processed_dict.update(preprocessed_tokenized_tbl_vals[i]) inverted_index = build_inverted_index(ltable_processed_dict) rtbl_str_cols = _get_str_cols_list(rtable_sampled) proj_rtable_sampled = rtable_sampled[rtable_sampled.columns[rtbl_str_cols]] if n_rtable_chunks == -1: n_rtable_chunks = multiprocessing.cpu_count() rtable_chunks = np.array_split(proj_rtable_sampled, n_rtable_chunks) probe_result = [] for i in range(len(rtable_chunks)): result = delayed(probe)(rtable_chunks[i], y_param, len(proj_ltable), inverted_index, rem_puncs, rem_stop_words, seed) probe_result.append(result) probe_result = delayed(wrap)(probe_result) if show_progress: with ProgressBar(): logger.info('Probing using rtable') probe_result = probe_result.compute( scheduler="processes", num_workers=multiprocessing.cpu_count()) else: probe_result = probe_result.compute( scheduler="processes", num_workers=multiprocessing.cpu_count()) probe_result = map(list, probe_result) l_tbl_indices = set(sum(probe_result, [])) l_tbl_indices = list(l_tbl_indices) ltable_sampled = ltable.iloc[l_tbl_indices] # update catalog if cm.is_dfinfo_present(ltable): cm.copy_properties(ltable, ltable_sampled) if cm.is_dfinfo_present(rtable): cm.copy_properties(rtable, rtable_sampled) return ltable_sampled, rtable_sampled
for learnername, learner in classalgs.items(): meanErrParam = [] nameParam = [] params = "" # print(numparams) for p in range(numparams): params = parameters[p] learner.reset(params) print('Running learner = ' + learnername + ' on parameters ' + str(learner.getparams())) trainset1 = trainset[1].reshape(trainset[1].shape[0], 1) XSplitter = np.array_split(trainset[0], 5) YSplitter = np.array_split(trainset1, 5) avgError = [] nameParam.append(params) for k in range(k_fold): trainX1 = np.zeros((1000, 9)) trainX1 = XSplitter[k] trainY1 = np.zeros((1000, 1)) trainY1 = YSplitter[k] trainX0 = np.array([], dtype=np.int64).reshape(0, 9) trainY0 = np.array([], dtype=np.int64).reshape(0, 1)
# In[16]: num_folds = 5 k_choices = [1, 3, 5, 8, 10, 12, 15, 20, 50, 100] X_train_folds = [] y_train_folds = [] ################################################################################ # TODO: # # Split up the training data into folds. After splitting, X_train_folds and # # y_train_folds should each be lists of length num_folds, where # # y_train_folds[i] is the label vector for the points in X_train_folds[i]. # # Hint: Look up the numpy array_split function. # ################################################################################ X_train_folds = np.array_split(X_train, num_folds) y_train_folds = np.array_split(y_train, num_folds) ################################################################################ # END OF YOUR CODE # ################################################################################ # A dictionary holding the accuracies for different values of k that we find # when running cross-validation. After running cross-validation, # k_to_accuracies[k] should be a list of length num_folds giving the different # accuracy values that we found when using that value of k. k_to_accuracies = {} ################################################################################ # TODO: # # Perform k-fold cross validation to find the best value of k. For each # # possible value of k, run the k-nearest-neighbor algorithm num_folds times, #
def batch_right_rate(output_label, y_variable): predict_max_pos = torch.max(output_label, 1)[1] right_vector = torch.eq(predict_max_pos, y_variable) count = torch.sum(right_vector) count = float(count.cpu().data.numpy()) return count / len(output_label) criterion = nn.CrossEntropyLoss() last_epoch_loss = 0 for epoch in range(1000): epoch_loss_list = [] epoch_right_list = [] all_labels = np.arange(0, len(x_train)) np.random.shuffle(all_labels) batched_labels = np.array_split(all_labels, int(len(x_train) / batch_size)) for label_of_label in tqdm(range(len(batched_labels))): batched_label = batched_labels[label_of_label] input_image_matrix = np.zeros((batch_size, 224, 224, 3), dtype=np.float32) input_label = np.zeros((batch_size), dtype=np.float32) for i, ele in enumerate(batched_label): input_image_matrix[i] = (x_train[ele] - 127.5) / 127.5 input_label[i] = y_train[ele] x_variable = Variable( torch.from_numpy(input_image_matrix).permute(0, 3, 1, 2)).type( torch.FloatTensor).cuda() y_variable = Variable(torch.from_numpy(input_label)).type( torch.LongTensor).cuda() output_label = net(x_variable) epoch_right_list.append(batch_right_rate(output_label, y_variable))
def simulate_latent_space(t, labels, seed=None, var=.2, split_prob=.1, gap=.75): """ Simulate splitting events in the latent space. The input time t is a one dimensional array having the times in it. The labels is a int array-like, which holds the labels for the wanted cell types. Basically it is an array of repetitions of 1 to number of cell types, e.g.: array([1..1,2..2,3..3,4..4]) for 4 cell types. :param array_like t: the time as [nx1] array, where n is the number of cells. :param array_like labels: the labels for the cells before splitting. :param int seed: the seed for this splitting, for reproducability. :param scalar var: the variance of spread of the first split, increasing after that. :param [0,1] split_prop: probability of split in the beginning, halfs with each split. :param [0,1] gap: the gap size between splitends and the beginning of the next. The method returns Xsim, seed, labels, time:: - Xsim is the two dimensional latent space with splits included. - seed is the seed generated, for reproduceability. - labels are the corrected labels, for split events. - time is the corrected timeline for split events. """ seed = seed or np.random.randint(1000,10000) np.random.seed(seed) n_data = t.shape[0] newlabs = [] assert np.issubdtype(labels.dtype, np.int_) and np.greater(labels, 0).all(), "labels need to be of positive integer dtype, 0 is not allowed" ulabs = [] for x in range(n_data): if labels[x] not in ulabs: ulabs.append(labels[x]) Xsim = np.zeros((n_data, 2)) split_ends = [Xsim[0]] prev_ms = [[.1,.1]] split_end_times = [t[labels==ulabs[0]].max()] t = np.sort(t.copy(), 0) tmax = t.max() for lab in ulabs: fil = (lab==labels).nonzero()[0] # zero out, for simulating linear relation within cluster: new_se = [] new_m = [] new_set = [] splits = np.array_split(fil, len(split_ends)) i = 1 for s in range(len(split_ends)): # for all previously done splits: prev_m = prev_ms[s] split = splits[s] split_end = split_ends[s] split_end_time = split_end_times[s] pre_theta = None prev_split_time = None for split in np.array_split(split, np.random.binomial(1, split_prob)+1): newlabs.extend(["{} {}".format(_c, i) for _c in labels[split]]) i += 1 # If we split a collection into two, we want the two times to match up now: if prev_split_time is None: prev_split_time = t[split].ptp() else: t[split.min():] -= prev_split_time t[split] -= (t[split.min()]-split_end_time) # make splits longer, the farther in we are into # the split process, it scales with sqrt(<split#>) x = t[split].copy() x -= x.min() x /= x.max() x *= np.sqrt(lab) # rotate m away a little from the previous direction: if pre_theta is None: pre_theta = theta = np.random.uniform(-45, 45) else: theta = ((pre_theta+90)%90)-90 theta *= (np.pi/180.) # radians for rotation matrix rot_m = np.array([[np.cos(theta), -np.sin(theta)], [np.sin(theta), np.cos(theta)]]) m = np.dot(rot_m, prev_m) # later splits have bigger spread: v = (x.mean(0) - np.abs((-x+x.mean(0)))) v -= v.min(0)-1e-6 v /= v.max(0) v *= var*t[split]/tmax # make the split Xsim[split] = np.random.normal(split_end + m*x, v) # put a gap between this and the next split: p = m*x[-1] #p /= np.sqrt(GPy.util.linalg.tdot(p)) # save the new sets of splits new_se.append(split_end + (1+gap)*p) new_m.append(m) new_set.append(t[split.max()]) split_ends = new_se prev_ms = new_m split_end_times = new_set # The split probability goes up every time the cell stage changes: split_prob = min(1., split_prob*2) Xsim -= Xsim.mean(0) Xsim /= Xsim.std(0) #Xsim += np.random.normal(0,var,Xsim.shape) from scipy.stats import t as tdist Xsim += tdist.rvs(3, loc=0, scale=.1*var, size=Xsim.shape) #Add outliers return Xsim, seed, np.asarray(newlabs), t
#print("class : ",content.split()[1]) c = int(content.split()[1]) - 2 l = int(Lines[count].split()[1]) - 1 ilp = float(Lines[count + 2].split()[0]) sn = float(Lines[count + 4].split()[3]) #print(ilp) List_ILP.append(ilp) List_SN.append(sn) #print("\tIteration : ",Lines[count].split()[1]) #ILP_String=str(c)+" "+str(l)+" "+str(ilp) #SN_String=str(c)+" "+str(l)+" "+str(sn) #List_ILP.append(ILP_String) count = count + 1 #print(len(List_ILP)) #print(len(List_SN)) split_ilp = np.array_split(List_ILP, n) split_sn = np.array_split(List_SN, n) # In[5]: #split_ilp[2][23] # In[6]: accuracy_ILP = [] std_devn_ILP = [] label = [] for i in range(0, n): #print(i) label.append(i + 2) #print(list(split_ilp[i]))
def shuffle_batch(X, y, batch_size): rnd_idx = np.random.permutation(len(X)) n_batches = len(X) // batch_size for batch_idx in np.array_split(rnd_idx, n_batches): X_batch, y_batch = X[batch_idx], y[batch_idx] yield X_batch, y_batch
def reformat (files, features, cls): #Median Absolute Deviation def mad(data, axis=None): return np.mean(np.absolute(data - np.mean(data, axis)), axis) #Meadian Filter def strided_app(a, L, S ): # Window len = L, Stride len/stepsize = S nrows = ((a.size-L)//S)+1 n = a.strides[0] return np.lib.stride_tricks.as_strided(a, shape=(nrows,L), strides=(S*n,n)) appended_features_all=[] appended_features_df = pd.DataFrame() wrist_class = {'Brush_teeth':0, 'Climb_stairs':4, 'Comb_hair':2, 'Descend_stairs':3, 'Drink_glass':1, 'Eat_meat':5, 'Eat_soup':6, 'Getup_bed':7, 'Liedown_bed':8, 'Pour_water':9, 'Sitdown_chair':10, 'Standup_chair':11, 'Use_telephone':12, 'Walk':13 } #Binarization dictionary wrist_class_binary = {'Other':0, 'Drink_glass':1, } if number_components == 3: #FOR XYZ data = pd.read_csv(files, sep=' ', header=None, names=['x', 'y', 'z']) #Conversion from 0-63 to m/s^2 df_x = -14.709 + (data.iloc[:,0:1]/63)*(2*14.709) df_y = -14.709 + (data.iloc[:,1:2]/63)*(2*14.709) df_z = -14.709 + (data.iloc[:,2:3]/63)*(2*14.709) #Median filtering x = np.median(strided_app(df_x.values.flatten(), 3,1),axis=1) y = np.median(strided_app(df_y.values.flatten(), 3,1),axis=1) z = np.median(strided_app(df_z.values.flatten(), 3,1),axis=1) df_x = pd.DataFrame(x, columns=['x']) df_y = pd.DataFrame(y, columns=['y']) df_z = pd.DataFrame(z, columns=['z']) data_x = df_x.values data_y = df_y.values data_z = df_z.values #Divide data in segments split_index=5 #Number of segments data_split_x=np.array_split(data_x, split_index) data_split_y=np.array_split(data_y, split_index) data_split_z=np.array_split(data_z, split_index) #Features Calculation appended_before=['data_split_x[2].min(axis=0)', 'data_x.min(axis=0)','data_split_x[2].mean(axis=0)', 'np.median(data_split_x[2],axis=0)','data_split_x[1].min(axis=0)','data_split_x[1].mean(axis=0)', 'data_x.mean(axis=0)','np.median(data_split_x[1],axis=0)','np.median(data_x,axis=0)', 'data_split_x[3].mean(axis=0)','np.median(data_split_x[3],axis=0)','data_split_x[3].min(axis=0)', 'np.median(data_z,axis=0)','data_split_x[4].min(axis=0)','data_split_x[2].max(axis=0)', 'np.median(data_split_z[2],axis=0)','data_split_x[2].std(axis=0)','data_split_x[4].mean(axis=0)', 'data_split_x[3].max(axis=0)','np.median(data_split_x[4],axis=0)','data_z.std(axis=0)', 'mad(data_split_x[2],axis=0)','np.median(data_split_z[4],axis=0)','data_split_z[2].mean(axis=0)', 'mad(data_z,axis=0)','data_split_z[2].std(axis=0)','data_z.mean(axis=0)', 'data_split_z[4].mean(axis=0)','data_split_x[0].min(axis=0)','data_z.var(axis=0)', 'np.median(data_split_z[3],axis=0)','data_split_z[3].mean(axis=0)','mad(data_split_z[2],axis=0)', 'np.median(data_split_x[0],axis=0)','data_split_x[0].mean(axis=0)','data_split_x[1].max(axis=0)', 'data_z.min(axis=0)','data_split_x[4].var(axis=0)','data_split_x[2].var(axis=0)', 'data_split_z[2].var(axis=0)','data_split_z[1].std(axis=0)','data_split_z[2].min(axis=0)', 'data_split_x[4].std(axis=0)','data_split_z[4].var(axis=0)','mad(data_split_z[1],axis=0)', 'mad(data_split_y[3],axis=0)','mad(data_split_x[4],axis=0)','mad(data_y,axis=0)', 'data_split_z[1].var(axis=0)','data_split_z[3].max(axis=0)','data_split_z[4].std(axis=0)', 'mad(data_split_z[4],axis=0)','data_split_z[1].min(axis=0)','data_y.std(axis=0)', 'data_split_y[3].std(axis=0)','data_split_z[4].max(axis=0)','data_split_z[0].min(axis=0)', 'data_split_z[1].mean(axis=0)','data_split_x[0].var(axis=0)','data_split_z[3].min(axis=0)', 'np.median(data_split_z[1],axis=0)','data_x.var(axis=0)','np.median(data_split_z[0],axis=0)', 'data_split_z[4].min(axis=0)','data_y.var(axis=0)','data_split_z[0].mean(axis=0)', 'data_split_x[0].std(axis=0)','kurtosis(data_split_z[4],axis=0)','np.median(data_split_y[2],axis=0)', 'data_split_x[4].max(axis=0)','data_split_y[3].var(axis=0)','data_x.max(axis=0)', 'data_split_z[0].var(axis=0)','data_split_y[2].max(axis=0)','data_split_y[2].mean(axis=0)', 'mad(data_split_x[0],axis=0)','data_split_z[3].var(axis=0)','data_x.std(axis=0)', 'kurtosis(data_split_y[1],axis=0)','data_split_z[0].std(axis=0)','data_split_z[2].max(axis=0)', 'mad(data_split_z[0],axis=0)','kurtosis(data_y,axis=0)','data_split_y[0].min(axis=0)', 'data_split_z[3].std(axis=0)','data_split_x[1].std(axis=0)','kurtosis(data_split_y[0],axis=0)', 'skew(data_z,axis=0)','mad(data_split_z[3],axis=0)','skew(data_split_y[2],axis=0)', 'data_split_x[1].var(axis=0)','data_split_x[0].max(axis=0)','np.median(data_split_y[4],axis=0)', 'data_split_y[4].mean(axis=0)','mad(data_x,axis=0)','data_split_y[0].mean(axis=0)', 'data_split_y[2].var(axis=0)','data_split_z[0].max(axis=0)','np.median(data_split_y[3],axis=0)', 'data_split_z[1].max(axis=0)','data_split_y[2].std(axis=0)','data_split_y[3].max(axis=0)', 'mad(data_split_x[1],axis=0)','np.median(data_split_y[0],axis=0)','mad(data_split_y[1],axis=0)', 'data_split_y[3].mean(axis=0)','mad(data_split_y[2],axis=0)','data_split_y[0].max(axis=0)', 'kurtosis(data_x,axis=0)','data_split_y[1].min(axis=0)','skew(data_split_y[3],axis=0)', 'skew(data_split_x[3],axis=0)','kurtosis(data_split_y[3],axis=0)','data_split_y[4].min(axis=0)', 'data_split_y[0].var(axis=0)','mad(data_split_x[3],axis=0)','data_split_y[1].std(axis=0)', 'kurtosis(data_split_z[1],axis=0)','kurtosis(data_split_y[4],axis=0)','skew(data_split_z[2],axis=0)', 'skew(data_split_x[1],axis=0)','data_split_y[4].max(axis=0)','np.median(data_y,axis=0)', 'data_split_y[4].std(axis=0)','skew(data_split_z[1],axis=0)','kurtosis(data_split_x[2],axis=0)', 'skew(data_split_x[2],axis=0)','data_split_y[1].mean(axis=0)','kurtosis(data_split_y[2],axis=0)', 'skew(data_split_z[0],axis=0)','kurtosis(data_split_x[0],axis=0)','skew(data_split_y[0],axis=0)', 'data_split_y[1].max(axis=0)','skew(data_split_z[3],axis=0)','kurtosis(data_split_x[1],axis=0)', 'kurtosis(data_split_x[3],axis=0)','data_split_x[3].std(axis=0)','skew(data_y,axis=0)', 'data_z.max(axis=0)','mad(data_split_y[4],axis=0)','data_y.mean(axis=0)', 'np.median(data_split_y[1],axis=0)','data_y.max(axis=0)','skew(data_x,axis=0)', 'data_split_y[4].var(axis=0)','mad(data_split_y[0],axis=0)','skew(data_split_y[1],axis=0)', 'kurtosis(data_z,axis=0)','kurtosis(data_split_x[4],axis=0)','data_split_y[2].min(axis=0)', 'kurtosis(data_split_z[2],axis=0)','skew(data_split_y[4],axis=0)','data_split_y[1].var(axis=0)', 'data_split_x[3].var(axis=0)','kurtosis(data_split_z[0],axis=0)','data_split_y[3].min(axis=0)', 'kurtosis(data_split_z[3],axis=0)','data_split_y[0].std(axis=0)','skew(data_split_x[4],axis=0)', 'skew(data_split_x[0],axis=0)','skew(data_split_z[4],axis=0)','data_y.min(axis=0)'] else: # For the most representative component (X) data = pd.read_csv(files, sep=' ', header=None, names=['x']) #Conversion from 0-63 to m/s^2 df_x = -14.709 + (data.iloc[:,0:1]/63)*(2*14.709) """ #Median filtering x = np.median(strided_app(df_x.values.flatten(), 3,1),axis=1) df_x = pd.DataFrame(x, columns=['x']) """ data_x = df_x.values #Divide data in segments split_index=5 #Number of segments data_split_x=np.array_split(data_x, split_index) appended_before=['data_split_x[2].min(axis=0)','data_x.min(axis=0)','data_split_x[2].mean(axis=0)', 'np.median(data_split_x[2],axis=0)','data_split_x[1].min(axis=0)','data_split_x[1].mean(axis=0)', 'data_x.mean(axis=0)','np.median(data_split_x[1],axis=0)','np.median(data_x,axis=0)', 'data_split_x[3].mean(axis=0)','np.median(data_split_x[3],axis=0)','data_split_x[3].min(axis=0)', 'data_split_x[4].min(axis=0)','data_split_x[2].max(axis=0)','data_split_x[2].std(axis=0)', 'data_split_x[4].mean(axis=0)','data_split_x[3].max(axis=0)','np.median(data_split_x[4],axis=0)', 'mad(data_split_x[2],axis=0)','data_split_x[0].min(axis=0)','np.median(data_split_x[0],axis=0)', 'data_split_x[0].mean(axis=0)','data_split_x[1].max(axis=0)','data_split_x[4].var(axis=0)', 'data_split_x[2].var(axis=0)','data_split_x[4].std(axis=0)','mad(data_split_x[4],axis=0)', 'data_split_x[0].var(axis=0)','data_x.var(axis=0)','data_split_x[0].std(axis=0)', 'data_split_x[4].max(axis=0)','data_x.std(axis=0)','mad(data_split_x[0],axis=0)', 'data_split_x[1].std(axis=0)','data_x.max(axis=0)','data_split_x[1].var(axis=0)', 'data_split_x[0].max(axis=0)','mad(data_x,axis=0)','kurtosis(data_x,axis=0)', 'mad(data_split_x[1],axis=0)','skew(data_split_x[3],axis=0)','mad(data_split_x[3],axis=0)', 'skew(data_split_x[1],axis=0)','kurtosis(data_split_x[2],axis=0)','skew(data_split_x[2],axis=0)', 'skew(data_x,axis=0)','kurtosis(data_split_x[0],axis=0)','kurtosis(data_split_x[1],axis=0)', 'kurtosis(data_split_x[3],axis=0)','data_split_x[3].std(axis=0)','kurtosis(data_split_x[4],axis=0)', 'data_split_x[3].var(axis=0)','skew(data_split_x[4],axis=0)','skew(data_split_x[0],axis=0)'] #Create initial_features_matrix appended_features_split=[] appended_features=[] for i in range (0, features): appended_features_before = eval(appended_before[i]) appended_features.append(appended_features_before[0]) appended_features_before=[] appended_features_all.append(appended_features) appended_features_df = pd.DataFrame(appended_features_all) #Binarize detectiom if wrist_class[cls] != 1: wrist_class[cls] = 0 #Other classes than drink are considered as CLASS 0. Drink = CLASS 1 #Access to dictionary class number and add it as a feature appended_features_df[-1]= wrist_class[cls] #Return table containing all rows for every class and feature colums #(mean*3, sd*3, Max*3, Min*3, Y). Number of the row is manteined. (0~101) return appended_features_df
def get_spikes(self, label, buffer_manager, region, placements, graph_mapper, application_vertex, machine_time_step): spike_times = list() spike_ids = list() ms_per_tick = machine_time_step / 1000.0 vertices = \ graph_mapper.get_machine_vertices(application_vertex) missing_str = "" progress_bar = ProgressBar(len(vertices), "Getting spikes for {}".format(label)) for vertex in vertices: placement = placements.get_placement_of_vertex(vertex) vertex_slice = graph_mapper.get_slice(vertex) x = placement.x y = placement.y p = placement.p lo_atom = vertex_slice.lo_atom # Read the spikes n_words = int(math.ceil(vertex_slice.n_atoms / 32.0)) n_bytes = n_words * 4 n_words_with_timestamp = n_words + 1 # for buffering output info is taken form the buffer manager neuron_param_region_data_pointer, data_missing = \ buffer_manager.get_data_for_vertex( placement, region) if data_missing: missing_str += "({}, {}, {}); ".format(x, y, p) record_raw = neuron_param_region_data_pointer.read_all() raw_data = (numpy.asarray(record_raw, dtype="uint8").view( dtype="<i4")).reshape([-1, n_words_with_timestamp]) if len(raw_data) > 0: split_record = numpy.array_split(raw_data, [1, 1], 1) record_time = split_record[0] * float(ms_per_tick) spikes = split_record[2].byteswap().view("uint8") bits = numpy.fliplr( numpy.unpackbits(spikes).reshape((-1, 32))).reshape( (-1, n_bytes * 8)) time_indices, indices = numpy.where(bits == 1) times = record_time[time_indices].reshape((-1)) indices = indices + lo_atom spike_ids.append(indices) spike_times.append(times) progress_bar.update() progress_bar.end() if len(missing_str) > 0: logger.warn( "Population {} is missing spike data in region {} from the" " following cores: {}".format(label, region, missing_str)) if len(spike_ids) == 0: return numpy.zeros((0, 2), dtype="float") spike_ids = numpy.hstack(spike_ids) spike_times = numpy.hstack(spike_times) result = numpy.dstack((spike_ids, spike_times))[0] return result[numpy.lexsort((spike_times, spike_ids))]
labelMissingGeoTag, user_top_used_geo_tag) return bipolar_tweets_with_geo if __name__ == '__main__': bipolar_tweets = pd.read_csv( '../initial_data/selected_normal_users_tweets_less5.csv') df = bipolar_tweets.groupby(['username', 'tweetLat', 'tweetLong' ])['tweetLong'].agg({'count': 'count'}) mask = df.groupby(level=0).agg('idxmax') user_top_used_geo_tag = df.loc[mask['count']] user_top_used_geo_tag = user_top_used_geo_tag.reset_index() p = mp.Pool(processes=8) split_dfs = np.array_split(bipolar_tweets, 8) pool_results = p.map(process, zip(split_dfs, repeat(user_top_used_geo_tag))) p.close() p.join() # merging parts processed by different processes parts = pd.concat(pool_results) # merging newly calculated parts to big_df #big_df = pd.concat([big_df, parts], axis=1) parts.to_csv( '../final_data/users_final_normal/labelMIssingGeo_normalusers.csv', index=False, quotechar='"', quoting=csv.QUOTE_ALL) #, encoding='utf-8'
def test_nn_shallow_mnist_smc_enhanced(): logger.info('test nn shallow in mnist using enhanced smc') logger.info('initialize the crypto system ...') sec_param_config_file = 'config/sec_param.json' # indicate kernel size 5 dlog_table_config_file = 'config/dlog_b8.json' with timer('initialize crypto system, cost time', logger) as t: eta = 1250 sec_param = 256 setup_parties = { 'id_1': 200, 'id_2': 200, 'id_3': 200, 'id_4': 200, 'id_5': 200 } logger.info('loading dlog configuration ...') dlog = load_dlog_table_config(dlog_table_config_file) logger.info('load dlog configuration DONE') sife_tpa = SIFEDynamicTPA(eta, sec_param=sec_param, sec_param_config=sec_param_config_file) sife_tpa.setup() sife_enc_client = SIFEDynamicClient(sec_param=256, role='enc') sife_dec_client = SIFEDynamicClient(sec_param=256, role='dec', dlog=dlog) mife_tpa = MIFEDynamicTPA(sec_param=256, parties=setup_parties, sec_param_config=sec_param_config_file) mife_tpa.setup() mife_enc_client = MIFEDynamicClient(sec_param=256, role='enc') mife_dec_client = MIFEDynamicClient(sec_param=256, role='dec', dlog=dlog) logger.info('the crypto system initialization done!') precision_data = 0 precision_weight = 4 es2pc_client = EnhancedSecure2PCClient(sife=(sife_tpa, sife_enc_client), mife=(mife_tpa, mife_enc_client), precision=precision_data) es2pc_server = EnhancedSecure2PCServer(sife=(sife_tpa, sife_dec_client), mife=(mife_tpa, mife_dec_client), precision=(precision_data, precision_weight)) X_train, y_train = load_mnist_size('datasets/mnist', size=600) X_test, y_test = load_mnist_size('datasets/mnist', size=100, kind='t10k') # X_train, y_train = load_mnist('datasets/mnist') # X_test, y_test = load_mnist('datasets/mnist', kind='t10k') # shuffle X_data, y_data = X_train.copy(), y_train.copy() idx = np.random.permutation(X_data.shape[0]) X_data, y_data = X_data[idx], y_data[idx] features_splits = np.array_split(range(X_data.shape[1]), len(setup_parties)) X_data_lst = [X_data[:, idx] for idx in features_splits] total_mini_batches = 50 nn_server = CryptoNNServer(n_output=10, n_features=X_data.shape[1], hidden_layers=[64], l2=0.1, l1=0.0, epochs=50, eta=0.001, alpha=0.001, decrease_const=0.0001, mini_batches=total_mini_batches, smc=es2pc_server) logger.info('client start to encrypt dataset ...') ct_ff_lst_dict = dict() ct_bp_lst_dict = dict() x_idx_count = 0 final_y_onehot_lst = None for id in setup_parties.keys(): if x_idx_count == (len(setup_parties) - 1): n_features = X_data_lst[x_idx_count].shape[1] + 1 nn_client = CryptoNNClient(n_output=10, mini_batches=total_mini_batches, n_features=n_features, smc=es2pc_client, random_seed=520, id=id) nn_server.register(nn_client) ct_feedforward_lst, ct_backpropagation_lst, y_onehot_lst = nn_client.pre_process( X_data_lst[x_idx_count], y_data) ct_ff_lst_dict[id] = ct_feedforward_lst ct_bp_lst_dict[id] = ct_backpropagation_lst final_y_onehot_lst = y_onehot_lst else: n_features = X_data_lst[x_idx_count].shape[1] nn_client = CryptoNNClient(n_output=10, mini_batches=total_mini_batches, n_features=n_features, smc=es2pc_client, random_seed=520, id=id) nn_server.register(nn_client) ct_feedforward_lst, ct_backpropagation_lst = nn_client.pre_process( X_data_lst[x_idx_count]) ct_ff_lst_dict[id] = ct_feedforward_lst ct_bp_lst_dict[id] = ct_backpropagation_lst x_idx_count = x_idx_count + 1 logger.info('client encrypting DONE') logger.info('server start to train ...') (train_loss_hist, test_acc_hist, train_batch_time_hist, train_time_hist) = nn_server.fit((ct_ff_lst_dict, ct_bp_lst_dict), final_y_onehot_lst, X_test, y_test) logger.info('server training DONE') logger.info('training loss: \n\r' + str(train_loss_hist)) logger.info('test acc: \n\r' + str(test_acc_hist))
type=str, help="Config file path", required=True, ) parser.add_argument("--gen", help="Generate subreddit list", action="store_true") args = parser.parse_args() if args.c: config = configparser.ConfigParser() config.read(args.c) subreddit_list_path = config["REDDIT"].get("subreddit_list_path") if args.gen: if not subreddit_list_path: raise Exception("Need to provide path to generate subreddit list.") generate_subreddit_list(subreddit_list_path) else: num_processes = int(config["REDDIT"].get("num_process")) with open(subreddit_list_path) as f: subreddit_list = f.readline().split(",") if not subreddit_list: raise Exception("Empty subreddit list") subreddit_lists = np.array_split(subreddit_list, num_processes) with concurrent.futures.ThreadPoolExecutor(max_workers=num_processes) as executor: fs = [executor.submit(scrape, subreddit_list, config) for subreddit_list in subreddit_lists] for future in concurrent.futures.as_completed(fs): print(future.result())