def get(self): if self.get_current_user(): self.redirect('/') return write_data(self, 'login.html', { 'login_error':None, })
def save_predotar(): genomes = ppr.get_genomes() data = [] vals = [ u"none", u"mitochondrial", u"plastid", u"er", u"elsewhere", u"possibly mitochondrial", u"possibly plastid", u"possibly er", u"possibly elsewhere", ] for g in genomes: pprs = ppr.load_records(g) if len(pprs) < 50: continue row = [0] * len(vals) total = float(len(pprs)) for p in pprs: pred = p.annotations["predotar"] if pred not in vals: raise ValueError("didn't expect {}".format(pred)) else: row[vals.index(pred)] += 1 row = [float(r) / total for r in row] data.append([short_name(g)] + row) data.sort(key=lambda d: d[1]) utils.write_data(["genome"] + vals, data, "output/ppr_predotar.dat")
def post(self): if self.get_current_user(): raise tornado.web.HTTPError(403) username = self.get_argument('form-username') password = self.get_argument('form-password') login_error = None #login_error = 'err_userpass' if login_error: write_data(self, 'login.html', { 'login_error':login_error, }) self.set_secure_cookie('user',username,settings.cookie_timeout) self.redirect(self.get_argument('next','/'))
def get_localization(genome): data = [] for g in genome: if len(g.pprs) < 50: continue total = float(len(g.pprs)) loc_c = 100.0 * len([1 for p in g.pprs if p.localization() == "C"]) / total loc_m = 100.0 * len([1 for p in g.pprs if p.localization() == "M"]) / total loc_s = 100.0 * len([1 for p in g.pprs if p.localization() == "S"]) / total loc_o = 100.0 * len([1 for p in g.pprs if p.localization() == "_"]) / total loc_u = 100.0 * len([1 for p in g.pprs if p.localization() == "*"]) / total data.append((short_name(g.name), loc_c, loc_m, loc_s, loc_o, loc_u)) data.sort(key=lambda d: d[1]) utils.write_data(("genome", "c", "m", "s", "other", "unknown"), data, "output/ppr_localization.dat")
def ppr_distance(): genomes = PPR.load_genomes() genomes = [g for g in genomes if len(g) > 50] data = [] for i,g in enumerate(genomes, 1): print "Genome {}/{}: {} ({})".format(i, len(genomes), g.name,short_name(g.name)) data.append([short_name(g.name),] + get_average(g,genomes)) utils.write_data(['',] + [reduce_name(g.name) for g in genomes], data, 'output/Average_PPR_distance.dat')
def get_length_family(genomes): pprs = [p for g in genomes for p in g.pprs] type_P = [p for p in pprs if p.family() == "P"] type_E = [p for p in pprs if p.family() == "E"] type_Ep = [p for p in pprs if p.family() == "E+"] type_DYW = [p for p in pprs if p.family() == "DYW"] type_PLS = [p for p in pprs if p.family() == "PLS"] P_hist = length_hist(type_P) E_hist = length_hist(type_E) Ep_hist = length_hist(type_Ep) DYW_hist = length_hist(type_DYW) PLS_hist = length_hist(type_PLS) hist = fmt_hist([P_hist, E_hist, Ep_hist, DYW_hist, PLS_hist]) utils.write_data(("length", "p", "e", "ep", "dyw", "pls"), hist, "output/ppr_family_lengths.dat")
def names(request,person,do_view=False): names = set(json.loads(open(settings.NAMES_DB,'r').read())['names']) oked, vetoed = utils.get_data(person) if request.method == "POST": name = request.POST['name'] ok = 0 if 'yes' in request.POST: oked.add(name) ok = 1 if 'no' in request.POST: vetoed.add(name) if 'cancel' in request.POST: if name in oked: oked.remove(name) if name in vetoed: vetoed.remove(name) utils.write_data(person, oked, vetoed) response = HttpResponseRedirect('/names/%s/' % (person,)) if 'yes' in request.POST or 'no' in request.POST: safe_name = urllib.quote(name.encode('UTF-8')) response.set_cookie('last_decision',value='%s|%s' % (ok,safe_name),max_age=60*60) return response remaining = names - vetoed - oked last_decision = request.COOKIES.get('last_decision','|').split('|') last_decision[1] = urllib.unquote(last_decision[1]) context = { 'name': random.choice(list(remaining)) if len(remaining) else None , 'num_remaining': len(remaining) , 'num_ok': len(oked) , 'num_veto': len(vetoed) , 'last_decision': last_decision } if do_view: context.update({'do_view':True, 'oked': utils.sort_nameset(oked) , 'vetoed': utils.sort_nameset(vetoed)}) template = 'names/%s.html' % ('namelist' if do_view else 'names',) response = HttpResponse(loader.get_template(template).render(RequestContext(request,context))) response.delete_cookie('last_decision') return response
def write_model_parameters(hps, output_fname=None, datasets=None): """Save all the model parameters Save all the parameters to hps.lfads_save_dir. Args: hps: The dictionary of hyperparameters. output_fname: The prefix of the file in which to save the generated samples. datasets: A dictionary of data dictionaries. The dataset dict is simply a name(string)-> data dictionary mapping (See top of lfads.py). """ if not output_fname: output_fname = "model_params" else: output_fname = output_fname + "_model_params" fname = os.path.join(hps.lfads_save_dir, output_fname) print("Writing model parameters to: ", fname) # save the optimizer params as well model = build_model(hps, kind="write_model_params", datasets=datasets) model_params = model.eval_model_parameters(use_nested=False, include_strs="LFADS") utils.write_data(fname, model_params, compression=None) print("Done.")
def main(): genomes = ppr.get_genomes() hist = [] numbers = [] locale = [] family = [] for g in genomes: print g pprs = list(ppr.load_pprs(g)) numbers.append((short_name(g),) + get_numbers(pprs)) hist.append(length_hist(pprs)) locale.append((short_name(g),) + get_localization(pprs)) family.append((short_name(g),) + get_family(pprs)) numbers.sort(key=lambda n: n[1]) family.sort(key=lambda f: sum(f[1:])) hist = fmt_hist(hist) utils.write_data(("genome",) + get_numbers_hdr(), numbers, "output/ppr_numbers.dat") utils.write_data(["length"] + [short_name(g) for g in genomes], hist, "output/ppr_lengths.dat") utils.write_data(("genome",) + get_localization_hdr(), locale, "output/ppr_localization.dat") utils.write_data(("genome",) + get_family_hdr(), family, "output/ppr_families.dat")
def exp_gbrbm(exp_name, T=[]): dataset = dg.one_exception_dataset( N=6, n=1500, T=T, lam=5000, exc=1, noise_k=1 ) utils.write_data(dataset, exp_name, "generated_data") new_data = utils.tsne(dataset, exp_name, "generated_data", T, 2) utils.write_data(new_data, exp_name, "generated_data_for_tsne") _, recovery_sample, decode_res = decoder.gbrbm_decoder( dataset, learning_rate=0.1, training_epochs=50, batch_size=1001, n_hidden=2000, plot_every=1 ) utils.write_data(decode_res, exp_name, "decoded_data") new_data = utils.tsne(decode_res, exp_name, "decoded_data", T, 2) utils.write_data(new_data, exp_name, "decoded_tsne_data_for_tsne")
auc_transfer_metric, mixmode='logsigp') alphas = np.asarray([frame.sig_alpha for frame in alpha_frames]) betas = np.asarray([frame.time for frame in alpha_frames]) gammas = np.asarray([frame.alpha for frame in alpha_frames]) alpha_list.append(alphas) beta_list.append(betas) gamma_list = filtered_append(gammas, gamma_list) print(i, '/', iterations) mean_alpha = np.mean(alpha_list, axis=0) mean_beta = np.mean(beta_list, axis=0) mean_gamma = np.mean(gamma_list, axis=0) write_data("results", "baseline.txt", mean_gamma, mean_alpha, mean_beta) for i, (a, b, c) in enumerate(zip(mean_gamma, mean_alpha, mean_beta)): print(i + 1, a, b, c) fig = plt.figure(figsize=(9, 5)) ax = plt.subplot(1, 1, 1) ax.tick_params(axis='both', which='major', labelsize=13) ax.axhline(1, c='lightgray', ls='--') ax.axhline(0, c='lightgray', ls='--') ax.plot(alphas, lw=2, color='k', label='N = {0}'.format(10)) ax.set_xlim([0, opt.ALPHA_NUM_ITER - 1]) ax.set_xlabel('Number of episodes', fontsize=14) ax.set_ylabel(r'$\sigma(\gamma)$', fontsize=14)
def build_model(hps, kind="train", datasets=None): """Builds a model from either random initialization, or saved parameters. Args: hps: The hyper parameters for the model. kind: (optional) The kind of model to build. Training vs inference require different graphs. datasets: The datasets structure (see top of lfads.py). Returns: an LFADS model. """ build_kind = kind if build_kind == "write_model_params": build_kind = "train" with tf.variable_scope("LFADS", reuse=None): model = LFADS(hps, kind=build_kind, datasets=datasets) if not os.path.exists(hps.lfads_save_dir): print("Save directory %s does not exist, creating it." % hps.lfads_save_dir) os.makedirs(hps.lfads_save_dir) cp_pb_ln = hps.checkpoint_pb_load_name cp_pb_ln = 'checkpoint' if cp_pb_ln == "" else cp_pb_ln if cp_pb_ln == 'checkpoint': print("Loading latest training checkpoint in: ", hps.lfads_save_dir) saver = model.seso_saver elif cp_pb_ln == 'checkpoint_lve': print("Loading lowest validation checkpoint in: ", hps.lfads_save_dir) saver = model.lve_saver else: print("Loading checkpoint: ", cp_pb_ln, ", in: ", hps.lfads_save_dir) saver = model.seso_saver ckpt = tf.train.get_checkpoint_state(hps.lfads_save_dir, latest_filename=cp_pb_ln) session = tf.get_default_session() print("ckpt: ", ckpt) if ckpt and tf.train.checkpoint_exists(ckpt.model_checkpoint_path): print("Reading model parameters from %s" % ckpt.model_checkpoint_path) saver.restore(session, ckpt.model_checkpoint_path) else: print("Created model with fresh parameters.") if kind in ["posterior_sample_and_average", "prior_sample", "write_model_params"]: print("Possible error!!! You are running ", kind, " on a newly \ initialized model!") print("Are you sure you sure ", ckpt.model_checkpoint_path, " exists?") tf.global_variables_initializer().run() if ckpt: train_step_str = re.search('-[0-9]+$', ckpt.model_checkpoint_path).group() else: train_step_str = '-0' fname = 'hyperparameters' + train_step_str + '.txt' hp_fname = os.path.join(hps.lfads_save_dir, fname) hps_for_saving = jsonify_dict(hps) utils.write_data(hp_fname, hps_for_saving, use_json=True) return model
def build_model(hps, kind="train", datasets=None): """Builds a model from either random initialization, or saved parameters. Args: hps: The hyper parameters for the model. kind: (optional) The kind of model to build. Training vs inference require different graphs. datasets: The datasets structure (see top of lfads.py). Returns: an LFADS model. """ build_kind = kind if build_kind == "write_model_params": build_kind = "train" with tf.variable_scope("LFADS", reuse=None): model = LFADS(hps, kind=build_kind, datasets=datasets) if not os.path.exists(hps.lfads_save_dir): print("Save directory %s does not exist, creating it." % hps.lfads_save_dir) os.makedirs(hps.lfads_save_dir) cp_pb_ln = hps.checkpoint_pb_load_name cp_pb_ln = 'checkpoint' if cp_pb_ln == "" else cp_pb_ln if cp_pb_ln == 'checkpoint': print("Loading latest training checkpoint in: ", hps.lfads_save_dir) saver = model.seso_saver elif cp_pb_ln == 'checkpoint_lve': print("Loading lowest validation checkpoint in: ", hps.lfads_save_dir) saver = model.lve_saver else: print("Loading checkpoint: ", cp_pb_ln, ", in: ", hps.lfads_save_dir) saver = model.seso_saver ckpt = tf.train.get_checkpoint_state(hps.lfads_save_dir, latest_filename=cp_pb_ln) session = tf.get_default_session() print("ckpt: ", ckpt) if ckpt and tf.train.checkpoint_exists(ckpt.model_checkpoint_path): print("Reading model parameters from %s" % ckpt.model_checkpoint_path) saver.restore(session, ckpt.model_checkpoint_path) else: print("Created model with fresh parameters.") if kind in ["posterior_sample_and_average", "posterior_push_mean", "prior_sample", "write_model_params"]: print("Possible error!!! You are running ", kind, " on a newly \ initialized model!") # cannot print ckpt.model_check_point path if no ckpt print("Are you sure you sure a checkpoint in ", hps.lfads_save_dir, " exists?") tf.global_variables_initializer().run() if ckpt: train_step_str = re.search('-[0-9]+$', ckpt.model_checkpoint_path).group() else: train_step_str = '-0' fname = 'hyperparameters' + train_step_str + '.txt' hp_fname = os.path.join(hps.lfads_save_dir, fname) hps_for_saving = jsonify_dict(hps) utils.write_data(hp_fname, hps_for_saving, use_json=True) return model
def get(self): write_data(self, 'monitor/monitor.html',{ })
def get(self): write_data(self, 'user/admin.html',{ })
rf = RandomForestClassifier(max_features='auto', oob_score=False, class_weight='balanced', random_state=config.set_seed, n_jobs=-1, verbose=config.verbose) clf = GridSearchCV(estimator=rf, param_grid=params, scoring='accuracy', cv=cv, n_jobs=-1) clf.fit(X, y) print('#### Best Params') print(clf.best_params_) print('#### Best Score') print(clf.best_score_) return clf.grid_scores_ if __name__ == '__main__': # Train Data train_x = read_data(config.d_xtrain) train_y = read_data(config.d_ytrain) # Test Data test_x = read_data(config.d_xtest) report = grid_model(param_grid, train_x, train_y, cv=5) write_data(config.grid_report_rf, report)
def generate_lorenz_data(N_trials, N_inits, N_cells, N_steps, N_stepsinbin=1, dt_lorenz=None, dt_spike=None, dt_cal=None, base_firing_rate=5.0, tau_c=0.4, inc_c=1.0, sigma=0.2, trainp=0.8, seed=100, save=True, save_dir='./'): print('Generating Lorenz data', flush=True) N_lorenz = 3 assert N_steps % N_stepsinbin == 0, 'Can\'t bin time steps' N_steps_bin = int(N_steps / N_stepsinbin) if dt_lorenz is None: dt_lorenz = np.clip(2.0 / N_steps, 0.005, 0.02) if dt_spike is None: dt_spike = dt_lorenz if dt_cal is None: dt_cal = dt_spike * N_stepsinbin N_train = int(N_trials * trainp) N_steps_burn = max(N_steps, 300) y = np.zeros((N_inits, N_steps + N_steps_burn, N_lorenz)) w_lorenz = ([10.0, 28.0, 8.0 / 3.0]) y[:, 0] = np.random.randn(N_inits, N_lorenz) for step in range(1, N_steps + N_steps_burn): dy = lorenz_grad(y[:, step - 1], w_lorenz) y[:, step] = eulerStep(y[:, step - 1], dy, dt_lorenz) print('Converting to rates and spikes', flush=True) y = y[:, N_steps_burn:] y = normalize(y) W = (np.random.rand(N_lorenz, N_cells) + 1) * np.sign( np.random.randn(N_lorenz, N_cells)) b = np.log(base_firing_rate) rates = np.exp(y.dot(W) + b) spikes = np.array( [np.random.poisson(rates * dt_spike) for trial in range(N_trials)]) if N_stepsinbin > 1: from scipy.stats import binned_statistic binned_latent = np.zeros((N_trials, N_inits, N_steps_bin, N_lorenz)) binned_rates = np.zeros((N_trials, N_inits, N_steps_bin, N_cells)) binned_spikes = np.zeros((N_trials, N_inits, N_steps_bin, N_cells)) for ix in range(N_trials): for jx in range(N_inits): binned_spikes[ix, jx] = binned_statistic(x=np.arange(N_steps), values=spikes[ix, jx].T, statistic='sum', bins=N_steps_bin)[0].T binned_rates[ix, jx] = binned_statistic(x=np.arange(N_steps), values=rates[jx].T, statistic='mean', bins=N_steps_bin)[0].T binned_latent[ix, jx] = binned_statistic(x=np.arange(N_steps), values=y[jx].T, statistic='mean', bins=N_steps_bin)[0].T rates = binned_rates del binned_rates spikes = binned_spikes.astype(int) del binned_spikes latent = binned_latent del binned_latent else: latent = np.array([y for trial in range(N_trials)]) rates = np.array([rates for trial in range(N_trials)]) calcium = np.zeros_like(spikes, dtype=float) fluor = np.zeros_like(spikes, dtype=float) ct = spikes[:, :, 0, :] * inc_c calcium[:, :, 0, :] = ct fluor[:, :, 0, :] = ct + np.random.randn(N_trials, N_inits, N_cells) * sigma print('Converting to fluorescence', flush=True) for step in range(1, N_steps_bin): ct = eulerStep(ct, calcium_grad(ct, tau_c), dt_spike) ct = ct + inc_c * spikes[:, :, step, :] calcium[:, :, step, :] = ct fluor[:, :, step, :] = ct + np.random.randn(N_trials, N_inits, N_cells) * sigma print('Train and test split') data_dict = {} for data, name in zip([latent, rates, spikes, calcium, fluor], ['latent', 'rates', 'spikes', 'calcium', 'fluor']): data_dict['train_%s' % name] = np.reshape( data[:N_train], (N_train * N_inits, N_steps_bin, data.shape[-1])) data_dict['valid_%s' % name] = np.reshape( data[N_train:], ((N_trials - N_train) * N_inits, N_steps_bin, data.shape[-1])) if importlib.find_loader('oasis'): data_dict['train_oasis'] = deconvolve_calcium(data_dict['train_fluor'], g=np.exp(-dt_cal / tau_c)) data_dict['valid_oasis'] = deconvolve_calcium(data_dict['valid_fluor'], g=np.exp(-dt_cal / tau_c)) data_dict['train_data'] = data_dict['train_spikes'] data_dict['valid_data'] = data_dict['valid_spikes'] data_dict['train_truth'] = data_dict['train_rates'] data_dict['valid_truth'] = data_dict['valid_rates'] data_dict['dt'] = dt_cal data_dict['loading_weights'] = W data_dict['conversion_factor'] = 1. / (np.max(rates) * dt_cal) print('Saving to %s/synth_data/lorenz_%03d' % (save_dir, seed), flush=True) if save: utils.write_data('%s/synth_data/lorenz_%03d' % (save_dir, seed), data_dict) return data_dict
def save_profile(self): profile_name = self.profilename_edit.text() profile_data = { "profile_name": profile_name, "shipping_fname": self.shipping_fname_edit.text(), "shipping_lname": self.shipping_lname_edit.text(), "shipping_email": self.shipping_email_edit.text(), "shipping_phone": self.shipping_phone_edit.text(), "shipping_a1": self.shipping_address1_edit.text(), "shipping_a2": self.shipping_address2_edit.text(), "shipping_city": self.shipping_city_edit.text(), "shipping_zipcode": self.shipping_zipcode_edit.text(), "shipping_state": self.shipping_state_box.currentText(), "shipping_country": self.shipping_country_box.currentText(), "billing_fname": self.billing_fname_edit.text(), "billing_lname": self.billing_lname_edit.text(), "billing_email": self.billing_email_edit.text(), "billing_phone": self.billing_phone_edit.text(), "billing_a1": self.billing_address1_edit.text(), "billing_a2": self.billing_address2_edit.text(), "billing_city": self.billing_city_edit.text(), "billing_zipcode": self.billing_zipcode_edit.text(), "billing_state": self.billing_state_box.currentText(), "billing_country": self.billing_country_box.currentText(), "card_number": (Encryption().encrypt( self.cardnumber_edit.text())).decode("utf-8"), "card_month": self.cardmonth_box.currentText(), "card_year": self.cardyear_box.currentText(), "card_type": self.cardtype_box.currentText(), "card_cvv": self.cardcvv_edit.text() } profiles = return_data("./data/profiles.json") for p in profiles: if p["profile_name"] == profile_name: profiles.remove(p) break profiles.append(profile_data) write_data("./data/profiles.json", profiles) if self.loadprofile_box.findText(profile_name) == -1: self.loadprofile_box.addItem(profile_name) self.parent().parent().createdialog.profile_box.addItem( profile_name) QtWidgets.QMessageBox.information(self, "Bird Bot", "Saved Profile")
def main(): args = parser.parse_args() data_name = os.path.basename(args.data_path).split('.')[0] dir_name = os.path.dirname(args.data_path) data_dict = read_data(args.data_path) dt = data_dict['dt'] g = np.exp(-dt / args.tau) train_size, steps_size, state_size = data_dict['train_fluor'].shape valid_size, steps_size, state_size = data_dict['valid_fluor'].shape data_size = train_size + valid_size data = np.zeros((data_size, steps_size, state_size)) if args.undo_train_test_split: train_idx = data_dict['train_idx'] valid_idx = data_dict['valid_idx'] data[train_idx] = data_dict['train_fluor'] data[valid_idx] = data_dict['valid_fluor'] else: data[:train_size] = data_dict['train_fluor'] data[train_size:] = data_dict['valid_fluor'] if args.flatten: data = data.reshape(data_size * steps_size, state_size).transpose() else: data = data.transpose(0, 2, 1) data = data.reshape(data_size * state_size, steps_size) data = np.hstack((np.zeros((data_size * state_size, 1)), data)) if args.known: S, C = deconvolve_calcium_known(data, g=g, s_min=args.scale / 2) else: if args.normalize: data = max_normalize(data.T, axis=0).T S, C, bias, G, gain, rval = deconvolve_calcium_unknown( data, g=g, snr_thresh=args.scale) tau = -dt / (np.log(G)) if args.flatten: data = data.reshape(data_size, steps_size, state_size) S = S.reshape(data_size, steps_size, state_size) C = C.reshape(data_size, steps_size, state_size) else: data = data.reshape(data_size, state_size, steps_size + 1).transpose(0, 2, 1)[:, 1:] S = S.reshape(data_size, state_size, steps_size + 1).transpose(0, 2, 1)[:, 1:] C = C.reshape(data_size, state_size, steps_size + 1).transpose(0, 2, 1)[:, 1:] if not args.known: bias = bias.reshape(data_size, state_size).mean(axis=0) tau = tau.reshape(data_size, state_size).mean(axis=0) gain = gain.reshape(data_size, state_size).mean(axis=0) if args.undo_train_test_split: train_fluor = data[train_idx] valid_fluor = data[valid_idx] train_ospikes = S[train_idx] valid_ospikes = S[valid_idx] train_ocalcium = C[train_idx] valid_ocalcium = C[valid_idx] else: train_fluor = data[:train_size] valid_fluor = data[train_size:] train_ospikes = S[:train_size] valid_ospikes = S[train_size:] train_ocalcium = C[:train_size] valid_ocalcium = C[train_size:] data_dict['train_fluor'] = train_fluor data_dict['valid_fluor'] = valid_fluor data_dict['train_ospikes'] = train_ospikes data_dict['valid_ospikes'] = valid_ospikes data_dict['train_ocalcium'] = train_ocalcium data_dict['valid_ocalcium'] = valid_ocalcium if not args.known: data_dict['obs_gain_init'] = gain data_dict['obs_bias_init'] = bias data_dict['obs_tau_init'] = tau data_dict['obs_var_init'] = (gain / args.scale)**2 arg_string = '_o%s' % ('k' if args.known else 'u') arg_string += '_t%s' % (str(args.tau)) arg_string += '_s%s' % (str(args.scale)) arg_string += '_f' if args.flatten else '' arg_string += '_z' if args.undo_train_test_split else '' arg_string += '_n' if args.normalize else '' write_data(os.path.join(dir_name, data_name) + arg_string, data_dict)
def run_filter(): data = utils.read_json(Const.origin_train_filename) saved_data = utils.filter_out(data) f = open(Const.origin_train_filtered_filename, 'w') utils.write_data(f, saved_data) print 'filter finish, saved in %s' % Const.origin_train_filtered_filename
sys.path.append("../") from param_config import config from sklearn.model_selection import train_test_split from imblearn.over_sampling import ADASYN, SMOTE, RandomOverSampler from utils import read_data from utils import write_data import numpy as np np.random.seed(config.set_seed) def join_split(X, y): x_train, x_val, y_train, y_val = train_test_split( X, y, stratify=y, test_size=config.train_val_split_pct) return x_train, y_train, x_val, y_val # Try imbalance learn if __name__ == '__main__': train_x = read_data(config.e_xtrain) train_y = read_data(config.e_ytrain) # split data into train and validation # 20 percent stratified x_train, y_train, x_val, y_val = join_split(train_x, train_y) print('#### Writing Pickle 09: Split ####') write_data(config.f_xtrain, x_train) write_data(config.f_ytrain, y_train) write_data(config.f_xval, x_val) write_data(config.f_yval, y_val)
c = Counter(lab60) # Return most common label. Make sure that ratio between second most common # and most common is < 0.8 counts = c.most_common(2) if counts[1][1] / float(counts[0][1]) > 0.8: print 'Ambiguous label for sampleid = %d : counts : %s' % (sampleid, counts) #print counts return counts[0][0] ## newlabels = np.array([knn_DM(DM, sampleid) for sampleid in xrange(len(labels))]) modified = np.flatnonzero(newlabels - labels) ## Write a new file with open('out.txt', 'w') as f: utils.write_data(f, accel, gyro, labels) ## sample1 = 18 sample2 = 151 dist, cost, path = mlpy.dtw_std(accel[sample1,0,:], accel[sample2,0,:], dist_only=False) pl.figure() pl.suptitle('dist = %f' % dist) pl.subplot(211); pl.title('%d'%sample1); pl.plot(accel[sample1,0,:]); pl.ylim(0, 5000); pl.subplot(212); pl.title('%d'%sample2); pl.plot(accel[sample2,0,:]); pl.ylim(0,5000); pl.figure() pl.title('%d - %d' % (sample1, sample2)) pl.imshow(cost.T, origin='lower', cmap=cm.gray, interpolation='nearest') pl.plot(path[0], path[1], 'w')
from utils import read_data from utils import write_data import numpy as np from sklearn.pipeline import make_pipeline from sklearn.preprocessing import StandardScaler np.random.seed(config.set_seed) num_cols = ['amount_tsh', 'gps_height', 'population', 'total_missing'] # Scale and Fit # standard mean and unit variance scale def pca(x_train, x_test): std_clf = make_pipeline(StandardScaler(), PCA(n_components=config.pca_components)) std_clf.fit(x_train) train_pca = std_clf.transform(x_train) test_pca = std_clf.transform(x_test) return train_pca, test_pca if __name__ == '__main__': train_x = read_data(config.d_xtrain) test_x = read_data(config.d_xtest) train_pca, test_pca = pca(train_x[num_cols], test_x[num_cols]) print('#### Writing Pickle 1: PCA ####') write_data(config.pca_xtrain, train_pca) write_data(config.pca_xtest, test_pca)
def deproject(self, config_path='params.cfg'): ''' Deproject a rotation measure map by dividing it by a smooth model of the cluster with the parameters taken from the configuration file. Args: config_path (str): path to the configuration file ''' # Observed cluster parameters parameters (use only when need to deproject 3D PDS) config = configparser.ConfigParser() try: config.read(config_path) except IOError: print('Error: No config file found at ' + config_path) return # beta model parameters self.cluster_params['ne0'] = config.getfloat("cluster_params", "ne0") self.cluster_params['rc'] = config.getfloat("cluster_params", "rc") self.cluster_params['beta'] = config.getfloat("cluster_params", "beta") # size of pixel in kiloparsecs self.cluster_params['kpc_px'] = config.getfloat( "cluster_params", "kpc_px") # Galactic background to subtract from the observed RM map self.cluster_params['gal_bg'] = config.getfloat( "cluster_params", "gal_bg") # location of the cluster center on the sky in pixels self.cluster_params['center'][0] += config.getint( "cluster_params", "ix0") self.cluster_params['center'][1] += config.getint( "cluster_params", "iy0") #recovery self.recovery_params = {} self.recovery_params['alpha'] = config.getfloat( "recovery_params", "alpha") self.recovery_params['inclin'] = config.getfloat( "recovery_params", "inclin") # depth of the 3D box used to generate the smooth model self.recovery_params['lz'] = config.getint("recovery_params", "Lz") # subtract the background self.data -= self.cluster_params['gal_bg'] kpc_px = self.cluster_params['kpc_px'] lx, ly = self.data.shape print('generate a 3D smooth model of the cluster...') smod = gen3d_smooth_model(shape=(lx, ly), cluster_params=self.cluster_params, recovery_params=self.recovery_params) write_data(self.output_paths['smooth3d'], smod[:, ly // 2, :], ftype='npy') print('deproject the image...') # integrate the smooth model along the line of sight I0 = np.sqrt((smod**2).sum(axis=2)) * kpc_px * 812. write_data(self.output_paths['smooth'], I0, ftype='npy') # divide the image by the smooth model ind = (I0 != 0.) * np.invert(np.isnan(self.data)) self.data[ind] /= I0[ind] write_data(self.output_paths['deproj'], self.data, ftype='npy') print('deprojection done\n')
def generate_chaotic_rnn_data(Ninits=400, Ntrial=10, Ncells=50, Nsteps=200, trainp=0.8, dt_rnn=0.1, dt_spike=0.1, tau=0.25, gamma=1.5, maxRate=5, B=20, tau_c=0.4, inc_c=1.0, sigma=0.2, seed=5, save=False, save_dir='./'): ''' Generate synthetic calcium fluorescence data from chaotic recurrent neural network system Arguments: - T (int or float): total time in seconds to run - dt_rnn (float): time step of chaotic RNN - dt_spike (float): time step of calcium trace - Ninits (int): Number of network initialisations - Ntrial (int): Number of instances with same network initialisations - Ncells (int): Number of cells in network - trainp (float): proportion of dataset to partition into training set - tau (float): time constant of chaotic RNN - gamma (float): - maxRate (float): maximum firing rate of chaotic RNN - B (int, or float): amplitude of perturbation to network - tau_c (float): time constant of calcium decay - inc_c (float): increment size of calcium influx - sigma (float): standard deviation of fluorescence noise - save (bool): save output ''' np.random.seed(seed) Nsteps = int(T / dt_rnn) Ntrial_train = int(trainp * Ntrial) # Chaotic RNN weight matrix W = gamma * np.random.randn(Ncells, Ncells) / np.sqrt(Ncells) rates, spikes, calcium, fluor = np.zeros( (4, Ninits, Ntrial, Nsteps, Ncells)) perturb_steps = [] for init in range(Ninits): y0 = np.random.randn(Ncells) for trial in range(Ntrial): perturb_step = np.random.randint(0.25 * Nsteps, 0.75 * Nsteps) perturb_steps.append(perturb_step) perturb_amp = np.random.randn(Ncells) * B b = 0 yt = y0 rt = rateScale(np.tanh(yt), maxRate=maxRate) st = spikify_rates(rt, dt=dt_spike) ct = inc_c * st rates[init, trial, 0, :] = rt spikes[init, trial, 0, :] = st calcium[init, trial, 0, :] = ct fluor[init, trial, 0, :] = ct + np.random.randn(Ncells) * sigma for step in range(1, Nsteps): yt = eulerStep(yt, RNNgrad(yt + b, W, tau), dt_rnn) ct = eulerStep(ct, calciumgrad(ct, tau_c), dt_spike) if step == perturb_step: b = perturb_amp * dt_rnn / tau else: b = 0 rt = rateScale(np.tanh(yt), maxRate=maxRate) st = spikify_rates(rt, dt=dt_spike) ct = ct + inc_c * st rates[init, trial, step, :] = rt spikes[init, trial, step, :] = st calcium[init, trial, step, :] = ct fluor[init, trial, step, :] = ct + np.random.randn(Ncells) * sigma # Construct data dictionary for data, name in zip([rates, spikes, calcium, fluor], ['rates', 'spikes', 'calcium', 'fluor']): data_dict['train_%s' % name] = np.reshape( data[:N_train], (N_train * N_inits, N_steps_bin, data.shape[-1])) data_dict['valid_%s' % name] = np.reshape( data[N_train:], ((N_trials - N_train) * N_inits, N_steps_bin, data.shape[-1])) if importlib.find_loader('oasis'): data_dict['train_oasis'] = deconvolve_calcium(data_dict['train_fluor'], g=np.exp(-dt_spike / tau_c)) data_dict['valid_oasis'] = deconvolve_calcium(data_dict['valid_fluor'], g=np.exp(-dt_spike / tau_c)) data_dict['train_data'] = data_dict['train_spikes'] data_dict['valid_data'] = data_dict['valid_spikes'] data_dict['train_truth'] = data_dict['train_rates'] data_dict['valid_truth'] = data_dict['valid_rates'] data_dict['dt'] = dt_spike data_dict['perturb_times'] = np.array(perturb_steps) * dt_spike if save: utils.write_data('%s/synth_data/chaotic_rnn_%03d' % (save_dir, seed), data_dict) return data_dict
def get(self): write_data(self, 'left.html', {})
def get(self): write_data(self, 'manage/manage.html',{ })
def most_frequent(x_train): na_cols = {} for c in x_train.columns: if x_train[c].isnull().sum() > 0: na_cols[c] = test_x[c].value_counts().index[0] return na_cols def target_encoding(y_train): y_train = [target_mapping[i] for i in y_train] return y_train if __name__ == '__main__': train_x = read_data(config.c_xtrain) test_x = read_data(config.c_xtest) train_y = read_data(config.b_ytrain) train_y = target_encoding(train_y) impute_data = most_frequent(train_x) train_x, test_x = label_encoding(train_x, test_x, impute_data) assert len(train_x.columns) == len(test_x.columns) print('#### Writing Pickle 04: Imputation ####') write_data(config.d_xtrain, train_x) write_data(config.d_xtest, test_x) write_data(config.d_ytrain, train_y)
def get(self): write_data(self, 'index.html', { })
def get_bs2_combined_features(df_policy, df_claim): ''' In: DataFrame(df_policy), DataFrame(df_claim), Out: DataFrame(X_fs), DataFrame(y_fs), Description: create train dataset with additional columns ''' print('Getting labels') y_train_all = read_data('training-set.csv', path='raw') y_test = read_data('testing-set.csv', path='raw') print('Getting neural network processed premiums') X_fs = read_data('premium_60_1.csv') # insured print('Getting column cat_ins') X_fs = X_fs.assign( cat_ins=get_bs2_cat(df_policy, X_fs.index, "Insured's_ID")) print('Getting column cat_assured') X_fs = X_fs.assign( cat_assured=get_bs2_cat(df_policy, X_fs.index, 'fassured')) print('Getting column real_age') X_fs = X_fs.assign(real_age=get_bs2_real_age(df_policy, X_fs.index)) print('Getting column cat_sex') X_fs = X_fs.assign(cat_sex=get_bs2_cat(df_policy, X_fs.index, 'fsex')) print('Getting column cat_marriage') X_fs = X_fs.assign( cat_marriage=get_bs2_cat(df_policy, X_fs.index, 'fmarriage')) # policy print('Getting column real_cancel') X_fs = X_fs.assign(real_cancel=get_bs2_real_cancel(df_policy, X_fs.index)) print('Getting column cat_area') X_fs = X_fs.assign( cat_area=get_bs2_cat(df_policy, X_fs.index, 'iply_area')) print('Getting column cat_ic_combo') X_fs = X_fs.assign( cat_ic_combo=get_bs2_cat_ic_combo(df_policy, X_fs.index)) print('Getting column cat_ic_grp_combo') X_fs = X_fs.assign( cat_ic_grp_combo=get_bs2_cat_ic_grp_combo(df_policy, X_fs.index)) print('Getting column cat_distr') X_fs = X_fs.assign( cat_distr=get_bs2_cat(df_policy, X_fs.index, 'Distribution_Channel')) print('Getting column real_acc_dmg') X_fs = X_fs.assign( real_acc_dmg=get_bs2_cat(df_policy, X_fs.index, 'pdmg_acc')) print('Getting column real_acc_lia') X_fs = X_fs.assign( real_acc_lia=get_bs2_cat(df_policy, X_fs.index, 'plia_acc')) print('Getting column real_dage') X_fs = X_fs.assign(real_dage=get_bs2_real_dage(df_policy, X_fs.index)) print('Getting column real_prem_terminate') X_fs = X_fs.assign( real_prem_terminate=get_bs2_real_prem_terminate(df_policy, X_fs.index)) # vehicle print('Getting column cat_vmm1') X_fs = X_fs.assign( cat_vmm1=get_bs2_cat(df_policy, X_fs.index, 'Vehicle_Make_and_Model1')) print('Getting column cat_vmm2') X_fs = X_fs.assign( cat_vmm2=get_bs2_cat(df_policy, X_fs.index, 'Vehicle_Make_and_Model2')) print('Getting column real_vmy') X_fs = X_fs.assign(real_vmy=get_bs2_real_vmy(df_policy, X_fs.index)) print('Getting column real_vengine') X_fs = X_fs.assign(real_vengine=get_bs2_cat( df_policy, X_fs.index, 'Engine_Displacement_(Cubic_Centimeter)')) print('Getting column cat_vregion') X_fs = X_fs.assign(cat_vregion=get_bs2_cat(df_policy, X_fs.index, 'Imported_or_Domestic_Car')) print('Getting column cat_vc') X_fs = X_fs.assign(cat_vc=get_bs2_cat(df_policy, X_fs.index, 'Coding_of_Vehicle_Branding_&_Type')) print('Getting column real_vqpt') X_fs = X_fs.assign(real_vqpt=get_bs2_cat(df_policy, X_fs.index, 'qpt')) print('Getting column real_vcost') X_fs = X_fs.assign(real_vcost=get_bs2_cat( df_policy, X_fs.index, 'Replacement_cost_of_insured_vehicle')) # claim print('Getting column real_num_claim') X_fs = X_fs.assign( real_num_claim=get_bs2_real_num_claim(df_claim, X_fs.index)) print('Getting column real_nearest_claim') X_fs = X_fs.assign( real_nearest_claim=get_bs2_real_nearest_claim(df_claim, X_fs.index)) print('Getting column cat_claim_cause') X_fs = X_fs.assign( cat_claim_cause=get_bs2_cat_claim_cause(df_claim, X_fs.index)) print('Getting column real_loss') X_fs = X_fs.assign( real_loss=get_bs2_real_claim(df_claim, X_fs.index, 'Paid_Loss_Amount')) print('Getting column real_loss_ins') X_fs = X_fs.assign( real_loss_ins=get_bs2_real_loss_ins(df_policy, df_claim, X_fs.index)) print('Getting column real_salvage') X_fs = X_fs.assign(real_salvage=get_bs2_real_claim( df_claim, X_fs.index, 'Salvage_or_Subrogation?')) print('Getting column real_claim_fault') X_fs = X_fs.assign( real_claim_fault=get_bs2_real_claim_fault(df_claim, X_fs.index)) print('Getting column cat_claim_area') X_fs = X_fs.assign( cat_claim_area=get_bs2_cat_claim_area(df_claim, X_fs.index)) print('Getting column real_claimants') X_fs = X_fs.assign( real_claimants=get_bs2_real_claimants(df_claim, X_fs.index)) # helper columns print('Getting column real_prem_plc, for mean encoding use') X_fs = X_fs.assign( real_prem_plc=get_bs2_real_prem_plc(df_policy, X_fs.index)) print('\nSplitting train valid test features\n') X_train_all = X_fs.loc[y_train_all.index] X_test = X_fs.loc[y_test.index] # add mean encoding on mean of diff btw next_premium and premium cols_cat = ['cat_vmm1', 'cat_vmm2', 'cat_vc'] for col_cat in cols_cat: col_mean = col_cat.replace('cat_', 'real_mc_mean_diff_') print('Getting column ' + col_mean) X_test[col_mean] = get_bs2_real_mc_mean_diff(col_cat, X_train_all, y_train_all, X_valid=X_test, train_only=False, fold=5, prior=1000) X_train_all[col_mean] = get_bs2_real_mc_mean_diff( col_cat, X_train_all, y_train_all, X_valid=pd.DataFrame(), train_only=True, fold=5, prior=1000) # add mean encoding on mean of dividend btw next_premium and premium cols_cat = ['cat_claim_cause'] for col_cat in cols_cat: col_mean = col_cat.replace('cat_', 'real_mc_mean_div_') print('Getting column ' + col_mean) X_test[col_mean] = get_bs2_real_mc_mean_div(col_cat, X_train_all, y_train_all, X_valid=X_test, train_only=False, fold=5, prior=1000) X_train_all[col_mean] = get_bs2_real_mc_mean_div( col_cat, X_train_all, y_train_all, X_valid=pd.DataFrame(), train_only=True, fold=5, prior=1000) # add median encoding on median of next_premium cols_cat = [ 'cat_ins', 'cat_assured', 'cat_sex', 'cat_distr', 'cat_ic_combo' ] for col_cat in cols_cat: col_median = col_cat.replace('cat_', 'real_mc_median_') print('Getting column ' + col_median) X_test[col_median] = get_bs2_real_mc_median(col_cat, X_train_all, y_train_all, X_valid=X_test, train_only=False, fold=5, prior=1000) X_train_all[col_median] = get_bs2_real_mc_median( col_cat, X_train_all, y_train_all, X_valid=pd.DataFrame(), train_only=True, fold=5, prior=1000) # add median encoding on median of diff btw next_premium and premium cols_cat = ['cat_assured', 'cat_sex'] for col_cat in cols_cat: col_median = col_cat.replace('cat_', 'real_mc_median_diff_') print('Getting column ' + col_median) X_test[col_median] = get_bs2_real_mc_median_diff(col_cat, X_train_all, y_train_all, X_valid=X_test, train_only=False, fold=5, prior=1000) X_train_all[col_median] = get_bs2_real_mc_median_diff( col_cat, X_train_all, y_train_all, X_valid=pd.DataFrame(), train_only=True, fold=5, prior=1000) # add median encoding on median of div btw next_premium and premium cols_cat = [ 'cat_ins', 'cat_assured', 'cat_sex', 'cat_distr', 'cat_ic_combo', 'cat_ic_grp_combo', 'cat_area', 'cat_vregion' ] for col_cat in cols_cat: col_median = col_cat.replace('cat_', 'real_mc_median_div_') print('Getting column ' + col_median) X_test[col_median] = get_bs2_real_mc_median_div(col_cat, X_train_all, y_train_all, X_valid=X_test, train_only=False, fold=5, prior=1000) X_train_all[col_median] = get_bs2_real_mc_median_div( col_cat, X_train_all, y_train_all, X_valid=pd.DataFrame(), train_only=True, fold=5, prior=1000) # add mean encoding on probability of next_premium being 0 cols_cat = ['cat_ins', 'cat_marriage', 'cat_claim_cause', 'cat_claim_area'] for col_cat in cols_cat: col_prob = col_cat.replace('cat_', 'real_mc_prob_') print('Getting column ' + col_prob) X_test[col_prob] = get_bs2_real_mc_prob(col_cat, X_train_all, y_train_all, X_valid=X_test, train_only=False, fold=5, prior=1000) X_train_all[col_prob] = get_bs2_real_mc_prob(col_cat, X_train_all, y_train_all, X_valid=pd.DataFrame(), train_only=True, fold=5, prior=1000) print('Writing results to file') write_data(X_train_all, "X_train_all_bs2.csv") write_data(y_train_all, "y_train_all_bs2.csv") write_data(X_test, "X_test_bs2.csv") write_data(y_test, "y_test_bs2.csv") return (None)
def get(self): write_data(self, 'main.html', {})
def main(args): # Parameters initial_seed = 123456 # Used to generate the set of seeds for repetitions n_repetitions = 30 n_iterations = args.iterations initial_pheromone = 0.5 t_min = 0.001 # Min pheromone level t_max = 0.999 # Max pheromone level rho = args.rho # Pheromone decay rate alpha = args.alpha beta = args.beta # Initializations random_seeds = utils.generate_seeds(initial_seed, n_repetitions) n, p, nodes = utils.read_data(args.dataset) world = World(n, p, nodes) n_ants = (n - p) if args.ants is None else args.ants colony = Colony(n_ants) ni = aco.information_heuristic(world) # Information Heuristic dataset_name = args.dataset.split('/')[-1].split('.')[0] output = np.zeros((n_repetitions, n_iterations, 3)) output_dir = "../results/{}it{}rho{}alpha{}beta{}ants{}/".format( dataset_name, n_iterations, rho, alpha, beta, n_ants) # Main loop for repetition in range(n_repetitions): np.random.seed(random_seeds[repetition]) # Reset things for new repetition g_best = Solution(distance=np.inf) world.reset_pheromones(initial_pheromone) print("Repetition {}\n".format(repetition)) for iteration in tqdm(range(n_iterations)): for ant in colony.ants: ant.build_solution(world, ni, alpha, beta) l_best, l_worst = aco.evaluate_solutions(world, colony) world.update_pheromones(rho, g_best, l_best, l_worst) # Check algorithm stagnation if aco.is_stagnated(world, t_min, t_max): world.reset_pheromones(initial_pheromone) # Update global solution if l_best.distance < g_best.distance: g_best = l_best # Reset for next iteration colony.reset_solutions() # Store output data output[repetition][iteration][0] = g_best.distance output[repetition][iteration][1] = l_best.distance output[repetition][iteration][2] = l_worst.distance print("\nBest solution\n" "-------------\n" "Distance: {}\n" "Medians: {}\n".format(g_best.distance, g_best.medians)) utils.write_data(output_dir, output)
#!/usr/bin/env python3 import sys sys.path.append("../") from param_config import config from imblearn.over_sampling import RandomOverSampler, SMOTE from utils import read_data, write_data import numpy as np from scipy import stats np.random.seed(config.set_seed) def imbalance_split(X, y): ros = SMOTE(random_state=config.set_seed) X_res, y_res = ros.fit_sample(X, y) return X_res, y_res # Try imbalance learn if __name__ == '__main__': train_x = read_data(config.d_xtrain) train_y = read_data(config.d_ytrain) X, y = imbalance_split(train_x, train_y) write_data(config.e_xtrain, X) write_data(config.e_ytrain, y)
def total_missing(train, test): header_str = 'total_missing' train[header_str] = train.isnull().sum(axis=1) test[header_str] = test.isnull().sum(axis=1) return train, test def missing(x_train, x_test): train_missing = get_missing(x_train) test_missing = get_missing(x_test) assert len(train_missing) == len(test_missing) train = create_missing(train_missing.values, x_train) test = create_missing(train_missing.values, x_test) train, test = total_missing(train, test) return train, test if __name__ == '__main__': train_x = pd.read_csv(config.train_file) test_x = pd.read_csv(config.test_file) # Shuffle training data train_x = shuffle(train_x, random_state=config.set_seed) train_x, test_x = missing(train_x, test_x) print('#### Writing Pickle 01: Missing ####') write_data(config.a_xtrain, train_x) write_data(config.a_xtest, test_x)
def get(self): write_data(self, 'server/server.html',{ })
def main(corpus_dir, labels_dir, output_dir, sample_rate=16000, use_reference=False): if not os.path.exists(output_dir): os.makedirs(output_dir) datadir = os.path.join(corpus_dir, 'core') wav_base = 'FILEID sox WAVPATH -r {0} -t .wav - |'.format(sample_rate) if use_reference: ref_dir = os.path.join(labels_dir, 'reference_labels', 'speaker_labels', 'lab') reference_list = [f.replace('.lab', '') for f in os.listdir(ref_dir)] # utterances with issues, ignore these reject_list = ['02F-Therapy_07-004A', '20M-BL2-009A'] speaker_utts = {} text, wav = [], [] utt2spk, spk2utt = [], [] utt2dur = [] speakers = os.listdir(datadir) for speaker in speakers: sessions = os.listdir(os.path.join(datadir, speaker)) for session in sessions: session_dir = os.path.join(datadir, speaker, session) flist = [f for f in os.listdir(session_dir) if f.endswith('.wav')] for f in flist: f = f.replace('.wav', '') fileid = '-'.join([speaker, session, f]) if fileid in reject_list: continue if use_reference: if fileid not in reference_list: continue # use prompt for text, although it will be ignored for decoding txt_f = os.path.join(session_dir, f+'.txt') with open(txt_f, 'r') as fid: txt = fid.readline().rstrip() words = [] for w in txt.split(): w = w.upper() words.append(w) words = ' '.join([fileid] + words) text.append(words) # prepare wav.scp wavpath = os.path.join(session_dir, f+'.wav') file_wav = wav_base.replace('FILEID', fileid) file_wav = file_wav.replace('WAVPATH', wavpath) wav.append(file_wav) # prepare utt2dur dur = get_duration(wavpath) utt2dur.append('{0} {1}'.format(fileid, dur)) # prepare utt2spk utt2spk.append('{0} {1}'.format(fileid, speaker)) if speaker in speaker_utts: speaker_utts[speaker].append(fileid) else: speaker_utts[speaker] = [fileid] # prepare spk2utt for speaker in speaker_utts: spk_utts = '{0} {1}'.format(speaker, ' '.join(sorted(speaker_utts[speaker]))) spk2utt.append(spk_utts) text_f = os.path.join(output_dir, 'text') wav_f = os.path.join(output_dir, 'wav.scp') utt2spk_f = os.path.join(output_dir, 'utt2spk') spk2utt_f = os.path.join(output_dir, 'spk2utt') utt2dur_f = os.path.join(output_dir, 'utt2dur') write_data(text, text_f) write_data(wav, wav_f) write_data(utt2spk, utt2spk_f) write_data(spk2utt, spk2utt_f) write_data(utt2dur, utt2dur_f) # validate data directory validate_cmd = './utils/validate_data_dir.sh --no-feats {0}'.format(output_dir) os.system(validate_cmd)
MOVEMENT_REGEXP = r'(\w+)_movement_\d+_\d+.txt' mvtfiles = os.listdir(dirname) mvtfiles = filter(lambda f: re.match(MOVEMENT_REGEXP, f), mvtfiles) mvtfiles = sorted(mvtfiles) all_accel = [] all_gyro = [] all_labels = [] for fname in mvtfiles: with open(os.path.join(dirname, fname)) as f: accel, gyro, labels = utils.load_data(f) shimmerid = re.match(MOVEMENT_REGEXP, fname).group(1) print shimmerid acalib = utils.load_calibration_from_properties(os.path.join(calib_dirname, '1_5_%s.accel.properties' % shimmerid)) gcalib = utils.load_calibration_from_properties(os.path.join(calib_dirname, '1_5_%s.gyro.properties' % shimmerid)) accel = 9.81 * (accel - acalib['offset'][:,None]) / acalib['gain'][:,None] gyro = (gyro - gcalib['offset'][:,None]) / gcalib['gain'][:,None] all_accel.append(accel) all_gyro.append(gyro) all_labels.append(labels) ## all_accel = np.squeeze(np.array(all_accel)) all_gyro = np.squeeze(np.array(all_gyro)) all_labels = np.squeeze(np.array(all_labels)) ## outfname = os.path.join(dirname, 'out_calib.txt') with open(outfname, 'w') as f: utils.write_data(f, all_accel, all_gyro, all_labels) ##
os.system('touch %s' % data_file) it = 0 ## Coarse calibration original_use_auto_judge = use_auto_judge use_auto_judge = True if not use_mp: for i in range(len(path)): target = path[i] im_name, index, p_robot = move_shoot(target, mode) p_camera = circle_fitting(im_name, ("flag" + str(i) + "_" + index), need_judge=False) write_data(data_file, [p_robot, p_camera], index, type="point_pair") if use_mp: for i in range(len(path)): print("Working Process") print(working_worker_pipe) target = path[i] im_name, index, p_robot = move_shoot(target, mode) worker[i % worker_num].inQ.put( [im_name, ("flag" + str(i) + "_" + index), p_robot]) working_worker_pipe.append(i % worker_num) if len(working_worker_pipe) == worker_num: last_idx = working_worker_pipe[0] result = worker[last_idx].outQ.get()
model.add(Dense(num_classes, activation='softmax')) model.summary() model.compile(loss='categorical_crossentropy', optimizer=Adam(), metrics=['accuracy']) history = model.fit(X_train, y_train, batch_size=batch_size, epochs=epochs, verbose=1) yp = model.predict(X_dev) yp = np.argmax(yp, axis=1) y_dev = np.argmax(y_dev, axis=1) #performance on dev set print_classfication_report('Keras', y_dev, yp, stem='keras_dev') pr_curve(y_dev, yp, num_classes, 'keras_dev') # prediction is done for every sample and # prediction for whole block is done through consensus voting # blocks_consensus tell me about block prediction distrubution # making sure there is no bimodal distribution blocks_pred, blocks_consensus = score_model(model, X_test) #writing csv file write_data(blocks_pred, 'khan_speaker_labels_MLP.csv')
def __init__(self, input_path, output_dir='out'): ''' Loads and preprocesses the data, generates the mask. Args: input_path (str): path to the input file, overrides the config file output_dir (str): path to the output directory ''' # Input (path to the input image) self.input_path = input_path input_fname_parts = os.path.split(self.input_path)[-1].split('.') if len(input_fname_parts) == 1: print( 'Error: the input file has no extension, please use .npy or .fits' ) sys.exit() input_fname = ('.').join( input_fname_parts[:-1]) # file name w/o extension # output folder if not os.path.exists(output_dir): os.mkdir(output_dir) # output file names self.output_paths = {} out_files = ['deproj', 'smooth', 'mask', 'smooth3d'] for f in out_files: self.output_paths[f] = os.path.join(output_dir, f + '_' + input_fname + '.npy') self.output_paths['pds'] = os.path.join(output_dir, 'pds_' + input_fname + '.txt') #input # mask_path = config.get("input", "mask_path") # # #output # rm_out_path = config.get("output", "rm_out_path") # # if regime=="2d": # #gen2d_params # lx = config.getint("gen2d_params", "Lx") # ly = config.getint("gen2d_params", "Ly") # p1 = config.getfloat("gen2d_params", "p1") # p2 = config.getfloat("gen2d_params", "p2") # kb = config.getfloat("gen2d_params", "kb") # C = config.getfloat("gen2d_params","C") # apply_mask = config.getboolean("gen2d_params", "apply_mask") # bw = config.getfloat("gen2d_params", "beam_width") # # elif regime=="3d": # #gen3d_params # lx = config.getint("gen3d_params", "Lx") # ly = config.getint("gen3d_params", "Ly") # lz = config.getint("gen3d_params", "Lz") # p1 = config.getfloat("gen3d_params", "p1") # p2 = config.getfloat("gen3d_params", "p2") # kb = config.getfloat("gen3d_params", "kb") # C = config.getfloat("gen3d_params", "C") # apply_mask = config.getboolean("gen3d_params", "apply_mask") # inclin = config.getfloat("gen3d_params", "inclin") # alpha = config.getfloat("gen3d_params", "alpha") ftype = self.input_path.split('.')[-1].lower() # load data d = load_data(self.input_path, ftype=ftype, print_info=True) # crop it ir, jr = np.nonzero(np.invert(np.isnan(d))) imin, jmin, imax, jmax = ir.min(), jr.min(), ir.max(), jr.max() self.data = d[imin:imax + 1, jmin:jmax + 1] self.data_dim = self.data.shape # adjust the cluster center location after the cropping self.cluster_params = {} self.cluster_params['center'] = [-imin, -jmin] # make the mask given the cropped input data self.mask = np.array(np.invert(np.isnan(self.data)), dtype=float) # save the mask write_data(self.output_paths['mask'], self.mask, ftype='npy')
# This code generates fake weather data with Location, Latitude, Longitude, Local Time, # Weather condition, Temperature, Pressure and Humidity ########################## import utils # Returns the full weather data for all cities def generate_weather_data(cities): full_weather_data = [] for city in cities: latitude, longitude = utils.get_lat_and_lon() altitude = utils.get_altitude() local_time = utils.get_local_time() temperature = utils.get_temperature() pressure = utils.get_pressure() weather_condition = utils.get_condition(temperature) humidity = utils.get_humidity() entry = utils.create_weather_entry(city, latitude, longitude, altitude, local_time, weather_condition, temperature, pressure, humidity) full_weather_data.append(entry) return full_weather_data if __name__ == '__main__': city_names = utils.read_cities() final_weather_data = generate_weather_data(city_names) utils.write_data(final_weather_data)
def set_data(self): settings_default = return_data("./data/settings_default.json") if data_exists("./data/settings.json"): settings = return_data("./data/settings.json") else: logger.alt( "Set-Settings-Data", "No existing settings found to be parsed, creating new from default." ) write_data("./data/settings.json", settings_default) settings = return_data("./data/settings.json") if not validate_data(settings, settings_default): logger.error( "Set-Settings-Data", "Parsed settings data is malformed! " "This will most likely cause a fatal exception. " "Try removing existing settings.json") self.webhook_edit.setText(settings["webhook"]) if settings["webhookonbrowser"]: self.browser_checkbox.setChecked(True) if settings["webhookonorder"]: self.order_checkbox.setChecked(True) if settings["webhookonfailed"]: self.paymentfailed_checkbox.setChecked(True) if settings["browseronfailed"]: self.onfailed_checkbox.setChecked(True) if settings["runheadless"]: self.headless_checkbox.setChecked(True) if settings["bb_ac_beta"]: self.bb_ac_beta_checkbox.setChecked(True) if settings['onlybuyone']: self.buy_one_checkbox.setChecked(True) if settings['dont_buy']: self.dont_buy_checkbox.setChecked(True) if settings['random_delay_start']: self.random_delay_start.setText(settings["random_delay_start"]) if settings['random_delay_stop']: self.random_delay_stop.setText(settings["random_delay_stop"]) self.geckodriver_path = settings["geckodriver"] # try: # self.geckodriver.setText(settings["geckodriver"]) # except: # self.geckodriver.setText("") try: self.bestbuy_user_edit.setText(settings["bestbuy_user"]) except: self.bestbuy_user_edit.setText("") try: self.bestbuy_pass_edit.setText((Encryption().decrypt( settings["bestbuy_pass"].encode("utf-8"))).decode("utf-8")) except: self.bestbuy_pass_edit.setText("") try: self.target_user_edit.setText(settings["target_user"]) except: self.target_user_edit.setText("") try: self.target_pass_edit.setText((Encryption().decrypt( settings["target_pass"].encode("utf-8"))).decode("utf-8")) except: self.target_pass_edit.setText("") try: self.gamestop_user_edit.setText(settings["gamestop_user"]) except: self.gamestop_user_edit.setText("") try: self.gamestop_pass_edit.setText((Encryption().decrypt( settings["gamestop_pass"].encode("utf-8"))).decode("utf-8")) except: self.gamestop_pass_edit.setText("") self.update_settings(settings)