Beispiel #1
0
 def get(self):
     if self.get_current_user():
         self.redirect('/')
         return
     write_data(self, 'login.html', {
         'login_error':None,
         })
def save_predotar():
    genomes = ppr.get_genomes()
    data = []
    vals = [
        u"none",
        u"mitochondrial",
        u"plastid",
        u"er",
        u"elsewhere",
        u"possibly mitochondrial",
        u"possibly plastid",
        u"possibly er",
        u"possibly elsewhere",
    ]
    for g in genomes:
        pprs = ppr.load_records(g)
        if len(pprs) < 50:
            continue

        row = [0] * len(vals)

        total = float(len(pprs))

        for p in pprs:
            pred = p.annotations["predotar"]
            if pred not in vals:
                raise ValueError("didn't expect {}".format(pred))
            else:
                row[vals.index(pred)] += 1
        row = [float(r) / total for r in row]
        data.append([short_name(g)] + row)

    data.sort(key=lambda d: d[1])
    utils.write_data(["genome"] + vals, data, "output/ppr_predotar.dat")
Beispiel #3
0
 def post(self):
     if self.get_current_user():
         raise tornado.web.HTTPError(403)
     username = self.get_argument('form-username')
     password = self.get_argument('form-password')
     login_error = None
     #login_error = 'err_userpass'
     if login_error:
         write_data(self, 'login.html', {
             'login_error':login_error,
             })
     self.set_secure_cookie('user',username,settings.cookie_timeout)
     self.redirect(self.get_argument('next','/'))
def get_localization(genome):
    data = []
    for g in genome:
        if len(g.pprs) < 50:
            continue
        total = float(len(g.pprs))
        loc_c = 100.0 * len([1 for p in g.pprs if p.localization() == "C"]) / total
        loc_m = 100.0 * len([1 for p in g.pprs if p.localization() == "M"]) / total
        loc_s = 100.0 * len([1 for p in g.pprs if p.localization() == "S"]) / total
        loc_o = 100.0 * len([1 for p in g.pprs if p.localization() == "_"]) / total
        loc_u = 100.0 * len([1 for p in g.pprs if p.localization() == "*"]) / total
        data.append((short_name(g.name), loc_c, loc_m, loc_s, loc_o, loc_u))

    data.sort(key=lambda d: d[1])
    utils.write_data(("genome", "c", "m", "s", "other", "unknown"), data, "output/ppr_localization.dat")
Beispiel #5
0
def ppr_distance():
	genomes = PPR.load_genomes()

	genomes = [g for g in genomes if len(g) > 50]

	data = []

	for i,g in enumerate(genomes, 1):
		print "Genome {}/{}: {} ({})".format(i, len(genomes),
				g.name,short_name(g.name))
		data.append([short_name(g.name),] + get_average(g,genomes))


	utils.write_data(['',] + [reduce_name(g.name) for g in genomes],
			data,
			'output/Average_PPR_distance.dat')
def get_length_family(genomes):
    pprs = [p for g in genomes for p in g.pprs]
    type_P = [p for p in pprs if p.family() == "P"]
    type_E = [p for p in pprs if p.family() == "E"]
    type_Ep = [p for p in pprs if p.family() == "E+"]
    type_DYW = [p for p in pprs if p.family() == "DYW"]
    type_PLS = [p for p in pprs if p.family() == "PLS"]

    P_hist = length_hist(type_P)
    E_hist = length_hist(type_E)
    Ep_hist = length_hist(type_Ep)
    DYW_hist = length_hist(type_DYW)
    PLS_hist = length_hist(type_PLS)

    hist = fmt_hist([P_hist, E_hist, Ep_hist, DYW_hist, PLS_hist])

    utils.write_data(("length", "p", "e", "ep", "dyw", "pls"), hist, "output/ppr_family_lengths.dat")
Beispiel #7
0
def names(request,person,do_view=False):
	names = set(json.loads(open(settings.NAMES_DB,'r').read())['names'])
	oked, vetoed = utils.get_data(person)

	if request.method == "POST":
		name = request.POST['name']
		ok = 0
		if 'yes' in request.POST:
			oked.add(name)
			ok = 1
		if 'no' in request.POST:
			vetoed.add(name)
		if 'cancel' in request.POST:
			if name in oked:
				oked.remove(name)
			if name in vetoed:
				vetoed.remove(name)
		utils.write_data(person, oked, vetoed)
		response = HttpResponseRedirect('/names/%s/' % (person,))
		if 'yes' in request.POST or 'no' in request.POST:
			safe_name = urllib.quote(name.encode('UTF-8'))
			response.set_cookie('last_decision',value='%s|%s' % (ok,safe_name),max_age=60*60) 
		return response

	remaining = names - vetoed - oked

	last_decision = request.COOKIES.get('last_decision','|').split('|')
	last_decision[1] = urllib.unquote(last_decision[1])
	context = {
			'name': random.choice(list(remaining)) if len(remaining) else None
			, 'num_remaining': len(remaining)
			, 'num_ok': len(oked)
			, 'num_veto': len(vetoed)
			, 'last_decision': last_decision
			}
	if do_view:
		context.update({'do_view':True, 'oked': utils.sort_nameset(oked) , 'vetoed': utils.sort_nameset(vetoed)})

	template = 'names/%s.html' % ('namelist' if do_view else 'names',)
	response = HttpResponse(loader.get_template(template).render(RequestContext(request,context)))
	response.delete_cookie('last_decision')
	return response
Beispiel #8
0
def write_model_parameters(hps, output_fname=None, datasets=None):
  """Save all the model parameters

  Save all the parameters to hps.lfads_save_dir.

  Args:
    hps: The dictionary of hyperparameters.
    output_fname: The prefix of the file in which to save the generated
      samples.
    datasets: A dictionary of data dictionaries.  The dataset dict is simply a
      name(string)-> data dictionary mapping (See top of lfads.py).
  """
  if not output_fname:
    output_fname = "model_params"
  else:
    output_fname = output_fname + "_model_params"
  fname = os.path.join(hps.lfads_save_dir, output_fname)
  print("Writing model parameters to: ", fname)
  # save the optimizer params as well
  model = build_model(hps, kind="write_model_params", datasets=datasets) 
  model_params = model.eval_model_parameters(use_nested=False,
                                             include_strs="LFADS")
  utils.write_data(fname, model_params, compression=None)
  print("Done.")
def main():
    genomes = ppr.get_genomes()
    hist = []
    numbers = []
    locale = []
    family = []
    for g in genomes:
        print g
        pprs = list(ppr.load_pprs(g))
        numbers.append((short_name(g),) + get_numbers(pprs))
        hist.append(length_hist(pprs))
        locale.append((short_name(g),) + get_localization(pprs))
        family.append((short_name(g),) + get_family(pprs))

    numbers.sort(key=lambda n: n[1])
    family.sort(key=lambda f: sum(f[1:]))
    hist = fmt_hist(hist)

    utils.write_data(("genome",) + get_numbers_hdr(), numbers, "output/ppr_numbers.dat")
    utils.write_data(["length"] + [short_name(g) for g in genomes], hist, "output/ppr_lengths.dat")
    utils.write_data(("genome",) + get_localization_hdr(), locale, "output/ppr_localization.dat")
    utils.write_data(("genome",) + get_family_hdr(), family, "output/ppr_families.dat")
def exp_gbrbm(exp_name, T=[]):
    dataset = dg.one_exception_dataset(
        N=6,
        n=1500,
        T=T,
        lam=5000,
        exc=1,
        noise_k=1
    )
    utils.write_data(dataset, exp_name, "generated_data")
    new_data = utils.tsne(dataset, exp_name, "generated_data", T, 2)
    utils.write_data(new_data, exp_name, "generated_data_for_tsne")

    _, recovery_sample, decode_res = decoder.gbrbm_decoder(
        dataset,
        learning_rate=0.1,
        training_epochs=50,
        batch_size=1001,
        n_hidden=2000,
        plot_every=1
    )
    utils.write_data(decode_res, exp_name, "decoded_data")
    new_data = utils.tsne(decode_res, exp_name, "decoded_data", T, 2)
    utils.write_data(new_data, exp_name, "decoded_tsne_data_for_tsne")
                               auc_transfer_metric, mixmode='logsigp')

    alphas = np.asarray([frame.sig_alpha for frame in alpha_frames])
    betas = np.asarray([frame.time for frame in alpha_frames])
    gammas = np.asarray([frame.alpha for frame in alpha_frames])

    alpha_list.append(alphas)
    beta_list.append(betas)
    gamma_list = filtered_append(gammas, gamma_list)
    print(i, '/', iterations)

mean_alpha = np.mean(alpha_list, axis=0)
mean_beta = np.mean(beta_list, axis=0)
mean_gamma = np.mean(gamma_list, axis=0)

write_data("results", "baseline.txt", mean_gamma, mean_alpha, mean_beta)

for i, (a, b, c) in enumerate(zip(mean_gamma, mean_alpha, mean_beta)):
    print(i + 1, a, b, c)

fig = plt.figure(figsize=(9, 5))
ax = plt.subplot(1, 1, 1)

ax.tick_params(axis='both', which='major', labelsize=13)
ax.axhline(1, c='lightgray', ls='--')
ax.axhline(0, c='lightgray', ls='--')
ax.plot(alphas, lw=2, color='k', label='N = {0}'.format(10))

ax.set_xlim([0, opt.ALPHA_NUM_ITER - 1])
ax.set_xlabel('Number of episodes', fontsize=14)
ax.set_ylabel(r'$\sigma(\gamma)$', fontsize=14)
Beispiel #12
0
def build_model(hps, kind="train", datasets=None):
  """Builds a model from either random initialization, or saved parameters.

  Args:
    hps: The hyper parameters for the model.
    kind: (optional) The kind of model to build.  Training vs inference require
      different graphs.
    datasets: The datasets structure (see top of lfads.py).

  Returns:
    an LFADS model.
  """

  build_kind = kind
  if build_kind == "write_model_params":
    build_kind = "train"
  with tf.variable_scope("LFADS", reuse=None):
    model = LFADS(hps, kind=build_kind, datasets=datasets)

  if not os.path.exists(hps.lfads_save_dir):
    print("Save directory %s does not exist, creating it." % hps.lfads_save_dir)
    os.makedirs(hps.lfads_save_dir)

  cp_pb_ln = hps.checkpoint_pb_load_name
  cp_pb_ln = 'checkpoint' if cp_pb_ln == "" else cp_pb_ln
  if cp_pb_ln == 'checkpoint':
    print("Loading latest training checkpoint in: ", hps.lfads_save_dir)
    saver = model.seso_saver
  elif cp_pb_ln == 'checkpoint_lve':
    print("Loading lowest validation checkpoint in: ", hps.lfads_save_dir)
    saver = model.lve_saver
  else:
    print("Loading checkpoint: ", cp_pb_ln, ", in: ", hps.lfads_save_dir)
    saver = model.seso_saver

  ckpt = tf.train.get_checkpoint_state(hps.lfads_save_dir,
                                       latest_filename=cp_pb_ln)

  session = tf.get_default_session()
  print("ckpt: ", ckpt)
  if ckpt and tf.train.checkpoint_exists(ckpt.model_checkpoint_path):
    print("Reading model parameters from %s" % ckpt.model_checkpoint_path)
    saver.restore(session, ckpt.model_checkpoint_path)
  else:
    print("Created model with fresh parameters.")
    if kind in ["posterior_sample_and_average", "prior_sample",
                "write_model_params"]:
      print("Possible error!!! You are running ", kind, " on a newly \
      initialized model!")
      print("Are you sure you sure ", ckpt.model_checkpoint_path, " exists?")

    tf.global_variables_initializer().run()

  if ckpt:
    train_step_str = re.search('-[0-9]+$', ckpt.model_checkpoint_path).group()
  else:
    train_step_str = '-0'

  fname = 'hyperparameters' + train_step_str + '.txt'
  hp_fname = os.path.join(hps.lfads_save_dir, fname)
  hps_for_saving = jsonify_dict(hps)
  utils.write_data(hp_fname, hps_for_saving, use_json=True)

  return model
Beispiel #13
0
def build_model(hps, kind="train", datasets=None):
  """Builds a model from either random initialization, or saved parameters.

  Args:
    hps: The hyper parameters for the model.
    kind: (optional) The kind of model to build.  Training vs inference require
      different graphs.
    datasets: The datasets structure (see top of lfads.py).

  Returns:
    an LFADS model.
  """

  build_kind = kind
  if build_kind == "write_model_params":
    build_kind = "train"
  with tf.variable_scope("LFADS", reuse=None):
    model = LFADS(hps, kind=build_kind, datasets=datasets)

  if not os.path.exists(hps.lfads_save_dir):
    print("Save directory %s does not exist, creating it." % hps.lfads_save_dir)
    os.makedirs(hps.lfads_save_dir)

  cp_pb_ln = hps.checkpoint_pb_load_name
  cp_pb_ln = 'checkpoint' if cp_pb_ln == "" else cp_pb_ln
  if cp_pb_ln == 'checkpoint':
    print("Loading latest training checkpoint in: ", hps.lfads_save_dir)
    saver = model.seso_saver
  elif cp_pb_ln == 'checkpoint_lve':
    print("Loading lowest validation checkpoint in: ", hps.lfads_save_dir)
    saver = model.lve_saver
  else:
    print("Loading checkpoint: ", cp_pb_ln, ", in: ", hps.lfads_save_dir)
    saver = model.seso_saver

  ckpt = tf.train.get_checkpoint_state(hps.lfads_save_dir,
                                       latest_filename=cp_pb_ln)

  session = tf.get_default_session()
  print("ckpt: ", ckpt)
  if ckpt and tf.train.checkpoint_exists(ckpt.model_checkpoint_path):
    print("Reading model parameters from %s" % ckpt.model_checkpoint_path)
    saver.restore(session, ckpt.model_checkpoint_path)
  else:
    print("Created model with fresh parameters.")
    if kind in ["posterior_sample_and_average", "posterior_push_mean",
                "prior_sample", "write_model_params"]:
      print("Possible error!!! You are running ", kind, " on a newly \
      initialized model!")
      # cannot print ckpt.model_check_point path if no ckpt
      print("Are you sure you sure a checkpoint in ", hps.lfads_save_dir,
            " exists?")

    tf.global_variables_initializer().run()

  if ckpt:
    train_step_str = re.search('-[0-9]+$', ckpt.model_checkpoint_path).group()
  else:
    train_step_str = '-0'

  fname = 'hyperparameters' + train_step_str + '.txt'
  hp_fname = os.path.join(hps.lfads_save_dir, fname)
  hps_for_saving = jsonify_dict(hps)
  utils.write_data(hp_fname, hps_for_saving, use_json=True)

  return model
Beispiel #14
0
 def get(self):
     write_data(self, 'monitor/monitor.html',{
         })
Beispiel #15
0
 def get(self):
     write_data(self, 'user/admin.html',{
         })
Beispiel #16
0
    rf = RandomForestClassifier(max_features='auto',
                                oob_score=False,
                                class_weight='balanced',
                                random_state=config.set_seed,
                                n_jobs=-1,
                                verbose=config.verbose)
    clf = GridSearchCV(estimator=rf,
                       param_grid=params,
                       scoring='accuracy',
                       cv=cv,
                       n_jobs=-1)
    clf.fit(X, y)

    print('#### Best Params')
    print(clf.best_params_)
    print('#### Best Score')
    print(clf.best_score_)
    return clf.grid_scores_


if __name__ == '__main__':
    # Train Data
    train_x = read_data(config.d_xtrain)
    train_y = read_data(config.d_ytrain)

    # Test Data
    test_x = read_data(config.d_xtest)
    report = grid_model(param_grid, train_x, train_y, cv=5)

    write_data(config.grid_report_rf, report)
Beispiel #17
0
def generate_lorenz_data(N_trials,
                         N_inits,
                         N_cells,
                         N_steps,
                         N_stepsinbin=1,
                         dt_lorenz=None,
                         dt_spike=None,
                         dt_cal=None,
                         base_firing_rate=5.0,
                         tau_c=0.4,
                         inc_c=1.0,
                         sigma=0.2,
                         trainp=0.8,
                         seed=100,
                         save=True,
                         save_dir='./'):

    print('Generating Lorenz data', flush=True)
    N_lorenz = 3
    assert N_steps % N_stepsinbin == 0, 'Can\'t bin time steps'
    N_steps_bin = int(N_steps / N_stepsinbin)
    if dt_lorenz is None:
        dt_lorenz = np.clip(2.0 / N_steps, 0.005, 0.02)

    if dt_spike is None:
        dt_spike = dt_lorenz

    if dt_cal is None:
        dt_cal = dt_spike * N_stepsinbin

    N_train = int(N_trials * trainp)
    N_steps_burn = max(N_steps, 300)

    y = np.zeros((N_inits, N_steps + N_steps_burn, N_lorenz))

    w_lorenz = ([10.0, 28.0, 8.0 / 3.0])
    y[:, 0] = np.random.randn(N_inits, N_lorenz)
    for step in range(1, N_steps + N_steps_burn):
        dy = lorenz_grad(y[:, step - 1], w_lorenz)
        y[:, step] = eulerStep(y[:, step - 1], dy, dt_lorenz)

    print('Converting to rates and spikes', flush=True)

    y = y[:, N_steps_burn:]
    y = normalize(y)

    W = (np.random.rand(N_lorenz, N_cells) + 1) * np.sign(
        np.random.randn(N_lorenz, N_cells))
    b = np.log(base_firing_rate)

    rates = np.exp(y.dot(W) + b)
    spikes = np.array(
        [np.random.poisson(rates * dt_spike) for trial in range(N_trials)])

    if N_stepsinbin > 1:
        from scipy.stats import binned_statistic
        binned_latent = np.zeros((N_trials, N_inits, N_steps_bin, N_lorenz))
        binned_rates = np.zeros((N_trials, N_inits, N_steps_bin, N_cells))
        binned_spikes = np.zeros((N_trials, N_inits, N_steps_bin, N_cells))
        for ix in range(N_trials):
            for jx in range(N_inits):
                binned_spikes[ix, jx] = binned_statistic(x=np.arange(N_steps),
                                                         values=spikes[ix,
                                                                       jx].T,
                                                         statistic='sum',
                                                         bins=N_steps_bin)[0].T
                binned_rates[ix, jx] = binned_statistic(x=np.arange(N_steps),
                                                        values=rates[jx].T,
                                                        statistic='mean',
                                                        bins=N_steps_bin)[0].T
                binned_latent[ix, jx] = binned_statistic(x=np.arange(N_steps),
                                                         values=y[jx].T,
                                                         statistic='mean',
                                                         bins=N_steps_bin)[0].T
        rates = binned_rates
        del binned_rates
        spikes = binned_spikes.astype(int)
        del binned_spikes
        latent = binned_latent
        del binned_latent
    else:
        latent = np.array([y for trial in range(N_trials)])
        rates = np.array([rates for trial in range(N_trials)])

    calcium = np.zeros_like(spikes, dtype=float)
    fluor = np.zeros_like(spikes, dtype=float)

    ct = spikes[:, :, 0, :] * inc_c
    calcium[:, :, 0, :] = ct
    fluor[:, :,
          0, :] = ct + np.random.randn(N_trials, N_inits, N_cells) * sigma

    print('Converting to fluorescence', flush=True)
    for step in range(1, N_steps_bin):
        ct = eulerStep(ct, calcium_grad(ct, tau_c), dt_spike)
        ct = ct + inc_c * spikes[:, :, step, :]
        calcium[:, :, step, :] = ct
        fluor[:, :, step, :] = ct + np.random.randn(N_trials, N_inits,
                                                    N_cells) * sigma

    print('Train and test split')
    data_dict = {}
    for data, name in zip([latent, rates, spikes, calcium, fluor],
                          ['latent', 'rates', 'spikes', 'calcium', 'fluor']):
        data_dict['train_%s' % name] = np.reshape(
            data[:N_train], (N_train * N_inits, N_steps_bin, data.shape[-1]))
        data_dict['valid_%s' % name] = np.reshape(
            data[N_train:],
            ((N_trials - N_train) * N_inits, N_steps_bin, data.shape[-1]))

    if importlib.find_loader('oasis'):
        data_dict['train_oasis'] = deconvolve_calcium(data_dict['train_fluor'],
                                                      g=np.exp(-dt_cal /
                                                               tau_c))
        data_dict['valid_oasis'] = deconvolve_calcium(data_dict['valid_fluor'],
                                                      g=np.exp(-dt_cal /
                                                               tau_c))

    data_dict['train_data'] = data_dict['train_spikes']
    data_dict['valid_data'] = data_dict['valid_spikes']
    data_dict['train_truth'] = data_dict['train_rates']
    data_dict['valid_truth'] = data_dict['valid_rates']
    data_dict['dt'] = dt_cal

    data_dict['loading_weights'] = W

    data_dict['conversion_factor'] = 1. / (np.max(rates) * dt_cal)

    print('Saving to %s/synth_data/lorenz_%03d' % (save_dir, seed), flush=True)
    if save:
        utils.write_data('%s/synth_data/lorenz_%03d' % (save_dir, seed),
                         data_dict)

    return data_dict
Beispiel #18
0
 def save_profile(self):
     profile_name = self.profilename_edit.text()
     profile_data = {
         "profile_name":
         profile_name,
         "shipping_fname":
         self.shipping_fname_edit.text(),
         "shipping_lname":
         self.shipping_lname_edit.text(),
         "shipping_email":
         self.shipping_email_edit.text(),
         "shipping_phone":
         self.shipping_phone_edit.text(),
         "shipping_a1":
         self.shipping_address1_edit.text(),
         "shipping_a2":
         self.shipping_address2_edit.text(),
         "shipping_city":
         self.shipping_city_edit.text(),
         "shipping_zipcode":
         self.shipping_zipcode_edit.text(),
         "shipping_state":
         self.shipping_state_box.currentText(),
         "shipping_country":
         self.shipping_country_box.currentText(),
         "billing_fname":
         self.billing_fname_edit.text(),
         "billing_lname":
         self.billing_lname_edit.text(),
         "billing_email":
         self.billing_email_edit.text(),
         "billing_phone":
         self.billing_phone_edit.text(),
         "billing_a1":
         self.billing_address1_edit.text(),
         "billing_a2":
         self.billing_address2_edit.text(),
         "billing_city":
         self.billing_city_edit.text(),
         "billing_zipcode":
         self.billing_zipcode_edit.text(),
         "billing_state":
         self.billing_state_box.currentText(),
         "billing_country":
         self.billing_country_box.currentText(),
         "card_number": (Encryption().encrypt(
             self.cardnumber_edit.text())).decode("utf-8"),
         "card_month":
         self.cardmonth_box.currentText(),
         "card_year":
         self.cardyear_box.currentText(),
         "card_type":
         self.cardtype_box.currentText(),
         "card_cvv":
         self.cardcvv_edit.text()
     }
     profiles = return_data("./data/profiles.json")
     for p in profiles:
         if p["profile_name"] == profile_name:
             profiles.remove(p)
             break
     profiles.append(profile_data)
     write_data("./data/profiles.json", profiles)
     if self.loadprofile_box.findText(profile_name) == -1:
         self.loadprofile_box.addItem(profile_name)
         self.parent().parent().createdialog.profile_box.addItem(
             profile_name)
     QtWidgets.QMessageBox.information(self, "Bird Bot", "Saved Profile")
Beispiel #19
0
def main():
    args = parser.parse_args()
    data_name = os.path.basename(args.data_path).split('.')[0]
    dir_name = os.path.dirname(args.data_path)

    data_dict = read_data(args.data_path)
    dt = data_dict['dt']
    g = np.exp(-dt / args.tau)

    train_size, steps_size, state_size = data_dict['train_fluor'].shape
    valid_size, steps_size, state_size = data_dict['valid_fluor'].shape
    data_size = train_size + valid_size
    data = np.zeros((data_size, steps_size, state_size))

    if args.undo_train_test_split:
        train_idx = data_dict['train_idx']
        valid_idx = data_dict['valid_idx']
        data[train_idx] = data_dict['train_fluor']
        data[valid_idx] = data_dict['valid_fluor']

    else:
        data[:train_size] = data_dict['train_fluor']
        data[train_size:] = data_dict['valid_fluor']

    if args.flatten:
        data = data.reshape(data_size * steps_size, state_size).transpose()
    else:
        data = data.transpose(0, 2, 1)
        data = data.reshape(data_size * state_size, steps_size)
        data = np.hstack((np.zeros((data_size * state_size, 1)), data))

    if args.known:
        S, C = deconvolve_calcium_known(data, g=g, s_min=args.scale / 2)
    else:
        if args.normalize:
            data = max_normalize(data.T, axis=0).T
        S, C, bias, G, gain, rval = deconvolve_calcium_unknown(
            data, g=g, snr_thresh=args.scale)
        tau = -dt / (np.log(G))

    if args.flatten:
        data = data.reshape(data_size, steps_size, state_size)
        S = S.reshape(data_size, steps_size, state_size)
        C = C.reshape(data_size, steps_size, state_size)

    else:
        data = data.reshape(data_size, state_size,
                            steps_size + 1).transpose(0, 2, 1)[:, 1:]
        S = S.reshape(data_size, state_size,
                      steps_size + 1).transpose(0, 2, 1)[:, 1:]
        C = C.reshape(data_size, state_size,
                      steps_size + 1).transpose(0, 2, 1)[:, 1:]

        if not args.known:
            bias = bias.reshape(data_size, state_size).mean(axis=0)
            tau = tau.reshape(data_size, state_size).mean(axis=0)
            gain = gain.reshape(data_size, state_size).mean(axis=0)

    if args.undo_train_test_split:
        train_fluor = data[train_idx]
        valid_fluor = data[valid_idx]
        train_ospikes = S[train_idx]
        valid_ospikes = S[valid_idx]
        train_ocalcium = C[train_idx]
        valid_ocalcium = C[valid_idx]

    else:
        train_fluor = data[:train_size]
        valid_fluor = data[train_size:]
        train_ospikes = S[:train_size]
        valid_ospikes = S[train_size:]
        train_ocalcium = C[:train_size]
        valid_ocalcium = C[train_size:]

    data_dict['train_fluor'] = train_fluor
    data_dict['valid_fluor'] = valid_fluor

    data_dict['train_ospikes'] = train_ospikes
    data_dict['valid_ospikes'] = valid_ospikes

    data_dict['train_ocalcium'] = train_ocalcium
    data_dict['valid_ocalcium'] = valid_ocalcium

    if not args.known:
        data_dict['obs_gain_init'] = gain
        data_dict['obs_bias_init'] = bias
        data_dict['obs_tau_init'] = tau
        data_dict['obs_var_init'] = (gain / args.scale)**2

    arg_string = '_o%s' % ('k' if args.known else 'u')
    arg_string += '_t%s' % (str(args.tau))
    arg_string += '_s%s' % (str(args.scale))
    arg_string += '_f' if args.flatten else ''
    arg_string += '_z' if args.undo_train_test_split else ''
    arg_string += '_n' if args.normalize else ''

    write_data(os.path.join(dir_name, data_name) + arg_string, data_dict)
Beispiel #20
0
def run_filter():
    data = utils.read_json(Const.origin_train_filename)
    saved_data = utils.filter_out(data)
    f = open(Const.origin_train_filtered_filename, 'w')
    utils.write_data(f, saved_data)
    print 'filter finish, saved in %s' % Const.origin_train_filtered_filename
Beispiel #21
0
sys.path.append("../")
from param_config import config
from sklearn.model_selection import train_test_split
from imblearn.over_sampling import ADASYN, SMOTE, RandomOverSampler
from utils import read_data
from utils import write_data
import numpy as np
np.random.seed(config.set_seed)


def join_split(X, y):
    x_train, x_val, y_train, y_val = train_test_split(
        X, y, stratify=y, test_size=config.train_val_split_pct)
    return x_train, y_train, x_val, y_val


# Try imbalance learn
if __name__ == '__main__':
    train_x = read_data(config.e_xtrain)
    train_y = read_data(config.e_ytrain)

    # split data into train and validation
    # 20 percent stratified
    x_train, y_train, x_val, y_val = join_split(train_x, train_y)

    print('#### Writing Pickle 09: Split ####')
    write_data(config.f_xtrain, x_train)
    write_data(config.f_ytrain, y_train)
    write_data(config.f_xval, x_val)
    write_data(config.f_yval, y_val)
    c = Counter(lab60)
    # Return most common label. Make sure that ratio between second most common
    # and most common is < 0.8
    counts = c.most_common(2)
    if counts[1][1] / float(counts[0][1]) > 0.8:
        print 'Ambiguous label for sampleid = %d : counts : %s' % (sampleid,
                counts)
    #print counts
    return counts[0][0]
##
newlabels = np.array([knn_DM(DM, sampleid) for sampleid in xrange(len(labels))])

modified = np.flatnonzero(newlabels - labels)
## Write a new file
with open('out.txt', 'w') as f:
    utils.write_data(f, accel, gyro, labels)

##
sample1 = 18
sample2 = 151

dist, cost, path = mlpy.dtw_std(accel[sample1,0,:], accel[sample2,0,:],
                                dist_only=False)
pl.figure()
pl.suptitle('dist = %f' % dist)
pl.subplot(211); pl.title('%d'%sample1); pl.plot(accel[sample1,0,:]); pl.ylim(0, 5000); pl.subplot(212); pl.title('%d'%sample2); pl.plot(accel[sample2,0,:]); pl.ylim(0,5000);

pl.figure()
pl.title('%d - %d' % (sample1, sample2))
pl.imshow(cost.T, origin='lower', cmap=cm.gray, interpolation='nearest')
pl.plot(path[0], path[1], 'w')
Beispiel #23
0
from utils import read_data
from utils import write_data
import numpy as np
from sklearn.pipeline import make_pipeline
from sklearn.preprocessing import StandardScaler

np.random.seed(config.set_seed)

num_cols = ['amount_tsh', 'gps_height', 'population', 'total_missing']


# Scale and Fit
# standard mean and unit variance scale
def pca(x_train, x_test):
    std_clf = make_pipeline(StandardScaler(),
                            PCA(n_components=config.pca_components))
    std_clf.fit(x_train)
    train_pca = std_clf.transform(x_train)
    test_pca = std_clf.transform(x_test)
    return train_pca, test_pca


if __name__ == '__main__':
    train_x = read_data(config.d_xtrain)
    test_x = read_data(config.d_xtest)
    train_pca, test_pca = pca(train_x[num_cols], test_x[num_cols])

    print('#### Writing Pickle 1: PCA ####')
    write_data(config.pca_xtrain, train_pca)
    write_data(config.pca_xtest, test_pca)
Beispiel #24
0
    def deproject(self, config_path='params.cfg'):
        '''
        Deproject a rotation measure map by dividing it by a smooth model
        of the cluster with the parameters taken from the configuration file.

        Args:
            config_path (str): path to the configuration file
        '''

        # Observed cluster parameters parameters (use only when need to deproject 3D PDS)

        config = configparser.ConfigParser()
        try:
            config.read(config_path)
        except IOError:
            print('Error: No config file found at ' + config_path)
            return

        # beta model parameters
        self.cluster_params['ne0'] = config.getfloat("cluster_params", "ne0")
        self.cluster_params['rc'] = config.getfloat("cluster_params", "rc")
        self.cluster_params['beta'] = config.getfloat("cluster_params", "beta")
        # size of pixel in kiloparsecs
        self.cluster_params['kpc_px'] = config.getfloat(
            "cluster_params", "kpc_px")
        # Galactic background to subtract from the observed RM map
        self.cluster_params['gal_bg'] = config.getfloat(
            "cluster_params", "gal_bg")
        # location of the cluster center on the sky in pixels
        self.cluster_params['center'][0] += config.getint(
            "cluster_params", "ix0")
        self.cluster_params['center'][1] += config.getint(
            "cluster_params", "iy0")

        #recovery
        self.recovery_params = {}
        self.recovery_params['alpha'] = config.getfloat(
            "recovery_params", "alpha")
        self.recovery_params['inclin'] = config.getfloat(
            "recovery_params", "inclin")
        # depth of the 3D box used to generate the smooth model
        self.recovery_params['lz'] = config.getint("recovery_params", "Lz")

        # subtract the background
        self.data -= self.cluster_params['gal_bg']

        kpc_px = self.cluster_params['kpc_px']
        lx, ly = self.data.shape

        print('generate a 3D smooth model of the cluster...')
        smod = gen3d_smooth_model(shape=(lx, ly),
                                  cluster_params=self.cluster_params,
                                  recovery_params=self.recovery_params)
        write_data(self.output_paths['smooth3d'],
                   smod[:, ly // 2, :],
                   ftype='npy')

        print('deproject the image...')

        # integrate the smooth model along the line of sight
        I0 = np.sqrt((smod**2).sum(axis=2)) * kpc_px * 812.
        write_data(self.output_paths['smooth'], I0, ftype='npy')

        # divide the image by the smooth model
        ind = (I0 != 0.) * np.invert(np.isnan(self.data))
        self.data[ind] /= I0[ind]
        write_data(self.output_paths['deproj'], self.data, ftype='npy')

        print('deprojection done\n')
Beispiel #25
0
def generate_chaotic_rnn_data(Ninits=400,
                              Ntrial=10,
                              Ncells=50,
                              Nsteps=200,
                              trainp=0.8,
                              dt_rnn=0.1,
                              dt_spike=0.1,
                              tau=0.25,
                              gamma=1.5,
                              maxRate=5,
                              B=20,
                              tau_c=0.4,
                              inc_c=1.0,
                              sigma=0.2,
                              seed=5,
                              save=False,
                              save_dir='./'):
    '''
    Generate synthetic calcium fluorescence data from chaotic recurrent neural network system
    
    Arguments:
        - T (int or float): total time in seconds to run 
        - dt_rnn (float): time step of chaotic RNN
        - dt_spike (float): time step of calcium trace
        - Ninits (int): Number of network initialisations
        - Ntrial (int): Number of instances with same network initialisations
        - Ncells (int): Number of cells in network
        - trainp (float): proportion of dataset to partition into training set
        - tau (float): time constant of chaotic RNN
        - gamma (float): 
        - maxRate (float): maximum firing rate of chaotic RNN
        - B (int, or float): amplitude of perturbation to network
        - tau_c (float): time constant of calcium decay
        - inc_c (float): increment size of calcium influx
        - sigma (float): standard deviation of fluorescence noise
        - save (bool): save output
    '''

    np.random.seed(seed)

    Nsteps = int(T / dt_rnn)
    Ntrial_train = int(trainp * Ntrial)

    # Chaotic RNN weight matrix
    W = gamma * np.random.randn(Ncells, Ncells) / np.sqrt(Ncells)

    rates, spikes, calcium, fluor = np.zeros(
        (4, Ninits, Ntrial, Nsteps, Ncells))

    perturb_steps = []

    for init in range(Ninits):
        y0 = np.random.randn(Ncells)

        for trial in range(Ntrial):
            perturb_step = np.random.randint(0.25 * Nsteps, 0.75 * Nsteps)
            perturb_steps.append(perturb_step)
            perturb_amp = np.random.randn(Ncells) * B
            b = 0

            yt = y0
            rt = rateScale(np.tanh(yt), maxRate=maxRate)
            st = spikify_rates(rt, dt=dt_spike)
            ct = inc_c * st

            rates[init, trial, 0, :] = rt
            spikes[init, trial, 0, :] = st
            calcium[init, trial, 0, :] = ct
            fluor[init, trial, 0, :] = ct + np.random.randn(Ncells) * sigma

            for step in range(1, Nsteps):
                yt = eulerStep(yt, RNNgrad(yt + b, W, tau), dt_rnn)
                ct = eulerStep(ct, calciumgrad(ct, tau_c), dt_spike)

                if step == perturb_step:
                    b = perturb_amp * dt_rnn / tau
                else:
                    b = 0

                rt = rateScale(np.tanh(yt), maxRate=maxRate)
                st = spikify_rates(rt, dt=dt_spike)
                ct = ct + inc_c * st

                rates[init, trial, step, :] = rt
                spikes[init, trial, step, :] = st
                calcium[init, trial, step, :] = ct
                fluor[init, trial,
                      step, :] = ct + np.random.randn(Ncells) * sigma

    # Construct data dictionary

    for data, name in zip([rates, spikes, calcium, fluor],
                          ['rates', 'spikes', 'calcium', 'fluor']):
        data_dict['train_%s' % name] = np.reshape(
            data[:N_train], (N_train * N_inits, N_steps_bin, data.shape[-1]))
        data_dict['valid_%s' % name] = np.reshape(
            data[N_train:],
            ((N_trials - N_train) * N_inits, N_steps_bin, data.shape[-1]))

    if importlib.find_loader('oasis'):
        data_dict['train_oasis'] = deconvolve_calcium(data_dict['train_fluor'],
                                                      g=np.exp(-dt_spike /
                                                               tau_c))
        data_dict['valid_oasis'] = deconvolve_calcium(data_dict['valid_fluor'],
                                                      g=np.exp(-dt_spike /
                                                               tau_c))

    data_dict['train_data'] = data_dict['train_spikes']
    data_dict['valid_data'] = data_dict['valid_spikes']
    data_dict['train_truth'] = data_dict['train_rates']
    data_dict['valid_truth'] = data_dict['valid_rates']
    data_dict['dt'] = dt_spike
    data_dict['perturb_times'] = np.array(perturb_steps) * dt_spike

    if save:
        utils.write_data('%s/synth_data/chaotic_rnn_%03d' % (save_dir, seed),
                         data_dict)

    return data_dict
Beispiel #26
0
 def get(self):
     write_data(self, 'left.html', {})
Beispiel #27
0
 def get(self):
     write_data(self, 'manage/manage.html',{
         })
Beispiel #28
0
def most_frequent(x_train):
    na_cols = {}
    for c in x_train.columns:
        if x_train[c].isnull().sum() > 0:
            na_cols[c] = test_x[c].value_counts().index[0]
    return na_cols


def target_encoding(y_train):
    y_train = [target_mapping[i] for i in y_train]
    return y_train


if __name__ == '__main__':
    train_x = read_data(config.c_xtrain)
    test_x = read_data(config.c_xtest)

    train_y = read_data(config.b_ytrain)
    train_y = target_encoding(train_y)

    impute_data = most_frequent(train_x)
    train_x, test_x = label_encoding(train_x, test_x, impute_data)

    assert len(train_x.columns) == len(test_x.columns)

    print('#### Writing Pickle 04: Imputation ####')
    write_data(config.d_xtrain, train_x)
    write_data(config.d_xtest, test_x)
    write_data(config.d_ytrain, train_y)
Beispiel #29
0
 def get(self):
     write_data(self, 'index.html', {
         })
Beispiel #30
0
def get_bs2_combined_features(df_policy, df_claim):
    '''
    In:
        DataFrame(df_policy),
        DataFrame(df_claim),

    Out:
        DataFrame(X_fs),
        DataFrame(y_fs),

    Description:
        create train dataset with additional columns
    '''
    print('Getting labels')
    y_train_all = read_data('training-set.csv', path='raw')
    y_test = read_data('testing-set.csv', path='raw')

    print('Getting neural network processed premiums')
    X_fs = read_data('premium_60_1.csv')

    # insured
    print('Getting column cat_ins')
    X_fs = X_fs.assign(
        cat_ins=get_bs2_cat(df_policy, X_fs.index, "Insured's_ID"))

    print('Getting column cat_assured')
    X_fs = X_fs.assign(
        cat_assured=get_bs2_cat(df_policy, X_fs.index, 'fassured'))

    print('Getting column real_age')
    X_fs = X_fs.assign(real_age=get_bs2_real_age(df_policy, X_fs.index))

    print('Getting column cat_sex')
    X_fs = X_fs.assign(cat_sex=get_bs2_cat(df_policy, X_fs.index, 'fsex'))

    print('Getting column cat_marriage')
    X_fs = X_fs.assign(
        cat_marriage=get_bs2_cat(df_policy, X_fs.index, 'fmarriage'))

    # policy
    print('Getting column real_cancel')
    X_fs = X_fs.assign(real_cancel=get_bs2_real_cancel(df_policy, X_fs.index))

    print('Getting column cat_area')
    X_fs = X_fs.assign(
        cat_area=get_bs2_cat(df_policy, X_fs.index, 'iply_area'))

    print('Getting column cat_ic_combo')
    X_fs = X_fs.assign(
        cat_ic_combo=get_bs2_cat_ic_combo(df_policy, X_fs.index))

    print('Getting column cat_ic_grp_combo')
    X_fs = X_fs.assign(
        cat_ic_grp_combo=get_bs2_cat_ic_grp_combo(df_policy, X_fs.index))

    print('Getting column cat_distr')
    X_fs = X_fs.assign(
        cat_distr=get_bs2_cat(df_policy, X_fs.index, 'Distribution_Channel'))

    print('Getting column real_acc_dmg')
    X_fs = X_fs.assign(
        real_acc_dmg=get_bs2_cat(df_policy, X_fs.index, 'pdmg_acc'))

    print('Getting column real_acc_lia')
    X_fs = X_fs.assign(
        real_acc_lia=get_bs2_cat(df_policy, X_fs.index, 'plia_acc'))

    print('Getting column real_dage')
    X_fs = X_fs.assign(real_dage=get_bs2_real_dage(df_policy, X_fs.index))

    print('Getting column real_prem_terminate')
    X_fs = X_fs.assign(
        real_prem_terminate=get_bs2_real_prem_terminate(df_policy, X_fs.index))

    # vehicle
    print('Getting column cat_vmm1')
    X_fs = X_fs.assign(
        cat_vmm1=get_bs2_cat(df_policy, X_fs.index, 'Vehicle_Make_and_Model1'))

    print('Getting column cat_vmm2')
    X_fs = X_fs.assign(
        cat_vmm2=get_bs2_cat(df_policy, X_fs.index, 'Vehicle_Make_and_Model2'))

    print('Getting column real_vmy')
    X_fs = X_fs.assign(real_vmy=get_bs2_real_vmy(df_policy, X_fs.index))

    print('Getting column real_vengine')
    X_fs = X_fs.assign(real_vengine=get_bs2_cat(
        df_policy, X_fs.index, 'Engine_Displacement_(Cubic_Centimeter)'))

    print('Getting column cat_vregion')
    X_fs = X_fs.assign(cat_vregion=get_bs2_cat(df_policy, X_fs.index,
                                               'Imported_or_Domestic_Car'))

    print('Getting column cat_vc')
    X_fs = X_fs.assign(cat_vc=get_bs2_cat(df_policy, X_fs.index,
                                          'Coding_of_Vehicle_Branding_&_Type'))

    print('Getting column real_vqpt')
    X_fs = X_fs.assign(real_vqpt=get_bs2_cat(df_policy, X_fs.index, 'qpt'))

    print('Getting column real_vcost')
    X_fs = X_fs.assign(real_vcost=get_bs2_cat(
        df_policy, X_fs.index, 'Replacement_cost_of_insured_vehicle'))

    # claim
    print('Getting column real_num_claim')
    X_fs = X_fs.assign(
        real_num_claim=get_bs2_real_num_claim(df_claim, X_fs.index))

    print('Getting column real_nearest_claim')
    X_fs = X_fs.assign(
        real_nearest_claim=get_bs2_real_nearest_claim(df_claim, X_fs.index))

    print('Getting column cat_claim_cause')
    X_fs = X_fs.assign(
        cat_claim_cause=get_bs2_cat_claim_cause(df_claim, X_fs.index))

    print('Getting column real_loss')
    X_fs = X_fs.assign(
        real_loss=get_bs2_real_claim(df_claim, X_fs.index, 'Paid_Loss_Amount'))

    print('Getting column real_loss_ins')
    X_fs = X_fs.assign(
        real_loss_ins=get_bs2_real_loss_ins(df_policy, df_claim, X_fs.index))

    print('Getting column real_salvage')
    X_fs = X_fs.assign(real_salvage=get_bs2_real_claim(
        df_claim, X_fs.index, 'Salvage_or_Subrogation?'))

    print('Getting column real_claim_fault')
    X_fs = X_fs.assign(
        real_claim_fault=get_bs2_real_claim_fault(df_claim, X_fs.index))

    print('Getting column cat_claim_area')
    X_fs = X_fs.assign(
        cat_claim_area=get_bs2_cat_claim_area(df_claim, X_fs.index))

    print('Getting column real_claimants')
    X_fs = X_fs.assign(
        real_claimants=get_bs2_real_claimants(df_claim, X_fs.index))

    # helper columns
    print('Getting column real_prem_plc, for mean encoding use')
    X_fs = X_fs.assign(
        real_prem_plc=get_bs2_real_prem_plc(df_policy, X_fs.index))

    print('\nSplitting train valid test features\n')
    X_train_all = X_fs.loc[y_train_all.index]
    X_test = X_fs.loc[y_test.index]

    # add mean encoding on mean of diff btw next_premium and premium
    cols_cat = ['cat_vmm1', 'cat_vmm2', 'cat_vc']
    for col_cat in cols_cat:
        col_mean = col_cat.replace('cat_', 'real_mc_mean_diff_')
        print('Getting column ' + col_mean)
        X_test[col_mean] = get_bs2_real_mc_mean_diff(col_cat,
                                                     X_train_all,
                                                     y_train_all,
                                                     X_valid=X_test,
                                                     train_only=False,
                                                     fold=5,
                                                     prior=1000)
        X_train_all[col_mean] = get_bs2_real_mc_mean_diff(
            col_cat,
            X_train_all,
            y_train_all,
            X_valid=pd.DataFrame(),
            train_only=True,
            fold=5,
            prior=1000)

    # add mean encoding on mean of dividend btw next_premium and premium
    cols_cat = ['cat_claim_cause']
    for col_cat in cols_cat:
        col_mean = col_cat.replace('cat_', 'real_mc_mean_div_')
        print('Getting column ' + col_mean)
        X_test[col_mean] = get_bs2_real_mc_mean_div(col_cat,
                                                    X_train_all,
                                                    y_train_all,
                                                    X_valid=X_test,
                                                    train_only=False,
                                                    fold=5,
                                                    prior=1000)
        X_train_all[col_mean] = get_bs2_real_mc_mean_div(
            col_cat,
            X_train_all,
            y_train_all,
            X_valid=pd.DataFrame(),
            train_only=True,
            fold=5,
            prior=1000)

    # add median encoding on median of next_premium
    cols_cat = [
        'cat_ins', 'cat_assured', 'cat_sex', 'cat_distr', 'cat_ic_combo'
    ]
    for col_cat in cols_cat:
        col_median = col_cat.replace('cat_', 'real_mc_median_')
        print('Getting column ' + col_median)
        X_test[col_median] = get_bs2_real_mc_median(col_cat,
                                                    X_train_all,
                                                    y_train_all,
                                                    X_valid=X_test,
                                                    train_only=False,
                                                    fold=5,
                                                    prior=1000)
        X_train_all[col_median] = get_bs2_real_mc_median(
            col_cat,
            X_train_all,
            y_train_all,
            X_valid=pd.DataFrame(),
            train_only=True,
            fold=5,
            prior=1000)

    # add median encoding on median of diff btw next_premium and premium
    cols_cat = ['cat_assured', 'cat_sex']
    for col_cat in cols_cat:
        col_median = col_cat.replace('cat_', 'real_mc_median_diff_')
        print('Getting column ' + col_median)
        X_test[col_median] = get_bs2_real_mc_median_diff(col_cat,
                                                         X_train_all,
                                                         y_train_all,
                                                         X_valid=X_test,
                                                         train_only=False,
                                                         fold=5,
                                                         prior=1000)
        X_train_all[col_median] = get_bs2_real_mc_median_diff(
            col_cat,
            X_train_all,
            y_train_all,
            X_valid=pd.DataFrame(),
            train_only=True,
            fold=5,
            prior=1000)

    # add median encoding on median of div btw next_premium and premium
    cols_cat = [
        'cat_ins', 'cat_assured', 'cat_sex', 'cat_distr', 'cat_ic_combo',
        'cat_ic_grp_combo', 'cat_area', 'cat_vregion'
    ]
    for col_cat in cols_cat:
        col_median = col_cat.replace('cat_', 'real_mc_median_div_')
        print('Getting column ' + col_median)
        X_test[col_median] = get_bs2_real_mc_median_div(col_cat,
                                                        X_train_all,
                                                        y_train_all,
                                                        X_valid=X_test,
                                                        train_only=False,
                                                        fold=5,
                                                        prior=1000)
        X_train_all[col_median] = get_bs2_real_mc_median_div(
            col_cat,
            X_train_all,
            y_train_all,
            X_valid=pd.DataFrame(),
            train_only=True,
            fold=5,
            prior=1000)

    # add mean encoding on probability of next_premium being 0
    cols_cat = ['cat_ins', 'cat_marriage', 'cat_claim_cause', 'cat_claim_area']
    for col_cat in cols_cat:
        col_prob = col_cat.replace('cat_', 'real_mc_prob_')
        print('Getting column ' + col_prob)
        X_test[col_prob] = get_bs2_real_mc_prob(col_cat,
                                                X_train_all,
                                                y_train_all,
                                                X_valid=X_test,
                                                train_only=False,
                                                fold=5,
                                                prior=1000)
        X_train_all[col_prob] = get_bs2_real_mc_prob(col_cat,
                                                     X_train_all,
                                                     y_train_all,
                                                     X_valid=pd.DataFrame(),
                                                     train_only=True,
                                                     fold=5,
                                                     prior=1000)

    print('Writing results to file')
    write_data(X_train_all, "X_train_all_bs2.csv")
    write_data(y_train_all, "y_train_all_bs2.csv")
    write_data(X_test, "X_test_bs2.csv")
    write_data(y_test, "y_test_bs2.csv")

    return (None)
Beispiel #31
0
 def get(self):
     write_data(self, 'main.html', {})
Beispiel #32
0
def main(args):
    # Parameters
    initial_seed = 123456  # Used to generate the set of seeds for repetitions
    n_repetitions = 30
    n_iterations = args.iterations
    initial_pheromone = 0.5
    t_min = 0.001  # Min pheromone level
    t_max = 0.999  # Max pheromone level
    rho = args.rho  # Pheromone decay rate
    alpha = args.alpha
    beta = args.beta

    # Initializations
    random_seeds = utils.generate_seeds(initial_seed, n_repetitions)
    n, p, nodes = utils.read_data(args.dataset)
    world = World(n, p, nodes)
    n_ants = (n - p) if args.ants is None else args.ants
    colony = Colony(n_ants)
    ni = aco.information_heuristic(world)  # Information Heuristic
    dataset_name = args.dataset.split('/')[-1].split('.')[0]
    output = np.zeros((n_repetitions, n_iterations, 3))
    output_dir = "../results/{}it{}rho{}alpha{}beta{}ants{}/".format(
        dataset_name, n_iterations, rho, alpha, beta, n_ants)

    # Main loop
    for repetition in range(n_repetitions):
        np.random.seed(random_seeds[repetition])

        # Reset things for new repetition
        g_best = Solution(distance=np.inf)
        world.reset_pheromones(initial_pheromone)

        print("Repetition {}\n".format(repetition))

        for iteration in tqdm(range(n_iterations)):
            for ant in colony.ants:
                ant.build_solution(world, ni, alpha, beta)

            l_best, l_worst = aco.evaluate_solutions(world, colony)

            world.update_pheromones(rho, g_best, l_best, l_worst)

            # Check algorithm stagnation
            if aco.is_stagnated(world, t_min, t_max):
                world.reset_pheromones(initial_pheromone)

            # Update global solution
            if l_best.distance < g_best.distance:
                g_best = l_best

            # Reset for next iteration
            colony.reset_solutions()

            # Store output data
            output[repetition][iteration][0] = g_best.distance
            output[repetition][iteration][1] = l_best.distance
            output[repetition][iteration][2] = l_worst.distance

        print("\nBest solution\n"
              "-------------\n"
              "Distance: {}\n"
              "Medians: {}\n".format(g_best.distance, g_best.medians))

    utils.write_data(output_dir, output)
Beispiel #33
0
#!/usr/bin/env python3

import sys
sys.path.append("../")
from param_config import config
from imblearn.over_sampling import RandomOverSampler, SMOTE
from utils import read_data, write_data
import numpy as np
from scipy import stats
np.random.seed(config.set_seed)


def imbalance_split(X, y):
    ros = SMOTE(random_state=config.set_seed)
    X_res, y_res = ros.fit_sample(X, y)
    return X_res, y_res


# Try imbalance learn
if __name__ == '__main__':
    train_x = read_data(config.d_xtrain)
    train_y = read_data(config.d_ytrain)
    X, y = imbalance_split(train_x, train_y)

    write_data(config.e_xtrain, X)
    write_data(config.e_ytrain, y)

Beispiel #34
0
def total_missing(train, test):
    header_str = 'total_missing'
    train[header_str] = train.isnull().sum(axis=1)
    test[header_str] = test.isnull().sum(axis=1)
    return train, test


def missing(x_train, x_test):
    train_missing = get_missing(x_train)
    test_missing = get_missing(x_test)
    assert len(train_missing) == len(test_missing)

    train = create_missing(train_missing.values, x_train)
    test = create_missing(train_missing.values, x_test)

    train, test = total_missing(train, test)
    return train, test


if __name__ == '__main__':
    train_x = pd.read_csv(config.train_file)
    test_x = pd.read_csv(config.test_file)

    # Shuffle training data
    train_x = shuffle(train_x, random_state=config.set_seed)
    train_x, test_x = missing(train_x, test_x)

    print('#### Writing Pickle 01: Missing ####')
    write_data(config.a_xtrain, train_x)
    write_data(config.a_xtest, test_x)
Beispiel #35
0
 def get(self):
     write_data(self, 'server/server.html',{
         })
def main(corpus_dir, labels_dir, output_dir, sample_rate=16000, use_reference=False):

    if not os.path.exists(output_dir):
        os.makedirs(output_dir)

    datadir  = os.path.join(corpus_dir, 'core')
    wav_base = 'FILEID sox WAVPATH -r {0} -t .wav - |'.format(sample_rate)

    if use_reference:
        ref_dir = os.path.join(labels_dir, 'reference_labels', 'speaker_labels', 'lab')
        reference_list = [f.replace('.lab', '') for f in os.listdir(ref_dir)]

    # utterances with issues, ignore these
    reject_list = ['02F-Therapy_07-004A', '20M-BL2-009A']

    speaker_utts = {}
    text, wav = [], []
    utt2spk, spk2utt = [], []
    utt2dur = []

    speakers = os.listdir(datadir)

    for speaker in speakers:
        sessions = os.listdir(os.path.join(datadir, speaker))

        for session in sessions:

            session_dir = os.path.join(datadir, speaker, session)
            flist = [f for f in os.listdir(session_dir) if f.endswith('.wav')]

            for f in flist:
                f = f.replace('.wav', '')
                fileid = '-'.join([speaker, session, f])

                if fileid in reject_list:
                    continue

                if use_reference:
                    if fileid not in reference_list:
                        continue

                # use prompt for text, although it will be ignored for decoding
                txt_f = os.path.join(session_dir, f+'.txt')
                with open(txt_f, 'r') as fid:
                    txt = fid.readline().rstrip()

                words = []
                for w in txt.split():
                    w = w.upper()
                    words.append(w)

                words = ' '.join([fileid] + words)
                text.append(words)

                # prepare wav.scp
                wavpath = os.path.join(session_dir, f+'.wav')
                file_wav = wav_base.replace('FILEID', fileid)
                file_wav = file_wav.replace('WAVPATH', wavpath)
                wav.append(file_wav)

                # prepare utt2dur
                dur = get_duration(wavpath)
                utt2dur.append('{0} {1}'.format(fileid, dur))

                # prepare utt2spk
                utt2spk.append('{0} {1}'.format(fileid, speaker))

                if speaker in speaker_utts:
                    speaker_utts[speaker].append(fileid)
                else:
                    speaker_utts[speaker] = [fileid]

    # prepare spk2utt
    for speaker in speaker_utts:
        spk_utts = '{0} {1}'.format(speaker, ' '.join(sorted(speaker_utts[speaker])))
        spk2utt.append(spk_utts)

    text_f    = os.path.join(output_dir, 'text')
    wav_f     = os.path.join(output_dir, 'wav.scp')
    utt2spk_f = os.path.join(output_dir, 'utt2spk')
    spk2utt_f = os.path.join(output_dir, 'spk2utt')
    utt2dur_f = os.path.join(output_dir, 'utt2dur')
    
    write_data(text, text_f)
    write_data(wav, wav_f)
    write_data(utt2spk, utt2spk_f)
    write_data(spk2utt, spk2utt_f)
    write_data(utt2dur, utt2dur_f)

    # validate data directory
    validate_cmd = './utils/validate_data_dir.sh --no-feats {0}'.format(output_dir)
    os.system(validate_cmd)
MOVEMENT_REGEXP = r'(\w+)_movement_\d+_\d+.txt'
mvtfiles = os.listdir(dirname)
mvtfiles = filter(lambda f: re.match(MOVEMENT_REGEXP, f), mvtfiles)
mvtfiles = sorted(mvtfiles)
all_accel = []
all_gyro = []
all_labels = []
for fname in mvtfiles:
    with open(os.path.join(dirname, fname)) as f:
        accel, gyro, labels = utils.load_data(f)
    shimmerid = re.match(MOVEMENT_REGEXP, fname).group(1)
    print shimmerid
    acalib = utils.load_calibration_from_properties(os.path.join(calib_dirname,
        '1_5_%s.accel.properties' % shimmerid))
    gcalib = utils.load_calibration_from_properties(os.path.join(calib_dirname,
        '1_5_%s.gyro.properties' % shimmerid))
    accel = 9.81 * (accel - acalib['offset'][:,None]) / acalib['gain'][:,None]
    gyro = (gyro - gcalib['offset'][:,None]) / gcalib['gain'][:,None]
    all_accel.append(accel)
    all_gyro.append(gyro)
    all_labels.append(labels)
##
all_accel = np.squeeze(np.array(all_accel))
all_gyro = np.squeeze(np.array(all_gyro))
all_labels = np.squeeze(np.array(all_labels))
##
outfname = os.path.join(dirname, 'out_calib.txt')
with open(outfname, 'w') as f:
    utils.write_data(f, all_accel, all_gyro, all_labels)
##
Beispiel #38
0
        os.system('touch %s' % data_file)

        it = 0

        ## Coarse calibration
        original_use_auto_judge = use_auto_judge
        use_auto_judge = True
        if not use_mp:
            for i in range(len(path)):
                target = path[i]
                im_name, index, p_robot = move_shoot(target, mode)
                p_camera = circle_fitting(im_name,
                                          ("flag" + str(i) + "_" + index),
                                          need_judge=False)
                write_data(data_file, [p_robot, p_camera],
                           index,
                           type="point_pair")

        if use_mp:
            for i in range(len(path)):
                print("Working Process")
                print(working_worker_pipe)
                target = path[i]
                im_name, index, p_robot = move_shoot(target, mode)
                worker[i % worker_num].inQ.put(
                    [im_name, ("flag" + str(i) + "_" + index), p_robot])
                working_worker_pipe.append(i % worker_num)

                if len(working_worker_pipe) == worker_num:
                    last_idx = working_worker_pipe[0]
                    result = worker[last_idx].outQ.get()
Beispiel #39
0
model.add(Dense(num_classes, activation='softmax'))

model.summary()

model.compile(loss='categorical_crossentropy',
              optimizer=Adam(),
              metrics=['accuracy'])

history = model.fit(X_train,
                    y_train,
                    batch_size=batch_size,
                    epochs=epochs,
                    verbose=1)
yp = model.predict(X_dev)

yp = np.argmax(yp, axis=1)
y_dev = np.argmax(y_dev, axis=1)

#performance on dev set
print_classfication_report('Keras', y_dev, yp, stem='keras_dev')
pr_curve(y_dev, yp, num_classes, 'keras_dev')

# prediction is done for every sample and
# prediction for whole block is done through consensus voting
# blocks_consensus tell me about block prediction distrubution
# making sure there is no bimodal distribution
blocks_pred, blocks_consensus = score_model(model, X_test)

#writing csv file
write_data(blocks_pred, 'khan_speaker_labels_MLP.csv')
Beispiel #40
0
    def __init__(self, input_path, output_dir='out'):
        '''
        Loads and preprocesses the data, generates the mask.

        Args:
            input_path (str):  path to the input file, overrides the config file
            output_dir (str):  path to the output directory
        '''

        # Input (path to the input image)

        self.input_path = input_path

        input_fname_parts = os.path.split(self.input_path)[-1].split('.')
        if len(input_fname_parts) == 1:
            print(
                'Error: the input file has no extension, please use .npy or .fits'
            )
            sys.exit()
        input_fname = ('.').join(
            input_fname_parts[:-1])  # file name w/o extension

        # output folder
        if not os.path.exists(output_dir): os.mkdir(output_dir)

        # output file names
        self.output_paths = {}
        out_files = ['deproj', 'smooth', 'mask', 'smooth3d']
        for f in out_files:
            self.output_paths[f] = os.path.join(output_dir,
                                                f + '_' + input_fname + '.npy')
        self.output_paths['pds'] = os.path.join(output_dir,
                                                'pds_' + input_fname + '.txt')

        #input
        # mask_path = config.get("input", "mask_path")
        #
        # #output
        # rm_out_path = config.get("output", "rm_out_path")
        #
        # if regime=="2d":
        #     #gen2d_params
        #     lx = config.getint("gen2d_params", "Lx")
        #     ly = config.getint("gen2d_params", "Ly")
        #     p1 = config.getfloat("gen2d_params", "p1")
        #     p2 = config.getfloat("gen2d_params", "p2")
        #     kb = config.getfloat("gen2d_params", "kb")
        #     C = config.getfloat("gen2d_params","C")
        #     apply_mask = config.getboolean("gen2d_params", "apply_mask")
        #     bw = config.getfloat("gen2d_params", "beam_width")
        #
        # elif regime=="3d":
        #     #gen3d_params
        #     lx = config.getint("gen3d_params", "Lx")
        #     ly = config.getint("gen3d_params", "Ly")
        #     lz = config.getint("gen3d_params", "Lz")
        #     p1 = config.getfloat("gen3d_params", "p1")
        #     p2 = config.getfloat("gen3d_params", "p2")
        #     kb = config.getfloat("gen3d_params", "kb")
        #     C = config.getfloat("gen3d_params", "C")
        #     apply_mask = config.getboolean("gen3d_params", "apply_mask")
        #     inclin = config.getfloat("gen3d_params", "inclin")
        #     alpha = config.getfloat("gen3d_params", "alpha")

        ftype = self.input_path.split('.')[-1].lower()

        # load data
        d = load_data(self.input_path, ftype=ftype, print_info=True)

        # crop it
        ir, jr = np.nonzero(np.invert(np.isnan(d)))
        imin, jmin, imax, jmax = ir.min(), jr.min(), ir.max(), jr.max()
        self.data = d[imin:imax + 1, jmin:jmax + 1]
        self.data_dim = self.data.shape

        # adjust the cluster center location after the cropping
        self.cluster_params = {}
        self.cluster_params['center'] = [-imin, -jmin]

        # make the mask given the cropped input data
        self.mask = np.array(np.invert(np.isnan(self.data)), dtype=float)

        # save the mask
        write_data(self.output_paths['mask'], self.mask, ftype='npy')
# This code generates fake weather data with Location, Latitude, Longitude, Local Time,
# Weather condition, Temperature, Pressure and Humidity
##########################

import utils


# Returns the full weather data for all cities
def generate_weather_data(cities):
    full_weather_data = []
    for city in cities:
        latitude, longitude = utils.get_lat_and_lon()
        altitude = utils.get_altitude()
        local_time = utils.get_local_time()
        temperature = utils.get_temperature()
        pressure = utils.get_pressure()
        weather_condition = utils.get_condition(temperature)
        humidity = utils.get_humidity()

        entry = utils.create_weather_entry(city, latitude, longitude, altitude,
                                           local_time, weather_condition,
                                           temperature, pressure, humidity)
        full_weather_data.append(entry)
    return full_weather_data


if __name__ == '__main__':
    city_names = utils.read_cities()
    final_weather_data = generate_weather_data(city_names)
    utils.write_data(final_weather_data)
Beispiel #42
0
    def set_data(self):

        settings_default = return_data("./data/settings_default.json")
        if data_exists("./data/settings.json"):
            settings = return_data("./data/settings.json")
        else:
            logger.alt(
                "Set-Settings-Data",
                "No existing settings found to be parsed, creating new from default."
            )
            write_data("./data/settings.json", settings_default)
            settings = return_data("./data/settings.json")

        if not validate_data(settings, settings_default):
            logger.error(
                "Set-Settings-Data", "Parsed settings data is malformed! "
                "This will most likely cause a fatal exception. "
                "Try removing existing settings.json")

        self.webhook_edit.setText(settings["webhook"])
        if settings["webhookonbrowser"]:
            self.browser_checkbox.setChecked(True)
        if settings["webhookonorder"]:
            self.order_checkbox.setChecked(True)
        if settings["webhookonfailed"]:
            self.paymentfailed_checkbox.setChecked(True)
        if settings["browseronfailed"]:
            self.onfailed_checkbox.setChecked(True)
        if settings["runheadless"]:
            self.headless_checkbox.setChecked(True)
        if settings["bb_ac_beta"]:
            self.bb_ac_beta_checkbox.setChecked(True)
        if settings['onlybuyone']:
            self.buy_one_checkbox.setChecked(True)
        if settings['dont_buy']:
            self.dont_buy_checkbox.setChecked(True)
        if settings['random_delay_start']:
            self.random_delay_start.setText(settings["random_delay_start"])
        if settings['random_delay_stop']:
            self.random_delay_stop.setText(settings["random_delay_stop"])

        self.geckodriver_path = settings["geckodriver"]

        # try:
        #     self.geckodriver.setText(settings["geckodriver"])
        # except:
        #     self.geckodriver.setText("")

        try:
            self.bestbuy_user_edit.setText(settings["bestbuy_user"])
        except:
            self.bestbuy_user_edit.setText("")

        try:
            self.bestbuy_pass_edit.setText((Encryption().decrypt(
                settings["bestbuy_pass"].encode("utf-8"))).decode("utf-8"))
        except:
            self.bestbuy_pass_edit.setText("")

        try:
            self.target_user_edit.setText(settings["target_user"])
        except:
            self.target_user_edit.setText("")

        try:
            self.target_pass_edit.setText((Encryption().decrypt(
                settings["target_pass"].encode("utf-8"))).decode("utf-8"))
        except:
            self.target_pass_edit.setText("")

        try:
            self.gamestop_user_edit.setText(settings["gamestop_user"])
        except:
            self.gamestop_user_edit.setText("")

        try:
            self.gamestop_pass_edit.setText((Encryption().decrypt(
                settings["gamestop_pass"].encode("utf-8"))).decode("utf-8"))
        except:
            self.gamestop_pass_edit.setText("")

        self.update_settings(settings)