def run(patch_size,n_batch,pca_frac,overcomplete,learning_rate,final_learning_rate,n_grad_step,loss_type,n_gauss_dim,n_lat_samp,seed,param_save_freq,log_freq,sigma,s1,s2,S,device,PCA_truncation,dataset): os.environ["CUDA_VISIBLE_DEVICES"]=str(device) # np.random.seed(seed) # Set the RNG seed to ensure randomness dirname = util.get_directory(direc="./model_output/",tag = loss_type + "_{}".format(n_gauss_dim)) params = { "dataset":dataset, "patch_size":patch_size, "n_batch":n_batch, "pca_frac":pca_frac, "overcomplete":overcomplete, "learning_rate":np.float32(learning_rate), "final_learning_rate":np.float32(final_learning_rate), "pca_truncation":PCA_truncation, "n_grad_step":n_grad_step, "loss_type":loss_type, "n_gauss_dim":n_gauss_dim, "n_lat_samp":n_lat_samp, "sigma":np.float32(sigma), "param_save_freq":param_save_freq, "log_freq":log_freq, "s1":np.float32(s1), "s2":np.float32(s2), "S":np.float32(S) } util.dump_file(dirname +"/model_params",params) LOG = log.log(dirname + "/logfile.csv") netpar = prepare_network(params) # var = netpar["variance"] # loss_exp = netpar["loss_exp"] # recon_err = netpar["recon_err"] # images = netpar["images"] # data = netpar["data"] # varif = netpar["vardat"] # #get factor to multiply LR by: if final_learning_rate < learning_rate: LR_factor = np.float32(np.exp(-np.log(learning_rate/final_learning_rate)/n_grad_step)) else: print("Final LR must be lower than initial LR! Overriding with LR_factor = 1") LR_factor = np.float32(1) LR = tf.Variable(np.float32(learning_rate),trainable = False)# adam = tf.train.AdamOptimizer(learning_rate = LR) # Set up the Adam optimization train = adam.minimize(loss_exp) # Run training update_LR = tf.assign(LR,LR*LR_factor) run_training_loop(data,varif,images,netpar["mean"],n_batch,train,loss_exp,recon_err,LOG,dirname,log_freq,n_grad_step,param_save_freq,update_LR)
def run(patch_size, n_batch, pca_frac, overcomplete, learning_rate, n_grad_step, loss_type, n_gauss_dim, n_lat_samp, seed, param_save_freq, log_freq, sigma, s1, s2, S, device, PCA_truncation, dataset): os.environ["CUDA_VISIBLE_DEVICES"] = str(device) np.random.seed(seed) dirname = util.get_directory(direc="./model_output/", tag=loss_type + "_{}".format(n_gauss_dim)) params = { "dataset": dataset, "patch_size": patch_size, "n_batch": n_batch, "pca_frac": pca_frac, "overcomplete": overcomplete, "learning_rate": np.float32(learning_rate), "pca_truncation": PCA_truncation, "n_grad_step": n_grad_step, "loss_type": loss_type, "n_gauss_dim": n_gauss_dim, "n_lat_samp": n_lat_samp, "sigma": np.float32(sigma), "param_save_freq": param_save_freq, "log_freq": log_freq, "s1": np.float32(s1), "s2": np.float32(s2), "S": np.float32(S) } util.dump_file(dirname + "/model_params", params) LOG = log.log(dirname + "/logfile.csv") netpar = prepare_network(params) var = netpar["variance"] loss_exp = netpar["loss_exp"] recon_err = netpar["recon_err"] images = netpar["images"] data = netpar["data"] varif = netpar["vardat"] LR = tf.Variable(np.float32(learning_rate), trainable=False) adam = tf.train.AdamOptimizer(learning_rate=LR) train = adam.minimize(loss_exp) run_training_loop(data, varif, images, n_batch, train, loss_exp, recon_err, LOG, dirname, log_freq, n_grad_step, param_save_freq)
def run_training_loop(data,vdata,input_tensor,pos_mean,batch_size,train_op,loss_op,recerr_op,log,dirname,log_freq,n_grad_step,save_freq,update_LR): # def var_loss(session,vdat,nbatch = 10): # SUB-FUNCTION TO CALCULATE THE LOSS OF THE VAE D = split_by_batches(vdat,batch_size,shuffle = False) # Shuffel and partition data into batches loss = 0 # Initialize the loss to zero rerr = 0 # Initialize the reconstruction loss to zero nb = 0 # Initialize a counter over the number of batches means = [] # Initialize mean storage array to an empty array for d in D: # Loop through the different batches nb += 1 # Update counter l,r,m = session.run([loss_op,recerr_op,pos_mean],{input_tensor:d}) # TENSORFLOW: RUN A SESSION?? loss += l # Update the loss function rerr += r # Update the reconstruction error means.append(m) # Append the mean to the mean storage array if nb == nbatch: # Check if passed the number of batches break # ... if so, BREAK loss /= nbatch # Normalize the loss to the number of batches rerr /= nbatch # Normalize the reconstruction error to the number of batches return loss,rerr,np.concatenate(means,axis = 0) # Return the loss the reconstruction error init = tf.global_variables_initializer() # config = tf.ConfigProto() # Initialize the tensorflow session configuration config.gpu_options.allow_growth = True # Allow for memory growth sess = tf.Session(config=config) # Start a tensorflow session K.setsession(sess) sess.run(init) # run the session nloss = 0 # Initailize a loss t1 = time.time() # Record start time av_time = -1 # efrac = .9 # log.log(["grad_step","loss","recloss","var_loss","var_rec","learning_rate","time_rem"],PRINT = True) t_loss_temp = [] # t_rec_temp = [] # lrflag = True # saver = tf.train.Saver(max_to_keep = 1000) # for grad_step in range(n_grad_step + 1): # batch = data[np.random.choice(np.arange(len(data)),batch_size)] # Get a batch of data _,loss,recloss,newLR = sess.run([train_op,loss_op,recerr_op,update_LR],{input_tensor:batch}) # Run a session to get the loss/reconstruction error t_loss_temp.append(loss) # Append loss to the ???? t_rec_temp.append(recloss) # Append reconstruction error to the ????? if grad_step % log_freq == 0: if grad_step == 0: av_time = -1 elif grad_step != 0 and av_time < 0: av_time = (time.time() - t1) else: av_time = efrac*av_time + (1. - efrac)*(time.time() - t1) t1 = time.time() # trem = av_time*((n_grad_step)+1-grad_step) # trem = trem / log_freq / 60. / 60. # loss = np.mean(t_loss_temp) # recloss = np.mean(t_rec_temp) # vloss,vrec,means = var_loss(sess,vdata) # log.log([grad_step,loss,recloss,vloss,vrec,newLR,trem],PRINT = True) # t_loss_temp = [] # t_rec_temp = [] # if grad_step % save_freq == 0: # util.dump_file(dirname + "/training_means_{}.pkl".format(grad_step),means) saver.save(sess,dirname + "/saved_params/saved_model_{}".format(str(grad_step))) # saver.save(sess,dirname + "/saved_params/saved_model_{}".format("final")) # sess.close() #
def train_network(args): ##setup batch_size = args["batchsize"] np.random.seed(args["seed"]) tf.set_random_seed(args["seed"]) dataset = args["dataset"] tag = args["tag"] import os os.environ["CUDA_VISIBLE_DEVICES"] = str(args["device"]) direc = util.get_directory(direc="./outputs/", tag=tag) util.save_dict(direc + "/training_params.csv", args) ####### ##get data data = get_data.get_data(dataset, "train") data = [tf.expand_dims(data[0], -1), data[1]] tr_lab, tr_dat = tf.train.shuffle_batch(data, batch_size, capacity=30, min_after_dequeue=10, seed=0) tedata = get_data.get_data(dataset, "test") tedata = [tf.expand_dims(tedata[0], -1), tedata[1]] te_lab, te_dat = tf.train.shuffle_batch(tedata, batch_size, capacity=30, min_after_dequeue=10, seed=0) ########## ##Build Network input_tensor = tf.placeholder(tf.float32, tr_dat.shape) enc, prob, pred, syst, off, init_prob = net.build_network( input_tensor, args["nenc"], args["nstate"], True, syspicktype=args["syspick"]) ############### ##Losses lik = losses.likelihood_loss(enc, pred, prob) rms = losses.rms_loss(enc, pred, prob) mine = losses.MINE_loss(enc, prob) pre_ent = losses.sys_prior_ent_loss(prob) post_ent = losses.sys_posterior_ent_loss(prob) emean = tf.reduce_mean(enc, axis=[0, 1], keepdims=True) varreg = tf.maximum((1. / (.001 + tf.reduce_mean((enc - emean)**2))) - 1., 0) meanediff = tf.reduce_mean((enc[:, :-1] - enc[:, 1:])**2) prederr = tf.reduce_mean( tf.expand_dims(prob[:, :-1], -1) * (tf.expand_dims(enc[:, 1:], 2) - pred[:, :-1])**2) scalereg = tf.reduce_mean(tf.reduce_sum(enc**2, 2)) loss = lik adamopt = tf.train.AdamOptimizer(learning_rate=.001) fulltrain = adamopt.minimize(loss) init = tf.global_variables_initializer() coord = tf.train.Coordinator() sess = tf.Session() sess.run(init) threads = tf.train.start_queue_runners(coord=coord, sess=sess) test = [sess.run([te_dat, te_lab]) for k in range(3)] LOG = log.log(direc + "/logfile.log", name="epoch,prederr,prior_entropy") dat, lab = sess.run([tr_dat, tr_lab]) for k in range(args["epochs"]): dat, lab = sess.run([tr_dat, tr_lab]) tr, pe = sess.run([fulltrain, pre_ent], {input_tensor: dat}) if k % 50 == 0: rms_error = 0 for t in range(len(test)): dat, lab = test[t] r = sess.run(prederr, {input_tensor: dat}) rms_error += r rms_error /= len(test) LOG.log("{}\t{}\t{}".format(k, rms_error, pe)) ###make test data lab = [] dat = [] e = [] p = [] pr = [] NN = args["ntestbatch"] for k in range(NN): d, l = sess.run([tr_dat, tr_lab]) en, pp, ppr = sess.run([enc, prob, pred], {input_tensor: d}) lab.append(d) dat.append(l) e.append(en) p.append(pp) pr.append(ppr) lab = np.concatenate(lab) dat = np.concatenate(dat) e = np.concatenate(e) p = np.concatenate(p) pr = np.concatenate(pr) sys, O = sess.run([syst, off]) sysdense = sess.run(trainable("syspick")) for s in range(len(sysdense)): np.savetxt(direc + "/nascar_syspick_{}.csv".format(s), sysdense[s]) np.savetxt(direc + "/nascar_lab.csv", np.reshape(lab, [batch_size * NN, -1])) np.savetxt(direc + "/nascar_dat.csv", np.reshape(dat, [batch_size * NN, -1])) np.savetxt(direc + "/nascar_enc.csv", np.reshape(e, [batch_size * NN, -1])) np.savetxt(direc + "/nascar_pro.csv", np.reshape(p, [batch_size * NN, -1])) np.savetxt(direc + "/nascar_pre.csv", np.reshape(pr, [batch_size * NN, -1])) np.savetxt(direc + "/nascar_sys.csv", np.reshape(sys, [len(sys), -1])) np.savetxt(direc + "/nascar_O.csv", O) coord.request_stop() coord.join(threads, stop_grace_period_secs=5) sess.close()
def run(dirname, save_weights, test_gratings, RF_comp, test_loglik, train_loglik, plot_loglik, plot_train_loglik, save_test_latents, n_data_samp, n_ais_step, n_prior_samp, n_hast_step, eps, n_ham_step, use_prior, full, fast, seed, AIS_test): np.random.seed(seed) LOG = log.log(dirname + "/analysis_log.log") MP = utils.load_obj(dirname + "model_params") n_pca = int((MP["patch_size"]**2) * MP["pca_frac"]) #this is to handle legacy data files that didn't have the CNN keyword if "CNN" not in MP.keys(): MP["CNN"] = False if MP["CNN"]: datsize = MP["patch_size"]**2 else: datsize = n_pca n_lat = int(n_pca * MP["overcomplete"]) MP["n_lat"] = n_lat MP["n_pca"] = n_pca MP["datsize"] = datsize MP["dirname"] = dirname for x in MP.keys(): print("{}\t{}".format(x, MP[x])) train, test, var, PCA = dat.get_data(MP["patch_size"], n_pca, MP["dataset"], MP["whiten"], MP["CNN"]) LOG.log("Train Shape:\t{}".format(train.shape)) LOG.log("Test Shape:\t{}".format(test.shape)) LOG.log("Var Shape:\t{}".format(var.shape)) W = get_weights(MP) try: Wf = get_weights(MP, "decoder_params_final") FINAL = True except: LOG.log("Final params not available") FINAL = False if save_weights or full or fast: #W[0] is [144,NLAT]. teh PCA var is size n_pca. I want to take the PCA var, inverse transform it, and them normalize by it. if MP["CNN"]: w_norm = np.sqrt(np.reshape(PCA.explained_variance_, [1, -1])) Wout = PCA.inverse_transform( PCA.transform(np.transpose(W[0])) * w_norm) else: Wout = PCA.inverse_transform(np.transpose(W[0])) LOG.log("Saving Weights") np.savetxt(MP["dirname"] + "weights.csv", Wout) if FINAL: if MP["CNN"]: w_norm = np.sqrt(np.reshape(PCA.explained_variance_, [1, -1])) Wout = PCA.inverse_transform( PCA.transform(np.transpose(Wf[0])) * w_norm) #w_norm = PCA.inverse_transform(np.sqrt(np.reshape(PCA.explained_variance_,[1,-1]))) #Wout = np.transpose(Wf[0])*w_norm else: Wout = PCA.inverse_transform(np.transpose(W[0])) np.savetxt(MP["dirname"] + "weights_final.csv", Wout) if save_test_latents or full or fast: LOG.log("Saving Latents") mean, var, trans = get_latents( test[:np.min([10 * n_data_samp, len(test)])], MP, W, PCA, SAVE=True) trans1 = np.array([np.diag(x) for x in trans]) trans2 = trans[:, 0, :] np.savetxt(MP['dirname'] + "test_means_best.csv", mean) np.savetxt( MP['dirname'] + "test_sample_best.csv", np.array([ np.random.multivariate_normal(mean[v], var[v]) for v in range(len(var)) ])) np.savetxt(MP['dirname'] + "test_trans_diag_best.csv", trans1) np.savetxt(MP['dirname'] + "test_trans_trans_best.csv", trans2) if FINAL: mean, var, trans = get_latents( test[:np.min([10 * n_data_samp, len(test)])], MP, Wf, PCA, SAVE=True) trans1 = np.array([np.diag(x) for x in trans]) trans2 = trans[:, 0, :] np.savetxt(MP['dirname'] + "test_means_final.csv", mean) np.savetxt(MP['dirname'] + "test_trans_diag_final.csv", trans1) np.savetxt(MP['dirname'] + "test_trans_trans_final.csv", trans2) if MP["CNN"]: norm = np.sqrt(np.reshape(PCA.explained_variance_, [1, -1])) out = PCA.inverse_transform( PCA.transform( test[:np.min([10 * n_data_samp, len(test)])]) * w_norm) else: out = PCA.inverse_transform( test[:np.min([10 * n_data_samp, len(test)])]) np.savetxt( MP["dirname"] + "test_images.csv", out ) #PCA.inverse_transform(test[:np.min([n_data_samp,len(test)])])) if test_gratings or full or fast: LOG.log("Processing Gratings") mean, lab, grats = grating_test(MP, PCA) np.savetxt(MP["dirname"] + "test_grating.csv", mean) np.savetxt(MP["dirname"] + "test_grating_labels.csv", lab) np.savetxt(MP["dirname"] + "test_grating_images.csv", grats) if RF_comp or full or fast: LOG.log("Calculating RFs") for scale in [.4, .5, .6]: RFs = RF_test(MP, PCA, scale) for k in range(len(RFs)): np.savetxt( MP["dirname"] + "receptive_fields_{}_{}.csv".format(k, scale), RFs[k]) if test_loglik or full: LOG.log("Calculating Likelihoods") plot_loglikelihood(test[:np.min([n_data_samp, len(test)])], MP, "test_final_loglik.csv", indices=["best"], n_ais_step=n_ais_step, n_prior_samp=n_prior_samp, n_hast_step=n_hast_step, eps=eps, n_ham_step=n_ham_step, use_prior=use_prior, LOG=LOG) if train_loglik or full: LOG.log("Calculating Likelihoods") plot_loglikelihood(train[:np.min([n_data_samp, len(train)])], MP, "train_final_loglik.csv", indices=["best"], n_ais_step=n_ais_step, n_prior_samp=n_prior_samp, n_hast_step=n_hast_step, eps=eps, n_ham_step=n_ham_step, use_prior=use_prior, LOG=LOG) if plot_loglik or full: LOG.log("Plotting Likelihoods") plot_loglikelihood(test[:np.min([n_data_samp, len(test)])], MP, "test_plot_loglik.csv", n_ais_step=n_ais_step, n_prior_samp=n_prior_samp, n_hast_step=n_hast_step, eps=eps, n_ham_step=n_ham_step, use_prior=use_prior, LOG=LOG) if plot_train_loglik or full: LOG.log("Plotting Likelihoods") plot_loglikelihood(train[:np.min([n_data_samp, len(test)])], MP, "train_plot_loglik.csv", n_ais_step=n_ais_step, n_prior_samp=n_prior_samp, n_hast_step=n_hast_step, eps=eps, n_ham_step=n_ham_step, use_prior=use_prior, LOG=LOG) if AIS_test: test_loglikelihood(test[:2], MP, "best", n_ais_step, n_prior_samp, n_hast_step, eps, n_ham_step, use_prior, LOG)
def run_training_loop(data, vdata, input_tensor, batch_size, train_op, loss_op, recerr_op, log, dirname, log_freq, n_grad_step, save_freq): def var_loss(session, vdat, nbatch=10): # SUB-FUNCTION TO CALCULATE THE LOSS OF THE VAE D = split_by_batches( vdat, batch_size, shuffle=False) # Shuffel and partition data into batches loss = 0 # Initialize the loss to zero rerr = 0 # Initialize the reconstruction loss to zero nb = 0 # Initialize a counter over the number of batches for d in D: # Loop through the different batches nb += 1 # Update counter l, r = session.run( [loss_op, recerr_op], {input_tensor: d}) # TENSORFLOW: RUN A SESSION?? loss += l # Update the loss function rerr += r # Update the reconstruction error if nb == nbatch: # Check if passed the number of batches break # ... if so, BREAK loss /= nbatch # Normalize the loss to the number of batches rerr /= nbatch # Normalize the reconstruction error to the number of batches return loss, rerr # Return the loss the reconstruction error init = tf.global_variables_initializer() sess = tf.Session() sess.run(init) nloss = 0 # Initailize a loss t1 = time.time() # Record start time av_time = -1 efrac = .9 log.log( ["grad_step", "loss", "recloss", "var_loss", "var_rec", "time_rem"], PRINT=True) t_loss_temp = [] t_rec_temp = [] lrflag = True saver = tf.train.Saver(max_to_keep=1000) for grad_step in range(n_grad_step + 1): batch = data[np.random.choice(np.arange(len(data)), batch_size)] # Get a batch of data _, loss, recloss = sess.run( [train_op, loss_op, recerr_op], {input_tensor: batch }) # Run a session to get the loss/reconstruction error t_loss_temp.append(loss) # Append loss to the ???? t_rec_temp.append(recloss) # Append reconstruction error to the ????? if grad_step % log_freq == 0: if grad_step == 0: av_time = -1 elif grad_step != 0 and av_time < 0: av_time = (time.time() - t1) else: av_time = efrac * av_time + (1. - efrac) * (time.time() - t1) t1 = time.time() trem = av_time * ((n_grad_step) + 1 - grad_step) trem = trem / log_freq / 60. / 60. loss = np.mean(t_loss_temp) recloss = np.mean(t_rec_temp) vloss, vrec = var_loss(sess, vdata) log.log([grad_step, loss, recloss, vloss, vrec, trem], PRINT=True) t_loss_temp = [] t_rec_temp = [] if grad_step % save_freq == 0: saver.save( sess, dirname + "/saved_params/saved_model_{}".format(str(grad_step))) saver.save(sess, dirname + "/saved_params/saved_model_{}".format("final")) sess.close()
def run_training_loop(data, vdata, input_tensor, pos_mean, batch_size, train_op, loss_op, recerr_op, log, dirname, log_freq, n_grad_step, save_freq, update_LR): def var_loss(session, vdat, nbatch=10): D = split_by_batches(vdat, batch_size, shuffle=False) loss = 0 rerr = 0 nb = 0 means = [] for d in D: nb += 1 l, r, m = session.run([loss_op, recerr_op, pos_mean], {input_tensor: d}) loss += l rerr += r means.append(m) if nb == nbatch: break loss /= nbatch rerr /= nbatch return loss, rerr, np.concatenate(means, axis=0) init = tf.global_variables_initializer() sess = tf.Session() sess.run(init) nloss = 0 t1 = time.time() av_time = -1 efrac = .9 log.log([ "grad_step", "loss", "recloss", "var_loss", "var_rec", "learning_rate", "time_rem" ], PRINT=True) t_loss_temp = [] t_rec_temp = [] lrflag = True saver = tf.train.Saver(max_to_keep=1000) for grad_step in range(n_grad_step + 1): batch = data[np.random.choice(np.arange(len(data)), batch_size)] _, loss, recloss, newLR = sess.run( [train_op, loss_op, recerr_op, update_LR], {input_tensor: batch }) # Run a session to get the loss/reconstruction error t_loss_temp.append(loss) t_rec_temp.append(recloss) if grad_step % log_freq == 0: if grad_step == 0: av_time = -1 elif grad_step != 0 and av_time < 0: av_time = (time.time() - t1) else: av_time = efrac * av_time + (1. - efrac) * (time.time() - t1) t1 = time.time() trem = av_time * ((n_grad_step) + 1 - grad_step) trem = trem / log_freq / 60. / 60. loss = np.mean(t_loss_temp) recloss = np.mean(t_rec_temp) vloss, vrec, means = var_loss(sess, vdata) log.log([grad_step, loss, recloss, vloss, vrec, newLR, trem], PRINT=True) # t_loss_temp = [] t_rec_temp = [] if grad_step % save_freq == 0: util.dump_file( dirname + "/training_means_{}.pkl".format(grad_step), means) saver.save( sess, dirname + "/saved_params/saved_model_{}".format(str(grad_step))) saver.save(sess, dirname + "/saved_params/saved_model_{}".format("final")) sess.close()
def train_network(args): ##setup batch_size = args["batchsize"] np.random.seed(args["seed"]) tf.set_random_seed(args["seed"]) dataset = args["dataset"] tag = args["tag"] train_mode = args["train_mode"] import os os.environ["CUDA_VISIBLE_DEVICES"] = str(args["device"]) direc = util.get_directory(direc="./outputs/", tag=tag) util.save_dict(direc + "/training_params", args) ####### ##get data data = get_data.get_data(dataset, "train") data = [tf.expand_dims(data[0], -1), data[1]] tr_dat, tr_lab = tf.train.shuffle_batch(data, batch_size, capacity=30, min_after_dequeue=10, seed=0) tedata = get_data.get_data(dataset, "test") tedata = [tf.expand_dims(tedata[0], -1), tedata[1]] te_dat, te_lab = tf.train.shuffle_batch(tedata, batch_size, capacity=30, min_after_dequeue=10, seed=0) ########## ##Build Network input_tensor = tf.placeholder(tf.float32, tr_dat.shape) enc, prob, pred, syst, off, init_prob = net.build_network( input_tensor, args["nenc"], args["nstate"], False, syspicktype=args["syspick"]) ############### ##Losses rms = losses.likelihood_loss(enc, pred, prob) mine = losses.MINE_loss(enc, prob) minevar = trainable(scope="MINE") minereg = tf.reduce_max([tf.reduce_max(k**2) for k in minevar]) othervar = trainable(scope="enc") otherreg = tf.reduce_max([tf.reduce_max(k**2) for k in othervar]) pre_ent = losses.sys_prior_ent_loss(prob) post_ent = losses.sys_posterior_ent_loss(prob) emean = tf.reduce_mean(enc, axis=[0, 1], keepdims=True) varreg = tf.maximum((1. / (.001 + tf.reduce_mean((enc - emean)**2))) - .5, 0) meanediff = tf.reduce_mean((enc[:, :-1] - enc[:, 1:])**2) prederr = tf.reduce_mean( tf.expand_dims(prob[:, :-1], -1) * (tf.expand_dims(enc[:, 1:], 2) - pred[:, :-1])**2) pererr = tf.reduce_mean( tf.expand_dims(prob[:, :-1], -1) * ((tf.expand_dims(enc[:, 1:], 2) - pred[:, :-1])**2)) / tf.reduce_mean( tf.expand_dims((enc[:, :-1] - enc[:, 1:]), 2)**2) scalereg = tf.reduce_mean(tf.reduce_sum(enc**2, 2)) loss = args["likloss"] * rms reg = args["regloss"] * (mine + scalereg + varreg + minereg + otherreg) reg += args["ent_loss"] * post_ent minegradreg = losses.MINE_grad_regularization(enc) reg += args["MINE_grad_reg"] * minegradreg ######## adamopt = tf.train.AdamOptimizer(learning_rate=.0001) fulltrain = adamopt.minimize(loss + reg) minetrain = adamopt.minimize(reg, var_list=trainable("MINE") + trainable("enc")) systtrain = adamopt.minimize(loss, var_list=trainable("sys")) ######## init = tf.global_variables_initializer() coord = tf.train.Coordinator() sess = tf.Session() sess.run(init) threads = tf.train.start_queue_runners(coord=coord, sess=sess) ########TRAINING test = [sess.run([te_dat, te_lab]) for k in range(3)] LOG = log.log(["epoch", "percenterr", "prior_entropy", "encmean", "mine"], PRINT=True) dat, lab = sess.run([tr_dat, tr_lab]) for k in range(args["epochs"]): dat, lab = sess.run([tr_dat, tr_lab]) #get data batch if train_mode == "full": tr, pe = sess.run([fulltrain, pre_ent], {input_tensor: dat}) elif train_mode == "minefirst": if k < args["epochs"] / 2: tr, pe = sess.run([minetrain, pre_ent], {input_tensor: dat}) else: tr, pe = sess.run([systtrain, pre_ent], {input_tensor: dat}) elif train_mode == "mineonly": tr, pe = sess.run([minetrain, pre_ent], {input_tensor: dat}) else: print("Training mode not recognized") exit() if k % 50 == 0: teloss = 0 tmean = 0 mineloss = 0 per_error = 0 for t in range(len(test)): dat, lab = test[t] l, e, m, r = sess.run([meanediff, enc, mine, pererr], {input_tensor: dat}) teloss += l tmean += np.max(e**2) mineloss += m per_error += r teloss /= len(test) tmean /= len(test) mineloss /= len(test) per_error /= len(test) LOG.log([k, per_error, pe, tmean, mineloss]) LOG.save(direc + "/logfile.json") ###make test data lab = [] dat = [] e = [] p = [] pr = [] NN = args["ntestbatch"] for k in range(NN): d, l = sess.run([tr_dat, tr_lab]) en, pp, ppr = sess.run([enc, prob, pred], {input_tensor: d}) lab.append(l) dat.append(d) e.append(en) p.append(pp) pr.append(ppr) lab = np.concatenate(lab) dat = np.concatenate(dat) e = np.concatenate(e) p = np.concatenate(p) pr = np.concatenate(pr) sys, O = sess.run([syst, off]) sysdense = sess.run(trainable("syspick_dense")) for s in range(len(sysdense)): np.savetxt(direc + "/nascar_syspick_{}.csv".format(s), sysdense[s]) np.savetxt(direc + "/nascar_lab.csv", np.reshape(lab, [batch_size * NN, -1])) np.savetxt(direc + "/nascar_dat.csv", np.reshape(dat, [batch_size * NN, -1])) np.savetxt(direc + "/nascar_enc.csv", np.reshape(e, [batch_size * NN, -1])) np.savetxt(direc + "/nascar_pro.csv", np.reshape(p, [batch_size * NN, -1])) np.savetxt(direc + "/nascar_pre.csv", np.reshape(pr, [batch_size * NN, -1])) np.savetxt(direc + "/nascar_sys.csv", np.reshape(sys, [len(sys), -1])) np.savetxt(direc + "/nascar_O.csv", O) coord.request_stop() coord.join(threads, stop_grace_period_secs=5) sess.close()
def run(dirname): LOG = log.log(dirname + "/weight_log.log") MP = utils.load_obj(dirname + "model_params") n_pca = int((MP["patch_size"]**2) * MP["pca_frac"]) #this is to handle legacy data files that didn't have the CNN keyword if "CNN" not in MP.keys(): MP["CNN"] = False if MP["CNN"]: datsize = MP["patch_size"]**2 else: datsize = n_pca n_lat = int(n_pca * MP["overcomplete"]) MP["n_lat"] = n_lat MP["n_pca"] = n_pca MP["datsize"] = datsize MP["dirname"] = dirname for x in MP.keys(): print("{}\t{}".format(x, MP[x])) train, test, var, PCA = dat.get_data(MP["patch_size"], n_pca, MP["dataset"], MP["whiten"], MP["CNN"]) LOG.log("Train Shape:\t{}".format(train.shape)) LOG.log("Test Shape:\t{}".format(test.shape)) LOG.log("Var Shape:\t{}".format(var.shape)) W = get_weights(MP) try: Wf = get_weights(MP, "decoder_params_final") FINAL = True except: LOG.log("Final params not available") FINAL = False LOG.log(np.std(test)) sp1 = np.random.randn(test.shape[0], n_lat) * MP["s1"] sp2 = np.random.randn(test.shape[0], n_lat) * MP["s2"] S = MP["S"] LOG.log("sp1 {}".format(np.std(sp1))) LOG.log("sp2 {}".format(np.std(sp2))) LOG.log("Wsp1 {}".format(np.std(np.tensordot(sp1, W[0], axes=[1, 1])))) LOG.log("Wsp2 {}".format(np.std(np.tensordot(sp2, W[0], axes=[1, 1])))) LOG.log("SW {}".format(S * np.std(np.tensordot(sp2, W[0], axes=[1, 1])) + (1. - S) * np.std(np.tensordot(sp1, W[0], axes=[1, 1])))) A = get_file(MP["dirname"] + "/test_means_best.csv") LOG.log("RV {}".format(np.std(np.tensordot(W[0], A, axes=[1, 1])))) LOG.log("DV {}".format(np.std(var)))