Ejemplo n.º 1
0
def run(patch_size,n_batch,pca_frac,overcomplete,learning_rate,final_learning_rate,n_grad_step,loss_type,n_gauss_dim,n_lat_samp,seed,param_save_freq,log_freq,sigma,s1,s2,S,device,PCA_truncation,dataset):

    os.environ["CUDA_VISIBLE_DEVICES"]=str(device)                  # 
    np.random.seed(seed)                                            # Set the RNG seed to ensure randomness 
            
    dirname = util.get_directory(direc="./model_output/",tag = loss_type + "_{}".format(n_gauss_dim))
    params  = {
        "dataset":dataset,
        "patch_size":patch_size,
        "n_batch":n_batch,
        "pca_frac":pca_frac,
        "overcomplete":overcomplete,
        "learning_rate":np.float32(learning_rate),
        "final_learning_rate":np.float32(final_learning_rate),
        "pca_truncation":PCA_truncation,
        "n_grad_step":n_grad_step,
        "loss_type":loss_type,
        "n_gauss_dim":n_gauss_dim,
        "n_lat_samp":n_lat_samp,
        "sigma":np.float32(sigma),
        "param_save_freq":param_save_freq,
        "log_freq":log_freq,
        "s1":np.float32(s1),
        "s2":np.float32(s2),
        "S":np.float32(S)
    }
    
    util.dump_file(dirname +"/model_params",params)
    
    LOG = log.log(dirname + "/logfile.csv")

    netpar    = prepare_network(params)                             # 
    var       = netpar["variance"]                                  # 
    loss_exp  = netpar["loss_exp"]                                  # 
    recon_err = netpar["recon_err"]                                 # 
    images    = netpar["images"]                                    # 
    data      = netpar["data"]                                      # 
    varif     = netpar["vardat"]                                    #
    
    #get factor to multiply LR by:
    if final_learning_rate < learning_rate:
        LR_factor = np.float32(np.exp(-np.log(learning_rate/final_learning_rate)/n_grad_step))
    else:
        print("Final LR must be lower than initial LR! Overriding with LR_factor = 1")
        LR_factor = np.float32(1)
        
    LR    = tf.Variable(np.float32(learning_rate),trainable = False)# 
    adam  = tf.train.AdamOptimizer(learning_rate = LR)              # Set up the Adam optimization 
    train = adam.minimize(loss_exp)                                 # Run training
    update_LR = tf.assign(LR,LR*LR_factor)
    run_training_loop(data,varif,images,netpar["mean"],n_batch,train,loss_exp,recon_err,LOG,dirname,log_freq,n_grad_step,param_save_freq,update_LR)
Ejemplo n.º 2
0
def run(patch_size, n_batch, pca_frac, overcomplete, learning_rate,
        n_grad_step, loss_type, n_gauss_dim, n_lat_samp, seed, param_save_freq,
        log_freq, sigma, s1, s2, S, device, PCA_truncation, dataset):

    os.environ["CUDA_VISIBLE_DEVICES"] = str(device)
    np.random.seed(seed)

    dirname = util.get_directory(direc="./model_output/",
                                 tag=loss_type + "_{}".format(n_gauss_dim))

    params = {
        "dataset": dataset,
        "patch_size": patch_size,
        "n_batch": n_batch,
        "pca_frac": pca_frac,
        "overcomplete": overcomplete,
        "learning_rate": np.float32(learning_rate),
        "pca_truncation": PCA_truncation,
        "n_grad_step": n_grad_step,
        "loss_type": loss_type,
        "n_gauss_dim": n_gauss_dim,
        "n_lat_samp": n_lat_samp,
        "sigma": np.float32(sigma),
        "param_save_freq": param_save_freq,
        "log_freq": log_freq,
        "s1": np.float32(s1),
        "s2": np.float32(s2),
        "S": np.float32(S)
    }

    util.dump_file(dirname + "/model_params", params)

    LOG = log.log(dirname + "/logfile.csv")

    netpar = prepare_network(params)
    var = netpar["variance"]
    loss_exp = netpar["loss_exp"]
    recon_err = netpar["recon_err"]
    images = netpar["images"]
    data = netpar["data"]
    varif = netpar["vardat"]

    LR = tf.Variable(np.float32(learning_rate), trainable=False)
    adam = tf.train.AdamOptimizer(learning_rate=LR)
    train = adam.minimize(loss_exp)

    run_training_loop(data, varif, images, n_batch, train, loss_exp, recon_err,
                      LOG, dirname, log_freq, n_grad_step, param_save_freq)
Ejemplo n.º 3
0
def run_training_loop(data,vdata,input_tensor,pos_mean,batch_size,train_op,loss_op,recerr_op,log,dirname,log_freq,n_grad_step,save_freq,update_LR):
    # 
    def var_loss(session,vdat,nbatch = 10):
        # SUB-FUNCTION TO CALCULATE THE LOSS OF THE VAE
        D     = split_by_batches(vdat,batch_size,shuffle = False)   # Shuffel and partition data into batches
        loss  = 0                                                   # Initialize the loss to zero
        rerr  = 0                                                   # Initialize the reconstruction loss to zero
        nb    = 0                                                   # Initialize a counter over the number of batches
        means = []                                                  # Initialize mean storage array to an empty array
        for d in D:                                                 # Loop through the different batches
            nb   += 1                                               # Update counter
            l,r,m = session.run([loss_op,recerr_op,pos_mean],{input_tensor:d}) # TENSORFLOW: RUN A SESSION??
            loss += l                                               # Update the loss function
            rerr += r                                               # Update the reconstruction error 
            means.append(m)                                         # Append the mean to the mean storage array
            if nb == nbatch:                                        # Check if passed the number of batches
                break                                               #   ... if so, BREAK
        loss /= nbatch                                              # Normalize the loss to the number of batches
        rerr /= nbatch                                              # Normalize the reconstruction error to the number of batches
        return loss,rerr,np.concatenate(means,axis = 0)             # Return the loss the reconstruction error
    
    init   = tf.global_variables_initializer()                      #
    config = tf.ConfigProto()		                            # Initialize the tensorflow session configuration
    config.gpu_options.allow_growth = True 			    # Allow for memory growth
    sess   = tf.Session(config=config)                              # Start a tensorflow session
    K.setsession(sess)
    sess.run(init)                                                  # run the session
    

    nloss   = 0                                                     # Initailize a loss   
    t1      = time.time()                                           # Record start time
    av_time = -1                                                    #
    efrac   = .9                                                    #

    log.log(["grad_step","loss","recloss","var_loss","var_rec","learning_rate","time_rem"],PRINT = True)

    t_loss_temp = []                                                #
    t_rec_temp  = []                                                # 
    lrflag      = True                                              #
    saver       = tf.train.Saver(max_to_keep = 1000)                #
    
    for grad_step in range(n_grad_step + 1):                        #
        batch          = data[np.random.choice(np.arange(len(data)),batch_size)]     # Get a batch of data
        _,loss,recloss,newLR = sess.run([train_op,loss_op,recerr_op,update_LR],{input_tensor:batch}) # Run a session to get the loss/reconstruction error
        t_loss_temp.append(loss)                                    # Append loss to the ????
        t_rec_temp.append(recloss)                                  # Append reconstruction error to the ?????
        
        if grad_step % log_freq  == 0:
            if grad_step == 0:
                av_time = -1
            elif grad_step != 0 and  av_time < 0:
                av_time = (time.time() - t1)
            else:
                av_time = efrac*av_time + (1. - efrac)*(time.time() - t1)
                
            t1               = time.time()                          # 
            trem             = av_time*((n_grad_step)+1-grad_step)  # 
            trem             = trem / log_freq / 60. / 60.          # 
            loss             = np.mean(t_loss_temp)                 # 
            recloss          = np.mean(t_rec_temp)                  # 
            vloss,vrec,means = var_loss(sess,vdata)                 # 
            log.log([grad_step,loss,recloss,vloss,vrec,newLR,trem],PRINT = True) #
            t_loss_temp = []                                        # 
            t_rec_temp  = []                                        # 

        if grad_step % save_freq == 0:                              #
            util.dump_file(dirname + "/training_means_{}.pkl".format(grad_step),means)
            saver.save(sess,dirname + "/saved_params/saved_model_{}".format(str(grad_step))) #

    saver.save(sess,dirname + "/saved_params/saved_model_{}".format("final")) #
    sess.close()                                                    # 
Ejemplo n.º 4
0
def train_network(args):

    ##setup
    batch_size = args["batchsize"]
    np.random.seed(args["seed"])
    tf.set_random_seed(args["seed"])

    dataset = args["dataset"]

    tag = args["tag"]

    import os
    os.environ["CUDA_VISIBLE_DEVICES"] = str(args["device"])

    direc = util.get_directory(direc="./outputs/", tag=tag)

    util.save_dict(direc + "/training_params.csv", args)
    #######

    ##get data
    data = get_data.get_data(dataset, "train")
    data = [tf.expand_dims(data[0], -1), data[1]]

    tr_lab, tr_dat = tf.train.shuffle_batch(data,
                                            batch_size,
                                            capacity=30,
                                            min_after_dequeue=10,
                                            seed=0)

    tedata = get_data.get_data(dataset, "test")
    tedata = [tf.expand_dims(tedata[0], -1), tedata[1]]

    te_lab, te_dat = tf.train.shuffle_batch(tedata,
                                            batch_size,
                                            capacity=30,
                                            min_after_dequeue=10,
                                            seed=0)
    ##########

    ##Build Network
    input_tensor = tf.placeholder(tf.float32, tr_dat.shape)

    enc, prob, pred, syst, off, init_prob = net.build_network(
        input_tensor,
        args["nenc"],
        args["nstate"],
        True,
        syspicktype=args["syspick"])
    ###############

    ##Losses
    lik = losses.likelihood_loss(enc, pred, prob)
    rms = losses.rms_loss(enc, pred, prob)
    mine = losses.MINE_loss(enc, prob)

    pre_ent = losses.sys_prior_ent_loss(prob)
    post_ent = losses.sys_posterior_ent_loss(prob)

    emean = tf.reduce_mean(enc, axis=[0, 1], keepdims=True)
    varreg = tf.maximum((1. / (.001 + tf.reduce_mean((enc - emean)**2))) - 1.,
                        0)

    meanediff = tf.reduce_mean((enc[:, :-1] - enc[:, 1:])**2)
    prederr = tf.reduce_mean(
        tf.expand_dims(prob[:, :-1], -1) *
        (tf.expand_dims(enc[:, 1:], 2) - pred[:, :-1])**2)

    scalereg = tf.reduce_mean(tf.reduce_sum(enc**2, 2))

    loss = lik

    adamopt = tf.train.AdamOptimizer(learning_rate=.001)

    fulltrain = adamopt.minimize(loss)

    init = tf.global_variables_initializer()
    coord = tf.train.Coordinator()

    sess = tf.Session()
    sess.run(init)
    threads = tf.train.start_queue_runners(coord=coord, sess=sess)

    test = [sess.run([te_dat, te_lab]) for k in range(3)]

    LOG = log.log(direc + "/logfile.log", name="epoch,prederr,prior_entropy")
    dat, lab = sess.run([tr_dat, tr_lab])

    for k in range(args["epochs"]):
        dat, lab = sess.run([tr_dat, tr_lab])
        tr, pe = sess.run([fulltrain, pre_ent], {input_tensor: dat})

        if k % 50 == 0:
            rms_error = 0

            for t in range(len(test)):
                dat, lab = test[t]

                r = sess.run(prederr, {input_tensor: dat})
                rms_error += r

            rms_error /= len(test)

            LOG.log("{}\t{}\t{}".format(k, rms_error, pe))

    ###make test data
    lab = []
    dat = []
    e = []
    p = []
    pr = []

    NN = args["ntestbatch"]

    for k in range(NN):
        d, l = sess.run([tr_dat, tr_lab])

        en, pp, ppr = sess.run([enc, prob, pred], {input_tensor: d})

        lab.append(d)
        dat.append(l)
        e.append(en)
        p.append(pp)
        pr.append(ppr)

    lab = np.concatenate(lab)
    dat = np.concatenate(dat)
    e = np.concatenate(e)
    p = np.concatenate(p)
    pr = np.concatenate(pr)

    sys, O = sess.run([syst, off])

    sysdense = sess.run(trainable("syspick"))

    for s in range(len(sysdense)):
        np.savetxt(direc + "/nascar_syspick_{}.csv".format(s), sysdense[s])

    np.savetxt(direc + "/nascar_lab.csv", np.reshape(lab,
                                                     [batch_size * NN, -1]))
    np.savetxt(direc + "/nascar_dat.csv", np.reshape(dat,
                                                     [batch_size * NN, -1]))
    np.savetxt(direc + "/nascar_enc.csv", np.reshape(e, [batch_size * NN, -1]))
    np.savetxt(direc + "/nascar_pro.csv", np.reshape(p, [batch_size * NN, -1]))
    np.savetxt(direc + "/nascar_pre.csv", np.reshape(pr,
                                                     [batch_size * NN, -1]))
    np.savetxt(direc + "/nascar_sys.csv", np.reshape(sys, [len(sys), -1]))
    np.savetxt(direc + "/nascar_O.csv", O)

    coord.request_stop()
    coord.join(threads, stop_grace_period_secs=5)

    sess.close()
Ejemplo n.º 5
0
def run(dirname, save_weights, test_gratings, RF_comp, test_loglik,
        train_loglik, plot_loglik, plot_train_loglik, save_test_latents,
        n_data_samp, n_ais_step, n_prior_samp, n_hast_step, eps, n_ham_step,
        use_prior, full, fast, seed, AIS_test):

    np.random.seed(seed)

    LOG = log.log(dirname + "/analysis_log.log")

    MP = utils.load_obj(dirname + "model_params")

    n_pca = int((MP["patch_size"]**2) * MP["pca_frac"])

    #this is to handle legacy data files that didn't have the CNN keyword

    if "CNN" not in MP.keys():
        MP["CNN"] = False

    if MP["CNN"]:
        datsize = MP["patch_size"]**2
    else:
        datsize = n_pca

    n_lat = int(n_pca * MP["overcomplete"])

    MP["n_lat"] = n_lat
    MP["n_pca"] = n_pca
    MP["datsize"] = datsize
    MP["dirname"] = dirname

    for x in MP.keys():
        print("{}\t{}".format(x, MP[x]))

    train, test, var, PCA = dat.get_data(MP["patch_size"], n_pca,
                                         MP["dataset"], MP["whiten"],
                                         MP["CNN"])

    LOG.log("Train Shape:\t{}".format(train.shape))
    LOG.log("Test Shape:\t{}".format(test.shape))
    LOG.log("Var Shape:\t{}".format(var.shape))

    W = get_weights(MP)

    try:
        Wf = get_weights(MP, "decoder_params_final")
        FINAL = True
    except:
        LOG.log("Final params not available")
        FINAL = False

    if save_weights or full or fast:

        #W[0] is [144,NLAT]. teh PCA var is size n_pca. I want to take the PCA var, inverse transform it, and them normalize by it.
        if MP["CNN"]:
            w_norm = np.sqrt(np.reshape(PCA.explained_variance_, [1, -1]))
            Wout = PCA.inverse_transform(
                PCA.transform(np.transpose(W[0])) * w_norm)
        else:
            Wout = PCA.inverse_transform(np.transpose(W[0]))

        LOG.log("Saving Weights")
        np.savetxt(MP["dirname"] + "weights.csv", Wout)

        if FINAL:
            if MP["CNN"]:
                w_norm = np.sqrt(np.reshape(PCA.explained_variance_, [1, -1]))
                Wout = PCA.inverse_transform(
                    PCA.transform(np.transpose(Wf[0])) * w_norm)
                #w_norm = PCA.inverse_transform(np.sqrt(np.reshape(PCA.explained_variance_,[1,-1])))
                #Wout = np.transpose(Wf[0])*w_norm
            else:
                Wout = PCA.inverse_transform(np.transpose(W[0]))

            np.savetxt(MP["dirname"] + "weights_final.csv", Wout)

    if save_test_latents or full or fast:
        LOG.log("Saving Latents")
        mean, var, trans = get_latents(
            test[:np.min([10 * n_data_samp, len(test)])],
            MP,
            W,
            PCA,
            SAVE=True)
        trans1 = np.array([np.diag(x) for x in trans])
        trans2 = trans[:, 0, :]

        np.savetxt(MP['dirname'] + "test_means_best.csv", mean)
        np.savetxt(
            MP['dirname'] + "test_sample_best.csv",
            np.array([
                np.random.multivariate_normal(mean[v], var[v])
                for v in range(len(var))
            ]))
        np.savetxt(MP['dirname'] + "test_trans_diag_best.csv", trans1)
        np.savetxt(MP['dirname'] + "test_trans_trans_best.csv", trans2)

        if FINAL:
            mean, var, trans = get_latents(
                test[:np.min([10 * n_data_samp, len(test)])],
                MP,
                Wf,
                PCA,
                SAVE=True)
            trans1 = np.array([np.diag(x) for x in trans])
            trans2 = trans[:, 0, :]

            np.savetxt(MP['dirname'] + "test_means_final.csv", mean)
            np.savetxt(MP['dirname'] + "test_trans_diag_final.csv", trans1)
            np.savetxt(MP['dirname'] + "test_trans_trans_final.csv", trans2)

        if MP["CNN"]:

            norm = np.sqrt(np.reshape(PCA.explained_variance_, [1, -1]))
            out = PCA.inverse_transform(
                PCA.transform(
                    test[:np.min([10 *
                                  n_data_samp, len(test)])]) * w_norm)
        else:
            out = PCA.inverse_transform(
                test[:np.min([10 * n_data_samp, len(test)])])

        np.savetxt(
            MP["dirname"] + "test_images.csv", out
        )  #PCA.inverse_transform(test[:np.min([n_data_samp,len(test)])]))

    if test_gratings or full or fast:
        LOG.log("Processing Gratings")
        mean, lab, grats = grating_test(MP, PCA)

        np.savetxt(MP["dirname"] + "test_grating.csv", mean)
        np.savetxt(MP["dirname"] + "test_grating_labels.csv", lab)
        np.savetxt(MP["dirname"] + "test_grating_images.csv", grats)

    if RF_comp or full or fast:
        LOG.log("Calculating RFs")
        for scale in [.4, .5, .6]:
            RFs = RF_test(MP, PCA, scale)
            for k in range(len(RFs)):
                np.savetxt(
                    MP["dirname"] +
                    "receptive_fields_{}_{}.csv".format(k, scale), RFs[k])

    if test_loglik or full:
        LOG.log("Calculating Likelihoods")
        plot_loglikelihood(test[:np.min([n_data_samp, len(test)])],
                           MP,
                           "test_final_loglik.csv",
                           indices=["best"],
                           n_ais_step=n_ais_step,
                           n_prior_samp=n_prior_samp,
                           n_hast_step=n_hast_step,
                           eps=eps,
                           n_ham_step=n_ham_step,
                           use_prior=use_prior,
                           LOG=LOG)

    if train_loglik or full:
        LOG.log("Calculating Likelihoods")
        plot_loglikelihood(train[:np.min([n_data_samp, len(train)])],
                           MP,
                           "train_final_loglik.csv",
                           indices=["best"],
                           n_ais_step=n_ais_step,
                           n_prior_samp=n_prior_samp,
                           n_hast_step=n_hast_step,
                           eps=eps,
                           n_ham_step=n_ham_step,
                           use_prior=use_prior,
                           LOG=LOG)

    if plot_loglik or full:
        LOG.log("Plotting Likelihoods")
        plot_loglikelihood(test[:np.min([n_data_samp, len(test)])],
                           MP,
                           "test_plot_loglik.csv",
                           n_ais_step=n_ais_step,
                           n_prior_samp=n_prior_samp,
                           n_hast_step=n_hast_step,
                           eps=eps,
                           n_ham_step=n_ham_step,
                           use_prior=use_prior,
                           LOG=LOG)

    if plot_train_loglik or full:
        LOG.log("Plotting Likelihoods")
        plot_loglikelihood(train[:np.min([n_data_samp, len(test)])],
                           MP,
                           "train_plot_loglik.csv",
                           n_ais_step=n_ais_step,
                           n_prior_samp=n_prior_samp,
                           n_hast_step=n_hast_step,
                           eps=eps,
                           n_ham_step=n_ham_step,
                           use_prior=use_prior,
                           LOG=LOG)

    if AIS_test:
        test_loglikelihood(test[:2], MP, "best", n_ais_step, n_prior_samp,
                           n_hast_step, eps, n_ham_step, use_prior, LOG)
Ejemplo n.º 6
0
def run_training_loop(data, vdata, input_tensor, batch_size, train_op, loss_op,
                      recerr_op, log, dirname, log_freq, n_grad_step,
                      save_freq):
    def var_loss(session, vdat, nbatch=10):
        # SUB-FUNCTION TO CALCULATE THE LOSS OF THE VAE
        D = split_by_batches(
            vdat, batch_size,
            shuffle=False)  # Shuffel and partition data into batches
        loss = 0  # Initialize the loss to zero
        rerr = 0  # Initialize the reconstruction loss to zero
        nb = 0  # Initialize a counter over the number of batches
        for d in D:  # Loop through the different batches
            nb += 1  # Update counter
            l, r = session.run(
                [loss_op, recerr_op],
                {input_tensor: d})  # TENSORFLOW: RUN A SESSION??
            loss += l  # Update the loss function
            rerr += r  # Update the reconstruction error
            if nb == nbatch:  # Check if passed the number of batches
                break  #   ... if so, BREAK
        loss /= nbatch  # Normalize the loss to the number of batches
        rerr /= nbatch  # Normalize the reconstruction error to the number of batches
        return loss, rerr  # Return the loss the reconstruction error

    init = tf.global_variables_initializer()
    sess = tf.Session()
    sess.run(init)

    nloss = 0  # Initailize a loss
    t1 = time.time()  # Record start time
    av_time = -1
    efrac = .9

    log.log(
        ["grad_step", "loss", "recloss", "var_loss", "var_rec", "time_rem"],
        PRINT=True)

    t_loss_temp = []
    t_rec_temp = []
    lrflag = True
    saver = tf.train.Saver(max_to_keep=1000)

    for grad_step in range(n_grad_step + 1):

        batch = data[np.random.choice(np.arange(len(data)),
                                      batch_size)]  # Get a batch of data
        _, loss, recloss = sess.run(
            [train_op, loss_op, recerr_op],
            {input_tensor: batch
             })  # Run a session to get the loss/reconstruction error

        t_loss_temp.append(loss)  # Append loss to the ????
        t_rec_temp.append(recloss)  # Append reconstruction error to the ?????

        if grad_step % log_freq == 0:
            if grad_step == 0:
                av_time = -1
            elif grad_step != 0 and av_time < 0:
                av_time = (time.time() - t1)
            else:
                av_time = efrac * av_time + (1. - efrac) * (time.time() - t1)

            t1 = time.time()
            trem = av_time * ((n_grad_step) + 1 - grad_step)
            trem = trem / log_freq / 60. / 60.

            loss = np.mean(t_loss_temp)
            recloss = np.mean(t_rec_temp)

            vloss, vrec = var_loss(sess, vdata)

            log.log([grad_step, loss, recloss, vloss, vrec, trem], PRINT=True)

            t_loss_temp = []
            t_rec_temp = []

        if grad_step % save_freq == 0:
            saver.save(
                sess, dirname +
                "/saved_params/saved_model_{}".format(str(grad_step)))

    saver.save(sess, dirname + "/saved_params/saved_model_{}".format("final"))
    sess.close()
Ejemplo n.º 7
0
def run_training_loop(data, vdata, input_tensor, pos_mean, batch_size,
                      train_op, loss_op, recerr_op, log, dirname, log_freq,
                      n_grad_step, save_freq, update_LR):
    def var_loss(session, vdat, nbatch=10):
        D = split_by_batches(vdat, batch_size, shuffle=False)
        loss = 0
        rerr = 0
        nb = 0
        means = []
        for d in D:
            nb += 1
            l, r, m = session.run([loss_op, recerr_op, pos_mean],
                                  {input_tensor: d})
            loss += l
            rerr += r
            means.append(m)
            if nb == nbatch:
                break
        loss /= nbatch
        rerr /= nbatch

        return loss, rerr, np.concatenate(means, axis=0)

    init = tf.global_variables_initializer()
    sess = tf.Session()
    sess.run(init)

    nloss = 0

    t1 = time.time()
    av_time = -1
    efrac = .9

    log.log([
        "grad_step", "loss", "recloss", "var_loss", "var_rec", "learning_rate",
        "time_rem"
    ],
            PRINT=True)

    t_loss_temp = []
    t_rec_temp = []

    lrflag = True

    saver = tf.train.Saver(max_to_keep=1000)

    for grad_step in range(n_grad_step + 1):

        batch = data[np.random.choice(np.arange(len(data)), batch_size)]

        _, loss, recloss, newLR = sess.run(
            [train_op, loss_op, recerr_op, update_LR],
            {input_tensor: batch
             })  # Run a session to get the loss/reconstruction error

        t_loss_temp.append(loss)
        t_rec_temp.append(recloss)

        if grad_step % log_freq == 0:
            if grad_step == 0:
                av_time = -1
            elif grad_step != 0 and av_time < 0:
                av_time = (time.time() - t1)
            else:
                av_time = efrac * av_time + (1. - efrac) * (time.time() - t1)

            t1 = time.time()
            trem = av_time * ((n_grad_step) + 1 - grad_step)
            trem = trem / log_freq / 60. / 60.

            loss = np.mean(t_loss_temp)
            recloss = np.mean(t_rec_temp)

            vloss, vrec, means = var_loss(sess, vdata)

            log.log([grad_step, loss, recloss, vloss, vrec, newLR, trem],
                    PRINT=True)  #

            t_loss_temp = []
            t_rec_temp = []

        if grad_step % save_freq == 0:
            util.dump_file(
                dirname + "/training_means_{}.pkl".format(grad_step), means)
            saver.save(
                sess, dirname +
                "/saved_params/saved_model_{}".format(str(grad_step)))

    saver.save(sess, dirname + "/saved_params/saved_model_{}".format("final"))
    sess.close()
Ejemplo n.º 8
0
def train_network(args):

    ##setup
    batch_size = args["batchsize"]
    np.random.seed(args["seed"])
    tf.set_random_seed(args["seed"])

    dataset = args["dataset"]

    tag = args["tag"]

    train_mode = args["train_mode"]

    import os
    os.environ["CUDA_VISIBLE_DEVICES"] = str(args["device"])

    direc = util.get_directory(direc="./outputs/", tag=tag)

    util.save_dict(direc + "/training_params", args)
    #######

    ##get data
    data = get_data.get_data(dataset, "train")
    data = [tf.expand_dims(data[0], -1), data[1]]

    tr_dat, tr_lab = tf.train.shuffle_batch(data,
                                            batch_size,
                                            capacity=30,
                                            min_after_dequeue=10,
                                            seed=0)

    tedata = get_data.get_data(dataset, "test")
    tedata = [tf.expand_dims(tedata[0], -1), tedata[1]]

    te_dat, te_lab = tf.train.shuffle_batch(tedata,
                                            batch_size,
                                            capacity=30,
                                            min_after_dequeue=10,
                                            seed=0)
    ##########

    ##Build Network
    input_tensor = tf.placeholder(tf.float32, tr_dat.shape)

    enc, prob, pred, syst, off, init_prob = net.build_network(
        input_tensor,
        args["nenc"],
        args["nstate"],
        False,
        syspicktype=args["syspick"])
    ###############

    ##Losses
    rms = losses.likelihood_loss(enc, pred, prob)
    mine = losses.MINE_loss(enc, prob)

    minevar = trainable(scope="MINE")
    minereg = tf.reduce_max([tf.reduce_max(k**2) for k in minevar])

    othervar = trainable(scope="enc")
    otherreg = tf.reduce_max([tf.reduce_max(k**2) for k in othervar])

    pre_ent = losses.sys_prior_ent_loss(prob)
    post_ent = losses.sys_posterior_ent_loss(prob)

    emean = tf.reduce_mean(enc, axis=[0, 1], keepdims=True)
    varreg = tf.maximum((1. / (.001 + tf.reduce_mean((enc - emean)**2))) - .5,
                        0)

    meanediff = tf.reduce_mean((enc[:, :-1] - enc[:, 1:])**2)
    prederr = tf.reduce_mean(
        tf.expand_dims(prob[:, :-1], -1) *
        (tf.expand_dims(enc[:, 1:], 2) - pred[:, :-1])**2)

    pererr = tf.reduce_mean(
        tf.expand_dims(prob[:, :-1], -1) *
        ((tf.expand_dims(enc[:, 1:], 2) - pred[:, :-1])**2)) / tf.reduce_mean(
            tf.expand_dims((enc[:, :-1] - enc[:, 1:]), 2)**2)

    scalereg = tf.reduce_mean(tf.reduce_sum(enc**2, 2))

    loss = args["likloss"] * rms
    reg = args["regloss"] * (mine + scalereg + varreg + minereg + otherreg)

    reg += args["ent_loss"] * post_ent

    minegradreg = losses.MINE_grad_regularization(enc)

    reg += args["MINE_grad_reg"] * minegradreg

    ########

    adamopt = tf.train.AdamOptimizer(learning_rate=.0001)

    fulltrain = adamopt.minimize(loss + reg)

    minetrain = adamopt.minimize(reg,
                                 var_list=trainable("MINE") + trainable("enc"))
    systtrain = adamopt.minimize(loss, var_list=trainable("sys"))

    ########

    init = tf.global_variables_initializer()
    coord = tf.train.Coordinator()

    sess = tf.Session()

    sess.run(init)

    threads = tf.train.start_queue_runners(coord=coord, sess=sess)

    ########TRAINING

    test = [sess.run([te_dat, te_lab]) for k in range(3)]

    LOG = log.log(["epoch", "percenterr", "prior_entropy", "encmean", "mine"],
                  PRINT=True)

    dat, lab = sess.run([tr_dat, tr_lab])

    for k in range(args["epochs"]):
        dat, lab = sess.run([tr_dat, tr_lab])  #get data batch

        if train_mode == "full":
            tr, pe = sess.run([fulltrain, pre_ent], {input_tensor: dat})
        elif train_mode == "minefirst":
            if k < args["epochs"] / 2:
                tr, pe = sess.run([minetrain, pre_ent], {input_tensor: dat})
            else:
                tr, pe = sess.run([systtrain, pre_ent], {input_tensor: dat})
        elif train_mode == "mineonly":
            tr, pe = sess.run([minetrain, pre_ent], {input_tensor: dat})
        else:
            print("Training mode not recognized")
            exit()

        if k % 50 == 0:
            teloss = 0
            tmean = 0
            mineloss = 0
            per_error = 0

            for t in range(len(test)):
                dat, lab = test[t]

                l, e, m, r = sess.run([meanediff, enc, mine, pererr],
                                      {input_tensor: dat})
                teloss += l
                tmean += np.max(e**2)
                mineloss += m
                per_error += r

            teloss /= len(test)
            tmean /= len(test)
            mineloss /= len(test)
            per_error /= len(test)

            LOG.log([k, per_error, pe, tmean, mineloss])

    LOG.save(direc + "/logfile.json")
    ###make test data
    lab = []
    dat = []
    e = []
    p = []
    pr = []

    NN = args["ntestbatch"]

    for k in range(NN):
        d, l = sess.run([tr_dat, tr_lab])

        en, pp, ppr = sess.run([enc, prob, pred], {input_tensor: d})
        lab.append(l)
        dat.append(d)
        e.append(en)
        p.append(pp)
        pr.append(ppr)

    lab = np.concatenate(lab)
    dat = np.concatenate(dat)
    e = np.concatenate(e)
    p = np.concatenate(p)
    pr = np.concatenate(pr)

    sys, O = sess.run([syst, off])

    sysdense = sess.run(trainable("syspick_dense"))

    for s in range(len(sysdense)):
        np.savetxt(direc + "/nascar_syspick_{}.csv".format(s), sysdense[s])

    np.savetxt(direc + "/nascar_lab.csv", np.reshape(lab,
                                                     [batch_size * NN, -1]))
    np.savetxt(direc + "/nascar_dat.csv", np.reshape(dat,
                                                     [batch_size * NN, -1]))
    np.savetxt(direc + "/nascar_enc.csv", np.reshape(e, [batch_size * NN, -1]))
    np.savetxt(direc + "/nascar_pro.csv", np.reshape(p, [batch_size * NN, -1]))
    np.savetxt(direc + "/nascar_pre.csv", np.reshape(pr,
                                                     [batch_size * NN, -1]))
    np.savetxt(direc + "/nascar_sys.csv", np.reshape(sys, [len(sys), -1]))
    np.savetxt(direc + "/nascar_O.csv", O)

    coord.request_stop()
    coord.join(threads, stop_grace_period_secs=5)

    sess.close()
Ejemplo n.º 9
0
def run(dirname):

    LOG = log.log(dirname + "/weight_log.log")

    MP = utils.load_obj(dirname + "model_params")

    n_pca = int((MP["patch_size"]**2) * MP["pca_frac"])

    #this is to handle legacy data files that didn't have the CNN keyword

    if "CNN" not in MP.keys():
        MP["CNN"] = False

    if MP["CNN"]:
        datsize = MP["patch_size"]**2
    else:
        datsize = n_pca

    n_lat = int(n_pca * MP["overcomplete"])

    MP["n_lat"] = n_lat
    MP["n_pca"] = n_pca
    MP["datsize"] = datsize
    MP["dirname"] = dirname

    for x in MP.keys():
        print("{}\t{}".format(x, MP[x]))

    train, test, var, PCA = dat.get_data(MP["patch_size"], n_pca,
                                         MP["dataset"], MP["whiten"],
                                         MP["CNN"])

    LOG.log("Train Shape:\t{}".format(train.shape))
    LOG.log("Test Shape:\t{}".format(test.shape))
    LOG.log("Var Shape:\t{}".format(var.shape))

    W = get_weights(MP)

    try:
        Wf = get_weights(MP, "decoder_params_final")
        FINAL = True
    except:
        LOG.log("Final params not available")
        FINAL = False

    LOG.log(np.std(test))

    sp1 = np.random.randn(test.shape[0], n_lat) * MP["s1"]

    sp2 = np.random.randn(test.shape[0], n_lat) * MP["s2"]

    S = MP["S"]

    LOG.log("sp1 {}".format(np.std(sp1)))
    LOG.log("sp2 {}".format(np.std(sp2)))

    LOG.log("Wsp1 {}".format(np.std(np.tensordot(sp1, W[0], axes=[1, 1]))))
    LOG.log("Wsp2 {}".format(np.std(np.tensordot(sp2, W[0], axes=[1, 1]))))

    LOG.log("SW {}".format(S * np.std(np.tensordot(sp2, W[0], axes=[1, 1])) +
                           (1. - S) *
                           np.std(np.tensordot(sp1, W[0], axes=[1, 1]))))

    A = get_file(MP["dirname"] + "/test_means_best.csv")

    LOG.log("RV {}".format(np.std(np.tensordot(W[0], A, axes=[1, 1]))))
    LOG.log("DV {}".format(np.std(var)))