def group_list_setup( self ): utilities.get_directory( DATA_PATH ) if not os.path.exists( GROUPS_LOCATION ): project_data = {'diffuse': '_d', 'specular': '_s', 'normal': '_n', 'bump': '_b', 'occlusion': '_o', 'luminous': '_l'} utilities.write_json_file( GROUPS_LOCATION, project_data )
def change_image_format_transform(path_to_file, new_format, updated_page_name="cmprs_img_index.html"): root_directory = os.getcwd() directory_to_write_in = get_directory(path_to_file) html_string = "" with open(path_to_file, "rb") as f: os.chdir(directory_to_write_in) soup = BeautifulSoup(f.read(), "html.parser") for tag in soup.findAll("img"): tag["src"] = change_image_format(tag["src"], new_format) os.chdir(root_directory) html_string = str(soup) with open("{}{}".format(directory_to_write_in, updated_page_name), "wb") as fw: fw.write(html_string)
def compress_image_by_rid_transform(path_to_file, compression_rate, rid, updated_page_name="cmprs_img_index.html"): root_directory = os.getcwd() directory_to_write_in = get_directory(path_to_file) html_string = "" with open(path_to_file, "rb") as f: os.chdir(directory_to_write_in) soup = BeautifulSoup(f.read(), "html.parser") tag = soup.find(['img'], r_id=rid) if tag: tag["src"] = compress_image_by_percentage(tag["src"], compression_rate) os.chdir(root_directory) html_string = str(soup) with open("{}{}".format(directory_to_write_in, updated_page_name), "wb") as fw: fw.write(html_string)
def page_stats(path_to_page, path_to_json_file): directory_to_write_in = get_directory(path_to_page) data_list = { "script": { "count": 0, "all": [] }, "video": { "count": 0, "all": [] }, "img": { "count": 0, "all": [] }, "total_size": 0 } # this list handles all the entries soup = None with open(path_to_page, "rb") as f: soup = BeautifulSoup(f.read(), "html.parser") for key in data_list.keys(): tags = soup.find_all(key) for tag in tags: try: data_list[key]["count"] += 1 data_list[key]["all"].append( anotate_tag(tag, key, directory_to_write_in)) #print "tag found" except KeyError: print "tag not labeled" for a in data_list["img"]["all"]: data_list["total_size"] += a["mem_footprint"] print len(data_list["img"]["all"]) with open("{}".format(path_to_json_file), "wb") as fw: fw.write(json.dumps(data_list)) return data_list["total_size"]
print("Min : {}".format(np.min(np.reshape(DATA, [-1])))) print("IQR : {}".format(IQR(np.reshape(DATA, [-1])))) np.random.shuffle(DATA) #run the fit C, Q, F, P, LOUT = TRAIN.fit_general_MGSM(DATA, segmentation, EMreps=args["em_steps"], batchsize=args["minibatch_size"], lr=args["learning_rate"], ngradstep=args["n_grad_steps"], buff=args["stochastic_buffer"], fq_shared=args["fq_shared"], f_ID=args["f_ID"]) #once it is complete, make the directory and save the data direc = utils.get_directory(direc="./model_files/", tag="model_file") np.savetxt(direc + "/fac.csv", fac) np.savetxt(direc + "/train_log.csv", LOUT) utils.save_dict(direc + "/parameters", args) utils.dump_file(direc + "/paths.pkl", paths) utils.dump_file(direc + "/segs.pkl", segmentation) utils.dump_file(direc + "/kernels.pkl", kernels) utils.dump_file(direc + "/C.pkl", C) utils.dump_file(direc + "/Q.pkl", Q) utils.dump_file(direc + "/F.pkl", F) utils.dump_file(direc + "/P.pkl", P)
def train_network(args): ##setup batch_size = args["batchsize"] np.random.seed(args["seed"]) tf.set_random_seed(args["seed"]) dataset = args["dataset"] tag = args["tag"] import os os.environ["CUDA_VISIBLE_DEVICES"] = str(args["device"]) direc = util.get_directory(direc="./outputs/", tag=tag) util.save_dict(direc + "/training_params.csv", args) ####### ##get data data = get_data.get_data(dataset, "train") data = [tf.expand_dims(data[0], -1), data[1]] tr_lab, tr_dat = tf.train.shuffle_batch(data, batch_size, capacity=30, min_after_dequeue=10, seed=0) tedata = get_data.get_data(dataset, "test") tedata = [tf.expand_dims(tedata[0], -1), tedata[1]] te_lab, te_dat = tf.train.shuffle_batch(tedata, batch_size, capacity=30, min_after_dequeue=10, seed=0) ########## ##Build Network input_tensor = tf.placeholder(tf.float32, tr_dat.shape) enc, prob, pred, syst, off, init_prob = net.build_network( input_tensor, args["nenc"], args["nstate"], True, syspicktype=args["syspick"]) ############### ##Losses lik = losses.likelihood_loss(enc, pred, prob) rms = losses.rms_loss(enc, pred, prob) mine = losses.MINE_loss(enc, prob) pre_ent = losses.sys_prior_ent_loss(prob) post_ent = losses.sys_posterior_ent_loss(prob) emean = tf.reduce_mean(enc, axis=[0, 1], keepdims=True) varreg = tf.maximum((1. / (.001 + tf.reduce_mean((enc - emean)**2))) - 1., 0) meanediff = tf.reduce_mean((enc[:, :-1] - enc[:, 1:])**2) prederr = tf.reduce_mean( tf.expand_dims(prob[:, :-1], -1) * (tf.expand_dims(enc[:, 1:], 2) - pred[:, :-1])**2) scalereg = tf.reduce_mean(tf.reduce_sum(enc**2, 2)) loss = lik adamopt = tf.train.AdamOptimizer(learning_rate=.001) fulltrain = adamopt.minimize(loss) init = tf.global_variables_initializer() coord = tf.train.Coordinator() sess = tf.Session() sess.run(init) threads = tf.train.start_queue_runners(coord=coord, sess=sess) test = [sess.run([te_dat, te_lab]) for k in range(3)] LOG = log.log(direc + "/logfile.log", name="epoch,prederr,prior_entropy") dat, lab = sess.run([tr_dat, tr_lab]) for k in range(args["epochs"]): dat, lab = sess.run([tr_dat, tr_lab]) tr, pe = sess.run([fulltrain, pre_ent], {input_tensor: dat}) if k % 50 == 0: rms_error = 0 for t in range(len(test)): dat, lab = test[t] r = sess.run(prederr, {input_tensor: dat}) rms_error += r rms_error /= len(test) LOG.log("{}\t{}\t{}".format(k, rms_error, pe)) ###make test data lab = [] dat = [] e = [] p = [] pr = [] NN = args["ntestbatch"] for k in range(NN): d, l = sess.run([tr_dat, tr_lab]) en, pp, ppr = sess.run([enc, prob, pred], {input_tensor: d}) lab.append(d) dat.append(l) e.append(en) p.append(pp) pr.append(ppr) lab = np.concatenate(lab) dat = np.concatenate(dat) e = np.concatenate(e) p = np.concatenate(p) pr = np.concatenate(pr) sys, O = sess.run([syst, off]) sysdense = sess.run(trainable("syspick")) for s in range(len(sysdense)): np.savetxt(direc + "/nascar_syspick_{}.csv".format(s), sysdense[s]) np.savetxt(direc + "/nascar_lab.csv", np.reshape(lab, [batch_size * NN, -1])) np.savetxt(direc + "/nascar_dat.csv", np.reshape(dat, [batch_size * NN, -1])) np.savetxt(direc + "/nascar_enc.csv", np.reshape(e, [batch_size * NN, -1])) np.savetxt(direc + "/nascar_pro.csv", np.reshape(p, [batch_size * NN, -1])) np.savetxt(direc + "/nascar_pre.csv", np.reshape(pr, [batch_size * NN, -1])) np.savetxt(direc + "/nascar_sys.csv", np.reshape(sys, [len(sys), -1])) np.savetxt(direc + "/nascar_O.csv", O) coord.request_stop() coord.join(threads, stop_grace_period_secs=5) sess.close()
def run(patch_size, n_batch, pca_frac, overcomplete, learning_rate, final_learning_rate, n_grad_step, loss_type, n_gauss_dim, n_lat_samp, seed, param_save_freq, log_freq, sigma, s1, s2, S, device, PCA_truncation, dataset): os.environ["CUDA_VISIBLE_DEVICES"] = str(device) np.random.seed(seed) dirname = util.get_directory(direc="./model_output/", tag=loss_type + "_{}".format(n_gauss_dim)) params = { "dataset": dataset, "patch_size": patch_size, "n_batch": n_batch, "pca_frac": pca_frac, "overcomplete": overcomplete, "learning_rate": np.float32(learning_rate), "final_learning_rate": np.float32(final_learning_rate), "pca_truncation": PCA_truncation, "n_grad_step": n_grad_step, "loss_type": loss_type, "n_gauss_dim": n_gauss_dim, "n_lat_samp": n_lat_samp, "sigma": np.float32(sigma), "param_save_freq": param_save_freq, "log_freq": log_freq, "s1": np.float32(s1), "s2": np.float32(s2), "S": np.float32(S) } util.dump_file(dirname + "/model_params", params) LOG = log.log(dirname + "/logfile.csv") netpar = prepare_network(params) var = netpar["variance"] loss_exp = netpar["loss_exp"] recon_err = netpar["recon_err"] images = netpar["images"] data = netpar["data"] varif = netpar["vardat"] #get factor to multiply LR by: if final_learning_rate < learning_rate: LR_factor = np.float32( np.exp(-np.log(learning_rate / final_learning_rate) / n_grad_step)) else: print( "Final LR must be lower than initial LR! Overriding with LR_factor = 1" ) LR_factor = np.float32(1) LR = tf.Variable(np.float32(learning_rate), trainable=False) # adam = tf.train.AdamOptimizer( learning_rate=LR) # Set up the Adam optimization train = adam.minimize(loss_exp) # Run training update_LR = tf.assign(LR, LR * LR_factor) run_training_loop(data, varif, images, netpar["mean"], n_batch, train, loss_exp, recon_err, LOG, dirname, log_freq, n_grad_step, param_save_freq, update_LR)
def page_prettify(path_to_page, updated_page_name="pretty_index.html"): directory_to_write_in = get_directory(path_to_page) path_to_output = "{}{}".format(directory_to_write_in, updated_page_name) prettify_html(path_to_page, path_to_output)
def train_network(args): ##setup batch_size = args["batchsize"] np.random.seed(args["seed"]) tf.set_random_seed(args["seed"]) dataset = args["dataset"] tag = args["tag"] train_mode = args["train_mode"] import os os.environ["CUDA_VISIBLE_DEVICES"] = str(args["device"]) direc = util.get_directory(direc="./outputs/", tag=tag) util.save_dict(direc + "/training_params", args) ####### ##get data data = get_data.get_data(dataset, "train") data = [tf.expand_dims(data[0], -1), data[1]] tr_dat, tr_lab = tf.train.shuffle_batch(data, batch_size, capacity=30, min_after_dequeue=10, seed=0) tedata = get_data.get_data(dataset, "test") tedata = [tf.expand_dims(tedata[0], -1), tedata[1]] te_dat, te_lab = tf.train.shuffle_batch(tedata, batch_size, capacity=30, min_after_dequeue=10, seed=0) ########## ##Build Network input_tensor = tf.placeholder(tf.float32, tr_dat.shape) enc, prob, pred, syst, off, init_prob = net.build_network( input_tensor, args["nenc"], args["nstate"], False, syspicktype=args["syspick"]) ############### ##Losses rms = losses.likelihood_loss(enc, pred, prob) mine = losses.MINE_loss(enc, prob) minevar = trainable(scope="MINE") minereg = tf.reduce_max([tf.reduce_max(k**2) for k in minevar]) othervar = trainable(scope="enc") otherreg = tf.reduce_max([tf.reduce_max(k**2) for k in othervar]) pre_ent = losses.sys_prior_ent_loss(prob) post_ent = losses.sys_posterior_ent_loss(prob) emean = tf.reduce_mean(enc, axis=[0, 1], keepdims=True) varreg = tf.maximum((1. / (.001 + tf.reduce_mean((enc - emean)**2))) - .5, 0) meanediff = tf.reduce_mean((enc[:, :-1] - enc[:, 1:])**2) prederr = tf.reduce_mean( tf.expand_dims(prob[:, :-1], -1) * (tf.expand_dims(enc[:, 1:], 2) - pred[:, :-1])**2) pererr = tf.reduce_mean( tf.expand_dims(prob[:, :-1], -1) * ((tf.expand_dims(enc[:, 1:], 2) - pred[:, :-1])**2)) / tf.reduce_mean( tf.expand_dims((enc[:, :-1] - enc[:, 1:]), 2)**2) scalereg = tf.reduce_mean(tf.reduce_sum(enc**2, 2)) loss = args["likloss"] * rms reg = args["regloss"] * (mine + scalereg + varreg + minereg + otherreg) reg += args["ent_loss"] * post_ent minegradreg = losses.MINE_grad_regularization(enc) reg += args["MINE_grad_reg"] * minegradreg ######## adamopt = tf.train.AdamOptimizer(learning_rate=.0001) fulltrain = adamopt.minimize(loss + reg) minetrain = adamopt.minimize(reg, var_list=trainable("MINE") + trainable("enc")) systtrain = adamopt.minimize(loss, var_list=trainable("sys")) ######## init = tf.global_variables_initializer() coord = tf.train.Coordinator() sess = tf.Session() sess.run(init) threads = tf.train.start_queue_runners(coord=coord, sess=sess) ########TRAINING test = [sess.run([te_dat, te_lab]) for k in range(3)] LOG = log.log(["epoch", "percenterr", "prior_entropy", "encmean", "mine"], PRINT=True) dat, lab = sess.run([tr_dat, tr_lab]) for k in range(args["epochs"]): dat, lab = sess.run([tr_dat, tr_lab]) #get data batch if train_mode == "full": tr, pe = sess.run([fulltrain, pre_ent], {input_tensor: dat}) elif train_mode == "minefirst": if k < args["epochs"] / 2: tr, pe = sess.run([minetrain, pre_ent], {input_tensor: dat}) else: tr, pe = sess.run([systtrain, pre_ent], {input_tensor: dat}) elif train_mode == "mineonly": tr, pe = sess.run([minetrain, pre_ent], {input_tensor: dat}) else: print("Training mode not recognized") exit() if k % 50 == 0: teloss = 0 tmean = 0 mineloss = 0 per_error = 0 for t in range(len(test)): dat, lab = test[t] l, e, m, r = sess.run([meanediff, enc, mine, pererr], {input_tensor: dat}) teloss += l tmean += np.max(e**2) mineloss += m per_error += r teloss /= len(test) tmean /= len(test) mineloss /= len(test) per_error /= len(test) LOG.log([k, per_error, pe, tmean, mineloss]) LOG.save(direc + "/logfile.json") ###make test data lab = [] dat = [] e = [] p = [] pr = [] NN = args["ntestbatch"] for k in range(NN): d, l = sess.run([tr_dat, tr_lab]) en, pp, ppr = sess.run([enc, prob, pred], {input_tensor: d}) lab.append(l) dat.append(d) e.append(en) p.append(pp) pr.append(ppr) lab = np.concatenate(lab) dat = np.concatenate(dat) e = np.concatenate(e) p = np.concatenate(p) pr = np.concatenate(pr) sys, O = sess.run([syst, off]) sysdense = sess.run(trainable("syspick_dense")) for s in range(len(sysdense)): np.savetxt(direc + "/nascar_syspick_{}.csv".format(s), sysdense[s]) np.savetxt(direc + "/nascar_lab.csv", np.reshape(lab, [batch_size * NN, -1])) np.savetxt(direc + "/nascar_dat.csv", np.reshape(dat, [batch_size * NN, -1])) np.savetxt(direc + "/nascar_enc.csv", np.reshape(e, [batch_size * NN, -1])) np.savetxt(direc + "/nascar_pro.csv", np.reshape(p, [batch_size * NN, -1])) np.savetxt(direc + "/nascar_pre.csv", np.reshape(pr, [batch_size * NN, -1])) np.savetxt(direc + "/nascar_sys.csv", np.reshape(sys, [len(sys), -1])) np.savetxt(direc + "/nascar_O.csv", O) coord.request_stop() coord.join(threads, stop_grace_period_secs=5) sess.close()