path_ckpt = "../trial/ckpt/kudo396" path_emb = "../data/stance_emb.npy" path_emb_sp = "../data/stance_emb_sample.npy" path_vocab = "../trial/data/vocab.model" path_data = "../data/stance.npz" from model import tf, vAe from util import partial from util_io import load_txt from util_np import np, partition, vpack import util_sp as sp from util_np import vpack # load test sentences text = np.load(path_data)['text'] # load sentencepiece model vocab = sp.load_spm(path_vocab) # Load the model model = vAe('infer') # Restore the session sess = tf.InteractiveSession() tf.train.Saver().restore(sess, path_ckpt) ################################ # deterministic representation # ################################ # encode text with sentence piece model
from util_sp import load_spm, encode_capped, encode_capped_sample_pair from util_tf import tf, pipe config = load_json(A.config) P = Record(config['paths']) C = Record(config['model']) T = Record(config['train']) tf.set_random_seed(A.seed) ############# # load data # ############# vocab = load_spm(P.vocab) valid = np.load(P.valid) def batch(size=T.batch_train, path=P.train, vocab=vocab, seed=A.seed, kudo=A.sample, max_len=T.max_len): pac = lambda arrs: vpack(arrs, (size, max(map(len, arrs))), eos, np.int32) enc = encode_capped_sample_pair if kudo else encode_capped raw = tuple(load_txt(path)) eos = vocab.eos_id() bat = [] for i in sample(len(raw), seed): if size == len(bat):
from trial import config as C, paths as P, train as T from util import partial, comp, select from util_io import pform, load_txt, save_txt from util_np import np, partition, batch_sample from util_sp import load_spm, encode, decode from util_tf import tf, pipe tf.set_random_seed(C.seed) C.trial = 'm4_' ############# # load data # ############# # valid_en, train_en = np.load(pform(P.data, "valid_en.npy")), np.load(pform(P.data, "train_en.npy")) valid_nl, train_nl = np.load(pform(P.data, "valid_nl.npy")), np.load(pform(P.data, "train_nl.npy")) # valid_de, train_de = np.load(pform(P.data, "valid_de.npy")), np.load(pform(P.data, "train_de.npy")) valid_da, train_da = np.load(pform(P.data, "valid_da.npy")), np.load(pform(P.data, "train_da.npy")) # valid_sv, train_sv = np.load(pform(P.data, "valid_sv.npy")), np.load(pform(P.data, "train_sv.npy")) train_nl = train_nl[:2**17].copy() train_da = train_da[:2**17].copy() data_index = 1, 3 data_valid = valid_nl, valid_da data_train = train_nl, train_da def batch(arrs, size= C.batch_train, seed= C.seed): size //= len(arrs) * (len(arrs) - 1) for i in batch_sample(len(arrs[0]), size, seed): yield tuple(arr[i] for arr in arrs)
from tqdm import tqdm from trial import config as C, paths as P, train as T from util import partial, comp, select from util_io import pform, load_txt, save_txt from util_np import np, partition, batch_sample from util_sp import load_spm, encode, decode from util_tf import tf, pipe tf.set_random_seed(C.seed) C.trial = 'm1_' ############# # load data # ############# valid_en, train_en = np.load(pform(P.data, "valid_en.npy")), np.load(pform(P.data, "train_en.npy")) # valid_nl, train_nl = np.load(pform(P.data, "valid_nl.npy")), np.load(pform(P.data, "train_nl.npy")) valid_de, train_de = np.load(pform(P.data, "valid_de.npy")), np.load(pform(P.data, "train_de.npy")) # valid_da, train_da = np.load(pform(P.data, "valid_da.npy")), np.load(pform(P.data, "train_da.npy")) valid_sv, train_sv = np.load(pform(P.data, "valid_sv.npy")), np.load(pform(P.data, "train_sv.npy")) data_index = 0, 2, 4 data_valid = valid_en, valid_de, valid_sv data_train = train_en, train_de, train_sv def batch(arrs, size= C.batch_train, seed= C.seed): size //= len(arrs) * (len(arrs) - 1) for i in batch_sample(len(arrs[0]), size, seed): yield tuple(arr[i] for arr in arrs) perm = comp(tuple, partial(permutations, r= 2))
path_ckpt = "../trial/ckpt/kudo396" path_emb = "../data/test_data_emb.npy" path_emb_sp = "../data/test_data_emb_sample.npy" path_vocab = "../trial/data/vocab.model" path_data = "../data/test_data.npz" from model import tf, vAe, encode, decode from util import partial from util_io import load_txt from util_np import np, partition, vpack import util_sp as sp from util_np import vpack # load test sentences text = np.load(path_data)["posts"] # load sentencepiece model vocab = sp.load_spm(path_vocab) # Load the model model = vAe('infer') # Restore the session sess = tf.InteractiveSession() tf.train.Saver().restore(sess, path_ckpt) ################################ # deterministic representation # ################################ # encode text with sentence piece model
path_csv = "../docs/results_iac/clustering.csv" path_emb = "../data/test_data_emb.npy" path_emb_sp = "../data/test_data_emb_sample.npy" from model import tf, vAe, encode, decode from util import partial from util_io import load_txt, save_txt from util_np import np, partition, vpack from util_np import vpack import pandas as pd import util_sp as sp # load data df = pd.read_csv(path_csv) emb = np.load(path_emb) emb_sp = np.load(path_emb_sp) # load sentencepiece model vocab = sp.load_spm(path_vocab) # Load the model model = vAe('infer') # Restore the session sess = tf.InteractiveSession() tf.train.Saver().restore(sess, path_ckpt) ########################### # generate from centroids # ###########################
from model import Transformer from os.path import expanduser, join from tqdm import tqdm from util import PointedIndex from util_io import decode from util_np import np, permute from util_tf import tf, batch logdir = expanduser("~/cache/tensorboard-logdir/explicharr") tf.set_random_seed(0) ############### # preparation # ############### src_train = np.load("trial/data/train_src.npy") tgt_train = np.load("trial/data/train_tgt.npy") src_valid = np.load("trial/data/valid_src.npy") tgt_valid = np.load("trial/data/valid_tgt.npy") assert src_train.shape[1] <= len_cap assert tgt_train.shape[1] <= len_cap assert src_valid.shape[1] <= len_cap assert tgt_valid.shape[1] <= len_cap epoch = len(src_train) // batch_size # # for profiling # from util_tf import profile # m = Transformer.new().data() # forcing = m.forcing(trainable= False) # autoreg = m.autoreg(trainable= False) # feed = {m.src_: src_train[:batch_size], m.tgt_: tgt_train[:batch_size]}
from model import Transformer from os.path import expanduser, join from tqdm import tqdm from util import comp from util_io import path, save from util_np import np, vpack, c2r from util_tf import tf, batch logdir = expanduser("~/cache/tensorboard-logdir/i-synth") tf.set_random_seed(0) ############### # preparation # ############### index = np.load("trial/data/index.npy").item() texts = np.load("trial/data/texts.npy") names = np.load("trial/data/names.npy") epoch, split = divmod(len(texts), batch_size) print("{} batches of {} training instances, {} validation".format( epoch, batch_size, split)) def load_batch(names, load=comp(np.load, "trial/data/grams/{}.npy".format)): names = names.astype(np.str) x = vpack(map(load, names), complex('(nan+nanj)'), 1, 1) # x = vpack(map(comp(load, path), names), complex('(nan+nanj)'), 1, 1) x[:, 0] = 0j x = c2r(x) _, t, d = x.shape assert t <= len_cap
def train(anomaly_class=8, dataset="cifar", n_dis=1, epochs=25, dim_btlnk=32, batch_size=64, loss="mean", context_weight=1, dim_d=64, dim_g=64, extra_layers=0, gpu="0"): #set gpu os.environ["CUDA_VISIBLE_DEVICES"] = gpu path_log = f"/cache/tensorboard-logdir/{dataset}" path_ckpt = "/project/multi-discriminator-gan/ckpt" path_data = "/project/multi-discriminator-gan/data" #reset graphs and fix seeds tf.reset_default_graph() if 'sess' in globals(): sess.close() rand = RandomState(0) tf.set_random_seed(0) #load data if dataset == "ucsd1": x_train = np.load("./data/ucsd1_train_x.npz")["arr_0"] / 255 y_train = np.load("./data/ucsd1_train_y.npz")["arr_0"] x_test = np.load("./data/ucsd1_test_x.npz")["arr_0"] / 255 y_test = np.load("./data/ucsd1_test_y.npz")["arr_0"] elif dataset == "uscd2": x_train = np.load("./data/ucsd2_train_x.npz")["arr_0"] y_train = np.load("./data/ucsd2_train_y.npz")["arr_0"] x_test = np.load("./data/ucsd2_test_x.npz")["arr_0"] y_test = np.load("./data/ucsd2_test_y.npz")["arr_0"] else: if dataset == "mnist": (train_images, train_labels), ( test_images, test_labels) = tf.keras.datasets.mnist.load_data() train_images = resize_images(train_images) test_images = resize_images(test_images) else: (train_images, train_labels), ( test_images, test_labels) = tf.keras.datasets.cifar10.load_data() train_labels = np.reshape(train_labels, len(train_labels)) test_labels = np.reshape(test_labels, len(test_labels)) inlier = train_images[train_labels != anomaly_class] #data_size = prod(inlier[0].sha x_train = inlier / 255 #x_train = np.reshape(inlier, (len(inlier), data_size))/255 #y_train = train_labels[train_labels!=anomaly_class] y_train = np.zeros(len(x_train), dtype=np.int8) # dummy outlier = train_images[train_labels == anomaly_class] x_test = np.concatenate([outlier, test_images]) / 255 #x_test = np.reshape(np.concatenate([outlier, test_images]) # ,(len(outlier)+len(test_images), data_size))/255 y_test = np.concatenate( [train_labels[train_labels == anomaly_class], test_labels]) y_test = [0 if y != anomaly_class else 1 for y in y_test] x_test, y_test = unison_shfl(x_test, np.array(y_test)) img_size_x = x_train[0].shape[0] img_size_y = x_train[0].shape[1] channel = x_train[0].shape[-1] trial = f"{dataset}_{loss}_dis{n_dis}_{anomaly_class}_w{context_weight}_btlnk{dim_btlnk}_d{dim_d}_g{dim_g}e{extra_layers}" # data pipeline batch_fn = lambda: batch2(x_train, y_train, batch_size) x, y = pipe(batch_fn, (tf.float32, tf.float32), prefetch=4) #z = tf.random_normal((batch_size, z_dim)) # load graph mg_gan = MG_GAN.new(img_size_x, channel, dim_btlnk, dim_d, dim_g, n_dis, extra_layers=0) model = MG_GAN.build(mg_gan, x, y, context_weight, loss) # start session, initialize variables sess = tf.InteractiveSession() saver = tf.train.Saver() wrtr = tf.summary.FileWriter(pform(path_log, trial)) wrtr.add_graph(sess.graph) ### if load pretrained model # pretrain = "modelname" #saver.restore(sess, pform(path_ckpt, pretrain)) ### else: auc_vars = tf.get_collection(tf.GraphKeys.LOCAL_VARIABLES, scope='AUC') init = tf.group(tf.global_variables_initializer(), tf.variables_initializer(var_list=auc_vars)) sess.run(init) #if "ucsd" in dataset: summary_test = tf.summary.merge([ tf.summary.scalar('g_loss', model.g_loss), tf.summary.scalar("lambda", model.lam), tf.summary.scalar("gl_rec", model.gl_rec), tf.summary.scalar("gl_adv", model.gl_adv), tf.summary.scalar("gl_lam", model.gl_lam), tf.summary.scalar('d_loss_mean', model.d_loss_mean), tf.summary.scalar('d_max', model.d_max) #, tf.summary.scalar('d_loss', model.d_loss) , tf.summary.scalar("AUC_gx", model.auc_gx) ]) if dataset == "ucsd1": summary_images = tf.summary.merge( (tf.summary.image("gx", model.gx, max_outputs=8), tf.summary.image("x", model.x, max_outputs=8), tf.summary.image( 'gx400', spread_image(tf.concat([model.gx, model.x], axis=1), 8, 2, img_size_x, img_size_y, channel)))) else: summary_images = tf.summary.merge( (tf.summary.image("gx", model.gx, max_outputs=8), tf.summary.image( 'gx400', spread_image(model.gx[:400], 20, 20, img_size_x, img_size_y, channel)), tf.summary.image("x", model.x, max_outputs=8))) if n_dis > 1: d_wrtr = { i: tf.summary.FileWriter(pform(path_log, trial + f"d{i}")) for i in range(n_dis) } summary_discr = { i: tf.summary.scalar('d_loss_multi', model.d_loss[i]) for i in range(n_dis) } def summ(step): fetches = model.g_loss, model.lam, model.d_loss_mean, model.auc_gx results = map( np.mean, zip(*(sess.run(fetches, { model['x']: x_test[i:j], model['y']: y_test[i:j] }) for i, j in partition(len(x_test), batch_size, discard=False)))) results = list(results) wrtr.add_summary(sess.run(summary_test, dict(zip(fetches, results))), step) if dataset == "ucsd1": # bike, skateboard, grasswalk, shopping cart, car, normal, normal, grass wrtr.add_summary( sess.run( summary_images, { model.x: x_test[[990, 1851, 2140, 2500, 2780, 2880, 3380, 3580]] }), step) else: wrtr.add_summary(sess.run(summary_images, {model.x: x_test}), step) wrtr.flush() def summ_discr(step): fetches = model.d_loss results = map( np.mean, zip(*(sess.run(fetches, { model['x']: x_test[i:j], model['y']: y_test[i:j] }) for i, j in partition(len(x_test), batch_size, discard=False)))) results = list(results) if n_dis > 1: # put all losses of the discriminators in one plot for i in range(n_dis): d_wrtr[i].add_summary( sess.run(summary_discr[i], dict(zip(fetches, results))), step) #d_wrtr[i].add_summary(sess.run(summary_discr[i], dict([(fetches[i], results[i])])), step) d_wrtr[i].flush() #def log(step # , wrtr= wrtr # , log = tf.summary.merge([tf.summary.scalar('g_loss', model.g_loss) # , tf.summary.scalar('d_loss', tf.reduce_mean(model.d_loss)) # , tf.summary.scalar("lambda", model.lam) # , tf.summary.image("gx", model.gx, max_outputs=5) # , tf.summary.image('gx400', spread_image(model.gx[:400], 20,20, img_size, img_size, channel)) # #, tf.summary.scalar("AUC_dgx", model.auc_dgx) # #, tf.summary.scalar("AUC_dx", model.auc_dx) # , tf.summary.scalar("AUC_gx", model.auc_gx)]) # , y= y_test # , x= x_test): # wrtr.add_summary(sess.run(log, {model["x"]:x # , model["y"]:y}) # , step) # wrtr.flush() steps_per_epoch = len(x_train) // batch_size - 1 for epoch in tqdm(range(epochs)): for i in range(steps_per_epoch): #sess.run(model["train_step"]) sess.run(model['d_step']) sess.run(model['g_step']) # tensorboard writer #if "ucsd" in dataset: summ(sess.run(model["step"]) // steps_per_epoch) #else: # log(sess.run(model["step"])//steps_per_epoch) if n_dis > 1: summ_discr(sess.run(model["step"]) // steps_per_epoch) saver.save(sess, pform(path_ckpt, trial), write_meta_graph=False)