def sr_generate(self, data): saver = tf.train.Saver() with tf.Session() as session: saver.restore(session, os.path.join(config.MODEL_DIR, "model" + str(config.BATCH_SIZE) + "_" + str(config.NUM_EPOCHS) + ".ckpt")) for file in data.filelist: data.process_img(file) batch = np.asarray(data.batch) feed_dict = {self.inputs: batch} patches = session.run(self.output, feed_dict=feed_dict) #utils.display_batch_patch(batch, patches) image = utils.stitch(patches) utils.reconstruct(image, file)
def linear(x, output_size, scope, add_tanh=False, wd=None): with tf.variable_scope(scope): # since the input here is not two rank, we flat the input while keeping the last dims keep = 1 #print x.get_shape().as_list() flat_x = flatten( x, keep ) # keeping the last one dim # [N,M,JX,JQ,2d] => [N*M*JX*JQ,2d] #print flat_x.get_shape() # (?, 200) # wd+cwd bias_start = 0.0 if not (type(output_size) == type(1)): # need to be get_shape()[k].value output_size = output_size.value #print [flat_x.get_shape()[-1],output_size] W = tf.get_variable("W", dtype="float", initializer=tf.truncated_normal( [flat_x.get_shape()[-1].value, output_size], stddev=0.1)) bias = tf.get_variable("b", dtype="float", initializer=tf.constant(bias_start, shape=[output_size])) flat_out = tf.matmul(flat_x, W) + bias if add_tanh: flat_out = tf.tanh(flat_out, name="tanh") if wd is not None: add_wd(wd) out = reconstruct(flat_out, x, keep) return out
def softmax(logits, scope=None): with tf.name_scope(scope or "softmax"): # noted here is name_scope not variable flat_logits = flatten(logits, 1) flat_out = tf.nn.softmax(flat_logits) out = reconstruct(flat_out, logits, 1) return out
def _discriminator_output(self, generated_image, inputs_var, real=True): if real: # We set the middle contour = inputs_var[1] contour = utils.reconstruct(contour, generated_image, flag = 1) return lasagne.layers.get_output(self.discriminator, contour) else: return lasagne.layers.get_output(self.discriminator, generated_image)
def main(args): # Settings VERBOSE = args.verbose DIR = args.directory MODEL = args.model # Load Model model = tf.keras.models.load_model(MODEL, compile=False) # Create Data Pipeline pipe = utils.DataPipe(DIR) assert len(pipe.files) > 0, "No tif files found" for name, DS in pipe.image_as_DS(): if VERBOSE: print(f"Size of DS is currently: {sys.getsizeof(DS)}") # Reconstruct I initial = True for elem in DS: if initial: I = elem initial = False else: I = np.append(I, elem, axis=0) # Predict P = model.predict(DS) # Reconstruct P = utils.reconstruct(P, name) I = utils.reconstruct(I, name) if VERBOSE: print(f"Size of P is currently: {sys.getsizeof(P)}") print(f"Size of I is currently: {sys.getsizeof(I)}") # Save savemat(name.replace('tfrecord.gz', 'mat'), {'I': I, 'P': P}) if VERBOSE: print(f"Saving: {name.replace('tfrecord.gz','mat')}")
def main(): # Set CUDA. use_cuda = torch.cuda.is_available() device = torch.device("cuda:0" if use_cuda else "cpu") torch.set_grad_enabled(False) # Parse arguments. args = parse_args() paths = types.SimpleNamespace() paths.database_path = os.path.join(args.dataset_path, '%s.db' % args.exp_name) paths.image_path = os.path.join(args.dataset_path, 'images') paths.match_list_path = os.path.join(args.dataset_path, 'match-list-exh.txt') paths.sparse_path = os.path.join(args.dataset_path, 'sparse-%s' % args.exp_name) paths.output_path = os.path.join(args.dataset_path, 'stats-%s.txt' % args.exp_name) # Copy reference database. if os.path.exists(paths.database_path): raise FileExistsError('Database file already exists.') shutil.copy(os.path.join(args.dataset_path, 'database.db'), paths.database_path) # Build and translate database. image_features = build_hybrid_database(args.features, args.dataset_path, paths.database_path) # Matching + GV + reconstruction. match_features_subset(args.feature, image_features, args.colmap_path, paths.database_path, paths.image_path, paths.match_list_path) torch.cuda.empty_cache() matching_stats = geometric_verification(args.colmap_path, paths.database_path, paths.match_list_path + '.aux') os.remove(paths.match_list_path + '.aux') largest_model_path, reconstruction_stats = reconstruct( args.colmap_path, paths.database_path, paths.image_path, paths.sparse_path) extra_stats = compute_extra_stats(image_features, largest_model_path) with open(paths.output_path, 'w') as f: f.write(json.dumps(matching_stats)) f.write('\n') f.write(json.dumps(reconstruction_stats)) f.write('\n') f.write(json.dumps(extra_stats)) f.write('\n')
def main(_): config = flags.FLAGS if config.mode == "train": assert config.dataset in ("mnist", "cifar10") config.in_shape = (config.batch_size, 32, 32, 3) config.block_list = [eval(x) for x in config.block_list] config.stride_list = [eval(x) for x in config.stride_list] config.channel_list = [eval(x) for x in config.channel_list] train(config) elif config.mode == "debug": config.train_steps = 1 config.viz_steps = 1 config.block_list = [2, 2, 2] config.channel_list = [3, 4, 5] config.stride_list = [1, 1, 2] config.in_shape = (config.batch_size, 28, 28, 1) train(config, debug=True) elif config.mode == "prepare": download_dataset(config) elif config.mode == "sn": test_spectral_norm() elif config.mode == "iresnet": test_iresnet() elif config.mode == "trace": test_trace_approximation() elif config.mode == "inverse": test_block_inversion() elif config.mode == "squeeze": test_squeeze() elif config.mode == "trace_sn": test_trace_sn() elif config.mode == "generate": generate(config) elif config.mode == "reconstruct": reconstruct(config)
def linear(x,output_size,scope,add_tanh=False,wd=None,bn=False,bias=False,is_train=None,ln=False): # bn -> batch norm # ln -> layer norm with tf.variable_scope(scope): # since the input here is not two rank, we flat the input while keeping the last dims keep = 1 #print x.get_shape().as_list() flat_x = flatten(x,keep) # keeping the last one dim # [N,M,JX,JQ,2d] => [N*M*JX*JQ,2d] #print flat_x.get_shape() # (?, 200) # wd+cwd bias_start = 0.0 if not (type(output_size) == type(1)): # need to be get_shape()[k].value output_size = output_size.value # add batch_norm if bn: assert is_train is not None flat_x = batch_norm(flat_x,scope="bn",is_train=is_train) if ln: flat_x = layer_norm(flat_x,scope="ln") #print [flat_x.get_shape()[-1],output_size] W = tf.get_variable("W",dtype="float",initializer=tf.truncated_normal([flat_x.get_shape()[-1].value,output_size],stddev=0.1)) flat_out = tf.matmul(flat_x,W) if bias: bias = tf.get_variable("b",dtype="float",initializer=tf.constant(bias_start,shape=[output_size])) flat_out += bias if add_tanh: flat_out = tf.tanh(flat_out,name="tanh") #flat_out = tf.nn.dropout(flat_out,keep_prob) if wd is not None: add_wd(wd) out = reconstruct(flat_out,x,keep) return out
def eval(): config = read_config() C, B = restore(config.M, config.K) base, _, query, gt = sift1m_read() D = base.shape[1] del config config = tf.ConfigProto(allow_soft_placement=True) config.gpu_options.allow_growth = True quantized = reconstruct(C, B) with tf.Session(config=config) as sess: total_distortion = np.mean(np.sum(np.square(quantized - base), axis=1)) print('total distortion:', total_distortion) recall = np.zeros([query.shape[0], 100], dtype=np.float32) database = tf.constant(quantized, dtype=tf.float32) query_gpu = tf.placeholder(tf.float32, shape=(1, D)) print(database.get_shape().as_list()) distance = tf.norm(query_gpu - database, ord='euclidean', axis=1) print(distance.get_shape().as_list()) _, index = tf.nn.top_k(-distance, k=100, sorted=True) print(index.get_shape().as_list()) sess.run(tf.global_variables_initializer()) for i in trange(query.shape[0], ncols=50): q = query[i][None, :] top100 = sess.run(index, {query_gpu: q}) gt1 = gt[i][0] recall[i] = np.cumsum(np.isin(top100, gt1)) recall = np.mean(recall, 0) np.savetxt('./result/recall.txt', recall)
def my_run(): img_root = './data/' imgs, feats, K = utils.build_img_info(img_root) F, pair, match = utils.build_F_pair_match(feats) # F, H, pair, match = utils.build_F_H_pair_match(feats) # points, edges, tracks, G = utils.extract_points_edges_tracks_G(pair, len(feats)) img_index1, img_index2 = 0,1 pts1 = pair[(img_index1, img_index2)]['pts1'] pts2 = pair[(img_index1, img_index2)]['pts2'] K1 = K[img_index1] K2 = K[img_index2] E=utils.get_E_from_F(F,K1,K2) # init reconstruction R1 = np.eye(3, 3) T1 = np.zeros((3, 1)) R2, T2 = utils.extract_R_T(E, K1, R1, T1, K2, pts1, pts2, 5) cloud3d = utils.reconstruct(K1, R1, T1, K2, R2, T2, pts1, pts2) utils.save_3d(cloud3d, './my_out/mycloud_3D_euclid.ply') # np.save('./cloud3d.npy', cloud3d) print(cloud3d.shape) #utils.draw_v1(cloud3d) utils.show3d( './my_out/mycloud_3D_euclid.ply', 'the result')
def q2(path): print("Reading data...") start = time.time() #path = path filepath = os.path.abspath(path) #print(filepath) get_df = lambda f: pd.read_csv(f) all_cars = {f: get_df(os.path.join(filepath, f)) for f in os.listdir(filepath)} print(len(all_cars)) print('Preparing data for clustering...') # select signal time series data from cars behaving correctly # (ignoring those that don't) and append it. signal_values = [] for k,v in all_cars.items(): car_num = int(k.split('/')[-1].split('.')[0][-1]) anomalous_cars = [3,7] # list the cars that we won't be included for train if car_num not in anomalous_cars: #print(car_num) car_sig_value = np.array(v.sig_value) #[100:100100]) # pick 100k signal values for each signal_values.extend(car_sig_value) #len(signal_values) else: pass print('Length signal_values:', len(signal_values)) # split signal data into segments. # setup the segment length and slide length parameters segment_len = 100 # try with 100 slide_len = 10 # try with 10 segments = utils.sliding_segments(signal_values, segment_len, slide_len) print("Produced %d signal values segments" % len(segments)) # 10763 # apply a window function to the data, which forces the start and end to be zero window_rads = np.linspace(0, np.pi, segment_len) window = np.sin(window_rads)**2 windowed_segments = utils.get_windowed_segments(segments,segment_len , window) #print(len(windowed_segments)) # Apply k-means clustering on the segments print('Clustering...') k = 150 # Test different n_clusters clusterer = KMeans(n_clusters=k) clusterer.fit(windowed_segments) # Reconstruct the data using the centroids from the clusterer calculated warnings.filterwarnings("ignore", category=DeprecationWarning) print('Reconstructing...') reconstructed = utils.reconstruct(signal_values, window, clusterer) # Anomaly detection: print('Anomaly Detection...') for k,v in all_cars.items(): car_num = int(k.split('/')[-1].split('.')[0][-1]) car_signal_data = v.sig_value #print(car_signal_data.shape) reconstructed = utils.reconstruct(car_signal_data, window, clusterer) error = reconstructed - car_signal_data error_98th_percentile = np.percentile(error, 98) #print("Car %d Maximum reconstruction error was %.1f" % (car_num, error.max())) #print("Car %f 98th percentile of reconstruction error was %.1f" % (car_num,error_98th_percentile)) # Car 1 98th percentile of reconstruction error was 106.5 # Car 8 98th percentile of reconstruction error was 103.0 # Car 5 98th percentile of reconstruction error was 73.0 # Car 7 98th percentile of reconstruction error was 318.9 # Car 2 98th percentile of reconstruction error was 110.6 # Car 9 98th percentile of reconstruction error was 113.7 # Car 0 98th percentile of reconstruction error was 103.7 # Car 6 98th percentile of reconstruction error was 110.0 # Car 3 98th percentile of reconstruction error was 340.8 # Car 4 98th percentile of reconstruction error was 103.2 error_threshold = 150 # Avg. 98th percentile Reconstruction Error is ~105. if error_98th_percentile > error_threshold: print("Car %d not performing appropriately due to potential damage. Needs revison."%(car_num)) else: pass end = time.time() print('Function took {:.3f} ms'.format((end-start)*1000.0))
def main(args): global logging debug = (args.reconstruct_from != "" or args.eval == True) # don't make exp dir for reconstruction logging = create_exp_dir(args.exp_dir, scripts_to_save=None, debug=debug) if args.cuda: logging('using cuda') logging(str(args)) opt_dict = {"not_improved": 0, "lr": 1., "best_loss": 1e4} train_data = MonoTextData(args.train_data, label=args.label) vocab = train_data.vocab vocab_size = len(vocab) val_data = MonoTextData(args.val_data, label=args.label, vocab=vocab) test_data = MonoTextData(args.test_data, label=args.label, vocab=vocab) logging('Train data: %d samples' % len(train_data)) logging('finish reading datasets, vocab size is %d' % len(vocab)) logging('dropped sentences: %d' % train_data.dropped) #sys.stdout.flush() log_niter = (len(train_data) // args.batch_size) // 10 model_init = uniform_initializer(0.01) emb_init = uniform_initializer(0.1) #device = torch.device("cuda" if args.cuda else "cpu") device = "cuda" if args.cuda else "cpu" args.device = device if args.enc_type == 'lstm': encoder = GaussianLSTMEncoder(args, vocab_size, model_init, emb_init) args.enc_nh = args.dec_nh else: raise ValueError("the specified encoder type is not supported") decoder = LSTMDecoder(args, vocab, model_init, emb_init) vae = VAE(encoder, decoder, args).to(device) if args.load_path: loaded_state_dict = torch.load(args.load_path) #curr_state_dict = vae.state_dict() #curr_state_dict.update(loaded_state_dict) vae.load_state_dict(loaded_state_dict) logging("%s loaded" % args.load_path) if args.reset_dec: vae.decoder.reset_parameters(model_init, emb_init) if args.eval: logging('begin evaluation') vae.load_state_dict(torch.load(args.load_path)) vae.eval() with torch.no_grad(): test_data_batch = test_data.create_data_batch( batch_size=args.batch_size, device=device, batch_first=True) test(vae, test_data_batch, "TEST", args) au, au_var = calc_au(vae, test_data_batch) logging("%d active units" % au) # print(au_var) test_data_batch = test_data.create_data_batch(batch_size=1, device=device, batch_first=True) nll, ppl = calc_iwnll(vae, test_data_batch, args) logging('iw nll: %.4f, iw ppl: %.4f' % (nll, ppl)) return if args.reconstruct_from != "": print("begin decoding") sys.stdout.flush() vae.load_state_dict(torch.load(args.reconstruct_from)) vae.eval() with torch.no_grad(): test_data_batch = test_data.create_data_batch( batch_size=args.batch_size, device=device, batch_first=True) # test(vae, test_data_batch, "TEST", args) reconstruct(vae, test_data_batch, vocab, args.decoding_strategy, args.reconstruct_to) return if args.opt == "sgd": enc_optimizer = optim.SGD(vae.encoder.parameters(), lr=args.lr, momentum=args.momentum) dec_optimizer = optim.SGD(vae.decoder.parameters(), lr=args.lr, momentum=args.momentum) opt_dict['lr'] = args.lr elif args.opt == "adam": enc_optimizer = optim.Adam(vae.encoder.parameters(), lr=0.001) dec_optimizer = optim.Adam(vae.decoder.parameters(), lr=0.001) opt_dict['lr'] = 0.001 else: raise ValueError("optimizer not supported") iter_ = decay_cnt = 0 best_loss = 1e4 best_kl = best_nll = best_ppl = 0 pre_mi = 0 vae.train() start = time.time() train_data_batch = train_data.create_data_batch(batch_size=args.batch_size, device=device, batch_first=True) val_data_batch = val_data.create_data_batch(batch_size=args.batch_size, device=device, batch_first=True) test_data_batch = test_data.create_data_batch(batch_size=args.batch_size, device=device, batch_first=True) # At any point you can hit Ctrl + C to break out of training early. try: for epoch in range(args.epochs): report_kl_loss = report_rec_loss = report_loss = 0 report_num_words = report_num_sents = 0 for i in np.random.permutation(len(train_data_batch)): batch_data = train_data_batch[i] batch_size, sent_len = batch_data.size() # not predict start symbol report_num_words += (sent_len - 1) * batch_size report_num_sents += batch_size kl_weight = args.beta enc_optimizer.zero_grad() dec_optimizer.zero_grad() if args.iw_train_nsamples < 0: loss, loss_rc, loss_kl = vae.loss(batch_data, kl_weight, nsamples=args.nsamples) else: loss, loss_rc, loss_kl = vae.loss_iw( batch_data, kl_weight, nsamples=args.iw_train_nsamples, ns=ns) loss = loss.mean(dim=-1) loss.backward() torch.nn.utils.clip_grad_norm_(vae.parameters(), clip_grad) loss_rc = loss_rc.sum() loss_kl = loss_kl.sum() enc_optimizer.step() dec_optimizer.step() report_rec_loss += loss_rc.item() report_kl_loss += loss_kl.item() report_loss += loss.item() * batch_size if iter_ % log_niter == 0: #train_loss = (report_rec_loss + report_kl_loss) / report_num_sents train_loss = report_loss / report_num_sents logging('epoch: %d, iter: %d, avg_loss: %.4f, kl: %.4f, recon: %.4f,' \ 'time elapsed %.2fs, kl_weight %.4f' % (epoch, iter_, train_loss, report_kl_loss / report_num_sents, report_rec_loss / report_num_sents, time.time() - start, kl_weight)) #sys.stdout.flush() report_rec_loss = report_kl_loss = report_loss = 0 report_num_words = report_num_sents = 0 iter_ += 1 logging('kl weight %.4f' % kl_weight) vae.eval() with torch.no_grad(): loss, nll, kl, ppl, mi = test(vae, val_data_batch, "VAL", args) au, au_var = calc_au(vae, val_data_batch) logging("%d active units" % au) # print(au_var) if args.save_ckpt > 0 and epoch <= args.save_ckpt: logging('save checkpoint') torch.save( vae.state_dict(), os.path.join(args.exp_dir, f'model_ckpt_{epoch}.pt')) if loss < best_loss: logging('update best loss') best_loss = loss best_nll = nll best_kl = kl best_ppl = ppl torch.save(vae.state_dict(), args.save_path) if loss > opt_dict["best_loss"]: opt_dict["not_improved"] += 1 if opt_dict[ "not_improved"] >= decay_epoch and epoch >= args.load_best_epoch: opt_dict["best_loss"] = loss opt_dict["not_improved"] = 0 opt_dict["lr"] = opt_dict["lr"] * lr_decay vae.load_state_dict(torch.load(args.save_path)) logging('new lr: %f' % opt_dict["lr"]) decay_cnt += 1 enc_optimizer = optim.SGD(vae.encoder.parameters(), lr=opt_dict["lr"], momentum=args.momentum) dec_optimizer = optim.SGD(vae.decoder.parameters(), lr=opt_dict["lr"], momentum=args.momentum) else: opt_dict["not_improved"] = 0 opt_dict["best_loss"] = loss if decay_cnt == max_decay: break if epoch % args.test_nepoch == 0: with torch.no_grad(): loss, nll, kl, ppl, _ = test(vae, test_data_batch, "TEST", args) if args.save_latent > 0 and epoch <= args.save_latent: visualize_latent(args, epoch, vae, "cuda", test_data) vae.train() except KeyboardInterrupt: logging('-' * 100) logging('Exiting from training early') # compute importance weighted estimate of log p(x) vae.load_state_dict(torch.load(args.save_path)) vae.eval() with torch.no_grad(): loss, nll, kl, ppl, _ = test(vae, test_data_batch, "TEST", args) au, au_var = calc_au(vae, test_data_batch) logging("%d active units" % au) # print(au_var) test_data_batch = test_data.create_data_batch(batch_size=1, device=device, batch_first=True) with torch.no_grad(): nll, ppl = calc_iwnll(vae, test_data_batch, args) logging('iw nll: %.4f, iw ppl: %.4f' % (nll, ppl))
mask = mask/255. mask = np.expand_dims(mask, axis=2) img1 = U.normalization(skimage.img_as_float64(imread(dataset_add + "GEE_mapbiomas/"+ path + "_2019-01-01.tif")), mean=dataset_mean_ind, std=dataset_std_ind) img2 = U.normalization(skimage.img_as_float64(imread(dataset_add + "GEE_mapbiomas/"+ path + "_2019-04-01.tif")), mean=dataset_mean_ind, std=dataset_std_ind) img3 = U.normalization(skimage.img_as_float64(imread(dataset_add + "GEE_mapbiomas/"+ path + "_2019-07-01.tif")), mean=dataset_mean_ind, std=dataset_std_ind) img4 = U.normalization(skimage.img_as_float64(imread(dataset_add + "GEE_mapbiomas/"+ path + "_2019-10-01.tif")), mean=dataset_mean_ind, std=dataset_std_ind) img5 = U.normalization(skimage.img_as_float64(imread(dataset_add + "GEE_mapbiomas/"+ path + "_2020-01-01.tif")), mean=dataset_mean_ind, std=dataset_std_ind) imgcon = np.concatenate((img1,img2,img3,img4,img5),axis=2) data = U.forward_crop(imgcon, window=(32,32), channels=10, stride=4) labels = U.forward_crop(mask, (32,32), channels=1, stride=4) pred = model.predict(data, batch_size=8, verbose=100) pred = U.reconstruct(pred, mask.shape, window=(32,32), channels=1, stride=4) y_scores_i = pred.reshape(pred.shape[0]*pred.shape[1]*pred.shape[2], 1) y_true_i = mask.reshape(mask.shape[0]*mask.shape[1]*mask.shape[2], 1) y_scores_i = np.where(y_scores_i>0.5, 1, 0) y_true_i = np.where(y_true_i>0.5, 1, 0) if idx == 0: y_scores = y_scores_i y_true = y_true_i else: overlap = y_scores_i*y_true_i # Logical AND
def build_forward(self): config = self.config VW = self.VW VC = self.VC W = self.W N = self.N # dynamic decide some step, for sequence length M = tf.shape(self.pis)[1] # photo num JXA = tf.shape(self.at)[2] # for album title, photo title JD = tf.shape(self.ad)[2] # description length JT = tf.shape(self.when)[2] JG = tf.shape(self.where)[2] JI = tf.shape(self.pis)[2] # used for photo_title, photo JXP = tf.shape(self.pts)[3] JQ = tf.shape(self.q)[1] JA = tf.shape(self.choices)[2] # embeding size cdim, wdim, cwdim = self.cd, self.wd, self.cwd #cwd: char -> word output dimension # image feature dim idim = self.idim # image_feat dimension # all input: # at, ad, when, where, # pts, pis # q, choices # embedding with tf.variable_scope('emb'): # char stuff if config.use_char: with tf.variable_scope("var"): char_emb = tf.get_variable("char_emb", shape=[VC, cdim], dtype="float") # the embedding for each of character # [N,M,JXA,W] Aat_c = tf.nn.embedding_lookup(char_emb, self.at_c) # [N,M,JD,W] Aad_c = tf.nn.embedding_lookup(char_emb, self.ad_c) # [N,M,JT,W] Awhen_c = tf.nn.embedding_lookup(char_emb, self.when_c) # [N,M,JG,W] Awhere_c = tf.nn.embedding_lookup(char_emb, self.where_c) # [N,M,JI,JXP,W] -> [N,M,JI,JXP,W,cdim] Apts_c = tf.nn.embedding_lookup(char_emb, self.pts_c) # [N,JQ,W] Aq_c = tf.nn.embedding_lookup(char_emb, self.q_c) Achoices_c = tf.nn.embedding_lookup(char_emb, self.choices_c) # flatten for conv2d input like images Aat_c = tf.reshape(Aat_c, [-1, JXA, W, cdim]) Aad_c = tf.reshape(Aad_c, [-1, JD, W, cdim]) Awhen_c = tf.reshape(Awhen_c, [-1, JT, W, cdim]) Awhere_c = tf.reshape(Awhere_c, [-1, JG, W, cdim]) # [N*M*JI,JXP,W,cdim] Apts_c = tf.reshape(Apts_c, [-1, JXP, W, cdim]) Aq_c = tf.reshape(Aq_c, [-1, JQ, W, cdim]) # [N*4,] Achoices_c = tf.reshape(Achoices_c, [-1, JA, W, cdim]) #char CNN filter_size = cwdim # output size for each word filter_height = 5 with tf.variable_scope("conv"): xat = conv1d(Aat_c, filter_size, filter_height, config.keep_prob, self.is_train, wd=config.wd, scope="conv1d") tf.get_variable_scope().reuse_variables() xad = conv1d(Aad_c, filter_size, filter_height, config.keep_prob, self.is_train, wd=config.wd, scope="conv1d") xwhen = conv1d(Awhen_c, filter_size, filter_height, config.keep_prob, self.is_train, wd=config.wd, scope="conv1d") xwhere = conv1d(Awhere_c, filter_size, filter_height, config.keep_prob, self.is_train, wd=config.wd, scope="conv1d") xpts = conv1d(Apts_c, filter_size, filter_height, config.keep_prob, self.is_train, wd=config.wd, scope="conv1d") qq = conv1d(Aq_c, filter_size, filter_height, config.keep_prob, self.is_train, wd=config.wd, scope="conv1d") qchoices = conv1d(Achoices_c, filter_size, filter_height, config.keep_prob, self.is_train, wd=config.wd, scope="conv1d") # reshape them back xat = tf.reshape(xat, [-1, M, JXA, cwdim]) xad = tf.reshape(xad, [-1, M, JD, cwdim]) xwhen = tf.reshape(xwhen, [-1, M, JT, cwdim]) xwhere = tf.reshape(xwhere, [-1, M, JG, cwdim]) xpts = tf.reshape(xpts, [-1, M, JI, JXP, cwdim]) qq = tf.reshape(qq, [-1, JQ, cwdim]) # [N,num_choice,JA,cwdim] qchoices = tf.reshape(qchoices, [-1, self.num_choice, JA, cwdim]) # word stuff with tf.variable_scope('word'): with tf.variable_scope("var"): # get the word embedding for new words if config.is_train: # for new word word_emb_mat = tf.get_variable( "word_emb_mat", dtype="float", shape=[VW, wdim], initializer=get_initializer(config.emb_mat) ) # it's just random initialized else: # save time for loading the emb during test word_emb_mat = tf.get_variable("word_emb_mat", dtype="float", shape=[VW, wdim]) # concat with pretrain vector # so 0 - VW-1 index for new words, the rest for pretrain vector # and the pretrain vector is fixed word_emb_mat = tf.concat( [word_emb_mat, self.existing_emb_mat], 0) #[N,M,JXA] -> [N,M,JXA,wdim] Aat = tf.nn.embedding_lookup(word_emb_mat, self.at) Aad = tf.nn.embedding_lookup(word_emb_mat, self.ad) Awhen = tf.nn.embedding_lookup(word_emb_mat, self.when) Awhere = tf.nn.embedding_lookup(word_emb_mat, self.where) Apts = tf.nn.embedding_lookup(word_emb_mat, self.pts) Aq = tf.nn.embedding_lookup(word_emb_mat, self.q) Achoices = tf.nn.embedding_lookup(word_emb_mat, self.choices) # concat char and word if config.use_char: xat = tf.concat([xat, Aat], 3) xad = tf.concat([xad, Aad], 3) xwhen = tf.concat([xwhen, Awhen], 3) xwhere = tf.concat([xwhere, Awhere], 3) # [N,M,JI,JX,wdim+cwdim] xpts = tf.concat([xpts, Apts], 4) # [N,JQ,wdim+cwdim] qq = tf.concat([qq, Aq], 2) qchoices = tf.concat([qchoices, Achoices], 3) else: xat = Aat xad = Aad xwhen = Awhen xwhere = Awhere xpts = Apts qq = Aq qchoices = Achoices # all the above last dim is the same [wdim+cwdim] or just [wdim] # get the image feature with tf.variable_scope("image"): # [N,M,JI] -> [N,M,JI,idim] xpis = tf.nn.embedding_lookup(self.image_emb_mat, self.pis) d = config.hidden_size cell_text = tf.nn.rnn_cell.BasicLSTMCell(d, state_is_tuple=True) cell_img = tf.nn.rnn_cell.BasicLSTMCell(d, state_is_tuple=True) # add dropout keep_prob = tf.cond(self.is_train, lambda: tf.constant(config.keep_prob), lambda: tf.constant(1.0)) cell_text = tf.nn.rnn_cell.DropoutWrapper(cell_text, keep_prob) cell_img = tf.nn.rnn_cell.DropoutWrapper(cell_img, keep_prob) # it is important to think about which LSTM shared with which? # sequence length for each at_len = tf.reduce_sum(tf.cast(self.at_mask, "int32"), 2) # [N,M] # each album's title length ad_len = tf.reduce_sum(tf.cast(self.ad_mask, "int32"), 2) when_len = tf.reduce_sum(tf.cast(self.when_mask, "int32"), 2) where_len = tf.reduce_sum(tf.cast(self.where_mask, "int32"), 2) # [N,M] pis_len = tf.reduce_sum(tf.cast(self.pis_mask, "int32"), 2) #[N,M,JI] #[N,M] pts_len = tf.reduce_sum(tf.cast(self.pts_mask, "int32"), 3) # [N,M,JI,JXP] -> [N,M,JI] q_len = tf.reduce_sum(tf.cast(self.q_mask, "int32"), 1) # [N] # each question 's length choices_len = tf.reduce_sum(tf.cast(self.choices_mask, "int32"), 2) # [N,4] # xat -> [N,M,JXA,wdim+cwdim] # xad -> [N,M,JD,wdim+cwdim] # xwhen/xwhere -> [N,M,JT/JG,wdim+cwdim] # xpts -> [N,M,JI,JXP,wdim+cwdim] # xpis -> [N,M,JI,idim] # qq -> [N,JQ,wdim+cwdim] # qchoices -> [N,4,JA,wdim+cwdim] # roll the sentence into lstm for context and question # from [N,M,JI,JX] -> [N,M,2d] with tf.variable_scope("reader"): with tf.variable_scope("text"): (fw_hq, bw_hq), (fw_lq, bw_lq) = tf.nn.bidirectional_dynamic_rnn( cell_text, cell_text, qq, sequence_length=q_len, dtype="float", scope="utext") # concat the fw and backward lstm output hq = tf.concat([fw_hq, bw_hq], 2) lq = tf.concat([fw_lq.h, bw_lq.h], 1) #LSTM CELL tf.get_variable_scope().reuse_variables() # flat all # choices flat_qchoices = flatten(qchoices, 2) # [N,4,JA,dim] -> [N*4,JA,dim] # album title flat_xat = flatten(xat, 2) #[N,M,JXA,dim] -> [N*M,JXA,dim] flat_xad = flatten(xad, 2) flat_xwhen = flatten(xwhen, 2) flat_xwhere = flatten(xwhere, 2) #print "flat_xpis shape:%s"%(flat_xpis.get_shape()) # photo tiles flat_xpts = flatten(xpts, 2) # [N,M,JI,JXP,dim] -> [N*M*JI,JXP,dim] #print "flat_xpts shape:%s"%(flat_xpts.get_shape()) # get the sequence length, all one dim flat_qchoices_len = flatten(choices_len, 0) # [N*4] flat_xat_len = flatten(at_len, 0) # [N*M] flat_xad_len = flatten(ad_len, 0) # [N*M] flat_xwhen_len = flatten(when_len, 0) # [N*M] flat_xwhere_len = flatten(where_len, 0) # [N*M] flat_xpts_len = flatten(pts_len, 0) # [N*M*JI] # put all through LSTM # uncomment to use ALL LSTM output or LAST LSTM output # album title # [N*M,JXA,d] (fw_hat_flat, bw_hat_flat), (fw_lat_flat, bw_lat_flat) = tf.nn.bidirectional_dynamic_rnn( cell_text, cell_text, flat_xat, sequence_length=flat_xat_len, dtype="float", scope="utext") fw_hat = reconstruct(fw_hat_flat, xat, 2) # bw_hat = reconstruct(bw_hat_flat, xat, 2) hat = tf.concat([fw_hat, bw_hat], 3) # [N,M,JXA,2d] fw_lat = tf.reshape(fw_lat_flat.h, [N, M, d]) # [N*M,d] -> [N,M,d] bw_lat = tf.reshape(bw_lat_flat.h, [N, M, d]) lat = tf.concat([fw_lat, bw_lat], 2) # [N,M,2d] # album desciption # [N*M,JD,d] (fw_had_flat, bw_had_flat), (fw_lad_flat, bw_lad_flat) = tf.nn.bidirectional_dynamic_rnn( cell_text, cell_text, flat_xad, sequence_length=flat_xad_len, dtype="float", scope="utext") fw_had = reconstruct(fw_had_flat, xad, 2) # bw_had = reconstruct(bw_had_flat, xad, 2) had = tf.concat([fw_had, bw_had], 3) # [N,M,JD,2d] fw_lad = tf.reshape(fw_lad_flat.h, [N, M, d]) # [N*M,d] -> [N,M,d] bw_lad = tf.reshape(bw_lad_flat.h, [N, M, d]) lad = tf.concat([fw_lad, bw_lad], 2) # [N,M,2d] # when (fw_hwhen_flat, bw_hwhen_flat), ( fw_lwhen_flat, bw_lwhen_flat) = tf.nn.bidirectional_dynamic_rnn( cell_text, cell_text, flat_xwhen, sequence_length=flat_xwhen_len, dtype="float", scope="utext") fw_hwhen = reconstruct(fw_hwhen_flat, xwhen, 2) # bw_hwhen = reconstruct(bw_hwhen_flat, xwhen, 2) hwhen = tf.concat([fw_hwhen, bw_hwhen], 3) # [N,M,JT,2d] # LSTM fw_lwhen = tf.reshape(fw_lwhen_flat.h, [N, M, d]) # [N*M,d] -> [N,M,d] bw_lwhen = tf.reshape(bw_lwhen_flat.h, [N, M, d]) lwhen = tf.concat([fw_lwhen, bw_lwhen], 2) # [N,M,2d] # where (fw_hwhere_flat, bw_hwhere_flat), ( fw_lwhere_flat, bw_lwhere_flat) = tf.nn.bidirectional_dynamic_rnn( cell_text, cell_text, flat_xwhere, sequence_length=flat_xwhere_len, dtype="float", scope="utext") fw_hwhere = reconstruct(fw_hwhere_flat, xwhere, 2) # bw_hwhere = reconstruct(bw_hwhere_flat, xwhere, 2) hwhere = tf.concat([fw_hwhere, bw_hwhere], 3) # [N,M,JG,2d] fw_lwhere = tf.reshape(fw_lwhere_flat.h, [N, M, d]) # [N*M,d] -> [N,M,d] bw_lwhere = tf.reshape(bw_lwhere_flat.h, [N, M, d]) lwhere = tf.concat([fw_lwhere, bw_lwhere], 2) # [N,M,2d] # photo title # [N*M*JI,JXP,d] (fw_hpts_flat, bw_hpts_flat), ( fw_lpts_flat, bw_lpts_flat) = tf.nn.bidirectional_dynamic_rnn( cell_text, cell_text, flat_xpts, sequence_length=flat_xpts_len, dtype="float", scope="utext") fw_hpts = reconstruct(fw_hpts_flat, xpts, 2) # bw_hpts = reconstruct(bw_hpts_flat, xpts, 2) # [N,M,JI,JXP,d] hpts = tf.concat([fw_hpts, bw_hpts], 4) # [N,M,JI,JXP,2d] # LSTM fw_lpts = tf.reshape(fw_lpts_flat.h, [N, M, JI, d]) # [N*M*JI,d] -> [N,M,JI,d] bw_lpts = tf.reshape(bw_lpts_flat.h, [N, M, JI, d]) lpts = tf.concat([fw_lpts, bw_lpts], 3) # [N,M,JI,2d] # choices (fw_hchoices_flat, bw_hchoices_flat), ( fw_lchoices_flat, bw_lchoices_flat) = tf.nn.bidirectional_dynamic_rnn( cell_text, cell_text, flat_qchoices, sequence_length=flat_qchoices_len, dtype="float", scope="utext") fw_hchoices = reconstruct(fw_hchoices_flat, qchoices, 2) # bw_hchoices = reconstruct(bw_hchoices_flat, qchoices, 2) hchoices = tf.concat([fw_hchoices, bw_hchoices], 3) # [N,4,JA,2d] # LSTM fw_lchoices = tf.reshape(fw_lchoices_flat.h, [N, -1, d]) # [N*4,d] -> [N,4,d] bw_lchoices = tf.reshape(bw_lchoices_flat.h, [N, -1, d]) lchoices = tf.concat([fw_lchoices, bw_lchoices], 2) # [N,4,2d] with tf.variable_scope("image"): # photos flat_xpis = flatten(xpis, 2) # [N,M,JI,idim] -> [N*M,JI,idim] flat_xpis_len = flatten(pis_len, 0) # [N*M] # photo # use different LSTM # [N*M,JXP,d] (fw_hpis_flat, bw_hpis_flat), ( fw_lpis_flat, bw_lpis_flat) = tf.nn.bidirectional_dynamic_rnn( cell_img, cell_img, flat_xpis, sequence_length=flat_xpis_len, dtype="float", scope="uimage") fw_hpis = reconstruct(fw_hpis_flat, xpis, 2) # bw_hpis = reconstruct(bw_hpis_flat, xpis, 2) # [N,M,JI,JXP,d] hpis = tf.concat([fw_hpis, bw_hpis], 3) # [N,M,JI,2d] # LSTM fw_lpis = tf.reshape(fw_lpis_flat.h, [N, M, d]) # [N*M,d] -> [N,M,d] bw_lpis = tf.reshape(bw_lpis_flat.h, [N, M, d]) lpis = tf.concat([fw_lpis, bw_lpis], 2) # [N,M,2d] if config.wd is not None: # l2 weight decay for the reader add_wd(config.wd) # all rnn output # hq -> [N,JQ,2d] # hat -> [N,M,JXA,2d] # had -> [N,M,JD,2d] # hwhen -> [N,M,JT,2d] # hwhere -> [N,M,JG,2d] # hpts -> [N,M,JI,JXP,2d] # hpis -> [N,M,JI,2d] # hchoices -> [N,4,JA,2d] # last states: # lq -> [N,2d] # lat -> [N,M,2d] # lad -> [N,M,2d] # lwhen -> [N,M,2d] # lwhere -> [N,M,2d] # lpts -> [N,M,JI,2d] # lpis -> [N,M,2d] # lchoices -> [N,4,2d] # use last lstm output (last hidden state) # outputs_fw[k,X_len[k]-1] == states_fw.h[k] # at_len -> [N,M] g0at = lat #[N,M,2d] g0ad = lad # [N,M,2d] g0when = lwhen g0where = lwhere g0pts = tf.reduce_mean(lpts, 2) #[N,M,JI,2d] -> [N,M,2d] g0pis = lpis # album level attention g1at = tf.reduce_mean(g0at, 1) # [N,2d] g1ad = tf.reduce_mean(g0ad, 1) g1when = tf.reduce_mean(g0when, 1) g1where = tf.reduce_mean(g0where, 1) g1pts = tf.reduce_mean(g0pts, 1) g1pis = tf.reduce_mean(g0pis, 1) g1 = tf.stack([g1at, g1ad, g1when, g1where, g1pts, g1pis], axis=1) g1_all = tf.reduce_mean(g1, 1) with tf.variable_scope("choices_emb"): # embed the choices gchoices = lchoices #[N,4,2d] # last LSTM state for each choice with tf.variable_scope("question_emb"): # hq -> [N,JQ,2d] # gp -> [N,2d] gq = lq # this is the last hidden state of each question # [N,2d] # the modeling layer with tf.variable_scope("output"): # g1_all [N,2d] # this could be viewed as an answer representation # together with the choices_emb and question_emb, # we do a single layer multi class classification # tile g1_all [N,2d] -> [N,1,2d] to concat with gchoices # [N,4,2d] g1_a_t = tf.tile(tf.expand_dims(g1_all, 1), [1, self.num_choice, 1]) # tile gq for all choices gq = tf.tile(tf.expand_dims(gq, 1), [1, self.num_choice, 1]) # [N,4,2d] # [N,4,2d] -> [N*4,1] # TODO: consider different similarity matrix logits = linear(tf.concat( [gq, g1_a_t, gchoices, g1_a_t * gchoices, gq * gchoices], 2), output_size=1, scope="choicelogits") logits = tf.squeeze(logits, 2) # [N,4,1] -> [N,4] yp = tf.nn.softmax(logits) # [N,4] # for loss and forward self.logits = logits self.yp = yp
def main(args, args_model): global logging eval_mode = (args.reconstruct_from != "" or args.eval or args.eval_iw_elbo or args.eval_valid_elbo or args.export_avg_loss_per_ts or args.study_pooling ) # don't make exp dir for reconstruction logging = create_exp_dir(args.exp_dir, scripts_to_save=None, debug=eval_mode) if args.cuda: logging('using cuda') logging(str(args)) opt_dict = {"not_improved": 0, "lr": 1., "best_loss": 1e4} vocab = {} if getattr(args, 'vocab_file', None): with open(args.vocab_file, 'r', encoding='utf-8') as fvocab: for i, line in enumerate(fvocab): vocab[line.strip()] = i vocab = VocabEntry(vocab) train_data = MonoTextData(args.train_data, label=args.label, vocab=vocab) vocab = train_data.vocab vocab_size = len(vocab) val_data = MonoTextData(args.val_data, label=args.label, vocab=vocab) test_data = MonoTextData(args.test_data, label=args.label, vocab=vocab) logging('Train data: %d samples' % len(train_data)) logging('finish reading datasets, vocab size is %d' % len(vocab)) logging('dropped sentences: %d' % train_data.dropped) #sys.stdout.flush() log_niter = max((len(train_data) // args.batch_size) // 10, 1) device = torch.device("cuda" if args.cuda else "cpu") vae = create_model(vocab, args, args_model, logging, eval_mode) if args.eval: logging('begin evaluation') vae.eval() with torch.no_grad(): test_data_batch = val_data.create_data_batch(batch_size=1, device=device, batch_first=True) nll, ppl = calc_iwnll(vae, test_data_batch, args, ns=250) logging('iw nll: %.4f, iw ppl: %.4f' % (nll, ppl)) return if args.eval_iw_elbo: logging('begin evaluation') vae.load_state_dict(torch.load(args.load_path)) vae.eval() with torch.no_grad(): test_data_batch = test_data.create_data_batch(batch_size=1, device=device, batch_first=True) nll, ppl = calc_iw_elbo(vae, test_data_batch, args) logging('iw ELBo: %.4f, iw PPL*: %.4f' % (nll, ppl)) return if args.eval_valid_elbo: logging('begin evaluation on validation set') vae.load_state_dict(torch.load(args.load_path)) vae.eval() val_data_batch = val_data.create_data_batch(batch_size=args.batch_size, device=device, batch_first=True) with torch.no_grad(): loss, nll, kl, ppl, mi = test(vae, val_data_batch, "VAL", args) logging('nll: %.4f, iw ppl: %.4f' % (nll, ppl)) return if args.study_pooling: vae.load_state_dict(torch.load(args.load_path)) vae.eval() with torch.no_grad(): data_batch = train_data.create_data_batch( batch_size=args.batch_size, device=device, batch_first=True) model_dir = os.path.dirname(args.load_path) archive_npy = os.path.join(model_dir, 'pooling.npy') random.shuffle(data_batch) #logs = study_pooling(vae, data_batch, "TRAIN", args, min_doc_size=16) logs = study_pooling(vae, data_batch, args, min_doc_size=4) logs['exp_dir'] = model_dir np.save(archive_npy, logs) return if args.export_avg_loss_per_ts: print("MODEL") print(vae) export_avg_loss_per_ts( vae, train_data, device, args.batch_size, args.load_path, args.export_avg_loss_per_ts, ) return if args.reconstruct_from != "": print("begin decoding") vae.load_state_dict(torch.load(args.reconstruct_from)) vae.eval() with torch.no_grad(): if args.reconstruct_add_labels_to_source: test_data_batch, test_labels_batch = test_data.create_data_batch_labels( batch_size=args.reconstruct_batch_size, device=device, batch_first=True, deterministic=True) c = list(zip(test_data_batch, test_labels_batch)) #random.shuffle(c) test_data_batch, test_labels_batch = zip(*c) else: test_data_batch = test_data.create_data_batch( batch_size=args.reconstruct_batch_size, device=device, batch_first=True) test_labels_batch = None #random.shuffle(test_data_batch) # test(vae, test_data_batch, "TEST", args) reconstruct(vae, test_data_batch, vocab, args.decoding_strategy, args.reconstruct_to, test_labels_batch, args.reconstruct_max_examples, args.force_absolute_length, args.no_unk) return if args.freeze_encoder_exc: assert args.enc_type == 'lstm' enc_params = vae.encoder.linear.parameters() else: enc_params = vae.encoder.parameters() dec_params = vae.decoder.parameters() if args.opt == 'sgd': optimizer_fn = optim.SGD elif args.opt == 'adam': optimizer_fn = optim.Adam else: raise ValueError("optimizer not supported") def optimizer_fn_(params): return optimizer_fn(params, lr=args.lr, momentum=args.momentum) enc_optimizer = optimizer_fn_(enc_params) dec_optimizer = optimizer_fn_(dec_params) iter_ = decay_cnt = 0 best_loss = 1e4 best_kl = best_nll = best_ppl = 0 vae.train() start = time.time() kl_weight = args.kl_start if args.warm_up > 0: anneal_rate = (1.0 - args.kl_start) / (args.warm_up * (len(train_data) / args.batch_size)) else: anneal_rate = 0 dim_target_kl = args.target_kl / float(args.nz) train_data_batch = train_data.create_data_batch(batch_size=args.batch_size, device=device, batch_first=True) val_data_batch = val_data.create_data_batch(batch_size=args.batch_size, device=device, batch_first=True) test_data_batch = test_data.create_data_batch(batch_size=args.batch_size, device=device, batch_first=True) # At any point you can hit Ctrl + C to break out of training early. try: for epoch in range(args.epochs): report_kl_loss = report_rec_loss = report_loss = 0 report_num_words = report_num_sents = 0 for i in np.random.permutation(len(train_data_batch)): batch_data = train_data_batch[i] batch_size, sent_len = batch_data.size() # not predict start symbol report_num_words += (sent_len - 1) * batch_size report_num_sents += batch_size kl_weight = min(1.0, kl_weight + anneal_rate) enc_optimizer.zero_grad() dec_optimizer.zero_grad() if args.fb == 0: loss, loss_rc, loss_kl = vae.loss(batch_data, kl_weight, nsamples=args.nsamples) elif args.fb == 1: loss, loss_rc, loss_kl = vae.loss(batch_data, kl_weight, nsamples=args.nsamples, sum_over_len=False) kl_mask = (loss_kl > args.target_kl).float() loss_rc = loss_rc.sum(-1) loss = loss_rc + kl_mask * kl_weight * loss_kl elif args.fb == 2: mu, logvar = vae.encoder(batch_data) z = vae.encoder.reparameterize(mu, logvar, args.nsamples) loss_kl = 0.5 * (mu.pow(2) + logvar.exp() - logvar - 1) kl_mask = (loss_kl > dim_target_kl).float() fake_loss_kl = (kl_mask * loss_kl).sum(dim=1) loss_rc = vae.decoder.reconstruct_error(batch_data, z).mean(dim=1) loss = loss_rc + kl_weight * fake_loss_kl loss = loss.mean(dim=-1) loss.backward() torch.nn.utils.clip_grad_norm_(vae.parameters(), clip_grad) loss_rc = loss_rc.sum() loss_kl = loss_kl.sum() if not args.freeze_encoder: enc_optimizer.step() dec_optimizer.step() report_rec_loss += loss_rc.item() report_kl_loss += loss_kl.item() report_loss += loss_rc.item() + loss_kl.item() if iter_ % log_niter == 0: train_loss = report_loss / report_num_sents logging('epoch: %d, iter: %d, avg_loss: %.4f, kl: %.4f, recon: %.4f,' \ 'time %.2fs, kl_weight %.4f' % (epoch, iter_, train_loss, report_kl_loss / report_num_sents, report_rec_loss / report_num_sents, time.time() - start, kl_weight)) report_rec_loss = report_kl_loss = report_loss = 0 report_num_words = report_num_sents = 0 iter_ += 1 logging('kl weight %.4f' % kl_weight) logging('lr {}'.format(opt_dict["lr"])) vae.eval() with torch.no_grad(): loss, nll, kl, ppl, mi = test(vae, val_data_batch, "VAL", args) au, au_var = calc_au(vae, val_data_batch) logging("%d active units" % au) if args.save_ckpt > 0 and epoch <= args.save_ckpt: logging('save checkpoint') torch.save( vae.state_dict(), os.path.join(args.exp_dir, f'model_ckpt_{epoch}.pt')) if loss < best_loss: logging('update best loss') best_loss = loss best_nll = nll best_kl = kl best_ppl = ppl torch.save(vae.state_dict(), args.save_path) if loss > opt_dict["best_loss"]: opt_dict["not_improved"] += 1 if opt_dict[ "not_improved"] >= decay_epoch and epoch >= args.load_best_epoch: opt_dict["best_loss"] = loss opt_dict["not_improved"] = 0 opt_dict["lr"] = opt_dict["lr"] * lr_decay vae.load_state_dict(torch.load(args.save_path)) logging('new lr: %f' % opt_dict["lr"]) decay_cnt += 1 enc_optimizer = optim.SGD(vae.encoder.parameters(), lr=opt_dict["lr"], momentum=args.momentum) dec_optimizer = optim.SGD(vae.decoder.parameters(), lr=opt_dict["lr"], momentum=args.momentum) else: opt_dict["not_improved"] = 0 opt_dict["best_loss"] = loss if decay_cnt == max_decay: break if args.save_latent > 0 and epoch <= args.save_latent: visualize_latent(args, epoch, vae, "cuda", test_data) vae.train() except KeyboardInterrupt: logging('-' * 100) logging('Exiting from training early') # compute importance weighted estimate of log p(x) vae.load_state_dict(torch.load(args.save_path)) vae.eval() with torch.no_grad(): loss, nll, kl, ppl, _ = test(vae, test_data_batch, "TEST", args) au, au_var = calc_au(vae, test_data_batch)
print("MIP relative gap:", gap) obj = model.solution.get_objective_value() print("Objective value:", obj) # visualize boundaries boundaries = utils.vis_cut(image, model) plt.imshow(boundaries) plt.show() # visualize segmentation segmentations = utils.vis_seg(image, model) plt.imshow(segmentations) plt.show() # visualize depth depth = utils.reconstruct(image, model) plt.imshow(depth) plt.show() cv2.imwrite('/home/bo/Desktop/sample/depth.png', (depth * 255).astype(np.uint8)) # visualize 3d input signal X = np.arange(depth.shape[1]) Y = np.arange(depth.shape[0]) X, Y = np.meshgrid(X, Y) fig = plt.figure() ax = fig.gca(projection='3d') surf = ax.plot_surface(X, Y, depth, cmap=cm.jet,
def main(): # Set CUDA. use_cuda = torch.cuda.is_available() device = torch.device("cuda:0" if use_cuda else "cpu") torch.set_grad_enabled(False) # Load config json. with open('checkpoints-pretrained/config.json', 'r') as f: config = json.load(f) # Parse arguments. args = parse_args() assert(len(args.features) > 1) paths = types.SimpleNamespace() paths.database_path = os.path.join( args.dataset_path, '%s.db' % args.exp_name ) paths.image_path = os.path.join( args.dataset_path, 'images' ) paths.match_list_path = os.path.join( args.dataset_path, 'match-list-exh.txt' ) paths.sparse_path = os.path.join( args.dataset_path, 'sparse-%s' % args.exp_name ) paths.output_path = os.path.join( args.dataset_path, 'stats-%s.txt' % args.exp_name ) # Copy reference database. if os.path.exists(paths.database_path): raise FileExistsError('Database file already exists.') shutil.copy( os.path.join(args.dataset_path, 'database.db'), paths.database_path ) # Create networks. checkpoint = torch.load(args.checkpoint) encoders = {} for feature in args.features: encoder, _ = create_network_for_feature(feature, config, use_cuda) state_dict = list(filter(lambda x: x[0] == feature, checkpoint['encoders']))[0] encoder.load_state_dict(state_dict[1]) encoder.eval() encoders[feature] = encoder # Build and translate database. image_features = build_hybrid_database( args.features, args.dataset_path, paths.database_path ) np.save(os.path.join(args.dataset_path, 'features.npy'), image_features) translate_database( image_features, paths.database_path, encoders, args.batch_size, device ) # Matching + GV + reconstruction. match_features( args.colmap_path, paths.database_path, paths.image_path, paths.match_list_path ) torch.cuda.empty_cache() matching_stats = geometric_verification( args.colmap_path, paths.database_path, paths.match_list_path ) largest_model_path, reconstruction_stats = reconstruct( args.colmap_path, paths.database_path, paths.image_path, paths.sparse_path ) extra_stats = compute_extra_stats(image_features, largest_model_path) with open(paths.output_path, 'w') as f: f.write(json.dumps(matching_stats)) f.write('\n') f.write(json.dumps(reconstruction_stats)) f.write('\n') f.write(json.dumps(extra_stats)) f.write('\n')
import numpy as np #%% # PREPARE DATA #stereo_to_mono('rawdata','groundtruth') #compress('groundtruth','training_samples') test_data, test_fs = sf.read('training_samples/Triviul-Dorothy.wav') orig_data, orig_fs = sf.read('groundtruth/Triviul-Dorothy.wav') #%% # LOAD MODEL model = load_model('SRCNN_2019-05-03 22_09_28_bestMix.h5') #%% # Reconstruct predict = reconstruct(test_data, test_fs, model) #%% # SPECTROGRAM ANALYSIS output_data, output_fs = sf.read('output_with_phase.wav') # Plot spectrogram of original data plt.figure(0) orig_f, orig_t, orig_spec = scipy.signal.stft(orig_data, orig_fs) plt.pcolormesh(orig_t, orig_f, 20 * np.log10(np.abs(orig_spec) + 0.0001)) plt.title('Spectrogram of original high-quality data') plt.xlabel('Time (s)') plt.ylabel('Freq (Hz)') # Plot spectrogram of test data (compressed) plt.figure(1)
import utils as Utils data = Utils.readFile("text.txt") array = Utils.getFileAsArray(data) Utils.findWordInFile("sentence.txt", "is") # Task 1 Utils.writeSentenceToFile("sentence.txt") # Task 2 Utils.minifyData(array) # Task 3 Utils.reconstruct()