def sr_generate(self, data):
     saver = tf.train.Saver()
     with tf.Session() as session:
         saver.restore(session, os.path.join(config.MODEL_DIR, "model" + str(config.BATCH_SIZE) + "_" + str(config.NUM_EPOCHS) + ".ckpt"))
         for file in data.filelist:
             data.process_img(file)
             batch = np.asarray(data.batch)
             feed_dict = {self.inputs: batch}
             patches = session.run(self.output, feed_dict=feed_dict)
             #utils.display_batch_patch(batch, patches)
             image = utils.stitch(patches)
             utils.reconstruct(image, file)
Ejemplo n.º 2
0
def linear(x, output_size, scope, add_tanh=False, wd=None):
    with tf.variable_scope(scope):
        # since the input here is not two rank, we flat the input while keeping the last dims
        keep = 1
        #print x.get_shape().as_list()
        flat_x = flatten(
            x, keep
        )  # keeping the last one dim # [N,M,JX,JQ,2d] => [N*M*JX*JQ,2d]
        #print flat_x.get_shape() # (?, 200) # wd+cwd
        bias_start = 0.0
        if not (type(output_size)
                == type(1)):  # need to be get_shape()[k].value
            output_size = output_size.value

        #print [flat_x.get_shape()[-1],output_size]

        W = tf.get_variable("W",
                            dtype="float",
                            initializer=tf.truncated_normal(
                                [flat_x.get_shape()[-1].value, output_size],
                                stddev=0.1))
        bias = tf.get_variable("b",
                               dtype="float",
                               initializer=tf.constant(bias_start,
                                                       shape=[output_size]))
        flat_out = tf.matmul(flat_x, W) + bias

        if add_tanh:
            flat_out = tf.tanh(flat_out, name="tanh")

        if wd is not None:
            add_wd(wd)

        out = reconstruct(flat_out, x, keep)
        return out
Ejemplo n.º 3
0
def softmax(logits, scope=None):
    with tf.name_scope(scope
                       or "softmax"):  # noted here is name_scope not variable
        flat_logits = flatten(logits, 1)
        flat_out = tf.nn.softmax(flat_logits)
        out = reconstruct(flat_out, logits, 1)
        return out
Ejemplo n.º 4
0
    def _discriminator_output(self, generated_image, inputs_var, real=True):
        if real:
            # We set the middle
            contour = inputs_var[1]
            contour = utils.reconstruct(contour, generated_image, flag = 1)

            return lasagne.layers.get_output(self.discriminator, contour)
        else:
            return lasagne.layers.get_output(self.discriminator, generated_image)
Ejemplo n.º 5
0
def main(args):
    # Settings
    VERBOSE = args.verbose
    DIR = args.directory
    MODEL = args.model

    # Load Model
    model = tf.keras.models.load_model(MODEL, compile=False)

    # Create Data Pipeline
    pipe = utils.DataPipe(DIR)
    assert len(pipe.files) > 0, "No tif files found"
    for name, DS in pipe.image_as_DS():

        if VERBOSE:
            print(f"Size of DS is currently: {sys.getsizeof(DS)}")

        # Reconstruct I
        initial = True
        for elem in DS:
            if initial:
                I = elem
                initial = False
            else:
                I = np.append(I, elem, axis=0)

        # Predict
        P = model.predict(DS)

        # Reconstruct
        P = utils.reconstruct(P, name)
        I = utils.reconstruct(I, name)

        if VERBOSE:
            print(f"Size of P is currently: {sys.getsizeof(P)}")
            print(f"Size of I is currently: {sys.getsizeof(I)}")

        # Save
        savemat(name.replace('tfrecord.gz', 'mat'), {'I': I, 'P': P})
        if VERBOSE:
            print(f"Saving: {name.replace('tfrecord.gz','mat')}")
Ejemplo n.º 6
0
def main():
    # Set CUDA.
    use_cuda = torch.cuda.is_available()
    device = torch.device("cuda:0" if use_cuda else "cpu")
    torch.set_grad_enabled(False)

    # Parse arguments.
    args = parse_args()

    paths = types.SimpleNamespace()
    paths.database_path = os.path.join(args.dataset_path,
                                       '%s.db' % args.exp_name)
    paths.image_path = os.path.join(args.dataset_path, 'images')
    paths.match_list_path = os.path.join(args.dataset_path,
                                         'match-list-exh.txt')
    paths.sparse_path = os.path.join(args.dataset_path,
                                     'sparse-%s' % args.exp_name)
    paths.output_path = os.path.join(args.dataset_path,
                                     'stats-%s.txt' % args.exp_name)

    # Copy reference database.
    if os.path.exists(paths.database_path):
        raise FileExistsError('Database file already exists.')
    shutil.copy(os.path.join(args.dataset_path, 'database.db'),
                paths.database_path)

    # Build and translate database.
    image_features = build_hybrid_database(args.features, args.dataset_path,
                                           paths.database_path)

    # Matching + GV + reconstruction.
    match_features_subset(args.feature, image_features, args.colmap_path,
                          paths.database_path, paths.image_path,
                          paths.match_list_path)
    torch.cuda.empty_cache()
    matching_stats = geometric_verification(args.colmap_path,
                                            paths.database_path,
                                            paths.match_list_path + '.aux')
    os.remove(paths.match_list_path + '.aux')
    largest_model_path, reconstruction_stats = reconstruct(
        args.colmap_path, paths.database_path, paths.image_path,
        paths.sparse_path)
    extra_stats = compute_extra_stats(image_features, largest_model_path)

    with open(paths.output_path, 'w') as f:
        f.write(json.dumps(matching_stats))
        f.write('\n')
        f.write(json.dumps(reconstruction_stats))
        f.write('\n')
        f.write(json.dumps(extra_stats))
        f.write('\n')
Ejemplo n.º 7
0
def main(_):
  config = flags.FLAGS
  if config.mode == "train":
    assert config.dataset in ("mnist", "cifar10")
    config.in_shape = (config.batch_size, 32, 32, 3)
    config.block_list = [eval(x) for x in config.block_list]
    config.stride_list = [eval(x) for x in config.stride_list]
    config.channel_list = [eval(x) for x in config.channel_list]

    train(config)
  elif config.mode == "debug":
    config.train_steps = 1
    config.viz_steps = 1
    config.block_list = [2, 2, 2]
    config.channel_list = [3, 4, 5]
    config.stride_list = [1, 1, 2]
    config.in_shape = (config.batch_size, 28, 28, 1)
    train(config, debug=True)
  elif config.mode == "prepare":
    download_dataset(config)
  elif config.mode == "sn":
    test_spectral_norm()
  elif config.mode == "iresnet":
    test_iresnet()
  elif config.mode == "trace":
    test_trace_approximation()
  elif config.mode == "inverse":
    test_block_inversion()
  elif config.mode == "squeeze":
    test_squeeze()
  elif config.mode == "trace_sn":
    test_trace_sn()
  elif config.mode == "generate":
    generate(config)
  elif config.mode == "reconstruct":
    reconstruct(config)
Ejemplo n.º 8
0
def linear(x,output_size,scope,add_tanh=False,wd=None,bn=False,bias=False,is_train=None,ln=False):
	# bn -> batch norm
	# ln -> layer norm
	with tf.variable_scope(scope):
		# since the input here is not two rank, we flat the input while keeping the last dims
		keep = 1
		#print x.get_shape().as_list()
		flat_x = flatten(x,keep) # keeping the last one dim # [N,M,JX,JQ,2d] => [N*M*JX*JQ,2d]
		#print flat_x.get_shape() # (?, 200) # wd+cwd
		bias_start = 0.0
		if not (type(output_size) == type(1)): # need to be get_shape()[k].value
			output_size = output_size.value

		# add batch_norm
		if bn:
			assert is_train is not None
			flat_x = batch_norm(flat_x,scope="bn",is_train=is_train)

		if ln:
			flat_x = layer_norm(flat_x,scope="ln")


		#print [flat_x.get_shape()[-1],output_size]

		W = tf.get_variable("W",dtype="float",initializer=tf.truncated_normal([flat_x.get_shape()[-1].value,output_size],stddev=0.1))
		flat_out = tf.matmul(flat_x,W)

		if bias:
			bias = tf.get_variable("b",dtype="float",initializer=tf.constant(bias_start,shape=[output_size]))
			flat_out += bias

		if add_tanh:
			flat_out = tf.tanh(flat_out,name="tanh")

		#flat_out = tf.nn.dropout(flat_out,keep_prob)

		if wd is not None:
			add_wd(wd)

		out = reconstruct(flat_out,x,keep)
		return out
Ejemplo n.º 9
0
def eval():
    config = read_config()

    C, B = restore(config.M, config.K)

    base, _, query, gt = sift1m_read()

    D = base.shape[1]

    del config

    config = tf.ConfigProto(allow_soft_placement=True)
    config.gpu_options.allow_growth = True

    quantized = reconstruct(C, B)

    with tf.Session(config=config) as sess:
        total_distortion = np.mean(np.sum(np.square(quantized - base), axis=1))
        print('total distortion:', total_distortion)

        recall = np.zeros([query.shape[0], 100], dtype=np.float32)

        database = tf.constant(quantized, dtype=tf.float32)
        query_gpu = tf.placeholder(tf.float32, shape=(1, D))
        print(database.get_shape().as_list())
        distance = tf.norm(query_gpu - database, ord='euclidean', axis=1)

        print(distance.get_shape().as_list())
        _, index = tf.nn.top_k(-distance, k=100, sorted=True)
        print(index.get_shape().as_list())
        sess.run(tf.global_variables_initializer())

        for i in trange(query.shape[0], ncols=50):
            q = query[i][None, :]
            top100 = sess.run(index, {query_gpu: q})
            gt1 = gt[i][0]
            recall[i] = np.cumsum(np.isin(top100, gt1))
    recall = np.mean(recall, 0)

    np.savetxt('./result/recall.txt', recall)
def my_run():

    img_root = './data/'
    imgs, feats, K = utils.build_img_info(img_root)
    F, pair, match = utils.build_F_pair_match(feats)
    # F, H, pair, match = utils.build_F_H_pair_match(feats)
    # points, edges, tracks, G = utils.extract_points_edges_tracks_G(pair, len(feats))
    img_index1, img_index2 = 0,1
    pts1 = pair[(img_index1, img_index2)]['pts1']
    pts2 = pair[(img_index1, img_index2)]['pts2']
    K1 = K[img_index1]
    K2 = K[img_index2]
    E=utils.get_E_from_F(F,K1,K2)
    # init reconstruction
    R1 = np.eye(3, 3)
    T1 = np.zeros((3, 1))
    R2, T2 = utils.extract_R_T(E, K1, R1, T1, K2, pts1, pts2, 5)
    cloud3d = utils.reconstruct(K1, R1, T1, K2, R2, T2, pts1, pts2)
    utils.save_3d(cloud3d, './my_out/mycloud_3D_euclid.ply')
    # np.save('./cloud3d.npy', cloud3d)
    print(cloud3d.shape)
    #utils.draw_v1(cloud3d)
    utils.show3d( './my_out/mycloud_3D_euclid.ply', 'the result')
Ejemplo n.º 11
0
def q2(path):
    print("Reading data...")
    start = time.time()
    #path = path
    filepath = os.path.abspath(path)
    #print(filepath)
    get_df = lambda f: pd.read_csv(f)
    all_cars = {f: get_df(os.path.join(filepath, f)) for f in os.listdir(filepath)}
    print(len(all_cars))

    print('Preparing data for clustering...')
    # select signal time series data from cars behaving correctly
    # (ignoring those that don't) and append it.
    signal_values = []
    for k,v in all_cars.items():
        car_num = int(k.split('/')[-1].split('.')[0][-1])
        anomalous_cars = [3,7]  # list the cars that we won't be included for train
        if car_num not in anomalous_cars:
            #print(car_num)
            car_sig_value = np.array(v.sig_value)  #[100:100100]) # pick 100k signal values for each 
            signal_values.extend(car_sig_value)
            #len(signal_values)
        else:
            pass
    print('Length signal_values:', len(signal_values))
    
    # split signal data into segments. 
    # setup the segment length and slide length parameters
    segment_len = 100  # try with 100
    slide_len = 10  # try with 10
    segments = utils.sliding_segments(signal_values, segment_len, slide_len)
    print("Produced %d signal values segments" % len(segments))   # 10763
    
    # apply a window function to the data, which forces the start and end to be zero
    window_rads = np.linspace(0, np.pi, segment_len)
    window = np.sin(window_rads)**2
    windowed_segments = utils.get_windowed_segments(segments,segment_len , window)
    #print(len(windowed_segments))

    # Apply k-means clustering on the segments
    print('Clustering...')
    k = 150 # Test different n_clusters
    clusterer = KMeans(n_clusters=k)  
    clusterer.fit(windowed_segments)

    # Reconstruct the data using the centroids from the clusterer calculated
    warnings.filterwarnings("ignore", category=DeprecationWarning) 
    print('Reconstructing...')    
    reconstructed = utils.reconstruct(signal_values, window, clusterer)
 
    # Anomaly detection:
    print('Anomaly Detection...') 
    for k,v in all_cars.items():
        car_num = int(k.split('/')[-1].split('.')[0][-1])
        car_signal_data = v.sig_value
        #print(car_signal_data.shape)
        reconstructed = utils.reconstruct(car_signal_data, window, clusterer)
        error = reconstructed - car_signal_data
        error_98th_percentile = np.percentile(error, 98)
        #print("Car %d Maximum reconstruction error was %.1f" % (car_num, error.max()))
        #print("Car %f 98th percentile of reconstruction error was %.1f" % (car_num,error_98th_percentile))
        # Car 1 98th percentile of reconstruction error was 106.5
        # Car 8 98th percentile of reconstruction error was 103.0
        # Car 5 98th percentile of reconstruction error was 73.0
        # Car 7 98th percentile of reconstruction error was 318.9
        # Car 2 98th percentile of reconstruction error was 110.6
        # Car 9 98th percentile of reconstruction error was 113.7
        # Car 0 98th percentile of reconstruction error was 103.7
        # Car 6 98th percentile of reconstruction error was 110.0
        # Car 3 98th percentile of reconstruction error was 340.8
        # Car 4 98th percentile of reconstruction error was 103.2
        error_threshold = 150 # Avg. 98th percentile Reconstruction Error is ~105.
        if error_98th_percentile > error_threshold:
            print("Car %d not performing appropriately due to potential damage. Needs revison."%(car_num))
        else:
            pass


    end = time.time()
    print('Function took {:.3f} ms'.format((end-start)*1000.0))
Ejemplo n.º 12
0
def main(args):
    global logging
    debug = (args.reconstruct_from != ""
             or args.eval == True)  # don't make exp dir for reconstruction
    logging = create_exp_dir(args.exp_dir, scripts_to_save=None, debug=debug)

    if args.cuda:
        logging('using cuda')
    logging(str(args))

    opt_dict = {"not_improved": 0, "lr": 1., "best_loss": 1e4}

    train_data = MonoTextData(args.train_data, label=args.label)

    vocab = train_data.vocab
    vocab_size = len(vocab)

    val_data = MonoTextData(args.val_data, label=args.label, vocab=vocab)
    test_data = MonoTextData(args.test_data, label=args.label, vocab=vocab)

    logging('Train data: %d samples' % len(train_data))
    logging('finish reading datasets, vocab size is %d' % len(vocab))
    logging('dropped sentences: %d' % train_data.dropped)
    #sys.stdout.flush()

    log_niter = (len(train_data) // args.batch_size) // 10

    model_init = uniform_initializer(0.01)
    emb_init = uniform_initializer(0.1)

    #device = torch.device("cuda" if args.cuda else "cpu")
    device = "cuda" if args.cuda else "cpu"
    args.device = device

    if args.enc_type == 'lstm':
        encoder = GaussianLSTMEncoder(args, vocab_size, model_init, emb_init)
        args.enc_nh = args.dec_nh
    else:
        raise ValueError("the specified encoder type is not supported")

    decoder = LSTMDecoder(args, vocab, model_init, emb_init)
    vae = VAE(encoder, decoder, args).to(device)

    if args.load_path:
        loaded_state_dict = torch.load(args.load_path)
        #curr_state_dict = vae.state_dict()
        #curr_state_dict.update(loaded_state_dict)
        vae.load_state_dict(loaded_state_dict)
        logging("%s loaded" % args.load_path)

        if args.reset_dec:
            vae.decoder.reset_parameters(model_init, emb_init)

    if args.eval:
        logging('begin evaluation')
        vae.load_state_dict(torch.load(args.load_path))
        vae.eval()
        with torch.no_grad():
            test_data_batch = test_data.create_data_batch(
                batch_size=args.batch_size, device=device, batch_first=True)

            test(vae, test_data_batch, "TEST", args)
            au, au_var = calc_au(vae, test_data_batch)
            logging("%d active units" % au)
            # print(au_var)

            test_data_batch = test_data.create_data_batch(batch_size=1,
                                                          device=device,
                                                          batch_first=True)

            nll, ppl = calc_iwnll(vae, test_data_batch, args)
            logging('iw nll: %.4f, iw ppl: %.4f' % (nll, ppl))

        return

    if args.reconstruct_from != "":
        print("begin decoding")
        sys.stdout.flush()

        vae.load_state_dict(torch.load(args.reconstruct_from))
        vae.eval()
        with torch.no_grad():
            test_data_batch = test_data.create_data_batch(
                batch_size=args.batch_size, device=device, batch_first=True)
            # test(vae, test_data_batch, "TEST", args)
            reconstruct(vae, test_data_batch, vocab, args.decoding_strategy,
                        args.reconstruct_to)

        return

    if args.opt == "sgd":
        enc_optimizer = optim.SGD(vae.encoder.parameters(),
                                  lr=args.lr,
                                  momentum=args.momentum)
        dec_optimizer = optim.SGD(vae.decoder.parameters(),
                                  lr=args.lr,
                                  momentum=args.momentum)
        opt_dict['lr'] = args.lr
    elif args.opt == "adam":
        enc_optimizer = optim.Adam(vae.encoder.parameters(), lr=0.001)
        dec_optimizer = optim.Adam(vae.decoder.parameters(), lr=0.001)
        opt_dict['lr'] = 0.001
    else:
        raise ValueError("optimizer not supported")

    iter_ = decay_cnt = 0
    best_loss = 1e4
    best_kl = best_nll = best_ppl = 0
    pre_mi = 0
    vae.train()
    start = time.time()

    train_data_batch = train_data.create_data_batch(batch_size=args.batch_size,
                                                    device=device,
                                                    batch_first=True)

    val_data_batch = val_data.create_data_batch(batch_size=args.batch_size,
                                                device=device,
                                                batch_first=True)

    test_data_batch = test_data.create_data_batch(batch_size=args.batch_size,
                                                  device=device,
                                                  batch_first=True)

    # At any point you can hit Ctrl + C to break out of training early.
    try:
        for epoch in range(args.epochs):
            report_kl_loss = report_rec_loss = report_loss = 0
            report_num_words = report_num_sents = 0

            for i in np.random.permutation(len(train_data_batch)):

                batch_data = train_data_batch[i]
                batch_size, sent_len = batch_data.size()

                # not predict start symbol
                report_num_words += (sent_len - 1) * batch_size
                report_num_sents += batch_size

                kl_weight = args.beta

                enc_optimizer.zero_grad()
                dec_optimizer.zero_grad()

                if args.iw_train_nsamples < 0:
                    loss, loss_rc, loss_kl = vae.loss(batch_data,
                                                      kl_weight,
                                                      nsamples=args.nsamples)
                else:
                    loss, loss_rc, loss_kl = vae.loss_iw(
                        batch_data,
                        kl_weight,
                        nsamples=args.iw_train_nsamples,
                        ns=ns)
                loss = loss.mean(dim=-1)

                loss.backward()
                torch.nn.utils.clip_grad_norm_(vae.parameters(), clip_grad)

                loss_rc = loss_rc.sum()
                loss_kl = loss_kl.sum()

                enc_optimizer.step()
                dec_optimizer.step()

                report_rec_loss += loss_rc.item()
                report_kl_loss += loss_kl.item()
                report_loss += loss.item() * batch_size

                if iter_ % log_niter == 0:
                    #train_loss = (report_rec_loss  + report_kl_loss) / report_num_sents
                    train_loss = report_loss / report_num_sents
                    logging('epoch: %d, iter: %d, avg_loss: %.4f, kl: %.4f, recon: %.4f,' \
                           'time elapsed %.2fs, kl_weight %.4f' %
                           (epoch, iter_, train_loss, report_kl_loss / report_num_sents,
                           report_rec_loss / report_num_sents, time.time() - start, kl_weight))

                    #sys.stdout.flush()

                    report_rec_loss = report_kl_loss = report_loss = 0
                    report_num_words = report_num_sents = 0

                iter_ += 1

            logging('kl weight %.4f' % kl_weight)

            vae.eval()
            with torch.no_grad():
                loss, nll, kl, ppl, mi = test(vae, val_data_batch, "VAL", args)
                au, au_var = calc_au(vae, val_data_batch)
                logging("%d active units" % au)
                # print(au_var)

            if args.save_ckpt > 0 and epoch <= args.save_ckpt:
                logging('save checkpoint')
                torch.save(
                    vae.state_dict(),
                    os.path.join(args.exp_dir, f'model_ckpt_{epoch}.pt'))

            if loss < best_loss:
                logging('update best loss')
                best_loss = loss
                best_nll = nll
                best_kl = kl
                best_ppl = ppl
                torch.save(vae.state_dict(), args.save_path)

            if loss > opt_dict["best_loss"]:
                opt_dict["not_improved"] += 1
                if opt_dict[
                        "not_improved"] >= decay_epoch and epoch >= args.load_best_epoch:
                    opt_dict["best_loss"] = loss
                    opt_dict["not_improved"] = 0
                    opt_dict["lr"] = opt_dict["lr"] * lr_decay
                    vae.load_state_dict(torch.load(args.save_path))
                    logging('new lr: %f' % opt_dict["lr"])
                    decay_cnt += 1
                    enc_optimizer = optim.SGD(vae.encoder.parameters(),
                                              lr=opt_dict["lr"],
                                              momentum=args.momentum)
                    dec_optimizer = optim.SGD(vae.decoder.parameters(),
                                              lr=opt_dict["lr"],
                                              momentum=args.momentum)

            else:
                opt_dict["not_improved"] = 0
                opt_dict["best_loss"] = loss

            if decay_cnt == max_decay:
                break

            if epoch % args.test_nepoch == 0:
                with torch.no_grad():
                    loss, nll, kl, ppl, _ = test(vae, test_data_batch, "TEST",
                                                 args)

            if args.save_latent > 0 and epoch <= args.save_latent:
                visualize_latent(args, epoch, vae, "cuda", test_data)

            vae.train()

    except KeyboardInterrupt:
        logging('-' * 100)
        logging('Exiting from training early')

    # compute importance weighted estimate of log p(x)
    vae.load_state_dict(torch.load(args.save_path))

    vae.eval()
    with torch.no_grad():
        loss, nll, kl, ppl, _ = test(vae, test_data_batch, "TEST", args)
        au, au_var = calc_au(vae, test_data_batch)
        logging("%d active units" % au)
        # print(au_var)

    test_data_batch = test_data.create_data_batch(batch_size=1,
                                                  device=device,
                                                  batch_first=True)
    with torch.no_grad():
        nll, ppl = calc_iwnll(vae, test_data_batch, args)
        logging('iw nll: %.4f, iw ppl: %.4f' % (nll, ppl))
Ejemplo n.º 13
0
        mask = mask/255.
        mask  = np.expand_dims(mask, axis=2)

        img1  = U.normalization(skimage.img_as_float64(imread(dataset_add + "GEE_mapbiomas/"+ path + "_2019-01-01.tif")), mean=dataset_mean_ind, std=dataset_std_ind)
        img2  = U.normalization(skimage.img_as_float64(imread(dataset_add + "GEE_mapbiomas/"+ path + "_2019-04-01.tif")), mean=dataset_mean_ind, std=dataset_std_ind)
        img3  = U.normalization(skimage.img_as_float64(imread(dataset_add + "GEE_mapbiomas/"+ path + "_2019-07-01.tif")), mean=dataset_mean_ind, std=dataset_std_ind)
        img4  = U.normalization(skimage.img_as_float64(imread(dataset_add + "GEE_mapbiomas/"+ path + "_2019-10-01.tif")), mean=dataset_mean_ind, std=dataset_std_ind)
        img5  = U.normalization(skimage.img_as_float64(imread(dataset_add + "GEE_mapbiomas/"+ path + "_2020-01-01.tif")), mean=dataset_mean_ind, std=dataset_std_ind)

        imgcon = np.concatenate((img1,img2,img3,img4,img5),axis=2)

        data   = U.forward_crop(imgcon, window=(32,32), channels=10, stride=4)
        labels = U.forward_crop(mask, (32,32), channels=1, stride=4)
        
        pred = model.predict(data, batch_size=8, verbose=100)
        pred = U.reconstruct(pred, mask.shape, window=(32,32), channels=1, stride=4)


        y_scores_i = pred.reshape(pred.shape[0]*pred.shape[1]*pred.shape[2], 1)

        y_true_i = mask.reshape(mask.shape[0]*mask.shape[1]*mask.shape[2], 1)


        y_scores_i = np.where(y_scores_i>0.5, 1, 0)
        y_true_i   = np.where(y_true_i>0.5, 1, 0)

        if idx == 0:
            y_scores = y_scores_i
            y_true   = y_true_i
        else:
            overlap = y_scores_i*y_true_i # Logical AND
Ejemplo n.º 14
0
    def build_forward(self):
        config = self.config
        VW = self.VW
        VC = self.VC
        W = self.W
        N = self.N
        # dynamic decide some step, for sequence length
        M = tf.shape(self.pis)[1]  # photo num
        JXA = tf.shape(self.at)[2]  # for album title, photo title
        JD = tf.shape(self.ad)[2]  # description length
        JT = tf.shape(self.when)[2]
        JG = tf.shape(self.where)[2]

        JI = tf.shape(self.pis)[2]  # used for photo_title, photo
        JXP = tf.shape(self.pts)[3]

        JQ = tf.shape(self.q)[1]
        JA = tf.shape(self.choices)[2]

        # embeding size
        cdim, wdim, cwdim = self.cd, self.wd, self.cwd  #cwd: char -> word output dimension
        # image feature dim
        idim = self.idim  # image_feat dimension

        # all input:
        #	at, ad, when, where,
        #	pts, pis
        #	q, choices

        # embedding
        with tf.variable_scope('emb'):
            # char stuff
            if config.use_char:
                with tf.variable_scope("var"):
                    char_emb = tf.get_variable("char_emb",
                                               shape=[VC, cdim],
                                               dtype="float")

                # the embedding for each of character
                # [N,M,JXA,W]
                Aat_c = tf.nn.embedding_lookup(char_emb, self.at_c)
                # [N,M,JD,W]
                Aad_c = tf.nn.embedding_lookup(char_emb, self.ad_c)
                # [N,M,JT,W]
                Awhen_c = tf.nn.embedding_lookup(char_emb, self.when_c)
                # [N,M,JG,W]
                Awhere_c = tf.nn.embedding_lookup(char_emb, self.where_c)
                # [N,M,JI,JXP,W] -> [N,M,JI,JXP,W,cdim]
                Apts_c = tf.nn.embedding_lookup(char_emb, self.pts_c)

                # [N,JQ,W]
                Aq_c = tf.nn.embedding_lookup(char_emb, self.q_c)
                Achoices_c = tf.nn.embedding_lookup(char_emb, self.choices_c)

                # flatten for conv2d input like images
                Aat_c = tf.reshape(Aat_c, [-1, JXA, W, cdim])
                Aad_c = tf.reshape(Aad_c, [-1, JD, W, cdim])
                Awhen_c = tf.reshape(Awhen_c, [-1, JT, W, cdim])
                Awhere_c = tf.reshape(Awhere_c, [-1, JG, W, cdim])
                # [N*M*JI,JXP,W,cdim]
                Apts_c = tf.reshape(Apts_c, [-1, JXP, W, cdim])

                Aq_c = tf.reshape(Aq_c, [-1, JQ, W, cdim])
                # [N*4,]
                Achoices_c = tf.reshape(Achoices_c, [-1, JA, W, cdim])

                #char CNN
                filter_size = cwdim  # output size for each word
                filter_height = 5
                with tf.variable_scope("conv"):
                    xat = conv1d(Aat_c,
                                 filter_size,
                                 filter_height,
                                 config.keep_prob,
                                 self.is_train,
                                 wd=config.wd,
                                 scope="conv1d")
                    tf.get_variable_scope().reuse_variables()
                    xad = conv1d(Aad_c,
                                 filter_size,
                                 filter_height,
                                 config.keep_prob,
                                 self.is_train,
                                 wd=config.wd,
                                 scope="conv1d")
                    xwhen = conv1d(Awhen_c,
                                   filter_size,
                                   filter_height,
                                   config.keep_prob,
                                   self.is_train,
                                   wd=config.wd,
                                   scope="conv1d")
                    xwhere = conv1d(Awhere_c,
                                    filter_size,
                                    filter_height,
                                    config.keep_prob,
                                    self.is_train,
                                    wd=config.wd,
                                    scope="conv1d")
                    xpts = conv1d(Apts_c,
                                  filter_size,
                                  filter_height,
                                  config.keep_prob,
                                  self.is_train,
                                  wd=config.wd,
                                  scope="conv1d")
                    qq = conv1d(Aq_c,
                                filter_size,
                                filter_height,
                                config.keep_prob,
                                self.is_train,
                                wd=config.wd,
                                scope="conv1d")
                    qchoices = conv1d(Achoices_c,
                                      filter_size,
                                      filter_height,
                                      config.keep_prob,
                                      self.is_train,
                                      wd=config.wd,
                                      scope="conv1d")

                    # reshape them back
                    xat = tf.reshape(xat, [-1, M, JXA, cwdim])
                    xad = tf.reshape(xad, [-1, M, JD, cwdim])
                    xwhen = tf.reshape(xwhen, [-1, M, JT, cwdim])
                    xwhere = tf.reshape(xwhere, [-1, M, JG, cwdim])
                    xpts = tf.reshape(xpts, [-1, M, JI, JXP, cwdim])

                    qq = tf.reshape(qq, [-1, JQ, cwdim])
                    # [N,num_choice,JA,cwdim]
                    qchoices = tf.reshape(qchoices,
                                          [-1, self.num_choice, JA, cwdim])

            # word stuff
            with tf.variable_scope('word'):
                with tf.variable_scope("var"):
                    # get the word embedding for new words
                    if config.is_train:
                        # for new word
                        word_emb_mat = tf.get_variable(
                            "word_emb_mat",
                            dtype="float",
                            shape=[VW, wdim],
                            initializer=get_initializer(config.emb_mat)
                        )  # it's just random initialized
                    else:  # save time for loading the emb during test
                        word_emb_mat = tf.get_variable("word_emb_mat",
                                                       dtype="float",
                                                       shape=[VW, wdim])
                    # concat with pretrain vector
                    # so 0 - VW-1 index for new words, the rest for pretrain vector
                    # and the pretrain vector is fixed
                    word_emb_mat = tf.concat(
                        [word_emb_mat, self.existing_emb_mat], 0)

                #[N,M,JXA] -> [N,M,JXA,wdim]
                Aat = tf.nn.embedding_lookup(word_emb_mat, self.at)
                Aad = tf.nn.embedding_lookup(word_emb_mat, self.ad)
                Awhen = tf.nn.embedding_lookup(word_emb_mat, self.when)
                Awhere = tf.nn.embedding_lookup(word_emb_mat, self.where)
                Apts = tf.nn.embedding_lookup(word_emb_mat, self.pts)

                Aq = tf.nn.embedding_lookup(word_emb_mat, self.q)
                Achoices = tf.nn.embedding_lookup(word_emb_mat, self.choices)

            # concat char and word
            if config.use_char:

                xat = tf.concat([xat, Aat], 3)
                xad = tf.concat([xad, Aad], 3)
                xwhen = tf.concat([xwhen, Awhen], 3)
                xwhere = tf.concat([xwhere, Awhere], 3)
                # [N,M,JI,JX,wdim+cwdim]
                xpts = tf.concat([xpts, Apts], 4)

                # [N,JQ,wdim+cwdim]
                qq = tf.concat([qq, Aq], 2)
                qchoices = tf.concat([qchoices, Achoices], 3)

            else:
                xat = Aat
                xad = Aad
                xwhen = Awhen
                xwhere = Awhere
                xpts = Apts

                qq = Aq
                qchoices = Achoices
                # all the above last dim is the same [wdim+cwdim] or just [wdim]

            # get the image feature
            with tf.variable_scope("image"):

                # [N,M,JI] -> [N,M,JI,idim]
                xpis = tf.nn.embedding_lookup(self.image_emb_mat, self.pis)

        d = config.hidden_size

        cell_text = tf.nn.rnn_cell.BasicLSTMCell(d, state_is_tuple=True)
        cell_img = tf.nn.rnn_cell.BasicLSTMCell(d, state_is_tuple=True)
        # add dropout
        keep_prob = tf.cond(self.is_train,
                            lambda: tf.constant(config.keep_prob),
                            lambda: tf.constant(1.0))

        cell_text = tf.nn.rnn_cell.DropoutWrapper(cell_text, keep_prob)

        cell_img = tf.nn.rnn_cell.DropoutWrapper(cell_img, keep_prob)

        # it is important to think about which LSTM shared with which?

        # sequence length for each
        at_len = tf.reduce_sum(tf.cast(self.at_mask, "int32"),
                               2)  # [N,M] # each album's title length
        ad_len = tf.reduce_sum(tf.cast(self.ad_mask, "int32"), 2)
        when_len = tf.reduce_sum(tf.cast(self.when_mask, "int32"), 2)
        where_len = tf.reduce_sum(tf.cast(self.where_mask, "int32"),
                                  2)  # [N,M]

        pis_len = tf.reduce_sum(tf.cast(self.pis_mask, "int32"),
                                2)  #[N,M,JI] #[N,M]

        pts_len = tf.reduce_sum(tf.cast(self.pts_mask, "int32"),
                                3)  # [N,M,JI,JXP] -> [N,M,JI]

        q_len = tf.reduce_sum(tf.cast(self.q_mask, "int32"),
                              1)  # [N] # each question 's length

        choices_len = tf.reduce_sum(tf.cast(self.choices_mask, "int32"),
                                    2)  # [N,4]

        # xat -> [N,M,JXA,wdim+cwdim]
        # xad -> [N,M,JD,wdim+cwdim]
        # xwhen/xwhere -> [N,M,JT/JG,wdim+cwdim]
        # xpts -> [N,M,JI,JXP,wdim+cwdim]

        # xpis -> [N,M,JI,idim]

        # qq -> [N,JQ,wdim+cwdim]
        # qchoices -> [N,4,JA,wdim+cwdim]

        # roll the sentence into lstm for context and question
        # from [N,M,JI,JX] -> [N,M,2d]
        with tf.variable_scope("reader"):
            with tf.variable_scope("text"):
                (fw_hq, bw_hq), (fw_lq,
                                 bw_lq) = tf.nn.bidirectional_dynamic_rnn(
                                     cell_text,
                                     cell_text,
                                     qq,
                                     sequence_length=q_len,
                                     dtype="float",
                                     scope="utext")
                # concat the fw and backward lstm output
                hq = tf.concat([fw_hq, bw_hq], 2)
                lq = tf.concat([fw_lq.h, bw_lq.h], 1)  #LSTM CELL

                tf.get_variable_scope().reuse_variables()

                # flat all
                # choices
                flat_qchoices = flatten(qchoices,
                                        2)  # [N,4,JA,dim] -> [N*4,JA,dim]
                # album title
                flat_xat = flatten(xat, 2)  #[N,M,JXA,dim] -> [N*M,JXA,dim]
                flat_xad = flatten(xad, 2)
                flat_xwhen = flatten(xwhen, 2)
                flat_xwhere = flatten(xwhere, 2)

                #print "flat_xpis shape:%s"%(flat_xpis.get_shape())

                # photo tiles
                flat_xpts = flatten(xpts,
                                    2)  # [N,M,JI,JXP,dim] -> [N*M*JI,JXP,dim]
                #print "flat_xpts shape:%s"%(flat_xpts.get_shape())

                # get the sequence length, all one dim
                flat_qchoices_len = flatten(choices_len, 0)  # [N*4]
                flat_xat_len = flatten(at_len, 0)  # [N*M]
                flat_xad_len = flatten(ad_len, 0)  # [N*M]
                flat_xwhen_len = flatten(when_len, 0)  # [N*M]
                flat_xwhere_len = flatten(where_len, 0)  # [N*M]

                flat_xpts_len = flatten(pts_len, 0)  # [N*M*JI]

                # put all through LSTM
                # uncomment to use ALL LSTM output or LAST LSTM output

                # album title
                # [N*M,JXA,d]
                (fw_hat_flat,
                 bw_hat_flat), (fw_lat_flat,
                                bw_lat_flat) = tf.nn.bidirectional_dynamic_rnn(
                                    cell_text,
                                    cell_text,
                                    flat_xat,
                                    sequence_length=flat_xat_len,
                                    dtype="float",
                                    scope="utext")
                fw_hat = reconstruct(fw_hat_flat, xat, 2)  #
                bw_hat = reconstruct(bw_hat_flat, xat, 2)
                hat = tf.concat([fw_hat, bw_hat], 3)  # [N,M,JXA,2d]
                fw_lat = tf.reshape(fw_lat_flat.h,
                                    [N, M, d])  # [N*M,d] -> [N,M,d]
                bw_lat = tf.reshape(bw_lat_flat.h, [N, M, d])
                lat = tf.concat([fw_lat, bw_lat], 2)  # [N,M,2d]

                # album desciption
                # [N*M,JD,d]
                (fw_had_flat,
                 bw_had_flat), (fw_lad_flat,
                                bw_lad_flat) = tf.nn.bidirectional_dynamic_rnn(
                                    cell_text,
                                    cell_text,
                                    flat_xad,
                                    sequence_length=flat_xad_len,
                                    dtype="float",
                                    scope="utext")
                fw_had = reconstruct(fw_had_flat, xad, 2)  #
                bw_had = reconstruct(bw_had_flat, xad, 2)
                had = tf.concat([fw_had, bw_had], 3)  # [N,M,JD,2d]
                fw_lad = tf.reshape(fw_lad_flat.h,
                                    [N, M, d])  # [N*M,d] -> [N,M,d]
                bw_lad = tf.reshape(bw_lad_flat.h, [N, M, d])
                lad = tf.concat([fw_lad, bw_lad], 2)  # [N,M,2d]

                # when
                (fw_hwhen_flat, bw_hwhen_flat), (
                    fw_lwhen_flat,
                    bw_lwhen_flat) = tf.nn.bidirectional_dynamic_rnn(
                        cell_text,
                        cell_text,
                        flat_xwhen,
                        sequence_length=flat_xwhen_len,
                        dtype="float",
                        scope="utext")
                fw_hwhen = reconstruct(fw_hwhen_flat, xwhen, 2)  #
                bw_hwhen = reconstruct(bw_hwhen_flat, xwhen, 2)
                hwhen = tf.concat([fw_hwhen, bw_hwhen], 3)  # [N,M,JT,2d]
                # LSTM
                fw_lwhen = tf.reshape(fw_lwhen_flat.h,
                                      [N, M, d])  # [N*M,d] -> [N,M,d]
                bw_lwhen = tf.reshape(bw_lwhen_flat.h, [N, M, d])
                lwhen = tf.concat([fw_lwhen, bw_lwhen], 2)  # [N,M,2d]

                # where
                (fw_hwhere_flat, bw_hwhere_flat), (
                    fw_lwhere_flat,
                    bw_lwhere_flat) = tf.nn.bidirectional_dynamic_rnn(
                        cell_text,
                        cell_text,
                        flat_xwhere,
                        sequence_length=flat_xwhere_len,
                        dtype="float",
                        scope="utext")
                fw_hwhere = reconstruct(fw_hwhere_flat, xwhere, 2)  #
                bw_hwhere = reconstruct(bw_hwhere_flat, xwhere, 2)
                hwhere = tf.concat([fw_hwhere, bw_hwhere], 3)  # [N,M,JG,2d]
                fw_lwhere = tf.reshape(fw_lwhere_flat.h,
                                       [N, M, d])  # [N*M,d] -> [N,M,d]
                bw_lwhere = tf.reshape(bw_lwhere_flat.h, [N, M, d])
                lwhere = tf.concat([fw_lwhere, bw_lwhere], 2)  # [N,M,2d]

                # photo title
                # [N*M*JI,JXP,d]
                (fw_hpts_flat, bw_hpts_flat), (
                    fw_lpts_flat,
                    bw_lpts_flat) = tf.nn.bidirectional_dynamic_rnn(
                        cell_text,
                        cell_text,
                        flat_xpts,
                        sequence_length=flat_xpts_len,
                        dtype="float",
                        scope="utext")
                fw_hpts = reconstruct(fw_hpts_flat, xpts, 2)  #
                bw_hpts = reconstruct(bw_hpts_flat, xpts, 2)  # [N,M,JI,JXP,d]
                hpts = tf.concat([fw_hpts, bw_hpts], 4)  # [N,M,JI,JXP,2d]
                # LSTM
                fw_lpts = tf.reshape(fw_lpts_flat.h,
                                     [N, M, JI, d])  # [N*M*JI,d] -> [N,M,JI,d]
                bw_lpts = tf.reshape(bw_lpts_flat.h, [N, M, JI, d])
                lpts = tf.concat([fw_lpts, bw_lpts], 3)  # [N,M,JI,2d]

                # choices
                (fw_hchoices_flat, bw_hchoices_flat), (
                    fw_lchoices_flat,
                    bw_lchoices_flat) = tf.nn.bidirectional_dynamic_rnn(
                        cell_text,
                        cell_text,
                        flat_qchoices,
                        sequence_length=flat_qchoices_len,
                        dtype="float",
                        scope="utext")
                fw_hchoices = reconstruct(fw_hchoices_flat, qchoices, 2)  #
                bw_hchoices = reconstruct(bw_hchoices_flat, qchoices, 2)
                hchoices = tf.concat([fw_hchoices, bw_hchoices],
                                     3)  # [N,4,JA,2d]
                # LSTM
                fw_lchoices = tf.reshape(fw_lchoices_flat.h,
                                         [N, -1, d])  # [N*4,d] -> [N,4,d]
                bw_lchoices = tf.reshape(bw_lchoices_flat.h, [N, -1, d])
                lchoices = tf.concat([fw_lchoices, bw_lchoices], 2)  # [N,4,2d]

            with tf.variable_scope("image"):
                # photos
                flat_xpis = flatten(xpis, 2)  # [N,M,JI,idim] -> [N*M,JI,idim]
                flat_xpis_len = flatten(pis_len, 0)  # [N*M]

                # photo # use different LSTM
                # [N*M,JXP,d]
                (fw_hpis_flat, bw_hpis_flat), (
                    fw_lpis_flat,
                    bw_lpis_flat) = tf.nn.bidirectional_dynamic_rnn(
                        cell_img,
                        cell_img,
                        flat_xpis,
                        sequence_length=flat_xpis_len,
                        dtype="float",
                        scope="uimage")
                fw_hpis = reconstruct(fw_hpis_flat, xpis, 2)  #
                bw_hpis = reconstruct(bw_hpis_flat, xpis, 2)  # [N,M,JI,JXP,d]
                hpis = tf.concat([fw_hpis, bw_hpis], 3)  # [N,M,JI,2d]
                # LSTM
                fw_lpis = tf.reshape(fw_lpis_flat.h,
                                     [N, M, d])  # [N*M,d] -> [N,M,d]
                bw_lpis = tf.reshape(bw_lpis_flat.h, [N, M, d])
                lpis = tf.concat([fw_lpis, bw_lpis], 2)  # [N,M,2d]

            if config.wd is not None:  # l2 weight decay for the reader
                add_wd(config.wd)

        # all rnn output
        # hq -> [N,JQ,2d]

        # hat -> [N,M,JXA,2d]
        # had -> [N,M,JD,2d]
        # hwhen -> [N,M,JT,2d]
        # hwhere -> [N,M,JG,2d]
        # hpts -> [N,M,JI,JXP,2d]
        # hpis -> [N,M,JI,2d]

        # hchoices -> [N,4,JA,2d]

        # last states:
        # lq -> [N,2d]

        # lat -> [N,M,2d]
        # lad -> [N,M,2d]
        # lwhen -> [N,M,2d]
        # lwhere -> [N,M,2d]
        # lpts -> [N,M,JI,2d]
        # lpis -> [N,M,2d]

        # lchoices -> [N,4,2d]

        # use last lstm output (last hidden state)
        # outputs_fw[k,X_len[k]-1] == states_fw.h[k]
        # at_len -> [N,M]
        g0at = lat  #[N,M,2d]
        g0ad = lad  # [N,M,2d]
        g0when = lwhen
        g0where = lwhere
        g0pts = tf.reduce_mean(lpts, 2)  #[N,M,JI,2d] -> [N,M,2d]
        g0pis = lpis
        # album level attention
        g1at = tf.reduce_mean(g0at, 1)  # [N,2d]
        g1ad = tf.reduce_mean(g0ad, 1)
        g1when = tf.reduce_mean(g0when, 1)
        g1where = tf.reduce_mean(g0where, 1)
        g1pts = tf.reduce_mean(g0pts, 1)
        g1pis = tf.reduce_mean(g0pis, 1)

        g1 = tf.stack([g1at, g1ad, g1when, g1where, g1pts, g1pis], axis=1)
        g1_all = tf.reduce_mean(g1, 1)

        with tf.variable_scope("choices_emb"):
            # embed the choices

            gchoices = lchoices  #[N,4,2d] # last LSTM state for each choice

        with tf.variable_scope("question_emb"):
            # hq -> [N,JQ,2d]
            # gp -> [N,2d]
            gq = lq  # this is the last hidden state of each question # [N,2d]

        # the modeling layer
        with tf.variable_scope("output"):

            # g1_all [N,2d] # this could be viewed as an answer representation
            # together with the choices_emb and question_emb,
            # we do a single layer multi class classification

            # tile g1_all [N,2d] -> [N,1,2d] to concat with gchoices
            # [N,4,2d]
            g1_a_t = tf.tile(tf.expand_dims(g1_all, 1),
                             [1, self.num_choice, 1])

            # tile gq for all choices
            gq = tf.tile(tf.expand_dims(gq, 1),
                         [1, self.num_choice, 1])  # [N,4,2d]

            # [N,4,2d] -> [N*4,1]
            # TODO: consider different similarity matrix

            logits = linear(tf.concat(
                [gq, g1_a_t, gchoices, g1_a_t * gchoices, gq * gchoices], 2),
                            output_size=1,
                            scope="choicelogits")

            logits = tf.squeeze(logits, 2)  # [N,4,1] -> [N,4]
            yp = tf.nn.softmax(logits)  # [N,4]

            # for loss and forward
            self.logits = logits
            self.yp = yp
Ejemplo n.º 15
0
def main(args, args_model):
    global logging
    eval_mode = (args.reconstruct_from != "" or args.eval or args.eval_iw_elbo
                 or args.eval_valid_elbo or args.export_avg_loss_per_ts
                 or args.study_pooling
                 )  # don't make exp dir for reconstruction
    logging = create_exp_dir(args.exp_dir,
                             scripts_to_save=None,
                             debug=eval_mode)

    if args.cuda:
        logging('using cuda')
    logging(str(args))

    opt_dict = {"not_improved": 0, "lr": 1., "best_loss": 1e4}

    vocab = {}
    if getattr(args, 'vocab_file', None):
        with open(args.vocab_file, 'r', encoding='utf-8') as fvocab:
            for i, line in enumerate(fvocab):
                vocab[line.strip()] = i

        vocab = VocabEntry(vocab)

    train_data = MonoTextData(args.train_data, label=args.label, vocab=vocab)

    vocab = train_data.vocab
    vocab_size = len(vocab)

    val_data = MonoTextData(args.val_data, label=args.label, vocab=vocab)
    test_data = MonoTextData(args.test_data, label=args.label, vocab=vocab)

    logging('Train data: %d samples' % len(train_data))
    logging('finish reading datasets, vocab size is %d' % len(vocab))
    logging('dropped sentences: %d' % train_data.dropped)
    #sys.stdout.flush()

    log_niter = max((len(train_data) // args.batch_size) // 10, 1)

    device = torch.device("cuda" if args.cuda else "cpu")
    vae = create_model(vocab, args, args_model, logging, eval_mode)

    if args.eval:
        logging('begin evaluation')
        vae.eval()
        with torch.no_grad():
            test_data_batch = val_data.create_data_batch(batch_size=1,
                                                         device=device,
                                                         batch_first=True)
            nll, ppl = calc_iwnll(vae, test_data_batch, args, ns=250)
            logging('iw nll: %.4f, iw ppl: %.4f' % (nll, ppl))
        return

    if args.eval_iw_elbo:
        logging('begin evaluation')
        vae.load_state_dict(torch.load(args.load_path))
        vae.eval()
        with torch.no_grad():
            test_data_batch = test_data.create_data_batch(batch_size=1,
                                                          device=device,
                                                          batch_first=True)
            nll, ppl = calc_iw_elbo(vae, test_data_batch, args)
            logging('iw ELBo: %.4f, iw PPL*: %.4f' % (nll, ppl))
        return

    if args.eval_valid_elbo:
        logging('begin evaluation on validation set')
        vae.load_state_dict(torch.load(args.load_path))
        vae.eval()
        val_data_batch = val_data.create_data_batch(batch_size=args.batch_size,
                                                    device=device,
                                                    batch_first=True)

        with torch.no_grad():
            loss, nll, kl, ppl, mi = test(vae, val_data_batch, "VAL", args)
            logging('nll: %.4f, iw ppl: %.4f' % (nll, ppl))
        return

    if args.study_pooling:
        vae.load_state_dict(torch.load(args.load_path))
        vae.eval()
        with torch.no_grad():
            data_batch = train_data.create_data_batch(
                batch_size=args.batch_size, device=device, batch_first=True)
            model_dir = os.path.dirname(args.load_path)
            archive_npy = os.path.join(model_dir, 'pooling.npy')
            random.shuffle(data_batch)
            #logs = study_pooling(vae, data_batch, "TRAIN", args, min_doc_size=16)
            logs = study_pooling(vae, data_batch, args, min_doc_size=4)
            logs['exp_dir'] = model_dir
            np.save(archive_npy, logs)
        return

    if args.export_avg_loss_per_ts:
        print("MODEL")
        print(vae)
        export_avg_loss_per_ts(
            vae,
            train_data,
            device,
            args.batch_size,
            args.load_path,
            args.export_avg_loss_per_ts,
        )
        return

    if args.reconstruct_from != "":
        print("begin decoding")
        vae.load_state_dict(torch.load(args.reconstruct_from))
        vae.eval()
        with torch.no_grad():
            if args.reconstruct_add_labels_to_source:
                test_data_batch, test_labels_batch = test_data.create_data_batch_labels(
                    batch_size=args.reconstruct_batch_size,
                    device=device,
                    batch_first=True,
                    deterministic=True)
                c = list(zip(test_data_batch, test_labels_batch))
                #random.shuffle(c)
                test_data_batch, test_labels_batch = zip(*c)
            else:
                test_data_batch = test_data.create_data_batch(
                    batch_size=args.reconstruct_batch_size,
                    device=device,
                    batch_first=True)
                test_labels_batch = None
                #random.shuffle(test_data_batch)
            # test(vae, test_data_batch, "TEST", args)
            reconstruct(vae, test_data_batch, vocab, args.decoding_strategy,
                        args.reconstruct_to, test_labels_batch,
                        args.reconstruct_max_examples,
                        args.force_absolute_length, args.no_unk)

        return

    if args.freeze_encoder_exc:
        assert args.enc_type == 'lstm'
        enc_params = vae.encoder.linear.parameters()
    else:
        enc_params = vae.encoder.parameters()
    dec_params = vae.decoder.parameters()
    if args.opt == 'sgd':
        optimizer_fn = optim.SGD
    elif args.opt == 'adam':
        optimizer_fn = optim.Adam
    else:
        raise ValueError("optimizer not supported")

    def optimizer_fn_(params):
        return optimizer_fn(params, lr=args.lr, momentum=args.momentum)

    enc_optimizer = optimizer_fn_(enc_params)
    dec_optimizer = optimizer_fn_(dec_params)

    iter_ = decay_cnt = 0
    best_loss = 1e4
    best_kl = best_nll = best_ppl = 0
    vae.train()
    start = time.time()

    kl_weight = args.kl_start
    if args.warm_up > 0:
        anneal_rate = (1.0 -
                       args.kl_start) / (args.warm_up *
                                         (len(train_data) / args.batch_size))
    else:
        anneal_rate = 0

    dim_target_kl = args.target_kl / float(args.nz)

    train_data_batch = train_data.create_data_batch(batch_size=args.batch_size,
                                                    device=device,
                                                    batch_first=True)

    val_data_batch = val_data.create_data_batch(batch_size=args.batch_size,
                                                device=device,
                                                batch_first=True)

    test_data_batch = test_data.create_data_batch(batch_size=args.batch_size,
                                                  device=device,
                                                  batch_first=True)

    # At any point you can hit Ctrl + C to break out of training early.
    try:
        for epoch in range(args.epochs):
            report_kl_loss = report_rec_loss = report_loss = 0
            report_num_words = report_num_sents = 0

            for i in np.random.permutation(len(train_data_batch)):

                batch_data = train_data_batch[i]
                batch_size, sent_len = batch_data.size()

                # not predict start symbol
                report_num_words += (sent_len - 1) * batch_size
                report_num_sents += batch_size

                kl_weight = min(1.0, kl_weight + anneal_rate)

                enc_optimizer.zero_grad()
                dec_optimizer.zero_grad()

                if args.fb == 0:
                    loss, loss_rc, loss_kl = vae.loss(batch_data,
                                                      kl_weight,
                                                      nsamples=args.nsamples)
                elif args.fb == 1:
                    loss, loss_rc, loss_kl = vae.loss(batch_data,
                                                      kl_weight,
                                                      nsamples=args.nsamples,
                                                      sum_over_len=False)
                    kl_mask = (loss_kl > args.target_kl).float()
                    loss_rc = loss_rc.sum(-1)
                    loss = loss_rc + kl_mask * kl_weight * loss_kl
                elif args.fb == 2:
                    mu, logvar = vae.encoder(batch_data)
                    z = vae.encoder.reparameterize(mu, logvar, args.nsamples)
                    loss_kl = 0.5 * (mu.pow(2) + logvar.exp() - logvar - 1)
                    kl_mask = (loss_kl > dim_target_kl).float()
                    fake_loss_kl = (kl_mask * loss_kl).sum(dim=1)
                    loss_rc = vae.decoder.reconstruct_error(batch_data,
                                                            z).mean(dim=1)
                    loss = loss_rc + kl_weight * fake_loss_kl
                loss = loss.mean(dim=-1)

                loss.backward()
                torch.nn.utils.clip_grad_norm_(vae.parameters(), clip_grad)

                loss_rc = loss_rc.sum()
                loss_kl = loss_kl.sum()

                if not args.freeze_encoder:
                    enc_optimizer.step()
                dec_optimizer.step()

                report_rec_loss += loss_rc.item()
                report_kl_loss += loss_kl.item()
                report_loss += loss_rc.item() + loss_kl.item()

                if iter_ % log_niter == 0:
                    train_loss = report_loss / report_num_sents

                    logging('epoch: %d, iter: %d, avg_loss: %.4f, kl: %.4f, recon: %.4f,' \
                           'time %.2fs, kl_weight %.4f' %
                           (epoch, iter_, train_loss, report_kl_loss / report_num_sents,
                           report_rec_loss / report_num_sents, time.time() - start, kl_weight))

                    report_rec_loss = report_kl_loss = report_loss = 0
                    report_num_words = report_num_sents = 0
                iter_ += 1

            logging('kl weight %.4f' % kl_weight)
            logging('lr {}'.format(opt_dict["lr"]))

            vae.eval()
            with torch.no_grad():
                loss, nll, kl, ppl, mi = test(vae, val_data_batch, "VAL", args)
                au, au_var = calc_au(vae, val_data_batch)
                logging("%d active units" % au)

            if args.save_ckpt > 0 and epoch <= args.save_ckpt:
                logging('save checkpoint')
                torch.save(
                    vae.state_dict(),
                    os.path.join(args.exp_dir, f'model_ckpt_{epoch}.pt'))

            if loss < best_loss:
                logging('update best loss')
                best_loss = loss
                best_nll = nll
                best_kl = kl
                best_ppl = ppl
                torch.save(vae.state_dict(), args.save_path)

            if loss > opt_dict["best_loss"]:
                opt_dict["not_improved"] += 1
                if opt_dict[
                        "not_improved"] >= decay_epoch and epoch >= args.load_best_epoch:
                    opt_dict["best_loss"] = loss
                    opt_dict["not_improved"] = 0
                    opt_dict["lr"] = opt_dict["lr"] * lr_decay
                    vae.load_state_dict(torch.load(args.save_path))
                    logging('new lr: %f' % opt_dict["lr"])
                    decay_cnt += 1
                    enc_optimizer = optim.SGD(vae.encoder.parameters(),
                                              lr=opt_dict["lr"],
                                              momentum=args.momentum)
                    dec_optimizer = optim.SGD(vae.decoder.parameters(),
                                              lr=opt_dict["lr"],
                                              momentum=args.momentum)

            else:
                opt_dict["not_improved"] = 0
                opt_dict["best_loss"] = loss

            if decay_cnt == max_decay:
                break

            if args.save_latent > 0 and epoch <= args.save_latent:
                visualize_latent(args, epoch, vae, "cuda", test_data)

            vae.train()

    except KeyboardInterrupt:
        logging('-' * 100)
        logging('Exiting from training early')

    # compute importance weighted estimate of log p(x)
    vae.load_state_dict(torch.load(args.save_path))

    vae.eval()
    with torch.no_grad():
        loss, nll, kl, ppl, _ = test(vae, test_data_batch, "TEST", args)
        au, au_var = calc_au(vae, test_data_batch)
Ejemplo n.º 16
0
    print("MIP relative gap:", gap)
    obj = model.solution.get_objective_value()
    print("Objective value:", obj)

    # visualize boundaries
    boundaries = utils.vis_cut(image, model)
    plt.imshow(boundaries)
    plt.show()

    # visualize segmentation
    segmentations = utils.vis_seg(image, model)
    plt.imshow(segmentations)
    plt.show()

    # visualize depth
    depth = utils.reconstruct(image, model)
    plt.imshow(depth)
    plt.show()
    cv2.imwrite('/home/bo/Desktop/sample/depth.png',
                (depth * 255).astype(np.uint8))

    # visualize 3d input signal
    X = np.arange(depth.shape[1])
    Y = np.arange(depth.shape[0])
    X, Y = np.meshgrid(X, Y)
    fig = plt.figure()
    ax = fig.gca(projection='3d')
    surf = ax.plot_surface(X,
                           Y,
                           depth,
                           cmap=cm.jet,
def main():
    # Set CUDA.
    use_cuda = torch.cuda.is_available()
    device = torch.device("cuda:0" if use_cuda else "cpu")
    torch.set_grad_enabled(False)

    #  Load config json.
    with open('checkpoints-pretrained/config.json', 'r') as f:
        config = json.load(f)

    # Parse arguments.
    args = parse_args()
    assert(len(args.features) > 1)

    paths = types.SimpleNamespace()
    paths.database_path = os.path.join(
        args.dataset_path, '%s.db' % args.exp_name
    )
    paths.image_path = os.path.join(
        args.dataset_path, 'images'
    )
    paths.match_list_path = os.path.join(
        args.dataset_path, 'match-list-exh.txt'
    )
    paths.sparse_path = os.path.join(
        args.dataset_path, 'sparse-%s' % args.exp_name
    )
    paths.output_path = os.path.join(
        args.dataset_path, 'stats-%s.txt' % args.exp_name
    )

    # Copy reference database.
    if os.path.exists(paths.database_path):
        raise FileExistsError('Database file already exists.')
    shutil.copy(
        os.path.join(args.dataset_path, 'database.db'),
        paths.database_path
    )

    # Create networks.
    checkpoint = torch.load(args.checkpoint)
    encoders = {}
    for feature in args.features:
        encoder, _ = create_network_for_feature(feature, config, use_cuda)
        state_dict = list(filter(lambda x: x[0] == feature, checkpoint['encoders']))[0]
        encoder.load_state_dict(state_dict[1])
        encoder.eval()
        encoders[feature] = encoder

    # Build and translate database.
    image_features = build_hybrid_database(
        args.features,
        args.dataset_path,
        paths.database_path
    )
    np.save(os.path.join(args.dataset_path, 'features.npy'), image_features)
    translate_database(
        image_features,
        paths.database_path,
        encoders, args.batch_size, device
    )

    # Matching + GV + reconstruction.
    match_features(
        args.colmap_path,
        paths.database_path, paths.image_path, paths.match_list_path
    )
    torch.cuda.empty_cache()
    matching_stats = geometric_verification(
        args.colmap_path,
        paths.database_path, paths.match_list_path
    )
    largest_model_path, reconstruction_stats = reconstruct(
        args.colmap_path,
        paths.database_path, paths.image_path, paths.sparse_path
    )
    extra_stats = compute_extra_stats(image_features, largest_model_path)

    with open(paths.output_path, 'w') as f:
        f.write(json.dumps(matching_stats))
        f.write('\n')
        f.write(json.dumps(reconstruction_stats))
        f.write('\n')
        f.write(json.dumps(extra_stats))
        f.write('\n')
Ejemplo n.º 18
0
import numpy as np

#%%
# PREPARE DATA
#stereo_to_mono('rawdata','groundtruth')
#compress('groundtruth','training_samples')
test_data, test_fs = sf.read('training_samples/Triviul-Dorothy.wav')
orig_data, orig_fs = sf.read('groundtruth/Triviul-Dorothy.wav')

#%%
# LOAD MODEL
model = load_model('SRCNN_2019-05-03 22_09_28_bestMix.h5')

#%%
# Reconstruct
predict = reconstruct(test_data, test_fs, model)

#%%
# SPECTROGRAM ANALYSIS
output_data, output_fs = sf.read('output_with_phase.wav')

# Plot spectrogram of original data
plt.figure(0)
orig_f, orig_t, orig_spec = scipy.signal.stft(orig_data, orig_fs)
plt.pcolormesh(orig_t, orig_f, 20 * np.log10(np.abs(orig_spec) + 0.0001))
plt.title('Spectrogram of original high-quality data')
plt.xlabel('Time (s)')
plt.ylabel('Freq (Hz)')

# Plot spectrogram of test data (compressed)
plt.figure(1)
Ejemplo n.º 19
0
import utils as Utils

data = Utils.readFile("text.txt")
array = Utils.getFileAsArray(data)

Utils.findWordInFile("sentence.txt", "is")
# Task 1

Utils.writeSentenceToFile("sentence.txt")
# Task 2

Utils.minifyData(array)
# Task 3
Utils.reconstruct()