コード例 #1
0
    def train_classifier(self, agent: AutoEncoder):
        mlp = MLP(30).to(self.dev)
        agent.to(self.dev)

        for i in range(int(self.cfg.nsteps)):
            X, y = map(
                lambda x: x.to(self.dev),
                self.dataset.sample_with_label(int(self.cfg.bsize)),
            )
            latent = agent.encode(X)
            mlp.train(latent, y)
            acc = mlp.compute_acc(latent, y)
            self.tb.add_scalar("Accuracy-Post", acc, global_step=i)
            # self.writer.add((acc.item(), agent.name), step=i)
        return mlp
コード例 #2
0
# assume the input dimension is input_d
# the network is like input_d -> 4 -> 2 -> 4 -> input_d
autoencoder = AutoEncoder()

# train autoencoder without fine-tuning
print "\ntrain autoencoder without fine-tuning ==========\n"
autoencoder.fit([4, 2],
                iterator,
                stacked=True,
                learning_rate=0.02,
                max_epoch=5000,
                tied=True,
                activation="tanh")

# encode data (without fine-tuning)
encoded_datas = autoencoder.encode(datas)
print "encoder (without fine-tuning) ================"
print encoded_datas

# train autoencoder with fine-tuning
print "\ntrain autoencoder with fine-tuning ==========\n"
autoencoder.fine_tune(fine_tuning_iterator,
                      supervised=True,
                      learning_rate=0.02,
                      max_epoch=10000,
                      tied=True)
#autoencoder.fine_tune(fine_tuning_iterator, supervised = False, learning_rate = 0.02, max_epoch = 6000)

# encode data (with fine-tuning)
tuned_encoded_datas = autoencoder.encode(datas)
print "encoder (with fine-tuning)================"
コード例 #3
0
def evaluate_experiment(path_dti: str, path_mtm: str, data_x: Tensor,
                        data_y: Tensor) -> pd.DataFrame:
    result_df_container = []

    for path_dti in glob.glob(f"{path_dti}/*"):
        for i in range(2):
            ae = AutoEncoder(30,
                             False,
                             False,
                             0.001,
                             "bruh",
                             pre_latent_dim=49)
            repres = ae.encode(data_x).detach()
            results = evaluate_representations(repres, data_y, 10, (30, ),
                                               args, "Random features")
            results["Agent"] = i
            result_df_container.append(results)

        for i in range(2):
            ae = AutoEncoder(30,
                             False,
                             False,
                             0.001,
                             "bruh",
                             pre_latent_dim=49)
            ae.load_state_dict(
                torch.load(path_dti +
                           ("/baseline.pt" if i == 0 else "/baseline_2.pt")))
            repres = ae.encode(data_x).detach()
            results = evaluate_representations(repres, data_y, 10, (30, ),
                                               args, "AE")
            results["Agent"] = i
            result_df_container.append(results)

        for i in range(3):
            ae = AutoEncoder(30,
                             False,
                             False,
                             0.001,
                             "bruh",
                             pre_latent_dim=49)
            ae.load_state_dict(
                torch.load(path_dti + f"/{string.ascii_uppercase[i]}.pt"))
            repres = ae.encode(data_x).detach()
            results = evaluate_representations(repres, data_y, 10, (30, ),
                                               args, "DTI")
            results["Agent"] = i
            result_df_container.append(results)

    for path_mtm in glob.glob(f"{path_mtm}/*"):
        for i in range(3):
            ae = AutoEncoder(30,
                             False,
                             False,
                             0.001,
                             "bruh",
                             pre_latent_dim=49)
            ae.load_state_dict(
                torch.load(path_mtm + f"/{string.ascii_uppercase[i]}.pt"))
            repres = ae.encode(data_x).detach()
            results = evaluate_representations(repres, data_y, 10, (30, ),
                                               args, "AE+MTM")
            results["Agent"] = i
            result_df_container.append(results)

        results = pd.concat(result_df_container)
        return results
コード例 #4
0
    mnist_dataset = MnistWrapper.load_default()

    # ----------normal relu pretraining----------
    print 'Training model with normal relu'
    folder = 'test/mnist_ae_relu_inf'
    ae = AutoEncoder(mnist_dataset, encode, decode, None, folder)
    ae.build_models()

    num_epoch = 30
    lr_schedule = utils.generate_decay_lr_schedule(num_epoch, 0.1, 1)
    ae.train(128, num_epoch, lr_schedule)
    ae.save_models()
    ae.test_models(utils.vis_mnist)
    ae.log()

    encoded_dataset = ae.encode(MnistWrapper)
    encoded_dataset.dump_to_h5(os.path.join(folder, 'encoded_mnist.h5'))
    encoded_dataset.plot_data_dist(os.path.join(folder, 'encoded_plot.png'))

    # ----------truncate relu and fine-tune----------
    print 'Training model with relu-%d' % RELU_MAX
    new_folder = 'test/mnist_ae_relu_%d' % RELU_MAX
    ae = AutoEncoder(mnist_dataset, encode, decode, RELU_MAX, new_folder)
    ae.build_models(folder) # load previously trained ae

    mnist_dataset.plot_data_dist(os.path.join(new_folder, 'original_mnist_plot.png'))

    # num_epoch = 2
    # lr_schedule = utils.generate_decay_lr_schedule(num_epoch, 0.1, 1)
    ae.train(128, num_epoch, lr_schedule)
    ae.save_models()
コード例 #5
0
# train autoencoder
# assume the input dimension is input_d
# the network is like input_d -> 4 -> 2 -> 4 -> input_d
autoencoder = AutoEncoder()
autoencoder.fit([4, 2],
                iterator,
                stacked=True,
                learning_rate=0.1,
                max_epoch=5000)
autoencoder.fine_tune(iterator, learning_rate=0.1, supervised=False)

# after training

# encode data
encoded_datas = autoencoder.encode(datas)
print "encoder ================"
print encoded_datas

# decode data
decoded_datas = autoencoder.decode(encoded_datas)
print "decoder ================"
print decoded_datas

# reconstruct data (encode and decode data)
reconstructed_datas = autoencoder.reconstruct(datas)
print "reconstruct ================"
print reconstructed_datas

autoencoder.close()
コード例 #6
0
def train_autoencoder_and_log(
        autoencoder: AutoEncoder,
        train_loader: DataLoader,
        test_loader: DataLoader,
        ) -> None:
    """Train the autoencoder and log data to disc."""
    if os.path.exists(config.LOG_DIR):
        shutil.rmtree(config.LOG_DIR)
    os.mkdir(config.LOG_DIR)
    train_losses = []
    test_losses = []
    test_images = _get_sample_digits(test_loader)
    all_encodings = []
    all_reconstructions = []
    for i in tqdm(range(config.STEPS)):
        # Train
        autoencoder.train_step(steps=1)

        # collect train and test losses
        train_losses.append(autoencoder.evaluate(train_loader))
        test_losses.append(autoencoder.evaluate(test_loader))

        # collect and save train encodings
        encodings = None
        for images, _ in train_loader:
            encodings_ = autoencoder.encode(images)
            if encodings is None:
                encodings = encodings_
            else:
                encodings = torch.cat((encodings, encodings_), 0)
        all_encodings.append(encodings)

        # collect the sample reconstructions
        reconstructions = autoencoder.autoencode(test_images)
        all_reconstructions.append(reconstructions)

    def save(filename, object):
        filename = os.path.join(config.LOG_DIR, filename)
        np.save(filename, object)



    # Format and save data
    train_losses = np.array(train_losses)
    save("train_losses.npy", train_losses)
    test_losses = np.array(test_losses)
    save("test_losses.npy", test_losses)

    test_images = (test_images.numpy() + 1) / 2
    save("test_images.npy", test_images)

    all_encodings = np.array([x.numpy() for x in all_encodings])
    save("encodings.npy", all_encodings)

    all_reconstructions = np.array([x.numpy() for x in all_reconstructions])
    all_reconstructions = (all_reconstructions + 1) / 2
    save("reconstructions.npy", all_reconstructions)

    labels = [x.numpy() for __, x in train_loader]
    labels = np.array(labels, dtype=int).flatten()
    save("labels.npy", labels)
コード例 #7
0
from dataset_wrapper import Cifar10Wrapper
import keras.backend as K


def compare_dataset():
    d1 = Cifar10Wrapper.load_from_h5('prod/test_relu6/encoded_cifar10.h5')
    d2 = Cifar10Wrapper.load_from_h5(
        'prod/cifar10_ae2_relu_6/encoded_cifar10.h5')

    return d1, d2


if __name__ == '__main__':
    K.set_session(utils.create_session())
    cifar10_dataset = Cifar10Wrapper.load_default()

    folder = 'prod/test_relu6'
    ae = AutoEncoder(cifar10_dataset, encode, decode, RELU_MAX, folder)
    ae.build_models(folder)  # load previously trained ae

    # num_epoch = 2
    # lr_schedule = utils.generate_decay_lr_schedule(num_epoch, 0.1, 1)
    # ae.train(128, num_epoch, lr_schedule)
    # ae.save_models()
    # ae.test_models(utils.vis_cifar10)
    # ae.log()

    encoded_dataset = ae.encode(Cifar10Wrapper)
    # encoded_dataset.dump_to_h5(os.path.join(folder, 'encoded_cifar10.h5'))
    # encoded_dataset.plot_data_dist(os.path.join(folder, 'encoded_plot.png'))
コード例 #8
0
                            drop_last=True)
    model = AutoEncoder(input_dim=21, latent_dim=5)
    criterion = nn.MSELoss()
    optimizer = torch.optim.Adam(model.parameters(),
                                 lr=learning_rate,
                                 weight_decay=1e-5)
    AutoEncoder.fit(model, num_epochs, dataloader, criterion, optimizer)

    for ccs_item in tqdm(mv.ccs_items()):
        print('ccs item:', ccs_item)
        while mv.is_ccs(ccs_item):
            u = mv.pick_random_user()
            print('user:', u)
            rated_ncs_items_by_u = mv.rated_ncs_items(u)
            u_rated_latents = [
                model.encode(mv.features(m)) for m in rated_ncs_items_by_u
            ]
            ccs_latent = model.encode(mv.features(ccs_item))

            cosine_sims = [
                cosine(r_latent, ccs_latent) for r_latent in u_rated_latents
            ]

            sorted_idxs = np.argsort(cosine_sims)
            top10_ncs_items = rated_ncs_items_by_u[sorted_idxs][:10]
            top10_ncs_ratings = [mv.rating(u, m) for m in top10_ncs_items]
            mean_top10 = np.mean(top10_ncs_ratings)

            mv.append_rating(user=u, item=ccs_item, rating=mean_top10)

            # print(f"\nccs_item:{ccs_item} | user:{u} | rating:{mean_top10} | rcount:{mv.rating_count(ccs_item)}")
コード例 #9
0
ファイル: stl_ae.py プロジェクト: hengyuan-hu/dem
    stl10_dataset = STL10Wrapper.load_from_h5('data/stl10.h5')

    # ----------normal relu pretraining----------
    print 'Training model with normal relu'
    folder = 'prod/stl10_ae_%d_inf' % LATENT_DIM
    ae = AutoEncoder(stl10_dataset, encode, decode, None, folder)
    ae.build_models()

    num_epoch = 150
    lr_schedule = utils.generate_decay_lr_schedule(num_epoch, 0.1, 1)
    ae.train(128, num_epoch, lr_schedule)
    ae.save_models()
    ae.test_models(utils.vis_stl10)
    ae.log()

    encoded_dataset = ae.encode(STL10Wrapper)
    encoded_dataset.dump_to_h5(os.path.join(folder, 'encoded_stl10.h5'))
    # encoded_dataset.plot_data_dist(os.path.join(folder, 'encoded_plot.png'))

    # ----------truncate relu and fine-tune----------
    print 'Training model with relu-%d' % RELU_MAX
    new_folder = 'prod/stl10_ae_%d_relu%d' % (LATENT_DIM, RELU_MAX)
    ae = AutoEncoder(stl10_dataset, encode, decode, RELU_MAX, new_folder)
    ae.build_models(folder)  # load previously trained ae

    ae.train(96, num_epoch, lr_schedule)
    ae.save_models()
    ae.log()
    ae.test_models(utils.vis_stl10)

    encoded_dataset = ae.encode(STL10Wrapper)
コード例 #10
0
    X_raw, y_raw = load_eeg_raw("{}data/raw/".format(base_path))
elif data_name == "syn":
    X_raw, y_raw = load_one_syn_raw("{}data/raw/".format(base_path), data_idx)
elif data_name == "har":
    X_raw, y_raw = load_har_raw("{}data/raw/".format(base_path))
else:
    assert(False)
print("y_raw: {}".format(y_raw.shape))
X_sliding = sliding_window(X_raw, args["window_size"])
X_variable = Variable(torch.Tensor(X_sliding), requires_grad=False).to(device)
auto_encoder = AutoEncoder(input_dim=X_sliding.shape[1],
                           hidden_sizes=args["hidden_sizes"],
                           latent_dim=args["latent_dim"],
                           ).to(device)
auto_encoder.load_state_dict(torch.load(checkpoint_path, map_location=device))
z = auto_encoder.encode(X_variable).detach().numpy()
print(z.shape)


def find_peaks(z):
    dists = np.sqrt(np.sum(np.diff(z, axis=0) ** 2, axis=1))
    print(dists.shape)

    def mean(xs):
        return sum(xs) * 1. / len(xs)

    # inspect width, i.e. for t we inspect [t-d, t+d]
    d = 50
    indices = [
        i
        for i in range(d, dists.shape[0] - d)