Ejemplo n.º 1
0
def main(args):
    transform = transforms.Compose([
        transforms.Resize([64, 64], 1),
        transforms.ToTensor(),
        transforms.Normalize([.5], [.5])
    ])

    vae = VAE(img_size=[3, 64, 64], z_dim=args.z_dim)
    pt_file = load_model(args.model_load_path, "*.pt")
    vae.load_state_dict(torch.load(pt_file))
    vae.eval()

    dirs = glob.glob(os.path.join(args.data_path, "goal*/camera*"))
    dirs = [d for d in dirs if os.path.isdir(d)]
    dirs.sort()

    for d in dirs:
        data = []
        files = glob.glob(os.path.join(d, "*.png"))
        files.sort()
        print(d.split("/")[-1])
        for f in files:
            img = Image.open(f)
            img = transform(img)
            data.append(vae.reparam(*vae.encoder(img[None, :, :, :])).detach())
        data = torch.cat(data).cpu().numpy()
        print(d + ".txt")
        np.savetxt(d + ".txt", data, delimiter=",")
Ejemplo n.º 2
0
def main(training: bool = False):
    (x_train, y_train), (x_test, y_test) = tf.keras.datasets.mnist.load_data()

    mnist_digits = np.concatenate([x_train, x_test], axis=0)
    labels = np.concatenate([y_train, y_test], axis=0)
    mnist_digits = np.expand_dims(mnist_digits, -1).astype("float32") / 255

    if training:
        vae = VAE(data_shape=(28, 28, 1),
                  latent_dim=2,
                  epochs=20,
                  batch_size=128,
                  optimizer=tf.keras.optimizers.Adam)
        vae.train_vae(mnist_digits, save_model=True)
    else:
        vae = VAE(data_shape=(28, 28, 1), latent_dim=2)
        vae.full_model = tf.keras.models.load_model('vae')
        vae.encoder = tf.keras.models.load_model('vae_encoder')
        vae.decoder = tf.keras.models.load_model('vae_decoder')

    plot_label_clusters(vae.encoder, vae.decoder, mnist_digits, labels)

    _, _, Latent = vae.encoder.predict(mnist_digits)
    # Clusters = KMeans(n_clusters=10, random_state=42)
    # X_ClusterLabels = Clusters.fit_predict(Latent)

    neigh = NearestNeighbors(n_neighbors=5)
    neigh.fit(Latent)
    test = x_test[0]
    plt.imshow(test)
    plt.show()
    start = time()
    test = np.expand_dims(test, 0)
    test = np.expand_dims(test, -1).astype("float32") / 255

    _, _, latent = vae.encoder.predict(test)
    # label = Clusters.predict(latent)
    # filtered_digits = Latent[np.argwhere(X_ClusterLabels == label)]

    closest = neigh.kneighbors(latent, 1, False)
    print(time() - start)
    plt.imshow(mnist_digits[closest[0]][0, :, :, 0])
    plt.show()
Ejemplo n.º 3
0
class NNModel(object):
    def __init__(self, args):
        self.log_path = args.log_path
        self.device = torch.device("cuda:0" if args.cuda else "cpu")
        self.img_size = args.img_size
        self.sample_num = args.sample_num
        self.transform = transforms.Compose([
            transforms.ToPILImage(),
            transforms.Resize([64, 64], 1),
            transforms.ToTensor(),
            transforms.Normalize([.5], [.5])
        ])
        self.pil_transform = transforms.ToPILImage(mode="RGB")

        self.norm_scale = np.loadtxt(os.path.join(args.config_path,
                                                  "norm_scale.txt"),
                                     dtype=np.float32,
                                     delimiter=",")[None]
        self.norm_min = np.loadtxt(os.path.join(args.config_path,
                                                "norm_min.txt"),
                                   dtype=np.float32,
                                   delimiter=",")[None]
        self.pb_list = torch.from_numpy(
            np.loadtxt(os.path.join(args.config_path, "pb_list.txt"),
                       dtype=np.float32,
                       delimiter=","))

        self.kmeans = KMeans(n_clusters=2)
        self.kmeans.fit(self.pb_list)

        print("=" * 5, "Init LSTMPB", "=" * 5)
        self.rnn = LSTMPB(args, pb_unit=self.pb_list[5][None])
        pt_file = load_model(args.model_load_path, "*/*LSTMPB*.pt")
        self.rnn.load_state_dict(torch.load(pt_file))

        print("=" * 5, "Init VAE", "=" * 5)
        self.vae = VAE(img_size=args.img_size, z_dim=args.vae_z_dims)
        pt_file = load_model(args.model_load_path, "*/VAE*.pt")
        self.vae.load_state_dict(torch.load(pt_file))
        self.vae.eval()

        print("=" * 5, "Init CVAE", "=" * 5)
        self.cvae = CVAE(img_size=args.img_size, z_dim=args.cvae_z_dims)
        pt_file = load_model(args.model_load_path, "*/*CVAE*.pt")
        self.cvae.load_state_dict(torch.load(pt_file))
        self.cvae.eval()

        self.norm_mode = {
            "joint": [0, 1, 2, 3, 4],
            "visual": [5, 6, 7, 8, 9, 10, 11]
        }
        self.norm_mode[
            "all"] = self.norm_mode["joint"] + self.norm_mode["visual"]

        self.global_step = 0
        self.his_log = HistoryWindow(maxlen=args.window_size)

        #visualize current goal
        _, goal = self.vae.decoder(self.denorm(self.goal, "visual"))
        goal = ((goal[0] * .5 + .5) * 255).to(torch.int8)
        self.goal_img = self.pil_transform(goal)

    def on_predict(self, cur_joint, cur_img, state=None):
        cur_joint = torch.Tensor(cur_joint)[None]
        cur_img = self.transform(cur_img[:, :, ::-1])[None]
        utils.save_image(cur_img[0],
                         "./result/visual_{:0>6d}.png".format(
                             self.global_step),
                         normalize=True,
                         range=(-1, 1))

        img_feature = self.vae.reparam(*self.vae.encoder(cur_img))
        inputs = torch.cat([cur_joint, img_feature], axis=-1).detach()
        inputs = self.norm(inputs).to(torch.float32)

        outputs, state = self.rnn.step(inputs, state)
        outputs, state = outputs.detach().cpu(), \
                         (state[0].detach().cpu(), state[1].detach().cpu())
        self.global_step += 1
        return outputs, state, self.denorm(outputs).to(torch.float32)

    def off_predict(self, cur_joint, img_feature, state=None):
        assert isinstance(cur_joint, (list, np.ndarray))
        assert isinstance(img_feature, (list, np.ndarray))

        cur_joint = torch.Tensor(cur_joint).to(torch.float32)[None]
        img_feature = torch.Tensor(img_feature).to(torch.float32)[None]

        inputs = torch.cat([cur_joint, img_feature], axis=-1)
        outputs, state = self.rnn.step(inputs, state)
        outputs, state = outputs.detach().cpu(), \
                         (state[0].detach().cpu(), state[1].detach().cpu())

        self.his_log.put([outputs, inputs, state])
        return outputs, state, self.denorm(outputs).to(torch.float32)

    def gen_goal(self, visual_img):
        visual_img = self.transform(visual_img)[None].repeat(
            self.sample_num, 1, 1, 1)
        sampled_z = torch.randn(self.sample_num, self.cvae.z_dim)
        _, gen_goals = self.cvae.decoder(z=sampled_z, cond=visual_img)
        pb_list = self.vae.reparam(*self.vae.encoder(gen_goals)).detach().cpu()
        #for i in range(gen_goals.shape[0]):
        #    utils.save_image(gen_goals[i], "{}gen_goal{:0>6d}.png".format("./", i),normalize=True, range=(-1., 1.))

        pb_label = self.kmeans.predict(pb_list.numpy())
        print(pb_label)
        pb_list = torch.stack(
            [pb_list[pb_label == 0].mean(0), pb_list[pb_label == 1].mean(0)])
        _, goal_list = self.vae.decoder(pb_list)
        pb_list = self.norm(pb_list, "visual")
        goal_list = ((goal_list * .5 + .5) * 255).to(torch.int8)
        goal_list = [self.pil_transform(goal) for goal in goal_list]
        return goal_list, pb_list

    def pem(self):
        assert len(self.his_log), "the history window is empty!"
        for param in self.rnn.parameters():
            param.requires_grad = False
        self.rnn.pb_unit = nn.Parameter(self.rnn.pb_unit, requires_grad=True)
        optim_param = [
            param for param in self.rnn.parameters()
            if param.requires_grad == True
        ]
        optim = torch.optim.Adam(optim_param, lr=0.01)
        mse_loss = nn.MSELoss()

        pred_his, actual_his, state_his = self.his_log.get()
        pb_log = []
        for i in range(80):
            log = []
            cur_input = torch.cat([pred_his[:, 0, :5], actual_his[:, 0, 5:]],
                                  dim=-1)
            state = state_his[0]
            for step in range(1, len(state_his)):
                cur_input, state = self.rnn.step(cur_input, state)
                log.append(cur_input)
            log = torch.stack(log, dim=1)
            loss = mse_loss(log[0, :, 5:], actual_his[0, 1:, 5:]) + \
                   (self.rnn.pb_unit - self.pb_list).pow(2).mean()
            pb_log.append(self.rnn.pb_unit.data.clone())
            loss.backward()
            optim.step()
            print("PEM loss, step {}, loss: {}".format(i, loss.item()))

    @property
    def goal(self):
        return self.rnn.pb_unit

    @goal.setter
    def goal(self, pb):
        if pb.ndim == 1:
            pb = torch.unsqueeze(pb, 0)
        self.rnn.pb_unit = pb

    def norm(self, inputs, mode="all"):
        assert mode in ["joint", "visual", "all"]
        i_slice = self.norm_mode[mode]
        return inputs * self.norm_scale[:, i_slice] + self.norm_min[:, i_slice]

    def denorm(self, outputs, mode="all"):
        assert mode in ["joint", "visual", "all"]
        i_slice = self.norm_mode[mode]
        return (outputs - self.norm_min[:, i_slice]) / self.norm_scale[:,
                                                                       i_slice]
Ejemplo n.º 4
0
def main(args):
    transform = transforms.Compose([
        transforms.Resize([64, 64], 1),
        transforms.ToTensor(),
        transforms.Normalize([.5], [.5])
    ])

    vae = VAE(img_size=[3, 64, 64], z_dim=args.z_dim)
    pt_file = load_model(args.model_load_path, "*200.pt")
    vae.load_state_dict(torch.load(pt_file))
    vae.eval()

    if args.extract_z:
        dirs = glob.glob(os.path.join(args.data_path, "*"))
        dirs.sort()

        for d in dirs:
            data = []
            files = glob.glob(os.path.join(d, "*.png"))
            files.sort()
            print(d.split("/")[-1])
            for f in files:
                img = Image.open(f)
                img = transform(img)
                data.append(
                    vae.reparam(*vae.encoder(img[None, :, :, :])).detach())
            data = torch.cat(data).cpu().numpy()
            #np.savetxt(d.split("/")[-1] + ".txt", data * 0.8 / 4, delimiter=" ")
            np.savetxt(d.split("/")[-1] + ".txt", data, delimiter=",")

    if args.vis_z:
        dataset = datasets.ImageFolder(root=args.data_path,
                                       transform=transform)
        data, label = [], []
        for i, j in dataset:
            data.append(i)
            label.append(j)
        data = torch.stack(data)
        label = torch.tensor(label)

        mu, log_var = vae.encoder(data)
        z = vae.reparam(mu, log_var)

        pca = PCA(n_components=3)
        #pca = TSNE(n_components=3)
        #pca = KernelPCA(n_components=3, kernel="rbf", fit_inverse_transform=True)
        z = z.detach().numpy()
        if z.shape[-1] > 3:
            comp_z = pca.fit_transform(z)
        else:
            comp_z = z
        #print(explained_variance_score(z.detach().numpy(), pca.inverse_transform(comp_z)))
        #print("evr:", pca.explained_variance_ratio_)
        fig = plt.figure()
        ax = fig.add_subplot(1, 1, 1, projection="3d")
        cmap = plt.get_cmap("tab20")
        for i in range(label.max() + 1):
            ax.scatter(comp_z[label == i, 0],
                       comp_z[label == i, 1],
                       comp_z[label == i, 2],
                       marker="${}$".format(i),
                       color=cmap(i),
                       label=dataset.classes[i])
            plt.legend(loc="best")
        plt.show()
Ejemplo n.º 5
0
class CVAE():
    def __init__(self,
                 num_features=50,
                 max_epoch=50,
                 max_iter=5,
                 a=1,
                 b=0.01,
                 lambda_u=0.1,
                 lambda_v=10,
                 lambda_r=10,
                 vae_pre_training=None):
        self.num_features = num_features
        self.max_epoch = max_epoch
        self.max_iter = max_iter
        self.a = a
        self.b = b
        self.lambda_u = lambda_u
        self.lambda_v = lambda_v
        self.lambda_r = lambda_r
        self.vae_pre_training = vae_pre_training

    def initialize(self, train_users, train_items, item_side_info):
        self.num_users = len(train_users)
        self.num_items = len(train_items)
        self.U = 0.1 * np.random.randn(self.num_users, self.num_features)
        self.V = 0.1 * np.random.randn(self.num_items, self.num_features)
        self.V_theta = 0.1 * np.random.randn(self.num_items, self.num_features)
        self.vae = VAE([item_side_info.shape[1], 200, 100], self.num_features)
        side_input = torch.tensor(item_side_info, dtype=torch.float)
        if self.vae_pre_training != None:
            self.vae.load_state_dict(torch.load(self.vae_pre_training))
            self.V_theta[:] = self.vae.encoder(
                side_input).clone().detach().numpy()
            self.V[:] = self.V_theta
        self.optimizer = optim.Adam(self.vae.parameters(),
                                    lr=0.001,
                                    weight_decay=2e-4)  # weight_decay为L2正则化
        return side_input

    def fit(self, train_users, test_users, train_items, item_side_info):
        side_input = self.initialize(train_users, train_items, item_side_info)
        for epoch in range(self.max_epoch):
            loss, side_latent = self.e_step(side_input)
            recall = self.m_step(train_users, train_items, test_users)
            print("Epoch:{}, Loss:{}, Recall:{}".format(epoch, loss, recall))

    # fix U,V  update V_theta
    def e_step(self, side_input):
        loss = 0.
        for it in range(self.max_iter):
            self.optimizer.zero_grad()
            side_latent = self.vae.encoder(side_input)
            side_output = self.reg_tensor(self.vae.decoder())
            gen_loss = -torch.mean(
                (side_input * torch.log(side_output) +
                 (1 - side_output) * torch.log(side_output)).sum(dim=1))
            latent_loss = self.vae.latent_loss()
            v_loss = self.lambda_r * \
                torch.mean(
                    ((side_latent-torch.tensor(self.V, dtype=torch.float))**2).sum(dim=1))
            loss = gen_loss + latent_loss + v_loss
            loss.backward()
            self.optimizer.step()
            print(
                "E_Step:Iter:{}, Loss:{:.5f}, gen_loss:{:0.5f}, latent_loss:{:0.5f}, v_loss:{:0.5f}"
                .format(it, loss, gen_loss, latent_loss, v_loss))
        return loss, side_latent

    # fix V_theta  update U,V
    def m_step(self, train_users, train_items, test_users):
        for it in range(self.max_iter):
            # update U
            items_ids = np.array([len(x) for x in train_items]) > 0
            v = self.V[items_ids]
            vTv = np.dot(v.T, v) * self.b
            for i in range(self.num_users):
                ui_items = train_users[i]
                if len(ui_items) > 0:
                    fs_part = vTv + \
                        np.dot(self.V[ui_items, :].T,
                               self.V[ui_items, :])*(self.a-self.b)
                    fs_part += self.lambda_u * np.eye(self.num_features)
                    sec_part = np.sum(self.V[ui_items, :], axis=0) * self.a
                    try:
                        self.U[i, :] = scipy.linalg.solve(fs_part, sec_part)
                    except AttributeError:
                        # if module 'scipy' has no attribute 'linalg'
                        self.U[i, :] = np.dot(np.mat(fs_part).I, sec_part)
            # update V
            users_ids = np.array([len(x) for x in train_users]) > 0
            u = self.U[users_ids]
            uTu = np.dot(u.T, u) * self.b
            for j in range(self.num_items):
                vj_users = train_items[j]
                if len(vj_users) > 0:
                    fs_part = uTu + \
                        np.dot(self.U[vj_users, :].T,
                               self.U[vj_users, :])*(self.a-self.b)
                    fs_part += self.lambda_v * np.eye(self.num_features)
                    sec_part = np.sum(
                        self.U[vj_users, :],
                        axis=0) * self.a + self.lambda_v * self.V_theta[j, :]
                else:
                    fs_part = uTu + self.lambda_v * np.eye(self.num_features)
                    sec_part = self.lambda_v * self.V_theta[j, :]
                try:
                    self.V[j, :] = scipy.linalg.solve(fs_part, sec_part)
                except AttributeError:
                    # if module 'scipy' has no attribute 'linalg'
                    self.V[j, :] = np.dot(np.mat(fs_part).I, sec_part)
            recall = self.evalute_recall(train_users, test_users,
                                         [50, 100, 150])
            print(
                "M_Step:Iter:{}, Recall@50:{:.5f}, Recall@100:{:.5f},Recall@150:{:.5f}"
                .format(it, recall[0], recall[1], recall[2]))
        return recall[0]

    def reg_tensor(self, ts):
        return torch.max(torch.sigmoid(ts),
                         torch.tensor(1e-10, dtype=torch.float))

    def evalute_recall(self, train_users, test_users, recall_M):
        res = []
        score = np.dot(self.U, self.V.T)
        ind_rec = np.argsort(score, axis=1)[:, ::-1]
        for m in recall_M:
            recalls = []
            for i in range(self.num_users):
                if len(test_users[i]) > 0:
                    m_rec = []
                    recall = 0.
                    for j in ind_rec[i]:
                        if j not in train_users[i]:
                            m_rec.append(j)
                            if j in test_users[i]:
                                recall += 1.
                        if len(m_rec) == m:
                            break
                    recalls.append(recall / len(test_users[i]))
            res.append(np.mean(recalls))
        return res

    def save_model(self, file_path):
        pass

    def load_model(self, file_path):
        pass
Ejemplo n.º 6
0
    nr_mix = 10
    # mean and scale for each components and weighting bt components (10+2*10)
    probs_size = (2 * nr_mix) + nr_mix
    dout = data_dim * probs_size
    latent_size = 64

    encoder = Encoder(data_dim, latent_size)
    decoder = Decoder(latent_size, dout)
    vae = VAE(encoder, decoder, use_cuda)
    # square error is not the correct loss - for ordered input,
    # should use softmax for unordered input ( like mine )

    if use_cuda:
        print("using gpu")
        vae = vae.cuda()
        vae.encoder = vae.encoder.cuda()
        vae.decoder = vae.decoder.cuda()
    opt = torch.optim.Adam(vae.parameters(), lr=1e-4)
    epoch = 0
    data_train_loader = DataLoader(FroggerDataset(
        train_data_dir,
        transform=transforms.ToTensor(),
        limit=args.num_train_limit),
                                   batch_size=64,
                                   shuffle=True)
    data_test_loader = DataLoader(FroggerDataset(
        test_data_dir, transform=transforms.ToTensor()),
                                  batch_size=32,
                                  shuffle=True)
    test_data = data_test_loader