def main(args): transform = transforms.Compose([ transforms.Resize([64, 64], 1), transforms.ToTensor(), transforms.Normalize([.5], [.5]) ]) vae = VAE(img_size=[3, 64, 64], z_dim=args.z_dim) pt_file = load_model(args.model_load_path, "*.pt") vae.load_state_dict(torch.load(pt_file)) vae.eval() dirs = glob.glob(os.path.join(args.data_path, "goal*/camera*")) dirs = [d for d in dirs if os.path.isdir(d)] dirs.sort() for d in dirs: data = [] files = glob.glob(os.path.join(d, "*.png")) files.sort() print(d.split("/")[-1]) for f in files: img = Image.open(f) img = transform(img) data.append(vae.reparam(*vae.encoder(img[None, :, :, :])).detach()) data = torch.cat(data).cpu().numpy() print(d + ".txt") np.savetxt(d + ".txt", data, delimiter=",")
def main(training: bool = False): (x_train, y_train), (x_test, y_test) = tf.keras.datasets.mnist.load_data() mnist_digits = np.concatenate([x_train, x_test], axis=0) labels = np.concatenate([y_train, y_test], axis=0) mnist_digits = np.expand_dims(mnist_digits, -1).astype("float32") / 255 if training: vae = VAE(data_shape=(28, 28, 1), latent_dim=2, epochs=20, batch_size=128, optimizer=tf.keras.optimizers.Adam) vae.train_vae(mnist_digits, save_model=True) else: vae = VAE(data_shape=(28, 28, 1), latent_dim=2) vae.full_model = tf.keras.models.load_model('vae') vae.encoder = tf.keras.models.load_model('vae_encoder') vae.decoder = tf.keras.models.load_model('vae_decoder') plot_label_clusters(vae.encoder, vae.decoder, mnist_digits, labels) _, _, Latent = vae.encoder.predict(mnist_digits) # Clusters = KMeans(n_clusters=10, random_state=42) # X_ClusterLabels = Clusters.fit_predict(Latent) neigh = NearestNeighbors(n_neighbors=5) neigh.fit(Latent) test = x_test[0] plt.imshow(test) plt.show() start = time() test = np.expand_dims(test, 0) test = np.expand_dims(test, -1).astype("float32") / 255 _, _, latent = vae.encoder.predict(test) # label = Clusters.predict(latent) # filtered_digits = Latent[np.argwhere(X_ClusterLabels == label)] closest = neigh.kneighbors(latent, 1, False) print(time() - start) plt.imshow(mnist_digits[closest[0]][0, :, :, 0]) plt.show()
class NNModel(object): def __init__(self, args): self.log_path = args.log_path self.device = torch.device("cuda:0" if args.cuda else "cpu") self.img_size = args.img_size self.sample_num = args.sample_num self.transform = transforms.Compose([ transforms.ToPILImage(), transforms.Resize([64, 64], 1), transforms.ToTensor(), transforms.Normalize([.5], [.5]) ]) self.pil_transform = transforms.ToPILImage(mode="RGB") self.norm_scale = np.loadtxt(os.path.join(args.config_path, "norm_scale.txt"), dtype=np.float32, delimiter=",")[None] self.norm_min = np.loadtxt(os.path.join(args.config_path, "norm_min.txt"), dtype=np.float32, delimiter=",")[None] self.pb_list = torch.from_numpy( np.loadtxt(os.path.join(args.config_path, "pb_list.txt"), dtype=np.float32, delimiter=",")) self.kmeans = KMeans(n_clusters=2) self.kmeans.fit(self.pb_list) print("=" * 5, "Init LSTMPB", "=" * 5) self.rnn = LSTMPB(args, pb_unit=self.pb_list[5][None]) pt_file = load_model(args.model_load_path, "*/*LSTMPB*.pt") self.rnn.load_state_dict(torch.load(pt_file)) print("=" * 5, "Init VAE", "=" * 5) self.vae = VAE(img_size=args.img_size, z_dim=args.vae_z_dims) pt_file = load_model(args.model_load_path, "*/VAE*.pt") self.vae.load_state_dict(torch.load(pt_file)) self.vae.eval() print("=" * 5, "Init CVAE", "=" * 5) self.cvae = CVAE(img_size=args.img_size, z_dim=args.cvae_z_dims) pt_file = load_model(args.model_load_path, "*/*CVAE*.pt") self.cvae.load_state_dict(torch.load(pt_file)) self.cvae.eval() self.norm_mode = { "joint": [0, 1, 2, 3, 4], "visual": [5, 6, 7, 8, 9, 10, 11] } self.norm_mode[ "all"] = self.norm_mode["joint"] + self.norm_mode["visual"] self.global_step = 0 self.his_log = HistoryWindow(maxlen=args.window_size) #visualize current goal _, goal = self.vae.decoder(self.denorm(self.goal, "visual")) goal = ((goal[0] * .5 + .5) * 255).to(torch.int8) self.goal_img = self.pil_transform(goal) def on_predict(self, cur_joint, cur_img, state=None): cur_joint = torch.Tensor(cur_joint)[None] cur_img = self.transform(cur_img[:, :, ::-1])[None] utils.save_image(cur_img[0], "./result/visual_{:0>6d}.png".format( self.global_step), normalize=True, range=(-1, 1)) img_feature = self.vae.reparam(*self.vae.encoder(cur_img)) inputs = torch.cat([cur_joint, img_feature], axis=-1).detach() inputs = self.norm(inputs).to(torch.float32) outputs, state = self.rnn.step(inputs, state) outputs, state = outputs.detach().cpu(), \ (state[0].detach().cpu(), state[1].detach().cpu()) self.global_step += 1 return outputs, state, self.denorm(outputs).to(torch.float32) def off_predict(self, cur_joint, img_feature, state=None): assert isinstance(cur_joint, (list, np.ndarray)) assert isinstance(img_feature, (list, np.ndarray)) cur_joint = torch.Tensor(cur_joint).to(torch.float32)[None] img_feature = torch.Tensor(img_feature).to(torch.float32)[None] inputs = torch.cat([cur_joint, img_feature], axis=-1) outputs, state = self.rnn.step(inputs, state) outputs, state = outputs.detach().cpu(), \ (state[0].detach().cpu(), state[1].detach().cpu()) self.his_log.put([outputs, inputs, state]) return outputs, state, self.denorm(outputs).to(torch.float32) def gen_goal(self, visual_img): visual_img = self.transform(visual_img)[None].repeat( self.sample_num, 1, 1, 1) sampled_z = torch.randn(self.sample_num, self.cvae.z_dim) _, gen_goals = self.cvae.decoder(z=sampled_z, cond=visual_img) pb_list = self.vae.reparam(*self.vae.encoder(gen_goals)).detach().cpu() #for i in range(gen_goals.shape[0]): # utils.save_image(gen_goals[i], "{}gen_goal{:0>6d}.png".format("./", i),normalize=True, range=(-1., 1.)) pb_label = self.kmeans.predict(pb_list.numpy()) print(pb_label) pb_list = torch.stack( [pb_list[pb_label == 0].mean(0), pb_list[pb_label == 1].mean(0)]) _, goal_list = self.vae.decoder(pb_list) pb_list = self.norm(pb_list, "visual") goal_list = ((goal_list * .5 + .5) * 255).to(torch.int8) goal_list = [self.pil_transform(goal) for goal in goal_list] return goal_list, pb_list def pem(self): assert len(self.his_log), "the history window is empty!" for param in self.rnn.parameters(): param.requires_grad = False self.rnn.pb_unit = nn.Parameter(self.rnn.pb_unit, requires_grad=True) optim_param = [ param for param in self.rnn.parameters() if param.requires_grad == True ] optim = torch.optim.Adam(optim_param, lr=0.01) mse_loss = nn.MSELoss() pred_his, actual_his, state_his = self.his_log.get() pb_log = [] for i in range(80): log = [] cur_input = torch.cat([pred_his[:, 0, :5], actual_his[:, 0, 5:]], dim=-1) state = state_his[0] for step in range(1, len(state_his)): cur_input, state = self.rnn.step(cur_input, state) log.append(cur_input) log = torch.stack(log, dim=1) loss = mse_loss(log[0, :, 5:], actual_his[0, 1:, 5:]) + \ (self.rnn.pb_unit - self.pb_list).pow(2).mean() pb_log.append(self.rnn.pb_unit.data.clone()) loss.backward() optim.step() print("PEM loss, step {}, loss: {}".format(i, loss.item())) @property def goal(self): return self.rnn.pb_unit @goal.setter def goal(self, pb): if pb.ndim == 1: pb = torch.unsqueeze(pb, 0) self.rnn.pb_unit = pb def norm(self, inputs, mode="all"): assert mode in ["joint", "visual", "all"] i_slice = self.norm_mode[mode] return inputs * self.norm_scale[:, i_slice] + self.norm_min[:, i_slice] def denorm(self, outputs, mode="all"): assert mode in ["joint", "visual", "all"] i_slice = self.norm_mode[mode] return (outputs - self.norm_min[:, i_slice]) / self.norm_scale[:, i_slice]
def main(args): transform = transforms.Compose([ transforms.Resize([64, 64], 1), transforms.ToTensor(), transforms.Normalize([.5], [.5]) ]) vae = VAE(img_size=[3, 64, 64], z_dim=args.z_dim) pt_file = load_model(args.model_load_path, "*200.pt") vae.load_state_dict(torch.load(pt_file)) vae.eval() if args.extract_z: dirs = glob.glob(os.path.join(args.data_path, "*")) dirs.sort() for d in dirs: data = [] files = glob.glob(os.path.join(d, "*.png")) files.sort() print(d.split("/")[-1]) for f in files: img = Image.open(f) img = transform(img) data.append( vae.reparam(*vae.encoder(img[None, :, :, :])).detach()) data = torch.cat(data).cpu().numpy() #np.savetxt(d.split("/")[-1] + ".txt", data * 0.8 / 4, delimiter=" ") np.savetxt(d.split("/")[-1] + ".txt", data, delimiter=",") if args.vis_z: dataset = datasets.ImageFolder(root=args.data_path, transform=transform) data, label = [], [] for i, j in dataset: data.append(i) label.append(j) data = torch.stack(data) label = torch.tensor(label) mu, log_var = vae.encoder(data) z = vae.reparam(mu, log_var) pca = PCA(n_components=3) #pca = TSNE(n_components=3) #pca = KernelPCA(n_components=3, kernel="rbf", fit_inverse_transform=True) z = z.detach().numpy() if z.shape[-1] > 3: comp_z = pca.fit_transform(z) else: comp_z = z #print(explained_variance_score(z.detach().numpy(), pca.inverse_transform(comp_z))) #print("evr:", pca.explained_variance_ratio_) fig = plt.figure() ax = fig.add_subplot(1, 1, 1, projection="3d") cmap = plt.get_cmap("tab20") for i in range(label.max() + 1): ax.scatter(comp_z[label == i, 0], comp_z[label == i, 1], comp_z[label == i, 2], marker="${}$".format(i), color=cmap(i), label=dataset.classes[i]) plt.legend(loc="best") plt.show()
class CVAE(): def __init__(self, num_features=50, max_epoch=50, max_iter=5, a=1, b=0.01, lambda_u=0.1, lambda_v=10, lambda_r=10, vae_pre_training=None): self.num_features = num_features self.max_epoch = max_epoch self.max_iter = max_iter self.a = a self.b = b self.lambda_u = lambda_u self.lambda_v = lambda_v self.lambda_r = lambda_r self.vae_pre_training = vae_pre_training def initialize(self, train_users, train_items, item_side_info): self.num_users = len(train_users) self.num_items = len(train_items) self.U = 0.1 * np.random.randn(self.num_users, self.num_features) self.V = 0.1 * np.random.randn(self.num_items, self.num_features) self.V_theta = 0.1 * np.random.randn(self.num_items, self.num_features) self.vae = VAE([item_side_info.shape[1], 200, 100], self.num_features) side_input = torch.tensor(item_side_info, dtype=torch.float) if self.vae_pre_training != None: self.vae.load_state_dict(torch.load(self.vae_pre_training)) self.V_theta[:] = self.vae.encoder( side_input).clone().detach().numpy() self.V[:] = self.V_theta self.optimizer = optim.Adam(self.vae.parameters(), lr=0.001, weight_decay=2e-4) # weight_decay为L2正则化 return side_input def fit(self, train_users, test_users, train_items, item_side_info): side_input = self.initialize(train_users, train_items, item_side_info) for epoch in range(self.max_epoch): loss, side_latent = self.e_step(side_input) recall = self.m_step(train_users, train_items, test_users) print("Epoch:{}, Loss:{}, Recall:{}".format(epoch, loss, recall)) # fix U,V update V_theta def e_step(self, side_input): loss = 0. for it in range(self.max_iter): self.optimizer.zero_grad() side_latent = self.vae.encoder(side_input) side_output = self.reg_tensor(self.vae.decoder()) gen_loss = -torch.mean( (side_input * torch.log(side_output) + (1 - side_output) * torch.log(side_output)).sum(dim=1)) latent_loss = self.vae.latent_loss() v_loss = self.lambda_r * \ torch.mean( ((side_latent-torch.tensor(self.V, dtype=torch.float))**2).sum(dim=1)) loss = gen_loss + latent_loss + v_loss loss.backward() self.optimizer.step() print( "E_Step:Iter:{}, Loss:{:.5f}, gen_loss:{:0.5f}, latent_loss:{:0.5f}, v_loss:{:0.5f}" .format(it, loss, gen_loss, latent_loss, v_loss)) return loss, side_latent # fix V_theta update U,V def m_step(self, train_users, train_items, test_users): for it in range(self.max_iter): # update U items_ids = np.array([len(x) for x in train_items]) > 0 v = self.V[items_ids] vTv = np.dot(v.T, v) * self.b for i in range(self.num_users): ui_items = train_users[i] if len(ui_items) > 0: fs_part = vTv + \ np.dot(self.V[ui_items, :].T, self.V[ui_items, :])*(self.a-self.b) fs_part += self.lambda_u * np.eye(self.num_features) sec_part = np.sum(self.V[ui_items, :], axis=0) * self.a try: self.U[i, :] = scipy.linalg.solve(fs_part, sec_part) except AttributeError: # if module 'scipy' has no attribute 'linalg' self.U[i, :] = np.dot(np.mat(fs_part).I, sec_part) # update V users_ids = np.array([len(x) for x in train_users]) > 0 u = self.U[users_ids] uTu = np.dot(u.T, u) * self.b for j in range(self.num_items): vj_users = train_items[j] if len(vj_users) > 0: fs_part = uTu + \ np.dot(self.U[vj_users, :].T, self.U[vj_users, :])*(self.a-self.b) fs_part += self.lambda_v * np.eye(self.num_features) sec_part = np.sum( self.U[vj_users, :], axis=0) * self.a + self.lambda_v * self.V_theta[j, :] else: fs_part = uTu + self.lambda_v * np.eye(self.num_features) sec_part = self.lambda_v * self.V_theta[j, :] try: self.V[j, :] = scipy.linalg.solve(fs_part, sec_part) except AttributeError: # if module 'scipy' has no attribute 'linalg' self.V[j, :] = np.dot(np.mat(fs_part).I, sec_part) recall = self.evalute_recall(train_users, test_users, [50, 100, 150]) print( "M_Step:Iter:{}, Recall@50:{:.5f}, Recall@100:{:.5f},Recall@150:{:.5f}" .format(it, recall[0], recall[1], recall[2])) return recall[0] def reg_tensor(self, ts): return torch.max(torch.sigmoid(ts), torch.tensor(1e-10, dtype=torch.float)) def evalute_recall(self, train_users, test_users, recall_M): res = [] score = np.dot(self.U, self.V.T) ind_rec = np.argsort(score, axis=1)[:, ::-1] for m in recall_M: recalls = [] for i in range(self.num_users): if len(test_users[i]) > 0: m_rec = [] recall = 0. for j in ind_rec[i]: if j not in train_users[i]: m_rec.append(j) if j in test_users[i]: recall += 1. if len(m_rec) == m: break recalls.append(recall / len(test_users[i])) res.append(np.mean(recalls)) return res def save_model(self, file_path): pass def load_model(self, file_path): pass
nr_mix = 10 # mean and scale for each components and weighting bt components (10+2*10) probs_size = (2 * nr_mix) + nr_mix dout = data_dim * probs_size latent_size = 64 encoder = Encoder(data_dim, latent_size) decoder = Decoder(latent_size, dout) vae = VAE(encoder, decoder, use_cuda) # square error is not the correct loss - for ordered input, # should use softmax for unordered input ( like mine ) if use_cuda: print("using gpu") vae = vae.cuda() vae.encoder = vae.encoder.cuda() vae.decoder = vae.decoder.cuda() opt = torch.optim.Adam(vae.parameters(), lr=1e-4) epoch = 0 data_train_loader = DataLoader(FroggerDataset( train_data_dir, transform=transforms.ToTensor(), limit=args.num_train_limit), batch_size=64, shuffle=True) data_test_loader = DataLoader(FroggerDataset( test_data_dir, transform=transforms.ToTensor()), batch_size=32, shuffle=True) test_data = data_test_loader