def process(jpg_data, box, texts): x_local = util.decode_jpg(jpg_data, crop_to_box=box) # hack: scale the box down x_global, box = util.decode_jpg(jpg_data, box) text = util.strip(random.choice(texts)) indices = words.indices(text) idx = np.random.randint(0, len(indices)) x_words = util.left_pad(indices[:idx][-MAX_WORDS:]) y = util.onehot(indices[idx]) x_ctx = img_ctx(box) return [x_global, x_local, x_words, x_ctx], y
def __getitem__(self, index): img, lb = self.dataset[index] lb_onehot = onehot(self.num_class, lb) for _ in range(self.num_mix): r = np.random.rand(1) if self.beta <= 0 or r > self.prob: continue # generate mixed sample lam = np.random.beta(self.beta, self.beta) rand_index = random.choice(range(len(self))) img2, lb2 = self.dataset[rand_index] lb2_onehot = onehot(self.num_class, lb2) img = img * lam + img2 * (1 - lam) lb_onehot = lb_onehot * lam + lb2_onehot * (1. - lam) return img, lb_onehot
def cross_entropy_loss(logits, labels): """Compute cross entropy loss. Args: logits: logits as (batch_size, n_classes) array labels: labels as (batch_size) integer array Returns: mean cross entropy loss """ log_softmax_logits = jax.nn.log_softmax(logits) loss_sum = -jnp.sum(util.onehot(labels, logits.shape[1]) * log_softmax_logits) return loss_sum / labels.size
def __getitem__(self, index): img, lb = self.dataset[index] lb_onehot = onehot(self.num_class, lb) for _ in range(self.num_mix): r = np.random.rand(1) if self.beta <= 0 or r > self.prob: continue # generate mixed sample lam = np.random.beta(self.beta, self.beta) rand_index = random.choice(range(len(self))) img2, lb2 = self.dataset[rand_index] lb2_onehot = onehot(self.num_class, lb2) bbx1, bby1, bbx2, bby2 = rand_bbox(img.size(), lam) img[:, bbx1:bbx2, bby1:bby2] = img2[:, bbx1:bbx2, bby1:bby2] lam = 1 - ((bbx2 - bbx1) * (bby2 - bby1) / (img.size()[-1] * img.size()[-2])) lb_onehot = lb_onehot * lam + lb2_onehot * (1. - lam) return img, lb_onehot
def plot_fig(G, G_mask, n, t): _, _, imgs, digits = load_mnist() target = onehot(np.full((1, 1), t), 10) use_img = imgs[n][np.newaxis, ...] gen_img = G.predict([use_img, target]) mask_img = G_mask.predict([use_img, target]) fig, axs = plt.subplots(1, 3, figsize=(4, 3)) axs[0].imshow(use_img[0, :, :, 0], cmap='gray') axs[0].axis('off') axs[1].imshow(mask_img[0, :, :, 0], cmap='gray') axs[1].axis('off') axs[2].imshow(gen_img[0, :, :, 0], cmap='gray') axs[2].axis('off') fig.savefig(os.path.join(f'./tmp/fig_{n}_{t}'))
def process(jpg_data, box, texts, **params): max_words = params['max_words'] x_local = util.decode_jpg(jpg_data, crop_to_box=box) # hack: scale the box down x_global, box = util.decode_jpg(jpg_data, box) text = util.strip(random.choice(texts)) indices = words.indices(text) idx = np.random.randint(1, len(indices)) x_indices = indices[:idx] if len(x_indices) > max_words: x_indices = x_indices[-max_words:] x_indices = util.left_pad(x_indices, **params) y = util.onehot(indices[idx]) x_ctx = img_ctx(box) return [x_global, x_local, x_indices, x_ctx], y, box
def plot_table(G, D, name, random=True): _, _, imgs, digits = load_mnist() input_imgs = [] # imgs of 0 ~ 9 for i in range(10): imgs_filtered = imgs[np.where(digits == i)[0]] if random: idx = np.random.randint(0, imgs_filtered.shape[0]) else: idx = 0 input_imgs.append(imgs_filtered[idx]) fig, axs = plt.subplots(10, 10) fig.set_size_inches(50, 50) for i in range(10): # for input img for j in range(10): # for target digit gen_img = G.predict([np.expand_dims(input_imgs[i], axis=0), onehot(np.full((1,), j), 10)]) axs[i, j].imshow(gen_img[0, :, :, 0], cmap='gray') axs[i, j].axis('off') plt.show() fig.savefig(name, dpi=150)
def compute_eval_metrics(logits, labels, eval_top_5): """Compute evaluation metrics. Eval metrics consists of loss sum, error count and sample count, and top-5 error count if eval_top_5 is True. Args: logits: logits as (batch_size, n_classes) array labels: labels as (batch_size) integer array eval_top_5: if True, compute top 5 error count Returns: metrics as a dict """ mask = (labels != -1).astype(jnp.float32) # `onehot` will generate all zeros for samples that are labelled -1 # So no need to multiply the per-sample loss by the mask log_softmax_logits = jax.nn.log_softmax(logits) ce_loss = -jnp.sum(util.onehot(labels, logits.shape[1]) * log_softmax_logits) error_rate = ((jnp.argmax(logits, -1) != labels) * mask).sum(-1) metrics = { 'loss_sum': ce_loss, 'error_count': error_rate, 'sample_count': mask.sum(-1), } if eval_top_5: top5_pred = jnp.argsort(logits, axis=-1)[..., -5:] top5_hits = (top5_pred == labels[..., None]).any(axis=-1) top5_errs = (~top5_hits).astype(jnp.float32) * mask metrics['top5_error_count'] = top5_errs.sum(-1) metrics = util.psum(metrics) return metrics
def sample_imgs(self, itr, img_dir): n = 5 targets = onehot( np.full((n, 1), 4), 10 ) test_imgs = self.test_imgs[:n] gen_imgs = self.G.predict([test_imgs, targets]) masks = self.G_mask.predict([test_imgs, targets]) D_pred_T = self.D.predict([test_imgs, targets]) D_pred_F = self.D.predict([gen_imgs, targets]) fig, axs = plt.subplots(n, 3, figsize=(8, 6)) fig.tight_layout() for i in range(n): for no, img in enumerate([test_imgs, masks, gen_imgs]): axs[i, no].imshow(img[i, :, :, 0], cmap='gray') axs[i, no].axis('off') if 0 == no: axs[i, no].text(-20, -2, f'D_pred_T: {D_pred_T[i]}') elif 2 == no: axs[i, no].text(-20, -2, f'D_pred_F: {D_pred_F[i]}') if not os.path.isdir(img_dir): os.makedirs(img_dir) fig.savefig(os.path.join(img_dir, f'{itr}.png')) plt.close()
def train(self, iterations, batch_size=128, sample_interval=100, save_model_interval=100, train_D_iters=1, train_G_iters=1, img_dir='./', model_dir='./'): imgs, digits = self.imgs, self.digits os.makedirs(img_dir, exist_ok=True) os.makedirs(model_dir, exist_ok=True) for itr in range(1, iterations + 1): # --------------------- # Train D_realness # --------------------- for _ in range(train_D_iters): # Select a random half batch of images idx_real = np.random.randint(0, imgs.shape[0], batch_size) idx_fake = np.random.randint(0, imgs.shape[0], batch_size) real_imgs = imgs[idx_real] real_ans = onehot(digits[idx_real], 11) # 11 class for D's output fake_target_digits = onehot( np.random.randint(0, 10, batch_size), 10) # for G's input fake_imgs = self.G.predict( [imgs[idx_fake], fake_target_digits]) fake_ans = np.zeros((batch_size, 11)) fake_ans[:, 10] = 1 # only the 11th class is 1 # real image d_loss_real = self.D.train_on_batch(real_imgs, real_ans) # fake image d_loss_fake = self.D.train_on_batch(fake_imgs, fake_ans) # --------------------- # Train Generator # --------------------- for _ in range(train_G_iters): # Condition on labels idx = np.random.randint(0, imgs.shape[0], batch_size) target_nums = np.random.randint(0, 10, batch_size) fake_target_digits = onehot(target_nums, 10) fake_ans = onehot(target_nums, 11) g_loss = self.combined.train_on_batch( [imgs[idx], fake_target_digits], fake_ans) print(f'--------\nEPOCH {itr}\n--------') print( pd.DataFrame({ 'D': self.D.metrics_names, 'real': d_loss_real, 'fake': d_loss_fake }).to_string(index=False)) print() print( pd.DataFrame({ 'G': self.combined.metrics_names, 'value': g_loss, }).to_string(index=False)) print() # If at save interval => save generated image samples if sample_interval > 0 and itr % sample_interval == 0: # self.sample_imgs(itr, img_dir) plot_table(self.G, self.D, os.path.join(img_dir, f'{itr}.png'), save=True) if save_model_interval > 0 and itr % save_model_interval == 0: self.D.save(os.path.join(model_dir, f'D{itr}.hdf5')) self.G.save(os.path.join(model_dir, f'G{itr}.hdf5')) self.G_mask.save(os.path.join(model_dir, f'G_mask{itr}.hdf5'))
N = 80 Dc = 5 Dd = 4 L = 5 K = 3 #X_c,X_b,s = dist.generate_mixture(K,N,Dc,Dd) #transformer_c = StandardScaler().fit(X_c) #standard normalization for real data #transformer_c.transform(X_c) X_b,X_d,X_c = read_dataset.get_eb2_data() X_c = np.log(X_c+1) X_b = X_b.reshape((len(X_b),1)) X_d = X_d.reshape((len(X_d),1)) X_d=util.onehot(X_d,int(np.max(X_d))) #one_hot encoding of categorical data transformer_c = StandardScaler().fit(X_c) #standard normalization for real data transformer_c.transform(X_c) # ---------- Diagonal Gaussian toy data ----------- #X_c = dist.toy_gaussian_mixture(N,Dc,K) # N x D # ---------- Diagonal Gaussian toy data ----------- ''' X_c = read_dataset.get_seizure() X_c = np.log(X_c)+1 # ---------- Categorical toy data -----------
def evaluate(batch_size, checknum, mode, discriminative): n_vox = cfg.CONST.N_VOX dim = cfg.NET.DIM vox_shape = [n_vox[0], n_vox[1], n_vox[2], dim[4]] complete_shape = [n_vox[0], n_vox[1], n_vox[2], 2] dim_z = cfg.NET.DIM_Z start_vox_size = cfg.NET.START_VOX kernel = cfg.NET.KERNEL stride = cfg.NET.STRIDE dilations = cfg.NET.DILATIONS freq = cfg.CHECK_FREQ save_path = cfg.DIR.EVAL_PATH if discriminative is True: model_path = cfg.DIR.CHECK_POINT_PATH + '-d' else: model_path = cfg.DIR.CHECK_POINT_PATH chckpt_path = model_path + '/checkpoint' + str(checknum) depvox_gan_model = depvox_gan(batch_size=batch_size, vox_shape=vox_shape, complete_shape=complete_shape, dim_z=dim_z, dim=dim, start_vox_size=start_vox_size, kernel=kernel, stride=stride, dilations=dilations, discriminative=discriminative, is_train=False) Z_tf, z_enc_tf, surf_tf, full_tf, full_gen_tf, surf_dec_tf, full_dec_tf,\ gen_loss_tf, discrim_loss_tf, recons_ssc_loss_tf, recons_com_loss_tf, recons_sem_loss_tf, encode_loss_tf, refine_loss_tf, summary_tf,\ part_tf, part_dec_tf, complete_gt_tf, complete_gen_tf, complete_dec_tf, ssc_tf, scores_tf = depvox_gan_model.build_model() if discriminative is True: Z_tf_sample, comp_tf_sample, surf_tf_sample, full_tf_sample, part_tf_sample, scores_tf_sample = depvox_gan_model.samples_generator( visual_size=batch_size) sess = tf.InteractiveSession() saver = tf.train.Saver() # Restore variables from disk. saver.restore(sess, chckpt_path) print("...Weights restored.") if mode == 'recons': # evaluation for reconstruction voxel_test, surf_test, part_test, num, data_paths = scene_model_id_pair_test( dataset_portion=cfg.TRAIN.DATASET_PORTION) # Evaluation masks if cfg.TYPE_TASK == 'scene': # occluded region """ space_effective = np.where(voxel_test > -1, 1, 0) * np.where( part_test > -1, 1, 0) voxel_test *= space_effective part_test *= space_effective """ # occluded region part_test[part_test < -1] = 0 surf_test[surf_test < 0] = 0 voxel_test[voxel_test < 0] = 0 num = voxel_test.shape[0] print("test voxels loaded") for i in np.arange(int(num / batch_size)): batch_tsdf = part_test[i * batch_size:i * batch_size + batch_size] batch_surf = surf_test[i * batch_size:i * batch_size + batch_size] batch_voxel = voxel_test[i * batch_size:i * batch_size + batch_size] batch_pred_surf, batch_pred_full, batch_pred_part, batch_part_enc_Z, batch_complete_gt, batch_pred_complete, batch_ssc = sess.run( [ surf_dec_tf, full_dec_tf, part_dec_tf, z_enc_tf, complete_gt_tf, complete_dec_tf, ssc_tf ], feed_dict={ part_tf: batch_tsdf, surf_tf: batch_surf, full_tf: batch_voxel }) if i == 0: pred_part = batch_pred_part pred_surf = batch_pred_surf pred_full = batch_pred_full pred_ssc = batch_ssc part_enc_Z = batch_part_enc_Z complete_gt = batch_complete_gt pred_complete = batch_pred_complete else: pred_part = np.concatenate((pred_part, batch_pred_part), axis=0) pred_surf = np.concatenate((pred_surf, batch_pred_surf), axis=0) pred_full = np.concatenate((pred_full, batch_pred_full), axis=0) pred_ssc = np.concatenate((pred_ssc, batch_ssc), axis=0) part_enc_Z = np.concatenate((part_enc_Z, batch_part_enc_Z), axis=0) complete_gt = np.concatenate((complete_gt, batch_complete_gt), axis=0) pred_complete = np.concatenate( (pred_complete, batch_pred_complete), axis=0) print("forwarded") # For visualization bin_file = np.uint8(voxel_test) bin_file.tofile(save_path + '/scene.bin') surface = np.array(part_test) if cfg.TYPE_TASK == 'scene': surface = np.abs(surface) surface *= 10 pred_part = np.abs(pred_part) pred_part *= 10 elif cfg.TYPE_TASK == 'object': surface = np.clip(surface, 0, 1) pred_part = np.clip(pred_part, 0, 1) surface.astype('uint8').tofile(save_path + '/surface.bin') pred_part.astype('uint8').tofile(save_path + '/dec_part.bin') depsem_gt = np.multiply(voxel_test, np.clip(surface, 0, 1)) if cfg.TYPE_TASK == 'scene': depsem_gt[depsem_gt < 0] = 0 depsem_gt.astype('uint8').tofile(save_path + '/depth_seg_scene.bin') # decoded np.argmax(pred_ssc, axis=4).astype('uint8').tofile(save_path + '/dec_ssc.bin') error = np.array( np.clip(np.argmax(pred_ssc, axis=4), 0, 1) + np.argmax(complete_gt, axis=4) * 2) error.astype('uint8').tofile(save_path + '/dec_ssc_error.bin') np.argmax(pred_surf, axis=4).astype('uint8').tofile(save_path + '/dec_surf.bin') error = np.array( np.clip(np.argmax(pred_surf, axis=4), 0, 1) + np.argmax(complete_gt, axis=4) * 2) error.astype('uint8').tofile(save_path + '/dec_surf_error.bin') np.argmax(pred_full, axis=4).astype('uint8').tofile(save_path + '/dec_full.bin') error = np.array( np.clip(np.argmax(pred_full, axis=4), 0, 1) + np.argmax(complete_gt, axis=4) * 2) error.astype('uint8').tofile(save_path + '/dec_full_error.bin') np.argmax(pred_complete, axis=4).astype('uint8').tofile(save_path + '/dec_complete.bin') np.argmax(complete_gt, axis=4).astype('uint8').tofile(save_path + '/complete_gt.bin') # reconstruction and generation from normal distribution evaluation # generator from random distribution if discriminative is True: np.save(save_path + '/decode_z.npy', part_enc_Z) sample_times = 10 for j in np.arange(sample_times): Z_var_np_sample = np.random.normal( size=(batch_size, start_vox_size[0], start_vox_size[1], start_vox_size[2], dim_z)).astype(np.float32) z_comp_rand, z_surf_rand, z_full_rand, z_part_rand, scores_sample = sess.run( [ comp_tf_sample, surf_tf_sample, full_tf_sample, part_tf_sample, scores_tf_sample ], feed_dict={Z_tf_sample: Z_var_np_sample}) if j == 0: z_comp_rand_all = z_comp_rand z_part_rand_all = z_part_rand z_surf_rand_all = z_surf_rand z_full_rand_all = z_full_rand else: z_comp_rand_all = np.concatenate( [z_comp_rand_all, z_comp_rand], axis=0) z_part_rand_all = np.concatenate( [z_part_rand_all, z_part_rand], axis=0) z_surf_rand_all = np.concatenate( [z_surf_rand_all, z_surf_rand], axis=0) z_full_rand_all = np.concatenate( [z_full_rand_all, z_full_rand], axis=0) print(scores_sample) Z_var_np_sample.astype('float32').tofile(save_path + '/sample_z.bin') np.argmax(z_comp_rand_all, axis=4).astype('uint8').tofile(save_path + '/gen_comp.bin') np.argmax(z_surf_rand_all, axis=4).astype('uint8').tofile(save_path + '/gen_surf.bin') np.argmax(z_full_rand_all, axis=4).astype('uint8').tofile(save_path + '/gen_full.bin') if cfg.TYPE_TASK == 'scene': z_part_rand_all = np.abs(z_part_rand_all) z_part_rand_all *= 10 elif cfg.TYPE_TASK == 'object': z_part_rand_all[z_part_rand_all <= 0.4] = 0 z_part_rand_all[z_part_rand_all > 0.4] = 1 z_part_rand = np.squeeze(z_part_rand) z_part_rand_all.astype('uint8').tofile(save_path + '/gen_part.bin') eigen_shape = False if eigen_shape: z_U, z_V = pca(np.reshape(part_enc_Z, [ 200, start_vox_size[0] * start_vox_size[1] * start_vox_size[2] * dim_z ]), dim_remain=200) z_V = np.reshape(np.transpose(z_V[:, 0:8]), [ 8, start_vox_size[0], start_vox_size[1], start_vox_size[2], dim_z ]) z_surf_rand, z_full_rand, z_part_rand = sess.run( [surf_tf_sample, full_tf_sample, part_tf_sample], feed_dict={Z_tf_sample: z_V}) np.argmax(z_surf_rand, axis=4).astype('uint8').tofile(save_path + '/gen_surf.bin') if cfg.TYPE_TASK == 'scene': z_part_rand = np.abs(z_part_rand) z_part_rand *= 10 elif cfg.TYPE_TASK == 'object': z_part_rand[z_part_rand <= 0.4] = 0 z_part_rand[z_part_rand > 0.4] = 1 z_part_rand = np.squeeze(z_part_rand) z_part_rand.astype('uint8').tofile(save_path + '/gen_sdf.bin') print("voxels saved") # numerical evalutation # calc_IoU # completion on_complete_gt = complete_gt complete_gen = np.argmax(pred_complete, axis=4) on_complete_gen = onehot(complete_gen, 2) IoU_comp = np.zeros([2 + 1]) AP_comp = np.zeros([2 + 1]) print(colored("Completion", 'cyan')) IoU_comp = IoU(on_complete_gt, on_complete_gen, IoU_comp, [vox_shape[0], vox_shape[1], vox_shape[2], 2]) # depth segmentation on_depsem_gt = onehot(depsem_gt, vox_shape[3]) on_depsem_ssc = np.multiply( onehot(np.argmax(pred_ssc, axis=4), vox_shape[3]), np.expand_dims(np.clip(surface, 0, 1), -1)) on_depsem_dec = np.multiply( onehot(np.argmax(pred_full, axis=4), vox_shape[3]), np.expand_dims(np.clip(surface, 0, 1), -1)) print(colored("Geometric segmentation", 'cyan')) IoU_class = np.zeros([vox_shape[3] + 1]) IoU_class = IoU(on_depsem_gt, on_depsem_ssc, IoU_class, vox_shape) IoU_all = np.expand_dims(IoU_class, axis=1) print(colored("Generative segmentation", 'cyan')) IoU_class = np.zeros([vox_shape[3] + 1]) IoU_class = IoU(on_depsem_gt, on_depsem_dec, IoU_class, vox_shape) IoU_all = np.expand_dims(IoU_class, axis=1) # volume segmentation on_surf_gt = onehot(surf_test, vox_shape[3]) on_full_gt = onehot(voxel_test, vox_shape[3]) print(colored("Geometric semantic completion", 'cyan')) on_pred = onehot(np.argmax(pred_ssc, axis=4), vox_shape[3]) IoU_class = IoU(on_full_gt, on_pred, IoU_class, vox_shape) IoU_all = np.concatenate((IoU_all, np.expand_dims(IoU_class, axis=1)), axis=1) print(colored("Generative semantic completion", 'cyan')) on_pred = onehot(np.argmax(pred_surf, axis=4), vox_shape[3]) IoU_class = IoU(on_full_gt, on_pred, IoU_class, vox_shape) IoU_all = np.concatenate((IoU_all, np.expand_dims(IoU_class, axis=1)), axis=1) print(colored("Solid generative semantic completion", 'cyan')) on_pred = onehot(np.argmax(pred_full, axis=4), vox_shape[3]) IoU_class = IoU(on_full_gt, on_pred, IoU_class, vox_shape) IoU_all = np.concatenate((IoU_all, np.expand_dims(IoU_class, axis=1)), axis=1) np.savetxt(save_path + '/IoU.csv', np.transpose(IoU_all[1:] * 100), delimiter=" & ", fmt='%2.1f') # interpolation evaluation if mode == 'interpolate': interpolate_num = 8 #interpolatioin latent vectores decode_z = np.load(save_path + '/decode_z.npy') print(save_path) decode_z = decode_z[20:20 + batch_size] for l in np.arange(batch_size): for r in np.arange(batch_size): if l != r: print l, r base_num_left = l base_num_right = r left = np.reshape(decode_z[base_num_left], [ 1, start_vox_size[0], start_vox_size[1], start_vox_size[2], dim_z ]) right = np.reshape(decode_z[base_num_right], [ 1, start_vox_size[0], start_vox_size[1], start_vox_size[2], dim_z ]) duration = (right - left) / (interpolate_num - 1) # left is the reference sample and Z_np_sample is the remaining samples if base_num_left == 0: Z_np_sample = decode_z[1:] elif base_num_left == batch_size - 1: Z_np_sample = decode_z[:batch_size - 1] else: Z_np_sample_before = np.reshape( decode_z[:base_num_left], [ base_num_left, start_vox_size[0], start_vox_size[1], start_vox_size[2], dim_z ]) Z_np_sample_after = np.reshape( decode_z[base_num_left + 1:], [ batch_size - base_num_left - 1, start_vox_size[0], start_vox_size[1], start_vox_size[2], dim_z ]) Z_np_sample = np.concatenate( [Z_np_sample_before, Z_np_sample_after], axis=0) for i in np.arange(interpolate_num): if i == 0: Z = copy.copy(left) interpolate_z = copy.copy(Z) else: Z = Z + duration interpolate_z = np.concatenate([interpolate_z, Z], axis=0) # Z_np_sample is used to fill up the batch Z_var_np_sample = np.concatenate([Z, Z_np_sample], axis=0) pred_full_rand, pred_part_rand = sess.run( [full_tf_sample, part_tf_sample], feed_dict={Z_tf_sample: Z_var_np_sample}) interpolate_vox = np.reshape(pred_full_rand[0], [ 1, vox_shape[0], vox_shape[1], vox_shape[2], vox_shape[3] ]) interpolate_part = np.reshape(pred_part_rand[0], [ 1, vox_shape[0], vox_shape[1], vox_shape[2], complete_shape[3] ]) if i == 0: pred_full = interpolate_vox pred_part = interpolate_part else: pred_full = np.concatenate( [pred_full, interpolate_vox], axis=0) pred_part = np.concatenate( [pred_part, interpolate_part], axis=0) interpolate_z.astype('uint8').tofile( save_path + '/interpolate/interpolation_z' + str(l) + '-' + str(r) + '.bin') full_models_cat = np.argmax(pred_full, axis=4) full_models_cat.astype('uint8').tofile( save_path + '/interpolate/interpolation_f' + str(l) + '-' + str(r) + '.bin') if cfg.TYPE_TASK == 'scene': pred_part = np.abs(pred_part) pred_part[pred_part < 0.2] = 0 pred_part[pred_part >= 0.2] = 1 elif cfg.TYPE_TASK == 'object': pred_part = np.argmax(pred_part, axis=4) pred_part.astype('uint8').tofile( save_path + '/interpolate/interpolation_p' + str(l) + '-' + str(r) + '.bin') print("voxels saved") # add noise evaluation if mode == 'noise': decode_z = np.load(save_path + '/decode_z.npy') decode_z = decode_z[:batch_size] noise_num = 10 for base_num in np.arange(batch_size): print base_num base = np.reshape(decode_z[base_num], [ 1, start_vox_size[0], start_vox_size[1], start_vox_size[2], dim_z ]) eps = np.random.normal(size=(noise_num - 1, dim_z)).astype(np.float32) if base_num == 0: Z_np_sample = decode_z[1:] elif base_num == batch_size - 1: Z_np_sample = decode_z[:batch_size - 1] else: Z_np_sample_before = np.reshape(decode_z[:base_num], [ base_num, start_vox_size[0], start_vox_size[1], start_vox_size[2], dim_z ]) Z_np_sample_after = np.reshape(decode_z[base_num + 1:], [ batch_size - base_num - 1, start_vox_size[0], start_vox_size[1], start_vox_size[2], dim_z ]) Z_np_sample = np.concatenate( [Z_np_sample_before, Z_np_sample_after], axis=0) for c in np.arange(start_vox_size[0]): for l in np.arange(start_vox_size[1]): for d in np.arange(start_vox_size[2]): for i in np.arange(noise_num): if i == 0: Z = copy.copy(base) noise_z = copy.copy(Z) else: Z = copy.copy(base) Z[0, c, l, d, :] += eps[i - 1] noise_z = np.concatenate([noise_z, Z], axis=0) Z_var_np_sample = np.concatenate([Z, Z_np_sample], axis=0) pred_full_rand = sess.run( full_tf_sample, feed_dict={Z_tf_sample: Z_var_np_sample}) """ refined_voxs_rand = sess.run( sample_refine_full_tf, feed_dict={ sample_full_tf: pred_full_rand }) """ noise_vox = np.reshape(pred_full_rand[0], [ 1, vox_shape[0], vox_shape[1], vox_shape[2], vox_shape[3] ]) if i == 0: pred_full = noise_vox else: pred_full = np.concatenate( [pred_full, noise_vox], axis=0) np.save( save_path + '/noise_z' + str(base_num) + '_' + str(c) + str(l) + str(d) + '.npy', noise_z) full_models_cat = np.argmax(pred_full, axis=4) np.save( save_path + '/noise' + str(base_num) + '_' + str(c) + str(l) + str(d) + '.npy', full_models_cat) print("voxels saved")
rnn = gru.gru(in_size, rnn_size, out_size, rnn_layers, dropout=dropout, adadelta_params=adadelta_params, alpha=alpha) for e in range(epochs): p = 0 costs = [] while p + seq_length + 1 < len(data): seq_x = data[p:p + seq_length] seq_y = data[p + 1:p + seq_length + 1] seq_x_oh = util.onehot(seq_x, char_to_ix) seq_y_oh = util.onehot(seq_y, char_to_ix) cost = rnn.train(seq_x_oh, seq_y_oh) costs.append(cost) p += seq_length print('epoch', e, 'cost: ', np.mean(costs)) util.save_model(model_save_file, rnn, char_to_ix, ix_to_char) rnn_test = util.load_model(model_save_file, is_train=0) seq_test = random.choice(chars) for n in range(args.test_length): seq_test_oh = util.onehot(seq_test, char_to_ix) prediction = rnn_test.predict(seq_test_oh) seq_test += ix_to_char[prediction[-1]] print(e, seq_test)
def evaluate(batch_size, checknum, mode, discriminative): n_vox = cfg.CONST.N_VOX dim = cfg.NET.DIM vox_shape = [n_vox[0], n_vox[1], n_vox[2], dim[4]] com_shape = [n_vox[0], n_vox[1], n_vox[2], 2] dim_z = cfg.NET.DIM_Z start_vox_size = cfg.NET.START_VOX kernel = cfg.NET.KERNEL stride = cfg.NET.STRIDE dilations = cfg.NET.DILATIONS freq = cfg.CHECK_FREQ save_path = cfg.DIR.EVAL_PATH if discriminative is True: model_path = cfg.DIR.CHECK_POINT_PATH + '-d' else: model_path = cfg.DIR.CHECK_POINT_PATH chckpt_path = model_path + '/checkpoint' + str(checknum) depvox_gan_model = depvox_gan(batch_size=batch_size, vox_shape=vox_shape, com_shape=com_shape, dim_z=dim_z, dim=dim, start_vox_size=start_vox_size, kernel=kernel, stride=stride, dilations=dilations, discriminative=discriminative, is_train=False) Z_tf, z_enc_tf, surf_tf, full_tf, full_gen_tf, surf_dec_tf, full_dec_tf,\ gen_loss_tf, discrim_loss_tf, recons_ssc_loss_tf, recons_com_loss_tf, recons_sem_loss_tf, encode_loss_tf, refine_loss_tf, summary_tf,\ part_tf, part_dec_tf, comp_gt_tf, comp_gen_tf, comp_dec_tf, ssc_tf, scores_tf = depvox_gan_model.build_model() if discriminative is True: Z_tf_samp, comp_tf_samp, surf_tf_samp, full_tf_samp, part_tf_samp, scores_tf_samp = depvox_gan_model.samples_generator( visual_size=batch_size) sess = tf.InteractiveSession() saver = tf.train.Saver() # Restore variables from disk. saver.restore(sess, chckpt_path) print("...Weights restored.") if mode == 'recons': # evaluation for reconstruction voxel_test, surf_test, part_test, num, data_paths = scene_model_id_pair_test( dataset_portion=cfg.TRAIN.DATASET_PORTION) # Evaluation masks if cfg.TYPE_TASK == 'scene': # occluded region """ space_effective = np.where(voxel_test > -1, 1, 0) * np.where( part_test > -1, 1, 0) voxel_test *= space_effective part_test *= space_effective """ # occluded region part_test[part_test < -1] = 0 surf_test[surf_test < 0] = 0 voxel_test[voxel_test < 0] = 0 num = voxel_test.shape[0] print("test voxels loaded") from progressbar import ProgressBar pbar = ProgressBar() for i in pbar(np.arange(int(num / batch_size))): bth_tsdf = part_test[i * batch_size:i * batch_size + batch_size] bth_surf = surf_test[i * batch_size:i * batch_size + batch_size] bth_voxel = voxel_test[i * batch_size:i * batch_size + batch_size] bth_pd_surf, bth_pd_full, bth_pd_part, bth_part_enc_Z, bth_comp_gt, bth_pd_comp, bth_ssc = sess.run( [ surf_dec_tf, full_dec_tf, part_dec_tf, z_enc_tf, comp_gt_tf, comp_dec_tf, ssc_tf ], feed_dict={ part_tf: bth_tsdf, surf_tf: bth_surf, full_tf: bth_voxel }) if i == 0: pd_part = bth_pd_part pd_surf = bth_pd_surf pd_full = bth_pd_full pd_ssc = bth_ssc part_enc_Z = bth_part_enc_Z comp_gt = bth_comp_gt pd_comp = bth_pd_comp else: pd_part = np.concatenate((pd_part, bth_pd_part), axis=0) pd_surf = np.concatenate((pd_surf, bth_pd_surf), axis=0) pd_full = np.concatenate((pd_full, bth_pd_full), axis=0) pd_ssc = np.concatenate((pd_ssc, bth_ssc), axis=0) part_enc_Z = np.concatenate((part_enc_Z, bth_part_enc_Z), axis=0) comp_gt = np.concatenate((comp_gt, bth_comp_gt), axis=0) pd_comp = np.concatenate((pd_comp, bth_pd_comp), axis=0) print("forwarded") # For visualization bin_file = np.uint8(voxel_test) bin_file.tofile(save_path + '/scene.bin') sdf_volume = np.round(10 * np.abs(np.array(part_test))) observed = np.array(part_test) if cfg.TYPE_TASK == 'scene': observed = np.abs(observed) observed *= 10 observed -= 7 observed = np.round(observed) pd_part = np.abs(pd_part) pd_part *= 10 pd_part -= 7 elif cfg.TYPE_TASK == 'object': observed = np.clip(observed, 0, 1) pd_part = np.clip(pd_part, 0, 1) sdf_volume.astype('uint8').tofile(save_path + '/surface.bin') pd_part.astype('uint8').tofile(save_path + '/dec_part.bin') depsem_gt = np.multiply(voxel_test, np.clip(observed, 0, 1)) if cfg.TYPE_TASK == 'scene': depsem_gt[depsem_gt < 0] = 0 depsem_gt.astype('uint8').tofile(save_path + '/depth_seg_scene.bin') # decoded do_save_pcd = True if do_save_pcd is True: results_pcds = np.argmax(pd_ssc, axis=4) for i in range(np.shape(results_pcds)[0]): pcd_idx = np.where(results_pcds[i] > 0) pts_coord = np.float32(np.transpose(pcd_idx)) / 80 - 0.5 pts_color = matplotlib.cm.Paired( np.float32(results_pcds[i][pcd_idx]) / 11 - 0.5 / 11) output_name = os.path.join('results_pcds', '%s.pcd' % data_paths[i][1][:-4]) output_pcds = np.concatenate((pts_coord, pts_color[:, 0:3]), -1) save_pcd(output_name, output_pcds) np.argmax(pd_ssc, axis=4).astype('uint8').tofile(save_path + '/dec_ssc.bin') error = np.array( np.clip(np.argmax(pd_ssc, axis=4), 0, 1) + np.argmax(comp_gt, axis=4) * 2) error.astype('uint8').tofile(save_path + '/dec_ssc_error.bin') np.argmax(pd_surf, axis=4).astype('uint8').tofile(save_path + '/dec_surf.bin') error = np.array( np.clip(np.argmax(pd_surf, axis=4), 0, 1) + np.argmax(comp_gt, axis=4) * 2) error.astype('uint8').tofile(save_path + '/dec_surf_error.bin') np.argmax(pd_full, axis=4).astype('uint8').tofile(save_path + '/dec_full.bin') error = np.array( np.clip(np.argmax(pd_full, axis=4), 0, 1) + np.argmax(comp_gt, axis=4) * 2) error.astype('uint8').tofile(save_path + '/dec_full_error.bin') np.argmax(pd_comp, axis=4).astype('uint8').tofile(save_path + '/dec_complete.bin') np.argmax(comp_gt, axis=4).astype('uint8').tofile(save_path + '/complete_gt.bin') # reconstruction and generation from normal distribution evaluation # generator from random distribution if discriminative is True: np.save(save_path + '/decode_z.npy', part_enc_Z) sample_times = 10 for j in np.arange(sample_times): gaussian_samp = np.random.normal( size=(batch_size, start_vox_size[0], start_vox_size[1], start_vox_size[2], dim_z)).astype(np.float32) z_comp_rnd, z_surf_rnd, z_full_rnd, z_part_rnd, scores_samp = sess.run( [ comp_tf_samp, surf_tf_samp, full_tf_samp, part_tf_samp, scores_tf_samp ], feed_dict={Z_tf_samp: gaussian_samp}) if j == 0: z_comp_rnd_all = z_comp_rnd z_part_rnd_all = z_part_rnd z_surf_rnd_all = z_surf_rnd z_full_rnd_all = z_full_rnd else: z_comp_rnd_all = np.concatenate( [z_comp_rnd_all, z_comp_rnd], axis=0) z_part_rnd_all = np.concatenate( [z_part_rnd_all, z_part_rnd], axis=0) z_surf_rnd_all = np.concatenate( [z_surf_rnd_all, z_surf_rnd], axis=0) z_full_rnd_all = np.concatenate( [z_full_rnd_all, z_full_rnd], axis=0) print('Discrim score: ' + colored(np.mean(scores_samp), 'blue')) gaussian_samp.astype('float32').tofile(save_path + '/sample_z.bin') np.argmax(z_comp_rnd_all, axis=4).astype('uint8').tofile(save_path + '/gen_comp.bin') np.argmax(z_surf_rnd_all, axis=4).astype('uint8').tofile(save_path + '/gen_surf.bin') np.argmax(z_full_rnd_all, axis=4).astype('uint8').tofile(save_path + '/gen_full.bin') if cfg.TYPE_TASK == 'scene': z_part_rnd_all = np.abs(z_part_rnd_all) z_part_rnd_all *= 10 z_part_rnd_all -= 7 elif cfg.TYPE_TASK == 'object': z_part_rnd_all[z_part_rnd_all <= 0.4] = 0 z_part_rnd_all[z_part_rnd_all > 0.4] = 1 z_part_rnd = np.squeeze(z_part_rnd) z_part_rnd_all.astype('uint8').tofile(save_path + '/gen_part.bin') print("voxels saved") # numerical evalutation iou_eval = True if iou_eval is True: # completion print(colored("Completion:", 'red')) on_gt = comp_gt pd_max = np.argmax(pd_comp, axis=4) on_pd = onehot(pd_max, 2) IoU_comp = np.zeros([2 + 1]) AP_comp = np.zeros([2 + 1]) IoU_comp = IoU(on_gt, on_pd, [vox_shape[0], vox_shape[1], vox_shape[2], 2]) # depth segmentation print(colored("Segmentation:", 'red')) print(colored("encoded", 'cyan')) on_gt = onehot(depsem_gt, vox_shape[3]) on_pd = np.multiply( onehot(np.argmax(pd_ssc, axis=4), vox_shape[3]), np.expand_dims(np.clip(observed, 0, 1), -1)) # IoUs = np.zeros([vox_shape[3] + 1]) IoU_temp = IoU(on_gt, on_pd, vox_shape) IoU_all = np.expand_dims(IoU_temp, axis=1) print(colored("decoded", 'cyan')) on_pd = np.multiply( onehot(np.argmax(pd_surf, axis=4), vox_shape[3]), np.expand_dims(np.clip(observed, 0, 1), -1)) IoU_temp = IoU(on_gt, on_pd, vox_shape, IoU_compared=IoU_all[:, -1]) IoU_all = np.concatenate( (IoU_all, np.expand_dims(IoU_temp, axis=1)), axis=1) print(colored("solidly decoded", 'cyan')) on_pd = np.multiply( onehot(np.argmax(pd_full, axis=4), vox_shape[3]), np.expand_dims(np.clip(observed, 0, 1), -1)) IoU_temp = IoU(on_gt, on_pd, vox_shape, IoU_compared=IoU_all[:, -1]) IoU_all = np.concatenate( (IoU_all, np.expand_dims(IoU_temp, axis=1)), axis=1) # volume segmentation print(colored("Semantic Completion:", 'red')) on_surf_gt = onehot(surf_test, vox_shape[3]) on_gt = onehot(voxel_test, vox_shape[3]) print(colored("encoded", 'cyan')) on_pd = onehot(np.argmax(pd_ssc, axis=4), vox_shape[3]) IoU_temp = IoU(on_gt, on_pd, vox_shape) IoU_all = np.concatenate( (IoU_all, np.expand_dims(IoU_temp, axis=1)), axis=1) print(colored("decoded", 'cyan')) on_pd = onehot(np.argmax(pd_surf, axis=4), vox_shape[3]) IoU_temp = IoU(on_gt, on_pd, vox_shape, IoU_compared=IoU_all[:, -1]) IoU_all = np.concatenate( (IoU_all, np.expand_dims(IoU_temp, axis=1)), axis=1) print(colored("solidly decoded", 'cyan')) on_pd = onehot(np.argmax(pd_full, axis=4), vox_shape[3]) IoU_temp = IoU(on_gt, on_pd, vox_shape, IoU_compared=IoU_all[:, -1]) IoU_all = np.concatenate( (IoU_all, np.expand_dims(IoU_temp, axis=1)), axis=1) np.savetxt(save_path + '/IoU.csv', np.transpose(IoU_all[1:] * 100), delimiter=" & ", fmt='%2.1f') # interpolation evaluation if mode == 'interpolate': interpolate_num = 8 #interpolatioin latent vectores decode_z = np.load(save_path + '/decode_z.npy') print(save_path) decode_z = decode_z[20:20 + batch_size] for l in np.arange(batch_size): for r in np.arange(batch_size): if l != r: print l, r base_num_left = l base_num_right = r left = np.reshape(decode_z[base_num_left], [ 1, start_vox_size[0], start_vox_size[1], start_vox_size[2], dim_z ]) right = np.reshape(decode_z[base_num_right], [ 1, start_vox_size[0], start_vox_size[1], start_vox_size[2], dim_z ]) duration = (right - left) / (interpolate_num - 1) # left is the reference sample and Z_np_samp is the remaining samples if base_num_left == 0: Z_np_samp = decode_z[1:] elif base_num_left == batch_size - 1: Z_np_samp = decode_z[:batch_size - 1] else: Z_np_samp_before = np.reshape( decode_z[:base_num_left], [ base_num_left, start_vox_size[0], start_vox_size[1], start_vox_size[2], dim_z ]) Z_np_samp_after = np.reshape( decode_z[base_num_left + 1:], [ batch_size - base_num_left - 1, start_vox_size[0], start_vox_size[1], start_vox_size[2], dim_z ]) Z_np_samp = np.concatenate( [Z_np_samp_before, Z_np_samp_after], axis=0) for i in np.arange(interpolate_num): if i == 0: Z = copy.copy(left) interpolate_z = copy.copy(Z) else: Z = Z + duration interpolate_z = np.concatenate([interpolate_z, Z], axis=0) # Z_np_samp is used to fill up the batch gaussian_samp = np.concatenate([Z, Z_np_samp], axis=0) pd_full_rnd, pd_part_rnd = sess.run( [full_tf_samp, part_tf_samp], feed_dict={Z_tf_samp: gaussian_samp}) interpolate_vox = np.reshape(pd_full_rnd[0], [ 1, vox_shape[0], vox_shape[1], vox_shape[2], vox_shape[3] ]) interpolate_part = np.reshape(pd_part_rnd[0], [ 1, vox_shape[0], vox_shape[1], vox_shape[2], com_shape[3] ]) if i == 0: pd_full = interpolate_vox pd_part = interpolate_part else: pd_full = np.concatenate( [pd_full, interpolate_vox], axis=0) pd_part = np.concatenate( [pd_part, interpolate_part], axis=0) interpolate_z.astype('uint8').tofile( save_path + '/interpolate/interpolation_z' + str(l) + '-' + str(r) + '.bin') full_models_cat = np.argmax(pd_full, axis=4) full_models_cat.astype('uint8').tofile( save_path + '/interpolate/interpolation_f' + str(l) + '-' + str(r) + '.bin') if cfg.TYPE_TASK == 'scene': pd_part = np.abs(pd_part) pd_part *= 10 pd_part -= 7 elif cfg.TYPE_TASK == 'object': pd_part = np.argmax(pd_part, axis=4) pd_part.astype('uint8').tofile( save_path + '/interpolate/interpolation_p' + str(l) + '-' + str(r) + '.bin') print("voxels saved") # add noise evaluation if mode == 'noise': decode_z = np.load(save_path + '/decode_z.npy') decode_z = decode_z[:batch_size] noise_num = 10 for base_num in np.arange(batch_size): print base_num base = np.reshape(decode_z[base_num], [ 1, start_vox_size[0], start_vox_size[1], start_vox_size[2], dim_z ]) eps = np.random.normal(size=(noise_num - 1, dim_z)).astype(np.float32) if base_num == 0: Z_np_samp = decode_z[1:] elif base_num == batch_size - 1: Z_np_samp = decode_z[:batch_size - 1] else: Z_np_samp_before = np.reshape(decode_z[:base_num], [ base_num, start_vox_size[0], start_vox_size[1], start_vox_size[2], dim_z ]) Z_np_samp_after = np.reshape(decode_z[base_num + 1:], [ batch_size - base_num - 1, start_vox_size[0], start_vox_size[1], start_vox_size[2], dim_z ]) Z_np_samp = np.concatenate([Z_np_samp_before, Z_np_samp_after], axis=0) for c in np.arange(start_vox_size[0]): for l in np.arange(start_vox_size[1]): for d in np.arange(start_vox_size[2]): for i in np.arange(noise_num): if i == 0: Z = copy.copy(base) noise_z = copy.copy(Z) else: Z = copy.copy(base) Z[0, c, l, d, :] += eps[i - 1] noise_z = np.concatenate([noise_z, Z], axis=0) gaussian_samp = np.concatenate([Z, Z_np_samp], axis=0) pd_full_rnd = sess.run( full_tf_samp, feed_dict={Z_tf_samp: gaussian_samp}) """ refined_voxs_rnd = sess.run( sample_refine_full_tf, feed_dict={ sample_full_tf: pd_full_rnd }) """ noise_vox = np.reshape(pd_full_rnd[0], [ 1, vox_shape[0], vox_shape[1], vox_shape[2], vox_shape[3] ]) if i == 0: pd_full = noise_vox else: pd_full = np.concatenate([pd_full, noise_vox], axis=0) np.save( save_path + '/noise_z' + str(base_num) + '_' + str(c) + str(l) + str(d) + '.npy', noise_z) full_models_cat = np.argmax(pd_full, axis=4) np.save( save_path + '/noise' + str(base_num) + '_' + str(c) + str(l) + str(d) + '.npy', full_models_cat) print("voxels saved")
def train(self, iterations, batch_size=128, sample_interval=100, save_model_interval=100, train_D_iters=1, train_G_iters=1, img_dir='./', model_dir='./'): imgs, digits = self.imgs, self.digits valid = np.ones((batch_size, 1)) fake = np.zeros((batch_size, 1)) os.makedirs(img_dir, exist_ok=True) os.makedirs(model_dir, exist_ok=True) for itr in range(1, iterations + 1): # --------------------- # Train Discriminator # --------------------- for _ in range(train_D_iters): # Select a random half batch of images idx_real = np.random.randint(0, imgs.shape[0], batch_size) idx_fake = np.random.randint(0, imgs.shape[0], batch_size) random_target_digits = onehot( np.random.randint(0, 10, batch_size), 10 ) unmatch_digits = onehot( exclude(digits[idx_real]), 10 ) real_imgs = imgs[idx_real] real_digits = onehot( digits[idx_real], 10 ) fake_imgs = self.G.predict([imgs[idx_fake], random_target_digits]) # real image and correct digit d_loss_real = self.D.train_on_batch([real_imgs, real_digits], valid) # fake image and random digit d_loss_fake = self.D.train_on_batch([fake_imgs, random_target_digits], fake) # real image but wrong digit d_loss_fake2 = self.D.train_on_batch([real_imgs, unmatch_digits], fake) # d_loss_fake2 = self.D.train_on_batch([real_imgs, unmatch_digits], fake) # tensorboard logs = { 'D_loss_real': d_loss_real[0], 'D_loss_fake': d_loss_fake[0], 'D_loss_fake2': d_loss_fake2[0] } self.tb.on_epoch_end(itr, logs) # --------------------- # Train Generator # --------------------- for _ in range(train_G_iters): # Condition on labels idx = np.random.randint(0, imgs.shape[0], batch_size) random_target_digits = onehot( np.random.randint(0, 10, batch_size), 10 ) g_loss = self.combined.train_on_batch([imgs[idx], random_target_digits], valid) # tensorboard logs = { 'G_loss': g_loss[0], } self.tb.on_epoch_end(itr, logs) # If at save interval => save generated image samples if sample_interval > 0 and itr % sample_interval == 0: # self.sample_imgs(itr, img_dir) plot_table(self.G, self.D, os.path.join(img_dir, f'{itr}.png'), save=True) if save_model_interval > 0 and itr % save_model_interval == 0: if not os.path.isdir(model_dir): os.makedirs(model_dir) self.D.save(os.path.join(model_dir, f'D{itr}.hdf5')) self.G.save(os.path.join(model_dir, f'G{itr}.hdf5')) self.G_mask.save(os.path.join(model_dir, f'G_mask{itr}.hdf5')) # Plot the progress print(f'{itr} [G loss: {g_loss[0]} | acc: {g_loss[1]}]') print(f'{itr} [D real: {d_loss_real[0]} | acc: {d_loss_real[1]}]') print(f'{itr} [D fake: {d_loss_fake[0]} | acc: {d_loss_fake[1]}]') print(f'{itr} [D fake2: {d_loss_fake2[0]} | acc: {d_loss_fake2[1]}]') print() self.tb.on_train_end(None)
def train(iterations=10000, batch_size=128, sample_interval=5, save_model_interval=100, train_D_iters=1, train_G_iters=1, D_lr=0.0001, G_lr=0.00001, img_dir='./imgs', model_dir='./models'): imgs, digits, test_img, test_digits = load_mnist() dataset = Dataset(imgs, digits) loader = DataLoader(dataset, batch_size=batch_size, shuffle=True) dataset = Dataset(test_img, test_digits) test_loader = DataLoader(dataset, batch_size=batch_size, shuffle=False) if torch.cuda.is_available: print(f"Using GPU {torch.cuda.current_device()}") device = "cuda" else: print("Using CPU...") device = "cpu" generator = Generator() generator = generator.float().to(device) discriminator = Discriminator() discriminator = discriminator.float().to(device) optimizer_G = torch.optim.Adam(generator.parameters(), lr=G_lr) optimizer_D = torch.optim.Adam(discriminator.parameters(), lr=D_lr) loss_fn = nn.BCELoss() for iters in range(iterations): #======# # Dis # #======# for _ in range(train_D_iters): discriminator.train() generator.eval() D_loss = 0 D_real_acc = torch.tensor([0, 0]).float() D_fake_acc = torch.tensor([0, 0]).float() for step, (batch_x, batch_y) in enumerate(loader): batch_x = torch.transpose(batch_x, 1, 3) batch_x = torch.transpose(batch_x, 2, 3) batch_y = batch_y match_c = onehot(batch_y, 10) unmatch_c = onehot(batch_y, 10, exclusive=True) noise = torch.randn(batch_x.size(0), 8, device=device) fake_x = generator(batch_x.float().to(device), unmatch_c.float().to(device), noise) real_x = batch_x valid_real, valid_num, valid, fake = target_generator( batch_x.size(0), device) optimizer_D.zero_grad() real_match_pred = discriminator(real_x.float().to(device), match_c.float().to(device)) real_unmatch_pred = discriminator(real_x.float().to(device), unmatch_c.float().to(device)) fake_match_pred = discriminator(fake_x.float().to(device), unmatch_c.float().to(device)) fake_unmatch_pred = discriminator(fake_x.float().to(device), match_c.float().to(device)) loss1 = loss_fn(real_match_pred, valid) loss2 = loss_fn(real_unmatch_pred, valid_real) loss3 = loss_fn(fake_match_pred, valid_num) loss4 = loss_fn(fake_unmatch_pred, fake) loss = loss1 * 0.75 + loss2 * 0.75 + loss3 * 0.5 + loss4 * 1 loss.backward() optimizer_D.step() D_real_acc += real_match_pred.cpu().detach().mean(dim=0) D_fake_acc += fake_unmatch_pred.cpu().detach().mean(dim=0) D_loss += loss.item() D_real_acc /= (len(loader) * train_D_iters) D_fake_acc /= (len(loader) * train_D_iters) D_loss /= (len(loader) * train_D_iters) print( f"iter: {iters} | D_loss: {D_loss} | real_acc: {D_real_acc.numpy()} | fake_acc: {D_fake_acc.numpy()}" ) #======# # Gen # #======# #print("start training gen") for G_iters in range(train_G_iters): generator.train() discriminator.eval() G_loss = 0 G_acc = torch.tensor([0, 0]).float() for step, (batch_x, batch_y) in enumerate(loader): optimizer_G.zero_grad() unmatch_c = onehot(batch_y, 10, exclusive=True) batch_x = torch.transpose(batch_x, 1, 3) batch_x = torch.transpose(batch_x, 2, 3) valid_real, valid_num, valid, fake = target_generator( batch_x.size(0), device) noise = torch.randn(batch_x.size(0), 8, device=device) fake_x = generator(batch_x.float().to(device), unmatch_c.float().to(device), noise) pred = discriminator(fake_x.float().to(device), unmatch_c.float().to(device)) loss = loss_fn(pred, valid) loss.backward() optimizer_G.step() pred = pred.cpu().detach() G_loss += loss.item() G_acc += pred.float().mean(dim=0) G_loss /= (len(loader)) G_acc /= (len(loader)) print( f"iter: {iters} | G_loss: {G_loss} | G_accuracy: {G_acc.detach().numpy()}" ) if iters % sample_interval == 0: discriminator.eval() generator.eval() pick_list = [] for step, (batch_x, batch_y) in enumerate(test_loader): batch_x = torch.transpose(batch_x, 1, 3) batch_x = torch.transpose(batch_x, 2, 3) unmatch_c = onehot(batch_y, 10, exclusive=True) unmatch_digits = reverse_onehot(unmatch_c) noise = torch.randn(batch_x.size(0), 8, device=device) test_x = generator(batch_x.float().to(device), unmatch_c.float().to(device), noise) n = random.randint(0, batch_x.size(0) - 1) pick_list.append([ batch_x[n].cpu().detach().numpy(), unmatch_digits[n].cpu().detach().numpy(), test_x[n].cpu().detach().numpy() ]) pick_list = pick_list[:5] fig, axs = plt.subplots(5, 2, figsize=(8, 6)) fig.tight_layout() for no, [origin, ans, fake] in enumerate(pick_list): axs[no, 0].text(-20, -2, f'Answer: {ans}') axs[no, 0].imshow(origin[0, :, :], cmap='gray') axs[no, 0].axis('off') axs[no, 1].imshow(fake[0, :, :], cmap='gray') axs[no, 1].axis('off') if not os.path.isdir(img_dir): os.makedirs(img_dir) fig.savefig(os.path.join(img_dir, f'{iters}.png')) plt.close() if iters % save_model_interval == 0: if not os.path.isdir(model_dir): os.makedirs(model_dir) torch.save(generator, os.path.join(model_dir, f'{iters}.pkl'))
import random import numpy as np import theano from theano import tensor as T from models import gru from models import lstm import util parser = argparse.ArgumentParser() parser.add_argument('load', default='') parser.add_argument('-tl', '--test_length', type=int, default=50) parser.add_argument('-s', '--seed', default='') args = parser.parse_args() rnn_test = util.load_model(args.load) char_to_ix = rnn_test.char_to_ix ix_to_char = rnn_test.ix_to_char chars = list(char_to_ix.keys()) seq_test = args.seed if seq_test == '': seq_test = random.choice(chars) for n in range(args.test_length): seq_test_oh = util.onehot(seq_test, char_to_ix) prediction = rnn_test.predict(seq_test_oh) seq_test += ix_to_char[prediction[-1]] print(seq_test)
from model import build_discriminator_digit from util import onehot, load_mnist from keras.callbacks import EarlyStopping, ModelCheckpoint from keras.optimizers import Adam xtr, ytr, xte, yte = load_mnist() ytr = onehot(ytr, 10) yte = onehot(yte, 10) model = build_discriminator_digit() model.compile(loss='categorical_crossentropy', optimizer=Adam(lr=0.0002), metrics=['accuracy']) earlystopping = EarlyStopping(monitor='val_accuracy', mode='max', patience=10) checkpoint = ModelCheckpoint('outputs/D_digit.hdf5', monitor='val_accuracy') model.fit(xtr, ytr, epochs=2000, batch_size=128, validation_split=0.2, callbacks=[earlystopping, checkpoint])
def __getitem__(self, idx): args = self.args annot = self.annotations[idx] t_s = self._getT(annot['subject']['bbox'], annot['object']['bbox']) t_o = self._getT(annot['object']['bbox'], annot['subject']['bbox']) xs0, xs1, ys0, ys1 = annot['subject']['bbox'] xo0, xo1, yo0, yo1 = annot['object']['bbox'] datum = {'url': annot['url'], '_id': annot['_id'], 'subject': {'name': annot['subject']['name'], 'embedding': phrase2vec(annot['subject']['name'], self.args.max_phrase_len, 300), 'bbox': np.asarray([xs0 / annot['height'], xs1 / annot['height'], ys0 / annot['width'], ys1 / annot['width']], dtype=np.float32), 't': np.asarray(t_s, dtype=np.float32)}, 'object': {'name': annot['object']['name'], 'embedding': phrase2vec(annot['object']['name'], self.args.max_phrase_len, 300), 'bbox': np.asarray([xo0 / annot['height'], xo1 / annot['height'], yo0 / annot['width'], yo1 / annot['width']], dtype=np.float32), 't': np.asarray(t_o, dtype=np.float32)}, 'label': np.asarray([[annot['label']]], dtype=np.float32), 'predicate': onehot(args.predicate_categories.index(annot['predicate']), 9), 'predicate_name': annot['predicate'], } if self.split == 'test': del datum['label'] if self.load_image: img = read_img(annot['url'], self.args.imagepath) ih, iw = img.shape[:2] if 'train' in self.split: t_bbox = transforms.Compose([ transforms.ToPILImage('RGB'), transforms.Pad(4, padding_mode='edge'), transforms.RandomResizedCrop(32, scale=(0.75, 0.85)), transforms.ToTensor(), ]) else: t_bbox = transforms.Compose([ transforms.ToPILImage('RGB'), transforms.Pad(4, padding_mode='edge'), transforms.CenterCrop(32), transforms.ToTensor(), ]) bbox_mask = np.stack([self._getDualMask(ih, iw, annot['subject']['bbox'], 32).astype(np.uint8), self._getDualMask(ih, iw, annot['object']['bbox'], 32).astype(np.uint8), np.zeros((32, 32), dtype=np.uint8)], 2) bbox_mask = t_bbox(bbox_mask)[:2].float() / 255. datum['bbox_mask'] = bbox_mask union_bbox = self.enlarge(self._getUnionBBox(annot['subject']['bbox'], annot['object']['bbox'], ih, iw), 1.25, ih, iw) if 'train' in self.split: t_bboximg = transforms.Compose([ transforms.ToPILImage('RGB'), transforms.RandomResizedCrop(224, scale=(0.75, 0.85)), transforms.ColorJitter(0.1, 0.1, 0.1, 0.05), transforms.ToTensor(), ]) else: t_bboximg = transforms.Compose([ transforms.ToPILImage('RGB'), transforms.CenterCrop(224), transforms.ToTensor(), ]) bbox_img = t_bboximg(self._getAppr(img, union_bbox)) datum['bbox_img'] = bbox_img if 'train' in self.split: t_fullimg = transforms.Compose([ transforms.ToPILImage('RGB'), transforms.ColorJitter(0.1, 0.1, 0.1, 0.05), transforms.ToTensor(), ]) else: t_fullimg = transforms.Compose([ transforms.ToPILImage('RGB'), transforms.ToTensor(), ]) if self.args.model == 'vipcnn': img_size = 400 elif self.args.model == 'pprfcn': img_size = 720 else: img_size = 224 datum['full_img'] = t_fullimg(self._getAppr(img, [0, ih, 0, iw], img_size)) return datum
def __getitem__(self, idx): args = self.args annot = self.annotations[idx] t_s = self._getT(annot["subject"]["bbox"], annot["object"]["bbox"]) t_o = self._getT(annot["object"]["bbox"], annot["subject"]["bbox"]) ys0, ys1, xs0, xs1 = annot["subject"]["bbox"] yo0, yo1, xo0, xo1 = annot["object"]["bbox"] datum = { "url": annot["url"], "_id": annot["_id"], "subject": { "name": annot["subject"]["name"], "embedding": phrase2vec( annot["subject"]["name"], self.args.max_phrase_len, 300 ), "bbox": np.asarray( [ ys0 / annot["height"], ys1 / annot["height"], xs0 / annot["width"], xs1 / annot["width"], ], dtype=np.float32, ), "t": np.asarray(t_s, dtype=np.float32), }, "object": { "name": annot["object"]["name"], "embedding": phrase2vec( annot["object"]["name"], self.args.max_phrase_len, 300 ), "bbox": np.asarray( [ yo0 / annot["height"], yo1 / annot["height"], xo0 / annot["width"], xo1 / annot["width"], ], dtype=np.float32, ), "t": np.asarray(t_o, dtype=np.float32), }, "label": np.asarray([[annot["label"]]], dtype=np.float32), "predicate": onehot(args.predicate_categories.index(annot["predicate"]), 9), "predicate_name": annot["predicate"], } if self.split == "test": del datum["label"] if self.load_image: img = read_img(annot["url"], self.args.imagepath) ih, iw = img.shape[:2] if "train" in self.split: t_bbox = transforms.Compose( [ transforms.ToPILImage("RGB"), transforms.Pad(4, padding_mode="edge"), transforms.RandomResizedCrop(32, scale=(0.75, 0.85)), transforms.ToTensor(), ] ) else: t_bbox = transforms.Compose( [ transforms.ToPILImage("RGB"), transforms.Pad(4, padding_mode="edge"), transforms.CenterCrop(32), transforms.ToTensor(), ] ) bbox_mask = np.stack( [ self._getDualMask(ih, iw, annot["subject"]["bbox"], 32).astype( np.uint8 ), self._getDualMask(ih, iw, annot["object"]["bbox"], 32).astype( np.uint8 ), np.zeros((32, 32), dtype=np.uint8), ], 2, ) bbox_mask = t_bbox(bbox_mask)[:2].float() / 255.0 datum["bbox_mask"] = bbox_mask union_bbox = self.enlarge( self._getUnionBBox( annot["subject"]["bbox"], annot["object"]["bbox"], ih, iw ), 1.25, ih, iw, ) if "train" in self.split: t_bboximg = transforms.Compose( [ transforms.ToPILImage("RGB"), transforms.RandomResizedCrop(224, scale=(0.75, 0.85)), transforms.ColorJitter(0.1, 0.1, 0.1, 0.05), transforms.ToTensor(), ] ) else: t_bboximg = transforms.Compose( [ transforms.ToPILImage("RGB"), transforms.CenterCrop(224), transforms.ToTensor(), ] ) bbox_img = t_bboximg(self._getAppr(img, union_bbox)) datum["bbox_img"] = bbox_img if "train" in self.split: t_fullimg = transforms.Compose( [ transforms.ToPILImage("RGB"), transforms.ColorJitter(0.1, 0.1, 0.1, 0.05), transforms.ToTensor(), ] ) else: t_fullimg = transforms.Compose( [transforms.ToPILImage("RGB"), transforms.ToTensor(),] ) if self.args.model == "vipcnn": img_size = 400 elif self.args.model == "pprfcn": img_size = 720 else: img_size = 224 datum["full_img"] = t_fullimg(self._getAppr(img, [0, ih, 0, iw], img_size)) return datum
def train(self, iterations, batch_size=128, sample_interval=100, save_model_interval=100, train_D_iters=1, train_G_iters=1, img_dir='./', model_dir='./'): imgs, digits = self.imgs, self.digits valid = np.ones((batch_size, 1)) fake = np.zeros((batch_size, 1)) os.makedirs(img_dir, exist_ok=True) os.makedirs(model_dir, exist_ok=True) for itr in range(1, iterations + 1): # --------------------- # Train D_realness # --------------------- for _ in range(train_D_iters): # Select a random half batch of images idx_real = np.random.randint(0, imgs.shape[0], batch_size) idx_fake = np.random.randint(0, imgs.shape[0], batch_size) fake_target_digits = onehot( np.random.randint(0, 10, batch_size), 10) unmatch_digits = onehot(exclude(digits[idx_real]), 10) real_imgs = imgs[idx_real] real_digits = onehot(digits[idx_real], 10) fake_imgs = self.G.predict( [imgs[idx_fake], fake_target_digits]) # real image d_loss_real = self.D_realness.train_on_batch(real_imgs, valid) # fake image d_loss_fake = self.D_realness.train_on_batch(fake_imgs, fake) # --------------------- # Train Generator # --------------------- for _ in range(train_G_iters): # Condition on labels idx = np.random.randint(0, imgs.shape[0], batch_size) fake_target_digits = onehot( np.random.randint(0, 10, batch_size), 10) g_loss = self.combined.train_on_batch( [imgs[idx], fake_target_digits], [valid, fake_target_digits]) print(f'--------\nEPOCH {itr}\n--------') print( pd.DataFrame({ 'D_realness': self.D_realness.metrics_names, 'real': d_loss_real, 'fake': d_loss_fake }).to_string(index=False)) print( pd.DataFrame({ 'combined': self.combined.metrics_names, 'value': g_loss, }).to_string(index=False)) print() # If at save interval => save generated image samples if sample_interval > 0 and itr % sample_interval == 0: # self.sample_imgs(itr, img_dir) plot_table(self.G, self.D, os.path.join(img_dir, f'{itr}.png'), save=True) if save_model_interval > 0 and itr % save_model_interval == 0: self.D.save(os.path.join(model_dir, f'D{itr}.hdf5')) self.G.save(os.path.join(model_dir, f'G{itr}.hdf5')) self.G_mask.save(os.path.join(model_dir, f'G_mask{itr}.hdf5')) self.tb.on_train_end(None)
def evaluate(batch_size, checknum, mode): n_vox = cfg_test.CONST.N_VOX dim = cfg_test.NET.DIM vox_shape = [n_vox[0], n_vox[1], n_vox[2], dim[4]] dim_z = cfg_test.NET.DIM_Z start_vox_size = cfg_test.NET.START_VOX kernel = cfg_test.NET.KERNEL stride = cfg_test.NET.STRIDE freq = cfg_test.CHECK_FREQ refine_ch = cfg_test.NET.REFINE_CH refine_kernel = cfg_test.NET.REFINE_KERNEL save_path = cfg_test.DIR.EVAL_PATH chckpt_path = cfg_test.DIR.CHECK_PT_PATH + str( checknum) #+ '-' + str(checknum * freq) fcr_agan_model = FCR_aGAN( batch_size=batch_size, vox_shape=vox_shape, dim_z=dim_z, dim=dim, start_vox_size=start_vox_size, kernel=kernel, stride=stride, refine_ch=refine_ch, refine_kernel=refine_kernel, ) Z_tf, z_enc_tf, vox_tf, vox_gen_tf, vox_gen_decode_tf, vox_refine_dec_tf, vox_refine_gen_tf,\ recons_loss_tf, code_encode_loss_tf, gen_loss_tf, discrim_loss_tf, recons_loss_refine_tfs, gen_loss_refine_tf, discrim_loss_refine_tf,\ cost_enc_tf, cost_code_tf, cost_gen_tf, cost_discrim_tf, cost_gen_ref_tf, cost_discrim_ref_tf, summary_tf,\ tsdf_tf = fcr_agan_model.build_model() """ z_enc_dep_tf, dep_tf, vox_gen_decode_dep_tf,\ recons_dep_loss_tf, code_encode_dep_loss_tf, gen_dep_loss_tf, discrim_dep_loss_tf,\ cost_enc_dep_tf, cost_code_dep_tf, cost_gen_dep_tf, cost_discrim_dep_tf, cost_code_compare_tf,\ """ Z_tf_sample, vox_tf_sample = fcr_agan_model.samples_generator( visual_size=batch_size) sample_vox_tf, sample_refine_vox_tf = fcr_agan_model.refine_generator( visual_size=batch_size) sess = tf.InteractiveSession() saver = tf.train.Saver() # Restore variables from disk. saver.restore(sess, chckpt_path) print("...Weights restored.") if mode == 'recons': #reconstruction and generation from normal distribution evaluation #generator from random distribution for i in np.arange(batch_size): Z_np_sample = np.random.normal(size=(1, start_vox_size[0], start_vox_size[1], start_vox_size[2], dim_z)).astype(np.float32) if i == 0: Z_var_np_sample = Z_np_sample else: Z_var_np_sample = np.concatenate( (Z_var_np_sample, Z_np_sample), axis=0) np.save(save_path + '/sample_z.npy', Z_var_np_sample) generated_voxs_fromrand = sess.run( vox_tf_sample, feed_dict={Z_tf_sample: Z_var_np_sample}) vox_models_cat = np.argmax(generated_voxs_fromrand, axis=4) np.save(save_path + '/generate.npy', vox_models_cat) refined_voxs_fromrand = sess.run( sample_refine_vox_tf, feed_dict={sample_vox_tf: generated_voxs_fromrand}) vox_models_cat = np.argmax(refined_voxs_fromrand, axis=4) np.save(save_path + '/generate_refine.npy', vox_models_cat) #evaluation for reconstruction voxel_test, tsdf_test, num = scene_model_id_pair_test( dataset_portion=cfg_test.TRAIN.DATASET_PORTION) num = voxel_test.shape[0] print("test voxels loaded") for i in np.arange(int(num / batch_size)): batch_voxel_test = voxel_test[i * batch_size:i * batch_size + batch_size] # depth--start """ batch_depth_test = depth_test[i*batch_size:i*batch_size+batch_size] """ # depth--end batch_tsdf_test = tsdf_test[i * batch_size:i * batch_size + batch_size] batch_generated_voxs, batch_enc_Z = sess.run( [vox_gen_decode_tf, z_enc_tf], feed_dict={tsdf_tf: batch_tsdf_test}) # depth--start """ batch_dep_generated_voxs, batch_enc_dep_Z = sess.run( [vox_gen_decode_dep_tf, z_enc_dep_tf], feed_dict={dep_tf:batch_depth_test}) """ # depth--end batch_refined_vox = sess.run( sample_refine_vox_tf, feed_dict={sample_vox_tf: batch_generated_voxs}) if i == 0: generated_voxs = batch_generated_voxs # generated_deps = batch_dep_generated_voxs refined_voxs = batch_refined_vox enc_Z = batch_enc_Z else: generated_voxs = np.concatenate( (generated_voxs, batch_generated_voxs), axis=0) # generated_deps = np.concatenate((generated_deps, batch_dep_generated_voxs), axis=0) refined_voxs = np.concatenate( (refined_voxs, batch_refined_vox), axis=0) enc_Z = np.concatenate((enc_Z, batch_enc_Z), axis=0) print("forwarded") #real vox_models_cat = voxel_test np.save(save_path + '/real.npy', vox_models_cat) tsdf_models_cat = tsdf_test np.save(save_path + '/tsdf.npy', tsdf_models_cat) #decoded vox_models_cat = np.argmax(generated_voxs, axis=4) np.save(save_path + '/recons.npy', vox_models_cat) """ vox_models_cat = np.argmax(generated_deps, axis=4) np.save(save_path + '/gens_dep.npy', vox_models_cat) """ vox_models_cat = np.argmax(refined_voxs, axis=4) np.save(save_path + '/recons_refine.npy', vox_models_cat) np.save(save_path + '/decode_z.npy', enc_Z) print("voxels saved") #numerical evalutation on_real = onehot(voxel_test, vox_shape[3]) on_recons = onehot(np.argmax(generated_voxs, axis=4), vox_shape[3]) # on_gens_dep = onehot(np.argmax(generated_deps, axis=4),vox_shape[3]) #calc_IoU IoU_class = np.zeros([vox_shape[3] + 1]) for class_n in np.arange(vox_shape[3]): on_recons_ = on_recons[:, :, :, :, class_n] on_real_ = on_real[:, :, :, :, class_n] mother = np.sum(np.add(on_recons_, on_real_), (1, 2, 3)) child = np.sum(np.multiply(on_recons_, on_real_), (1, 2, 3)) count = 0 IoU_element = 0 for i in np.arange(num): if mother[i] != 0: IoU_element += child[i] / mother[i] count += 1 IoU_calc = np.round(IoU_element / count, 3) IoU_class[class_n] = IoU_calc print 'IoU class ' + str(class_n) + '=' + str(IoU_calc) on_recons_ = on_recons[:, :, :, :, 1:vox_shape[3]] on_real_ = on_real[:, :, :, :, 1:vox_shape[3]] mother = np.sum(np.add(on_recons_, on_real_), (1, 2, 3, 4)) child = np.sum(np.multiply(on_recons_, on_real_), (1, 2, 3, 4)) count = 0 IoU_element = 0 for i in np.arange(num): if mother[i] != 0: IoU_element += child[i] / mother[i] count += 1 IoU_calc = np.round(IoU_element / count, 3) IoU_class[vox_shape[3]] = IoU_calc print 'IoU all =' + str(IoU_calc) np.savetxt(save_path + '/IoU.csv', IoU_class, delimiter=",") #calc_AP AP_class = np.zeros([vox_shape[3] + 1]) for class_n in np.arange(vox_shape[3]): on_recons_ = generated_voxs[:, :, :, :, class_n] on_real_ = on_real[:, :, :, :, class_n] AP = 0. for i in np.arange(num): y_true = np.reshape(on_real_[i], [-1]) y_scores = np.reshape(on_recons_[i], [-1]) if np.sum(y_true) > 0.: AP += average_precision_score(y_true, y_scores) AP = np.round(AP / num, 3) AP_class[class_n] = AP print 'AP class ' + str(class_n) + '=' + str(AP) on_recons_ = generated_voxs[:, :, :, :, 1:vox_shape[3]] on_real_ = on_real[:, :, :, :, 1:vox_shape[3]] AP = 0. for i in np.arange(num): y_true = np.reshape(on_real_[i], [-1]) y_scores = np.reshape(on_recons_[i], [-1]) if np.sum(y_true) > 0.: AP += average_precision_score(y_true, y_scores) AP = np.round(AP / num, 3) AP_class[vox_shape[3]] = AP print 'AP all =' + str(AP) np.savetxt(save_path + '/AP.csv', AP_class, delimiter=",") #Refine #calc_IoU on_recons = onehot(np.argmax(refined_voxs, axis=4), vox_shape[3]) IoU_class = np.zeros([vox_shape[3] + 1]) for class_n in np.arange(vox_shape[3]): on_recons_ = on_recons[:, :, :, :, class_n] on_real_ = on_real[:, :, :, :, class_n] mother = np.sum(np.add(on_recons_, on_real_), (1, 2, 3)) child = np.sum(np.multiply(on_recons_, on_real_), (1, 2, 3)) count = 0 IoU_element = 0 for i in np.arange(num): if mother[i] != 0: IoU_element += child[i] / mother[i] count += 1 IoU_calc = np.round(IoU_element / count, 3) IoU_class[class_n] = IoU_calc print 'IoU class ' + str(class_n) + '=' + str(IoU_calc) on_recons_ = on_recons[:, :, :, :, 1:vox_shape[3]] on_real_ = on_real[:, :, :, :, 1:vox_shape[3]] mother = np.sum(np.add(on_recons_, on_real_), (1, 2, 3, 4)) child = np.sum(np.multiply(on_recons_, on_real_), (1, 2, 3, 4)) count = 0 IoU_element = 0 for i in np.arange(num): if mother[i] != 0: IoU_element += child[i] / mother[i] count += 1 IoU_calc = np.round(IoU_element / count, 3) IoU_class[vox_shape[3]] = IoU_calc print 'IoU all =' + str(IoU_calc) np.savetxt(save_path + '/IoU_refine.csv', IoU_class, delimiter=",") #calc_AP AP_class = np.zeros([vox_shape[3] + 1]) for class_n in np.arange(vox_shape[3]): on_recons_ = refined_voxs[:, :, :, :, class_n] on_real_ = on_real[:, :, :, :, class_n] AP = 0. for i in np.arange(num): y_true = np.reshape(on_real_[i], [-1]) y_scores = np.reshape(on_recons_[i], [-1]) if np.sum(y_true) > 0.: AP += average_precision_score(y_true, y_scores) AP = np.round(AP / num, 3) AP_class[class_n] = AP print 'AP class ' + str(class_n) + '=' + str(AP) on_recons_ = refined_voxs[:, :, :, :, 1:vox_shape[3]] on_real_ = on_real[:, :, :, :, 1:vox_shape[3]] AP = 0. for i in np.arange(num): y_true = np.reshape(on_real_[i], [-1]) y_scores = np.reshape(on_recons_[i], [-1]) if np.sum(y_true) > 0.: AP += average_precision_score(y_true, y_scores) AP = np.round(AP / num, 3) AP_class[vox_shape[3]] = AP print 'AP all =' + str(AP) np.savetxt(save_path + '/AP_refine.csv', AP_class, delimiter=",") #interpolation evaluation if mode == 'interpolate': interpolate_num = 30 #interpolatioin latent vectores decode_z = np.load(save_path + '/decode_z.npy') decode_z = decode_z[:batch_size] for l in np.arange(batch_size): for r in np.arange(batch_size): if l != r: print l, r base_num_left = l base_num_right = r left = np.reshape(decode_z[base_num_left], [ 1, start_vox_size[0], start_vox_size[1], start_vox_size[2], dim_z ]) right = np.reshape(decode_z[base_num_right], [ 1, start_vox_size[0], start_vox_size[1], start_vox_size[2], dim_z ]) duration = (right - left) / (interpolate_num - 1) if base_num_left == 0: Z_np_sample = decode_z[1:] elif base_num_left == batch_size - 1: Z_np_sample = decode_z[:batch_size - 1] else: Z_np_sample_before = np.reshape( decode_z[:base_num_left], [ base_num_left, start_vox_size[0], start_vox_size[1], start_vox_size[2], dim_z ]) Z_np_sample_after = np.reshape( decode_z[base_num_left + 1:], [ batch_size - base_num_left - 1, start_vox_size[0], start_vox_size[1], start_vox_size[2], dim_z ]) Z_np_sample = np.concatenate( [Z_np_sample_before, Z_np_sample_after], axis=0) for i in np.arange(interpolate_num): if i == 0: Z = copy.copy(left) interpolate_z = copy.copy(Z) else: Z = Z + duration interpolate_z = np.concatenate([interpolate_z, Z], axis=0) Z_var_np_sample = np.concatenate([Z, Z_np_sample], axis=0) generated_voxs_fromrand = sess.run( vox_tf_sample, feed_dict={Z_tf_sample: Z_var_np_sample}) refined_voxs_fromrand = sess.run( sample_refine_vox_tf, feed_dict={sample_vox_tf: generated_voxs_fromrand}) interpolate_vox = np.reshape( refined_voxs_fromrand[0], [ 1, vox_shape[0], vox_shape[1], vox_shape[2], vox_shape[3] ]) if i == 0: generated_voxs = interpolate_vox else: generated_voxs = np.concatenate( [generated_voxs, interpolate_vox], axis=0) np.save( save_path + '/interpolation_z' + str(l) + '-' + str(r) + '.npy', interpolate_z) vox_models_cat = np.argmax(generated_voxs, axis=4) np.save( save_path + '/interpolation' + str(l) + '-' + str(r) + '.npy', vox_models_cat) print("voxels saved") #add noise evaluation if mode == 'noise': decode_z = np.load(save_path + '/decode_z.npy') decode_z = decode_z[:batch_size] noise_num = 10 for base_num in np.arange(batch_size): print base_num base = np.reshape(decode_z[base_num], [ 1, start_vox_size[0], start_vox_size[1], start_vox_size[2], dim_z ]) eps = np.random.normal(size=(noise_num - 1, dim_z)).astype(np.float32) if base_num == 0: Z_np_sample = decode_z[1:] elif base_num == batch_size - 1: Z_np_sample = decode_z[:batch_size - 1] else: Z_np_sample_before = np.reshape(decode_z[:base_num], [ base_num, start_vox_size[0], start_vox_size[1], start_vox_size[2], dim_z ]) Z_np_sample_after = np.reshape(decode_z[base_num + 1:], [ batch_size - base_num - 1, start_vox_size[0], start_vox_size[1], start_vox_size[2], dim_z ]) Z_np_sample = np.concatenate( [Z_np_sample_before, Z_np_sample_after], axis=0) for c in np.arange(start_vox_size[0]): for l in np.arange(start_vox_size[1]): for d in np.arange(start_vox_size[2]): for i in np.arange(noise_num): if i == 0: Z = copy.copy(base) noise_z = copy.copy(Z) else: Z = copy.copy(base) Z[0, c, l, d, :] += eps[i - 1] noise_z = np.concatenate([noise_z, Z], axis=0) Z_var_np_sample = np.concatenate([Z, Z_np_sample], axis=0) generated_voxs_fromrand = sess.run( vox_tf_sample, feed_dict={Z_tf_sample: Z_var_np_sample}) refined_voxs_fromrand = sess.run( sample_refine_vox_tf, feed_dict={ sample_vox_tf: generated_voxs_fromrand }) noise_vox = np.reshape(refined_voxs_fromrand[0], [ 1, vox_shape[0], vox_shape[1], vox_shape[2], vox_shape[3] ]) if i == 0: generated_voxs = noise_vox else: generated_voxs = np.concatenate( [generated_voxs, noise_vox], axis=0) np.save( save_path + '/noise_z' + str(base_num) + '_' + str(c) + str(l) + str(d) + '.npy', noise_z) vox_models_cat = np.argmax(generated_voxs, axis=4) np.save( save_path + '/noise' + str(base_num) + '_' + str(c) + str(l) + str(d) + '.npy', vox_models_cat) print("voxels saved")