def train_gen(self, number_of_objects, number_of_trees): """ Generates cluster programs to be drawn in one image. :param number_of_objects: Total number of objects to draw in one image :param number_of_trees: total number of cluster to draw in one image :return: """ num_objs = 0 programs = [] while num_objs < number_of_objects: index = np.random.choice(len(self.train_substrings)) if num_objs + len( self.train_substrings[index].keys()) > number_of_objects: required_indices = sorted( self.train_substrings[index].keys())[0:number_of_objects - num_objs] cluster = {} for r in required_indices: p = self.train_substrings[index][r] image = image_from_expressions([ p, ], stack_size=9, canvas_shape=[64, 64]) # Makes sure that the object created doesn't have disjoint parts, # don't include the program, because it makes the analysis difficult. nlabels, labels, stats, centroids = cv2.connectedComponentsWithStats( np.array(image[0], dtype=np.uint8)) if nlabels > 2: continue cluster[r] = self.train_substrings[index][r] if cluster: programs.append(cluster) num_objs += len(cluster.keys()) num_objs += len(cluster.keys()) else: cluster = {} for k, p in self.train_substrings[index].items(): image = image_from_expressions([p], stack_size=9, canvas_shape=[64, 64]) nlabels, labels, stats, centroids = cv2.connectedComponentsWithStats( np.array(image[0], dtype=np.uint8)) if nlabels > 2: continue cluster[k] = p if cluster: programs.append(cluster) num_objs += len(cluster.keys()) return programs
def get_cd(imitate_net, data, one_hot_labels, program_len): batch_size = data.shape[1] beam_width = 10 all_beams, next_beams_prob, all_inputs = imitate_net.beam_search( [data, one_hot_labels], beam_width, program_len) beam_labels = beams_parser(all_beams, batch_size, beam_width=beam_width) beam_labels_numpy = np.zeros((batch_size * beam_width, program_len), dtype=np.int32) for i in range(batch_size): beam_labels_numpy[i * beam_width:(i + 1) * beam_width, :] = beam_labels[i] # find expression from these predicted beam labels expressions = [""] * batch_size * beam_width for i in range(batch_size * beam_width): for j in range(program_len): expressions[i] += unique_draw[beam_labels_numpy[i, j]] for index, prog in enumerate(expressions): expressions[index] = prog.split("$")[0] predicted_images = image_from_expressions(parser, expressions) target_images = data_[-1, :, 0, :, :].astype(dtype=bool) target_images_new = np.repeat(target_images, axis=0, repeats=beam_width) beam_CD = chamfer(target_images_new, predicted_images) CD = np.zeros((batch_size, 1)) for r in range(batch_size): CD[r, 0] = min(beam_CD[r * beam_width:(r + 1) * beam_width]) return np.sum(CD)
def place_on_canvas(self, programs): """ Places objects from progams one by one on bigger canvas randomly such there is no intersection between objects. """ canvas = np.zeros((240, 240), dtype=bool) grid = np.arange(0, 16) valid_objects = 0 images = image_from_expressions(programs, stack_size=9, canvas_shape=[64, 64]) objects_done = 0 xi, yj = np.meshgrid(np.arange(3), np.arange(3)) xi = np.reshape(xi, 9) yj = np.reshape(yj, 9) random_index = np.random.choice(np.arange(9), len(programs), replace=False) for index in range(len(programs)): x, y = np.random.choice(grid, 2) canvas[xi[random_index[index]] * 80 + x:xi[random_index[index]] * 80 + x + 64, yj[random_index[index]] * 80 + y:yj[random_index[index]] * 80 + y + 64] = images[index] return canvas
def infer_programs(imitate_net, self_training=False, ab=None): config = read_config.Config("config_cad.yml") # Load the terminals symbols of the grammar with open("terminals.txt", "r") as file: unique_draw = file.readlines() for index, e in enumerate(unique_draw): unique_draw[index] = e[0:-1] config.train_size = 10000 config.test_size = 3000 imitate_net.eval() imitate_net.epsilon = 0 parser = ParseModelOutput(unique_draw, max_len // 2 + 1, max_len, config.canvas_shape) generator = Generator() test_gen = generator.test_gen(batch_size=config.batch_size, path="data/cad/cad.h5", if_augment=False) pred_expressions = [] Rs = 0 CDs = 0 Target_images = [] for batch_idx in range(config.test_size // config.batch_size): with torch.no_grad(): print(f"Inferring test cad batch: {batch_idx}") data_ = next(test_gen) labels = np.zeros((config.batch_size, max_len), dtype=np.int32) one_hot_labels = prepare_input_op(labels, len(unique_draw)) one_hot_labels = torch.from_numpy(one_hot_labels).to(device) data = torch.from_numpy(data_).to(device) all_beams, next_beams_prob, all_inputs = imitate_net.beam_search( [data[-1, :, 0, :, :], one_hot_labels], beam_width, max_len) beam_labels = beams_parser(all_beams, data_.shape[1], beam_width=beam_width) beam_labels_numpy = np.zeros( (config.batch_size * beam_width, max_len), dtype=np.int32) Target_images.append(data_[-1, :, 0, :, :]) for i in range(data_.shape[1]): beam_labels_numpy[i * beam_width:(i + 1) * beam_width, :] = beam_labels[i] # find expression from these predicted beam labels expressions = [""] * config.batch_size * beam_width for i in range(config.batch_size * beam_width): for j in range(max_len): expressions[i] += unique_draw[beam_labels_numpy[i, j]] for index, prog in enumerate(expressions): expressions[index] = prog.split("$")[0] pred_expressions += expressions predicted_images = image_from_expressions(parser, expressions) target_images = data_[-1, :, 0, :, :].astype(dtype=bool) target_images_new = np.repeat(target_images, axis=0, repeats=beam_width) beam_CD = chamfer(target_images_new, predicted_images) CD = np.zeros((config.batch_size, 1)) for r in range(config.batch_size): CD[r, 0] = min(beam_CD[r * beam_width:(r + 1) * beam_width]) CDs += np.mean(CD) for j in range(0, config.batch_size): f, a = plt.subplots(1, beam_width + 1, figsize=(30, 3)) a[0].imshow(data_[-1, j, 0, :, :], cmap="Greys_r") a[0].axis("off") a[0].set_title("target") for i in range(1, beam_width + 1): a[i].imshow(predicted_images[j * beam_width + i - 1], cmap="Greys_r") a[i].set_title("{}".format(i)) a[i].axis("off") plt.savefig("best_lest/" + "{}.png".format(batch_idx * config.batch_size + j), transparent=0) plt.close("all") # with open("best_st_expressions.txt", "w") as file: # for e in pred_expressions: # file.write(f"{e}\n") # break return CDs / (config.test_size // config.batch_size)
def infer_programs(imitate_net, path, self_training=False, ab=None): save_viz = False config = read_config.Config("config_cad.yml") # Load the terminals symbols of the grammar with open("terminals.txt", "r") as file: unique_draw = file.readlines() for index, e in enumerate(unique_draw): unique_draw[index] = e[0:-1] config.train_size = 10000 config.test_size = 3000 imitate_net.eval() imitate_net.epsilon = 0 parser = ParseModelOutput(unique_draw, max_len // 2 + 1, max_len, config.canvas_shape) pred_expressions = [] if ab is not None: pred_labels = np.zeros((config.train_size * ab, max_len)) else: pred_labels = np.zeros((config.train_size, max_len)) image_path = f"{path}/images/" results_path = f"{path}/results/" labels_path = f"{path}/labels/" os.makedirs(os.path.dirname(image_path), exist_ok=True) os.makedirs(os.path.dirname(results_path), exist_ok=True) os.makedirs(os.path.dirname(labels_path), exist_ok=True) os.makedirs(os.path.dirname(labels_path + "val/"), exist_ok=True) generator = Generator() train_gen = generator.train_gen(batch_size=config.batch_size, path="data/cad/cad.h5", if_augment=False) val_gen = generator.val_gen(batch_size=config.batch_size, path="data/cad/cad.h5", if_augment=False) Rs = 0 CDs = 0 Target_images = [] start = time.time() pred_images = np.zeros((config.train_size, 64, 64)) for batch_idx in range(config.train_size // config.batch_size): with torch.no_grad(): print(f"Inferring cad batch: {batch_idx}") data_ = next(train_gen) labels = np.zeros((config.batch_size, max_len), dtype=np.int32) one_hot_labels = prepare_input_op(labels, len(unique_draw)) one_hot_labels = torch.from_numpy(one_hot_labels).to(device) data = torch.from_numpy(data_).to(device) all_beams, next_beams_prob, all_inputs = imitate_net.beam_search( [data[-1, :, 0, :, :], one_hot_labels], beam_width, max_len) beam_labels = beams_parser(all_beams, data_.shape[1], beam_width=beam_width) beam_labels_numpy = np.zeros( (config.batch_size * beam_width, max_len), dtype=np.int32) Target_images.append(data_[-1, :, 0, :, :]) for i in range(data_.shape[1]): beam_labels_numpy[i * beam_width:(i + 1) * beam_width, :] = beam_labels[i] # find expression from these predicted beam labels expressions = [""] * config.batch_size * beam_width for i in range(config.batch_size * beam_width): for j in range(max_len): expressions[i] += unique_draw[beam_labels_numpy[i, j]] for index, prog in enumerate(expressions): expressions[index] = prog.split("$")[0] pred_expressions += expressions predicted_images = image_from_expressions(parser, expressions) target_images = data_[-1, :, 0, :, :].astype(dtype=bool) target_images_new = np.repeat(target_images, axis=0, repeats=beam_width) # beam_R = np.sum(np.logical_and(target_images_new, predicted_images), # (1, 2)) / np.sum(np.logical_or(target_images_new, predicted_images), (1, 2)) # # R = np.zeros((config.batch_size, 1)) # for r in range(config.batch_size): # R[r, 0] = max(beam_R[r * beam_width:(r + 1) * beam_width]) # # Rs += np.mean(R) beam_CD = chamfer(target_images_new, predicted_images) # select best expression by chamfer distance if ab is None: best_labels = np.zeros((config.batch_size, max_len)) for r in range(config.batch_size): idx = np.argmin(beam_CD[r * beam_width:(r + 1) * beam_width]) best_labels[r] = beam_labels[r][idx] pred_labels[batch_idx * config.batch_size:batch_idx * config.batch_size + config.batch_size] = best_labels else: best_labels = np.zeros((config.batch_size * ab, max_len)) for r in range(config.batch_size): sorted_idx = np.argsort(beam_CD[r * beam_width:(r + 1) * beam_width])[:ab] best_labels[r * ab:r * ab + ab] = beam_labels[r][sorted_idx] pred_labels[batch_idx * config.batch_size * ab:batch_idx * config.batch_size * ab + config.batch_size * ab] = best_labels CD = np.zeros((config.batch_size, 1)) for r in range(config.batch_size): CD[r, 0] = min(beam_CD[r * beam_width:(r + 1) * beam_width]) pred_images[batch_idx * config.batch_size + r] = predicted_images[r * beam_width + np.argmin( beam_CD[r * beam_width:(r + 1) * beam_width])] CDs += np.mean(CD) if save_viz: for j in range(0, config.batch_size): f, a = plt.subplots(1, beam_width + 1, figsize=(30, 3)) a[0].imshow(data_[-1, j, 0, :, :], cmap="Greys_r") a[0].axis("off") a[0].set_title("target") for i in range(1, beam_width + 1): a[i].imshow(predicted_images[j * beam_width + i - 1], cmap="Greys_r") a[i].set_title("{}".format(i)) a[i].axis("off") plt.savefig( image_path + "{}.png".format(batch_idx * config.batch_size + j), transparent=0) plt.close("all") save_viz = False print("Inferring cad average chamfer distance: {}".format( CDs / (config.train_size // config.batch_size)), flush=True) Rs = Rs / (config.train_size // config.batch_size) CDs = CDs / (config.train_size // config.batch_size) print(Rs, CDs) results = {"iou": Rs, "chamferdistance": CDs} with open(results_path + "results_beam_width_{}.org".format(beam_width), 'w') as outfile: json.dump(results, outfile) torch.save(pred_labels, labels_path + "labels.pt") # torch.save(pred_images, labels_path + "images.pt") if self_training: if ab is None: torch.save(np.concatenate(Target_images, axis=0), labels_path + "images.pt") else: torch.save( np.repeat(np.concatenate(Target_images, axis=0), ab, axis=0), labels_path + "images.pt") test_gen = generator.test_gen(batch_size=config.batch_size, path="data/cad/cad.h5", if_augment=False) pred_expressions = [] Rs = 0 CDs = 0 Target_images = [] for batch_idx in range(config.test_size // config.batch_size): with torch.no_grad(): print(f"Inferring test cad batch: {batch_idx}") data_ = next(test_gen) labels = np.zeros((config.batch_size, max_len), dtype=np.int32) one_hot_labels = prepare_input_op(labels, len(unique_draw)) one_hot_labels = torch.from_numpy(one_hot_labels).to(device) data = torch.from_numpy(data_).to(device) all_beams, next_beams_prob, all_inputs = imitate_net.beam_search( [data[-1, :, 0, :, :], one_hot_labels], beam_width, max_len) beam_labels = beams_parser(all_beams, data_.shape[1], beam_width=beam_width) beam_labels_numpy = np.zeros( (config.batch_size * beam_width, max_len), dtype=np.int32) Target_images.append(data_[-1, :, 0, :, :]) for i in range(data_.shape[1]): beam_labels_numpy[i * beam_width:(i + 1) * beam_width, :] = beam_labels[i] # find expression from these predicted beam labels expressions = [""] * config.batch_size * beam_width for i in range(config.batch_size * beam_width): for j in range(max_len): expressions[i] += unique_draw[beam_labels_numpy[i, j]] for index, prog in enumerate(expressions): expressions[index] = prog.split("$")[0] pred_expressions += expressions predicted_images = image_from_expressions(parser, expressions) target_images = data_[-1, :, 0, :, :].astype(dtype=bool) target_images_new = np.repeat(target_images, axis=0, repeats=beam_width) beam_CD = chamfer(target_images_new, predicted_images) CD = np.zeros((config.batch_size, 1)) for r in range(config.batch_size): CD[r, 0] = min(beam_CD[r * beam_width:(r + 1) * beam_width]) CDs += np.mean(CD) print(f"TEST CD: {CDs / (config.test_size // config.batch_size)}") end = time.time() print(f"Inference time: {end-start}")
# Load the terminals symbols of the grammar canvas_shape = [64, 64] max_len = 13 with open("terminals.txt", "r") as file: unique_draw = file.readlines() for index, e in enumerate(unique_draw): unique_draw[index] = e[0:-1] # Fill the expressions that you want to render expressions = [ "c(32,32,28)c(32,32,24)-s(32,32,28)s(32,32,20)-+t(32,32,20)+", "c(32,32,28)c(32,32,24)-" ] parser = ParseModelOutput(unique_draw, max_len // 2 + 1, max_len, canvas_shape) predicted_images = image_from_expressions(parser, expressions) plt.imshow(predicted_images[0], cmap="Greys") plt.grid("off") plt.axis("off") plt.show() config = read_config.Config("config_synthetic.yml") config.batch_size = len(predicted_images) evaluator = Evaluator(config) pred_images, expressions2 = evaluator.test2(predicted_images, parser, max_len) cd, iou, cos = compute_batch_metrics(predicted_images, pred_images) show_pair(predicted_images, pred_images, expressions, expressions2, cd, iou, cos, (lambda cd, iou, cos, e1, e2: cd > 0.0))
with open('trained_models/results/{}/tar_prog.org'.format(args.network), 'r') as f: target_data = json.load(f)['true'] with open('trained_models/results/{}/pred_prog.org'.format(args.network), 'r') as f: prediction_data = json.load(f)['true'] parser = ParseModelOutput(unique_draw, max_len // 2 + 1, max_len, canvas_shape) data_num = len(target_data) for i in range(data_num): if args.show_only_long: if len(target_data[i]) < 50: continue target_images = image_from_expressions(parser, [target_data[i]]) prediction_images = image_from_expressions(parser, [prediction_data[i]]) plt.subplot(121) plt.imshow(target_images[0], cmap='Greys') plt.grid('off') plt.axis('off') plt.title('target') plt.subplot(122) plt.imshow(prediction_images[0], cmap='Greys') plt.grid('off') plt.axis('off') plt.title('prediction') plt.show()
def train_generator(generator_net, load_path, save_path, max_epochs=None): if max_epochs is None: epochs = 500 else: epochs = max_epochs labels = torch.load(f"{load_path}/labels/labels.pt", map_location=device) # pad with a start and stop token labels = np.pad(labels, ((0, 0), (1, 1)), constant_values=399) batch_size = 100 optimizer = optim.Adam(generator_net.parameters(), lr=1e-3) generator_net.train() best_train_loss = 1e20 patience = 20 num_worse = 0 best_gen_dict = torch.save(generator_net.state_dict(), f"{save_path}/best_gen_dict.pth") for epoch in range(epochs): start = time.time() train_loss = 0 ce_loss = 0 kl_loss = 0 acc = 0 np.random.shuffle(labels) for i in range(0, len(labels), batch_size): batch = torch.from_numpy(labels[i:i + batch_size]).long().to(device) optimizer.zero_grad() recon_batch, mu, logvar = generator_net(batch) ce, kld = generator_net.loss_function(recon_batch, batch, mu, logvar) loss = ce + 0.1 * kld loss.backward() train_loss += loss.item() / (len(labels) * (labels.shape[1] - 1)) ce_loss += ce.item() / (len(labels) * (labels.shape[1] - 1)) kl_loss += kld.item() / (len(labels) * (labels.shape[1] - 1)) acc += (recon_batch.permute(1, 2, 0).max(dim=1)[1] == batch[:, 1:]).float().sum() / (len(labels) * (labels.shape[1] - 1)) optimizer.step() print( f"generator epoch: {epoch}, loss: {train_loss}, accuracy: {acc}, ce: {ce_loss}, kld: {kl_loss}" ) # if (epoch + 1) % 10 == 0: # latents = torch.randn(1, inference_test_size, generator_latent_dim).to(device) # sample_tokens = generator_net.decode(latents, timesteps=labels.shape[1] - 1) # sample_tokens = sample_tokens.permute(1, 0, 2).max(dim=2)[1][:, :-1] # os.makedirs(os.path.dirname(f"wake_sleep_data/generator/tmp/"), exist_ok=True) # os.makedirs(os.path.dirname(f"wake_sleep_data/generator/tmp/val/"), exist_ok=True) # torch.save(sample_tokens, f"wake_sleep_data/generator/tmp/labels.pt") # torch.save(sample_tokens, f"wake_sleep_data/generator/tmp/val/labels.pt") # fid_value = calculate_fid_given_paths(f"wake_sleep_data/generator/tmp", # "trained_models/fid-model-three.pth", # 100, # 32) # print('FID: ', fid_value) # load_images() if train_loss >= best_train_loss: num_worse += 1 else: num_worse = 0 best_train_loss = train_loss best_gen_dict = torch.save(generator_net.state_dict(), f"{save_path}/best_gen_dict.pth") if num_worse >= patience: # load the best model and stop training generator_net.load_state_dict( torch.load(f"{save_path}/best_gen_dict.pth")) break end = time.time() print(f'gen epoch time {end-start}') train_tokens = torch.zeros((inference_train_size, max_len)) for i in range(0, inference_train_size, batch_size): batch_latents = torch.randn(1, batch_size, generator_latent_dim).to(device) batch_tokens = generator_net.decode(batch_latents, timesteps=labels.shape[1] - 1) batch_tokens = batch_tokens.permute(1, 0, 2).max(dim=2)[1][:, :-1] train_tokens[i:i + batch_size] = batch_tokens # test_tokens = torch.zeros((inference_test_size, max_len)) # for i in range(0, inference_test_size, batch_size): # batch_latents = torch.randn(1, batch_size, generator_latent_dim).to(device) # batch_tokens = generator_net.decode(batch_latents, timesteps=labels.shape[1] - 1) # batch_tokens = batch_tokens.permute(1, 0, 2).max(dim=2)[1][:, :-1] # test_tokens[i:i+batch_size] = batch_tokens os.makedirs(os.path.dirname(f"{save_path}/"), exist_ok=True) torch.save(train_tokens, f"{save_path}/labels.pt") # os.makedirs(os.path.dirname(f"{save_path}/val/"), exist_ok=True) # torch.save(test_tokens, f"{save_path}/val/labels.pt") # fid_value = calculate_fid_given_paths(f"{save_path}", # f"trained_models/fid-model-two.pth", # 100) # print('FID: ', fid_value) # find expression from labels parser = ParseModelOutput(unique_draw, max_len // 2 + 1, max_len, [64, 64]) expressions = [""] * inference_train_size for i in range(inference_train_size): for j in range(max_len): expressions[i] += unique_draw[int(train_tokens[i, j])] for index, prog in enumerate(expressions): expressions[index] = prog.split("$")[0] pred_images = image_from_expressions(parser, expressions).astype(np.float32) torch.save(pred_images, f"{save_path}/images.pt") return epoch + 1