def add_training_data(current_label_dict): cur_dir = os.getcwd() res_dir = os.path.abspath(os.path.join(cur_dir, '..', 'res')) all_models_dir = os.path.abspath(os.path.join(cur_dir, '..', 'models')) model_dir = utils.check_or_create_local_path('pro_classifier', all_models_dir) classifications_dir = utils.check_or_create_local_path( 'classifications', model_dir) pro_dir = utils.check_or_create_local_path('pro', classifications_dir) notpro_dir = utils.check_or_create_local_path('notpro', classifications_dir) notpro_worlds = os.listdir(notpro_dir) pro_worlds = os.listdir(pro_dir) orig_dict = utils.load_label_dict(res_dir, current_label_dict) new_dict = utils.load_label_dict(res_dir, current_label_dict) for notpro_world in notpro_worlds: notpro_id = utils.get_world_id(notpro_world) if notpro_id not in orig_dict: new_dict[notpro_id] = 0 for pro_world in pro_worlds: pro_id = utils.get_world_id(pro_world) new_dict[pro_id] = 1 utils.save_label_dict(classifications_dir, 'test', new_dict)
def predict(network_ver, dict_src_name): cur_dir = os.getcwd() res_dir = os.path.abspath(os.path.join(cur_dir, '..', 'res')) all_models_dir = os.path.abspath(os.path.join(cur_dir, '..', 'models')) model_dir = utils.check_or_create_local_path('pro_classifier', all_models_dir) version_dir = utils.check_or_create_local_path(network_ver, model_dir) model_save_dir = utils.check_or_create_local_path('models', version_dir) classifications_dir = utils.check_or_create_local_path( 'classifications', model_dir) utils.delete_files_in_path(classifications_dir) pro_dir = utils.check_or_create_local_path('pro', classifications_dir) notpro_dir = utils.check_or_create_local_path('notpro', classifications_dir) print('Loading model...') classifier = load_model(f'{model_save_dir}\\latest.h5') print('Loading block images...') block_images = utils.load_block_images(res_dir) print('Loading encoding dictionaries...') block_forward, block_backward = utils.load_encoding_dict( res_dir, 'blocks_optimized') x_data, x_files = load_worlds_with_files(5000, f'{res_dir}\\worlds\\', (112, 112), block_forward) x_labeled = utils.load_label_dict(res_dir, dict_src_name) batch_size = 50 batches = x_data.shape[0] // batch_size for batch_index in range(batches): x_batch = x_data[batch_index * batch_size:(batch_index + 1) * batch_size] y_batch = classifier.predict(x_batch) for world in range(batch_size): g_index = (batch_index * batch_size) + world world_file = x_files[g_index] world_id = utils.get_world_id(world_file) # Ignore worlds we've already labeled if world_id in x_labeled: continue prediction = y_batch[world] world_data = utils.load_world_data_ver3( f'{res_dir}\\worlds\\{world_id}.world') if prediction[0] < 0.5: utils.save_world_preview(block_images, world_data, f'{notpro_dir}\\{world_id}.png') else: utils.save_world_preview(block_images, world_data, f'{pro_dir}\\{world_id}.png')
def apply_train(): label_dict = load_label_dict(caption_fn) best_fscore = -1 for epoch in range(args.epoch): train(epoch, train_dir_fn, label_dict, fvec_dir) fscore = test(epoch, dev_dir_fn, label_dict, fvec_dir) if fscore > best_fscore: torch.save(mlp, args.model) best_fscore = fscore return 0
def save_current_labels(current_label_dict): cur_dir = os.getcwd() res_dir = os.path.abspath(os.path.join(cur_dir, '..', 'res')) all_models_dir = os.path.abspath(os.path.join(cur_dir, '..', 'models')) model_dir = utils.check_or_create_local_path('pro_classifier', all_models_dir) notpro_dir = utils.check_or_create_local_path('notpro', model_dir) pro_dir = utils.check_or_create_local_path('pro', model_dir) print('Loading block images...') block_images = utils.load_block_images(res_dir) print('Loading label dict...') x_labeled = utils.load_label_dict(res_dir, current_label_dict) saved = 0 for x_world in x_labeled: label = x_labeled[x_world] if os.path.exists(f'{pro_dir}\\{x_world}.png') or os.path.exists( f'{notpro_dir}\\{x_world}.png'): saved += 1 continue world_file = f'{res_dir}\\worlds\\{x_world}.world' world_data = utils.load_world_data_ver3(world_file) if label == 1: utils.save_world_preview(block_images, world_data, f'{pro_dir}\\{x_world}.png') else: utils.save_world_preview(block_images, world_data, f'{notpro_dir}\\{x_world}.png') saved += 1 print(f'Saved {saved} of {len(x_labeled)} world previews')
best_model = model for ep, lr in sorted(results): valid_acc = results[(ep, lr)] print('# epochs : %d lr : %e valid accuracy : %f' % (ep, lr, valid_acc)) print('best validation accuracy achieved: %f' % best_acc) # Evaluate best model on test data test_x, test_y = test_data pred, prob = best_model.eval(test_x) test_acc = accuracy(pred, test_y) print('test accuracy of best model : %f' % test_acc) # Plot prediction of the best model if show_plot and DATA_NAME == 'Fashion_mnist': num_test = len(test_x) test_x = test_x[:, 1:] test_x = test_x.reshape(num_test, 28, 28) random_idx = np.random.choice(num_test, 5) sample_data = test_x[random_idx] sample_label = test_y[random_idx] sample_prob = prob[random_idx] label_dict = load_label_dict(DATA_NAME) display_image_predictions(sample_data, sample_label, sample_prob, label_dict)
default=256, help="BEST results: same value as when training the Model") args = parser.parse_args() EVALUATE_PREDICATES = utils.get_bool_value(args.eval_preds) device, USE_CUDA = utils.get_torch_device() file_has_gold = utils.get_bool_value(args.gold_labels) SEQ_MAX_LEN = int(args.seq_max_len) BATCH_SIZE = int(args.batch_size) # Load Saved Model model, tokenizer = utils.load_model( BertForTokenClassification, BertTokenizer, f"{args.model_dir}/EPOCH_{args.epoch}") label2index = utils.load_label_dict(f"{args.model_dir}/label2index.json") index2label = {v: k.strip("B-") for k, v in label2index.items()} # Load File for Predictions _, prediction_inputs, prediction_masks, gold_labels, seq_lens, gold_predicates = utils.load_srl_dataset( args.test_path, tokenizer, include_labels=True, max_len=SEQ_MAX_LEN, label2index=label2index) # Create the DataLoader. prediction_data = TensorDataset(prediction_inputs, prediction_masks, gold_labels, seq_lens, gold_predicates) prediction_sampler = SequentialSampler(prediction_data) prediction_dataloader = DataLoader(prediction_data,
def train(epochs, batch_size, world_count, dict_src_name, version_name=None, initial_epoch=0): cur_dir = os.getcwd() res_dir = os.path.abspath(os.path.join(cur_dir, '..', 'res')) all_models_dir = os.path.abspath(os.path.join(cur_dir, '..', 'models')) model_dir = utils.check_or_create_local_path('pro_classifier', all_models_dir) utils.delete_empty_versions(model_dir, 1) no_version = version_name is None if no_version: latest = utils.get_latest_version(model_dir) version_name = f'ver{latest + 1}' version_dir = utils.check_or_create_local_path(version_name, model_dir) graph_dir = utils.check_or_create_local_path('graph', model_dir) graph_version_dir = utils.check_or_create_local_path( version_name, graph_dir) model_save_dir = utils.check_or_create_local_path('models', version_dir) print('Saving source...') utils.save_source_to_dir(version_dir) print('Loading encoding dictionaries...') block_forward, block_backward = utils.load_encoding_dict( res_dir, 'blocks_optimized') print('Building model from scratch...') c_optim = Adam(lr=0.0001) size = 64 c = build_classifier(size) # c = build_resnet50(1) # c = build_wide_resnet(input_dim=(size, size, 10), nb_classes=1, N=2, k=1, dropout=0.1) c.summary() c.compile(loss='binary_crossentropy', optimizer=c_optim, metrics=['accuracy']) print('Loading labels...') label_dict = utils.load_label_dict(res_dir, dict_src_name) print('Loading worlds...') x, y_raw = load_worlds_with_labels(world_count, f'{res_dir}\\worlds\\', label_dict, (size, size), block_forward) y = utils.convert_labels_binary(y_raw, epsilon=0) # Create callback for automatically saving best model based on highest regular accuracy check_best_acc = keras.callbacks.ModelCheckpoint( f'{model_save_dir}\\best_acc.h5', monitor='acc', verbose=0, save_best_only=True, save_weights_only=False, mode='max', period=1) # Create callback for automatically saving latest model so training can be resumed. Saves every epoch latest_h5_callback = keras.callbacks.ModelCheckpoint( f'{model_save_dir}\\latest.h5', verbose=0, save_best_only=False, save_weights_only=False, mode='auto', period=1) # Create callback for automatically saving latest weights so training can be resumed. Saves every epoch latest_weights_callback = keras.callbacks.ModelCheckpoint( f'{model_save_dir}\\latest.weights', verbose=0, save_best_only=False, save_weights_only=True, mode='auto', period=1) # Create callback for tensorboard tb_callback = keras.callbacks.TensorBoard(log_dir=graph_version_dir, batch_size=batch_size, write_graph=False, write_grads=True) callback_list = [ check_best_acc, latest_h5_callback, latest_weights_callback, tb_callback ] # Train model c.fit(x, y, batch_size, epochs, initial_epoch=initial_epoch, callbacks=callback_list, validation_split=0.2)
def predict_sample_matlab(network_ver, dict_src_name, cols, rows): cur_dir = os.getcwd() res_dir = os.path.abspath(os.path.join(cur_dir, '..', 'res')) all_models_dir = os.path.abspath(os.path.join(cur_dir, '..', 'models')) model_dir = utils.check_or_create_local_path('pro_classifier', all_models_dir) version_dir = utils.check_or_create_local_path(network_ver, model_dir) model_save_dir = utils.check_or_create_local_path('models', version_dir) plots_dir = utils.check_or_create_local_path('plots', model_dir) utils.delete_files_in_path(plots_dir) print('Loading model...') classifier = load_model(f'{model_save_dir}\\latest.h5') print('Loading block images...') block_images = utils.load_block_images(res_dir) print('Loading encoding dictionaries...') block_forward, block_backward = utils.load_encoding_dict( res_dir, 'blocks_optimized') x_labeled = utils.load_label_dict(res_dir, dict_src_name) x_worlds = os.listdir(f'{res_dir}\\worlds\\') np.random.shuffle(x_worlds) world_size = classifier.input_shape[1] dpi = 96 hpixels = 320 * cols hfigsize = hpixels / dpi vpixels = 330 * rows vfigsize = vpixels / dpi fig = plt.figure(figsize=(hfigsize, vfigsize), dpi=dpi) sample_num = 0 pro_score_floor = 0 pro_score_ceiling = 1.0 / (rows * cols) for world_filename in x_worlds: world_file = os.path.join(f'{res_dir}\\worlds\\', world_filename) world_id = utils.get_world_id(world_filename) if world_id not in x_labeled: # Load world and save preview encoded_regions = load_world(world_file, (world_size, world_size), block_forward) if len(encoded_regions) == 0: continue # Create prediction batch_input = np.empty((1, world_size, world_size, 10), dtype=np.int8) batch_input[0] = encoded_regions[0] batch_score = classifier.predict(batch_input) pro_score = batch_score[0][0] if pro_score < pro_score_floor or pro_score > pro_score_ceiling: continue decoded_region = utils.decode_world_sigmoid( block_backward, encoded_regions[0]) utils.save_world_preview(block_images, decoded_region, f'{plots_dir}\\preview{sample_num}.png') pro_score_floor += 1.0 / (rows * cols) pro_score_ceiling += 1.0 / (rows * cols) # Create plot img = mpimg.imread(f'{plots_dir}\\preview{sample_num}.png') subplt = fig.add_subplot(rows, cols, sample_num + 1) subplt.set_title(world_id) subplt.set_xlabel('P = %.2f%%' % (pro_score * 100)) no_labels = 2 # how many labels to see on axis x step = (16 * world_size) / (no_labels - 1 ) # step between consecutive labels positions = np.arange(0, (16 * world_size) + 1, step) # pixel count at label position labels = positions // 16 plt.xticks(positions, labels) plt.yticks(positions, labels) plt.imshow(img) print(f'Adding plot {sample_num + 1} of {rows * cols}') sample_num += 1 if sample_num >= rows * cols: break print('Saving figure...') fig.tight_layout() fig.savefig(f'{plots_dir}\\plot.png', transparent=True)
def train(epochs, batch_size, world_count, latent_dim, version_name=None, initial_epoch=0): cur_dir = os.getcwd() res_dir = os.path.abspath(os.path.join(cur_dir, '..', 'res')) all_models_dir = os.path.abspath(os.path.join(cur_dir, '..', 'models')) model_dir = utils.check_or_create_local_path('gan', all_models_dir) utils.delete_empty_versions(model_dir, 1) no_version = version_name is None if no_version: latest = utils.get_latest_version(model_dir) version_name = f'ver{latest + 1}' version_dir = utils.check_or_create_local_path(version_name, model_dir) graph_dir = utils.check_or_create_local_path('graph', model_dir) graph_version_dir = utils.check_or_create_local_path( version_name, graph_dir) worlds_dir = utils.check_or_create_local_path('worlds', version_dir) previews_dir = utils.check_or_create_local_path('previews', version_dir) model_save_dir = utils.check_or_create_local_path('models', version_dir) print('Saving source...') utils.save_source_to_dir(version_dir) print('Loading block images...') block_images = utils.load_block_images(res_dir) print('Loading encoding dictionaries...') block_forward, block_backward = utils.load_encoding_dict( res_dir, 'blocks_optimized') # Load model and existing weights print('Loading model...') # Try to load full model, otherwise try to load weights size = 64 cur_models = f'{model_save_dir}\\epoch{initial_epoch - 1}' if os.path.exists(f'{cur_models}\\discriminator.h5') and os.path.exists( f'{cur_models}\\generator.h5'): print('Building model from files...') d = load_model(f'{cur_models}\\discriminator.h5') g = load_model(f'{cur_models}\\generator.h5') if os.path.exists(f'{cur_models}\\d_g.h5'): d_on_g = load_model(f'{cur_models}\\d_g.h5') else: g_optim = Adam(lr=0.0001, beta_1=0.5) d_on_g = generator_containing_discriminator(g, d) d_on_g.compile(loss='binary_crossentropy', optimizer=g_optim) elif os.path.exists( f'{cur_models}\\discriminator.weights') and os.path.exists( f'{cur_models}\\generator.weights'): print('Building model with weights...') d_optim = Adam(lr=0.00001) d = build_discriminator(size) d.load_weights(f'{cur_models}\\discriminator.weights') d.compile(loss='binary_crossentropy', optimizer=d_optim, metrics=['accuracy']) g = build_generator(size) g.load_weights(f'{cur_models}\\generator.weights') g_optim = Adam(lr=0.0001, beta_1=0.5) d_on_g = generator_containing_discriminator(g, d) d_on_g.compile(loss='binary_crossentropy', optimizer=g_optim) else: print('Building model from scratch...') d_optim = Adam(lr=0.00001) g_optim = Adam(lr=0.0001, beta_1=0.5) d = build_discriminator(size) d.compile(loss='binary_crossentropy', optimizer=d_optim, metrics=['accuracy']) d.summary() g = build_generator(size, latent_dim) g.summary() d_on_g = generator_containing_discriminator(g, d) d_on_g.compile(loss='binary_crossentropy', optimizer=g_optim) if no_version: # Delete existing worlds and previews if any print('Checking for old generated data...') utils.delete_files_in_path(worlds_dir) utils.delete_files_in_path(previews_dir) print('Saving model images...') keras.utils.plot_model(d, to_file=f'{version_dir}\\discriminator.png', show_shapes=True, show_layer_names=True) keras.utils.plot_model(g, to_file=f'{version_dir}\\generator.png', show_shapes=True, show_layer_names=True) # Load Data print('Loading worlds...') label_dict = utils.load_label_dict(res_dir, 'pro_labels_b') x_train = load_worlds_with_label(world_count, f'{res_dir}\\worlds\\', label_dict, 1, (size, size), block_forward, overlap_x=0.1, overlap_y=0.1) world_count = x_train.shape[0] batch_cnt = (world_count - (world_count % batch_size)) // batch_size # Set up tensorboard print('Setting up tensorboard...') tb_manager = TensorboardManager(graph_version_dir, batch_cnt) preview_frequency_sec = 5 * 60.0 for epoch in range(initial_epoch, epochs): # Create directories for current epoch cur_worlds_dir = utils.check_or_create_local_path( f'epoch{epoch}', worlds_dir) cur_previews_dir = utils.check_or_create_local_path( f'epoch{epoch}', previews_dir) cur_models_dir = utils.check_or_create_local_path( f'epoch{epoch}', model_save_dir) print('Shuffling data...') np.random.shuffle(x_train) last_save_time = time.time() for batch in range(batch_cnt): # Get real set of images real_worlds = x_train[batch * batch_size:(batch + 1) * batch_size] # Get fake set of images noise = np.random.normal(0, 1, size=(batch_size, latent_dim)) fake_worlds = g.predict(noise) real_labels = np.ones( (batch_size, 1)) # np.random.uniform(0.9, 1.1, size=(batch_size,)) fake_labels = np.zeros( (batch_size, 1)) # np.random.uniform(-0.1, 0.1, size=(batch_size,)) # Train discriminator on real worlds d.trainable = True d_loss = d.train_on_batch(real_worlds, real_labels) acc_real = d_loss[1] loss_real = d_loss[0] tb_manager.log_var('d_acc_real', epoch, batch, d_loss[1]) tb_manager.log_var('d_loss_real', epoch, batch, d_loss[0]) # Train discriminator on fake worlds d_loss = d.train_on_batch(fake_worlds, fake_labels) d.trainable = False acc_fake = d_loss[1] loss_fake = d_loss[0] tb_manager.log_var('d_acc_fake', epoch, batch, d_loss[1]) tb_manager.log_var('d_loss_fake', epoch, batch, d_loss[0]) # Training generator on X data, with Y labels # noise = np.random.normal(0, 1, (batch_size, 256)) # Train generator to generate real g_loss = d_on_g.train_on_batch(noise, real_labels) tb_manager.log_var('g_loss', epoch, batch, g_loss) print( f'epoch [{epoch}/{epochs}] :: batch [{batch}/{batch_cnt}] :: fake_acc = {acc_fake} :: ' f'real_acc = {acc_real} :: fake_loss = {loss_fake} :: real_loss = {loss_real} :: gen_loss = {g_loss}' ) # Save models time_since_save = time.time() - last_save_time if time_since_save >= preview_frequency_sec or batch == batch_cnt - 1: print('Saving previews...') for i in range(batch_size): generated_world = fake_worlds[i] decoded_world = utils.decode_world_sigmoid( block_backward, generated_world) utils.save_world_data(decoded_world, f'{cur_worlds_dir}\\world{i}.world') utils.save_world_preview( block_images, decoded_world, f'{cur_previews_dir}\\preview{i}.png') print('Saving models...') try: d.save(f'{cur_models_dir}\\discriminator.h5') g.save(f'{cur_models_dir}\\generator.h5') d_on_g.save(f'{cur_models_dir}\\d_g.h5') d.save_weights(f'{cur_models_dir}\\discriminator.weights') g.save_weights(f'{cur_models_dir}\\generator.weights') d_on_g.save_weights(f'{cur_models_dir}\\d_g.weights') except ImportError: print('Failed to save data.') last_save_time = time.time()