def train(model, optimizer, criterion, objective_type, train_loader): device = get_torch_device(args) model.train() train_loss = 0 for batch_idx, (x, y_voice_cmd, y_voice_cmd_lng) in enumerate(train_loader): x = x.to(device) y_voice_cmd = y_voice_cmd.to(device) y_voice_cmd_lng = y_voice_cmd_lng.to(device) optimizer.zero_grad() outputs = model(x) if objective_type == 'voice_cmd': logits_voice_cmd = outputs loss = criterion(logits_voice_cmd, y_voice_cmd) elif objective_type == 'voice_cmd__and__voice_cmd_lng': logits_voice_cmd, logits_voice_cmd_lng = outputs loss = (criterion(logits_voice_cmd, y_voice_cmd) + criterion(logits_voice_cmd_lng, y_voice_cmd_lng)) / 2 else: raise ValueError(f"Unknown objective type: {objective_type}") loss.backward() optimizer.step() train_loss += loss.item()
def run_trials(args): device = get_torch_device(args) trial_params = generate_trial_params(args) for p in trial_params: trial_name = f"{args.model_name}__" + "__".join( [f"{k}_{p[k]}" for k in sorted(p.keys())]) logging.info(f"running trial: {trial_name}") if results_exist(trial_name, args): logging.info(f"skipping {trial_name}") continue model_class = getattr(models, args.model_name) model = model_class(input_channels=args.input_channels, conv_dropout_p=p['c_dropout_p'], fc_dropout_p=p['f_dropout_p'], voice_cmd_neuron_count=105, voice_cmd_lng_neuron_count=5, objective_type=p['obj']).to(device) epochs_results = train_on_fold(model, fold_id=p['fold_id'], feature_name=p['feature'], objective_type=p['obj'], args=args) # results for only one fold trial_results = { 'fold_index': p['fold_id'], 'feature_name': p['feature'], 'epochs': epochs_results } save_results(trial_name, trial_results, args) # write_epoch_test_logits(model_name, all_folds_results) del model
def main(_): checkpoint = Checkpoint(FLAGS.checkpoint_dir) utils.exists_or_mkdir(FLAGS.sample_dir) utils.exists_or_mkdir(FLAGS.log_dir) summaryWriter = tensorboardX.SummaryWriter(log_dir = FLAGS.log_dir)#torch.utils.tensorboard.SummaryWriter(log_dir = FLAGS.log_dir) logger.info('[Params] lr:%f, size:%d, dataset:%s, av_gen:%d, n_disc:%d'% (FLAGS.learning_rate, FLAGS.output_size, FLAGS.dataset, int(FLAGS.use_averaged_gen), FLAGS.n_discriminator)) #dataset z_shape = (FLAGS.z_dim,) image_size = (FLAGS.output_size, FLAGS.output_size) image_shape = (3,) + image_size ds = dataset.datasets.from_name(name=FLAGS.dataset, data_folder=FLAGS.data_folder, output_size=image_size) batch = batch_gen.BatchWithNoise(ds, batch_size=FLAGS.batch_size, z_shape=z_shape,num_workers=10) #initialize device device = utils.get_torch_device() #model nn_model = models.model_factory.create_model(FLAGS.model_name, device=device, image_shape=image_shape, z_shape=z_shape, use_av_gen=FLAGS.use_averaged_gen, g_tanh=False) nn_model.register_checkpoint(checkpoint) loss = gan_loss.js_loss() #lambd = lambda_scheduler.Constant(0.1) lambd = lambda_scheduler.ThresholdAnnealing(1000., threshold=loss.lambda_switch_level, min_switch_step=FLAGS.lambda_switch_steps, verbose=True) checkpoint.register('lambda', lambd, True) trainer = Trainer(model=nn_model, batch=batch, loss=loss, lr=FLAGS.learning_rate, reg='gp', lambd=lambd) trainer.sub_batches = FLAGS.batch_per_update trainer.register_checkpoint(checkpoint) it_start = checkpoint.load(FLAGS.checkpoint_it_to_load) trainer.update_lr() ##========================= LOAD CONTEXT ================================## context_path = os.path.join(FLAGS.checkpoint_dir, 'context.npz') sample_seed = None if os.path.exists(context_path): sample_seed = np.load(context_path)['z'] if sample_seed.shape[0] != FLAGS.sample_size or sample_seed.shape[1] != FLAGS.z_dim: sample_seed = None logger.info('Invalid sample seed') else: logger.info('Sample seed loaded') if sample_seed is None: sample_seed = batch.sample_z(FLAGS.sample_size).data.numpy() np.savez(context_path, z = sample_seed) ##========================= TRAIN MODELS ================================## batches_per_epoch = 10000 total_time = 0 bLambdaSwitched = (it_start == 0) n_too_good_d = [] number_of_iterations = FLAGS.epoch*batches_per_epoch for it in range(number_of_iterations): start_time = time.time() iter_counter = it + it_start # updates the discriminator #if iter_counter < 25 or iter_counter % 500 == 0: # d_iter = 20 #else: # d_iter = 5 if bLambdaSwitched: #if lambda was switched we want to keep discriminator optimal logger.info('[!] Warming up discriminator') d_iter = 25 else: d_iter = FLAGS.n_discriminator # errD, s, errG, b_too_good_D = trainer.update(d_iter, 1) summaryWriter.add_scalar('d_loss', errD, iter_counter) summaryWriter.add_scalar('slope', s, iter_counter) summaryWriter.add_scalar('g_loss', errG, iter_counter) summaryWriter.add_scalar('loss', errD + float(lambd) * s**2, iter_counter) summaryWriter.add_scalar('lambda', float(lambd), iter_counter) #updating lambda n_too_good_d.append(b_too_good_D) if len(n_too_good_d) > 20: del n_too_good_d[0] bLambdaSwitched = lambd.update(errD) if not bLambdaSwitched and sum(n_too_good_d) > 10: bLambdaSwitched = lambd.switch() end_time = time.time() iter_time = end_time - start_time total_time += iter_time logger.info("[%2d/%2d] time: %4.4f, d_loss: %.8f, s: %.4f, g_loss: %.8f" % (iter_counter, it_start + number_of_iterations, iter_time, errD, s, errG)) if np.mod(iter_counter, FLAGS.sample_step) == 0 and it > 0: n = int(np.sqrt(FLAGS.sample_size)) img = trainer.sample(sample_seed) img = img.data.cpu() img_tb = utils.image_to_tensorboard(torchvision.utils.make_grid(img, n)) summaryWriter.add_image('samples',img_tb, iter_counter) utils.save_images(img.data.cpu().numpy(), [n, n], './{}/train_{:02d}.png'.format(FLAGS.sample_dir, iter_counter)) if np.mod(iter_counter, FLAGS.save_step) == 0 and it > 0: checkpoint.save(iter_counter) checkpoint.save(iter_counter)
criterion=criterion, optimizer=optimizer, device=config.device, epoch_log=epoch_log) writer.add_scalar('training loss', train_loss, epoch) if epoch % config.save_every == 0: save_checkpoint(model, epoch, optimizer, train_loss, chk_path + "/Epoch-" + str(epoch) + ".chk") # torch.save(model.state_dict(), chk_path+template_log+"-epoch-"+str(epoch)+".pth") from utils import get_torch_device from global_var import * if __name__ == "__main__": device = get_torch_device() config = Config(model=MODEL_ANOMALY_DET, dataset=UCFCrime2LocalClips_DATASET, device=device, num_epoch=100000, save_every=1000, learning_rate=0.01, train_batch=64, bag_size=32) source = FEAT_EXT_C3D #resnetxt , resnetxt+s3d enviroment_config = {"home": HOME_UBUNTU} if source == FEAT_EXT_RESNEXT: # features_path="/Users/davidchoqueluqueroman/Documents/DATASETS_Local/UCFCrime2Local/features_input(dynamic-images)_frames(16)"#"/content/DATASETS/UCFCrime2Local/features_input(dynamic-images)_frames(16)"#"/Users/davidchoqueluqueroman/Documents/DATASETS_Local/RWF-2000/features2D-train"# features_path = "/content/DATASETS/UCFCrime2Local/features_from(ucfcrime2localClips)_input(dynamic-images)_frames(10)_num_segments(32)"
LABELS_FILENAME = f"{MODEL_DIR}/label2index.json" if not os.path.exists(args.save_model_dir): os.makedirs(args.save_model_dir) # ===================================================================================== # LOGGING INFO ... # ===================================================================================== console_hdlr = logging.StreamHandler(sys.stdout) file_hdlr = logging.FileHandler(filename=f"{MODEL_DIR}/BERT_TokenClassifier_{START_EPOCH}_{EPOCHS}.log") logging.basicConfig(level=logging.INFO, handlers=[console_hdlr, file_hdlr]) logging.info("Start Logging") logging.info(args) # Initialize Random seeds and validate if there's a GPU available... device, USE_CUDA = utils.get_torch_device() random.seed(SEED_VAL) np.random.seed(SEED_VAL) torch.manual_seed(SEED_VAL) torch.cuda.manual_seed_all(SEED_VAL) # ========================================================================================== # LOAD TRAIN & DEV DATASETS # ========================================================================================== # Initialize Tokenizer tokenizer = BertTokenizer.from_pretrained(BERT_MODEL_NAME, do_lower_case=DO_LOWERCASE, do_basic_tokenize=False) # Load Train Dataset train_label2index, train_inputs, train_masks, train_labels, train_lens, train_preds = utils.load_srl_dataset(TRAIN_DATA_PATH, tokenizer, max_len=SEQ_MAX_LEN, include_labels=True,
args = parser.cmd_parser.parse_args() params = vars(args) experiment_directory, checkpoint_directory, logs_directory, outputs_directory = utils.setup_directories( project_directory=args.project_directory, experiment_name=args.experiment_name, starting_iteration=args.starting_iteration, ) utils.set_deterministic(is_deterministic=args.deterministic) utils.save_params( mode=args.mode, experiment_directory=experiment_directory, params=params ) device = utils.get_torch_device(args.device) data_loader = utils.get_data_loader( data_path=args.training_data_directory if args.mode == utils.TRAINING else args.testing_data_directory, batch_size=args.batch_size, ) model = network.VectorQuantizedVAE() model = model.to(device) if args.loss == "Baur": loss_function = losses.BaurLoss(lambda_reconstruction=args.reconstruction_lambda) else: image_shape = iter(data_loader).__next__()["T1"][DATA].shape[1:]
def train_on_fold(model, fold_id, feature_name, objective_type, args): torch.manual_seed(0) device = get_torch_device(args) results = {} train_loader, test_loader, train_bias_category_labels, test_bias_category_labels = get_loaders_for_fold( fold_id, feature_name, args.batch_size, args) logging.info("Model Summary :\n" + summary(model, torch.zeros((10, args.max_sequence_length, model.input_channels)).to(device), show_input=False)) logging.info(f"train_n: {len(train_loader.dataset)}") logging.info(f"test_n: {len(test_loader.dataset)}") optimizer = optim.Adam(model.parameters(), lr=args.learning_rate) criterion = nn.CrossEntropyLoss(reduction='sum') for epoch in range(1, args.epochs + 1): # train on training set train(model, optimizer, criterion, objective_type, train_loader) # test on training set train_n, train_average_loss, train_acc, train_acc_by_bais_category, train_acc_lng, train_acc_by_bais_category_lng = \ test(model, criterion, objective_type, train_loader, train_bias_category_labels) # test on test set test_n, test_average_loss, test_acc, test_acc_by_bais_category, test_acc_lng, test_acc_by_bais_category_lng = \ test(model, criterion, objective_type, test_loader, test_bias_category_labels) # persist if args.persistence_interval > 0 and epoch % args.persistence_interval == 0: models.save_model(model, optimizer, fold_id, feature_name, epoch, args) if epoch % 10 == 0: logging.info( f"Epoch: {epoch}. Train Loss: {train_average_loss:0.4}. Test Loss: {test_average_loss:0.4}. Train Acc: {train_acc:0.4}. Test Acc:{test_acc:0.4}" ) else: logging.debug( f"Epoch: {epoch}. Train Loss: {train_average_loss:0.4}. Test Loss: {test_average_loss:0.4}. Train Acc: {train_acc:0.4}. Test Acc:{test_acc:0.4}" ) results[epoch] = { 'epoch': epoch, 'train_n': train_n, 'train_loss': train_average_loss, 'train_acc': train_acc, 'train_acc_lng': train_acc_lng, 'test_n': test_n, 'test_loss': test_average_loss, 'test_acc': test_acc, 'test_acc_lng': test_acc_lng } for c in train_acc_by_bais_category: results[epoch][f"train_acc_{c}"] = train_acc_by_bais_category[c] results[epoch][f"train_n_{c}"] = int( np.sum(train_bias_category_labels[c])) for c in train_acc_by_bais_category_lng: results[epoch][ f"train_acc_lng_{c}"] = train_acc_by_bais_category_lng[c] for c in test_acc_by_bais_category: results[epoch][f"test_acc_{c}"] = test_acc_by_bais_category[c] results[epoch][f"test_n_{c}"] = int( np.sum(test_bias_category_labels[c])) for c in test_acc_by_bais_category_lng: results[epoch][ f"test_acc_lng_{c}"] = test_acc_by_bais_category_lng[c] return results
def test(model, criterion, objective_type, loader, bias_category_labels): device = get_torch_device(args) model.eval() accumulated_loss = 0 pred_classes = [] true_classes = [] pred_classes_lng = [] true_classes_lng = [] for batch_idx, (x, y_voice_cmd, y_voice_cmd_lng) in enumerate(loader): x = x.to(device) y_voice_cmd = y_voice_cmd.to(device) y_voice_cmd_lng = y_voice_cmd_lng.to(device) outputs = model(x) if objective_type == 'voice_cmd': logits_voice_cmd = outputs pred_classes.extend( get_predictions_for_logits(logits_voice_cmd).cpu().numpy()) true_classes.extend(y_voice_cmd.cpu().numpy()) loss = criterion(logits_voice_cmd, y_voice_cmd) elif objective_type == 'voice_cmd__and__voice_cmd_lng': logits_voice_cmd, logits_voice_cmd_lng = outputs pred_classes.extend( get_predictions_for_logits(logits_voice_cmd).cpu().numpy()) true_classes.extend(y_voice_cmd.cpu().numpy()) pred_classes_lng.extend( get_predictions_for_logits(logits_voice_cmd_lng).cpu().numpy()) true_classes_lng.extend(y_voice_cmd_lng.cpu().numpy()) loss = (criterion(logits_voice_cmd, y_voice_cmd) + criterion(logits_voice_cmd_lng, y_voice_cmd_lng)) / 2 else: raise ValueError(f"Unknown objective type: {objective_type}") accumulated_loss += loss.item() n = len(true_classes) average_loss = accumulated_loss / n acc = sklearn.metrics.accuracy_score(true_classes, pred_classes) acc_by_bais_category = { category: sklearn.metrics.accuracy_score(true_classes, pred_classes, sample_weight=sw) for category, sw in bias_category_labels.items() } if objective_type == 'voice_cmd__and__voice_cmd_lng': acc_lng = sklearn.metrics.accuracy_score(true_classes_lng, pred_classes_lng) acc_by_bais_category_lng = { category: sklearn.metrics.accuracy_score(true_classes_lng, pred_classes_lng, sample_weight=sw) for category, sw in bias_category_labels.items() } else: acc_lng = -1 acc_by_bais_category_lng = { category: -1 for category, sw in bias_category_labels.items() } return n, average_loss, acc, acc_by_bais_category, acc_lng, acc_by_bais_category_lng
'If use averaged generator for sampling') utils.flags.DEFINE_integer('n_samples', 1, 'Number of batches') utils.flags.DEFINE_integer("checkpoint_it_to_load", -1, "Iteration to restore [-1]") FLAGS = utils.flags.FLAGS() checkpoint = Checkpoint(FLAGS.checkpoint_dir) utils.exists_or_mkdir(FLAGS.sample_dir) z_shape = (FLAGS.z_dim, ) image_size = (FLAGS.output_size, FLAGS.output_size) image_shape = (3, ) + image_size device = utils.get_torch_device() nn_model = models.model_factory.create_model(FLAGS.model_name, device=device, image_shape=image_shape, z_shape=z_shape, use_av_gen=FLAGS.use_averaged_gen) nn_model.register_checkpoint(checkpoint) if not checkpoint.load(FLAGS.checkpoint_it_to_load): raise RuntimeError('Cannot load checkpoint') now = datetime.datetime.now() for i in range(FLAGS.n_samples): z = np.random.randn(FLAGS.sample_size, FLAGS.z_dim).astype(np.float32) z = torch.tensor(z, device=device)