def build_or_load(allow_load=True): from model import build_models models = build_models() models[0].summary() if allow_load: try: models[0].load_weights(MODEL_FILE) print('Loaded model from file.') except: print('Unable to load model from file.') return models
import prep_data import model import os from pathlib import Path from sklearn.externals import joblib os.chdir(os.path.join(os.getcwd(), "Speech-Recognition/project")) paths = Path(r"sample").glob("**/*.wav") splits = list(map(prep_data.split_numbers, paths)) splits_dict = {k: [d[k] for d in splits] for k in splits[0]} prep_data.save_splits(splits_dict, "new_data") augmented_data = list(map(prep_data.augment, ["new_data/0", "new_data/1", "new_data/2", "new_data/3","new_data/4", "new_data/5", "new_data/6", "new_data/7", "new_data/8", "new_data/9"])) augmented_dict = {k: v for item in augmented_data for k, v in item.items()} prep_data.save_augments(augmented_dict, "new_data") num_models = model.build_models("new_data/") joblib.dump(num_models, "saved_num_models.pkl")
def main(): global args torch.manual_seed(args.seed) os.environ['CUDA_VISIBLE_DEVICES'] = args.gpu_devices if not torch.cuda.is_available(): args.use_gpu = False print("USE GPU: {}".format(args.use_gpu)) # Data load data_file = os.path.join(args.root, args.train_data) with open(data_file) as f: data = json.load(f) word_to_idx = data['word_to_idx'] vocab_size = len(word_to_idx) idx_to_word = {i: w for w, i in word_to_idx.items()} cap_length = data['cap_length'] pca = PCA(args.pca_file) # Build model model_c3d = C3D() model_att, model_tep, model_sg, args.scale_ratios = build_models( in_c=args.feature_dim, num_class=args.num_class, voca_size=vocab_size, caps_length=cap_length, embedding_dim=args.embedding_dim, hidden_dim=args.hidden_dim, use_gpu=args.use_gpu) if args.use_gpu: model_c3d = model_c3d.cuda() model_att = model_att.cuda() model_tep = model_tep.cuda() model_sg = model_sg.cuda() # Load resume from a checkpoint if args.resume_c3d: if os.path.isfile(args.resume_c3d): print("=> loading checkpoint " "for C3D module '{}'".format(args.resume_c3d)) checkpoint = torch.load(args.resume_c3d) model_c3d.load_state_dict(checkpoint) else: print("=> no checkpoint found at '{}'".format(args.resume_c3d)) if args.resume_att: if os.path.isfile(args.resume_att): print("=> loading checkpoint " "for attribute detector module '{}'".format(args.resume_att)) checkpoint = torch.load(args.resume_att) model_att.load_state_dict(checkpoint['state_dict']) else: print("=> no checkpoint found at '{}'".format(args.resume_att)) if args.resume_dvc: if os.path.isfile(args.resume_dvc): print("=> loading checkpoint " "for DVC with Cross Entropy module '{}'".format( args.resume_dvc)) checkpoint = torch.load(args.resume_dvc) model_tep.load_state_dict(checkpoint['tep_state_dict']) model_sg.load_state_dict(checkpoint['sg_state_dict']) else: print("=> no checkpoint found at '{}'".format(args.resume_dvc)) # Run eval run_video(model_c3d, model_att, model_tep, model_sg, pca, idx_to_word)
def main(): global args if args.batch_size != 1: raise SizeError() torch.manual_seed(args.seed) os.environ['CUDA_VISIBLE_DEVICES'] = args.gpu_devices if not torch.cuda.is_available(): args.use_gpu = False print("USE GPU: {}".format(args.use_gpu)) # Data loader train_dataset = ActivityNet(args.root, args.train_data, args.train_ids, args.feature_set, caps=args.train_vec) train_loader = DataLoader(train_dataset, batch_size=args.batch_size, shuffle=True, num_workers=args.workers, drop_last=True) if args.evaluate: val_loader = DataLoader(ActivityNet(args.root, args.val_data, args.val_ids, args.feature_set, mode='eval'), batch_size=args.batch_size, shuffle=False, num_workers=args.workers) else: val_loader = DataLoader(ActivityNet(args.root, args.val_data, args.val_ids, args.feature_set, caps=args.val_vec), batch_size=args.batch_size, shuffle=False, num_workers=args.workers) print("Train dataset : {} / Validation dataset: {}".format( len(train_loader.dataset), len(val_loader.dataset))) # Build model model_att, model_tep, model_sg, args.scale_ratios = build_models( in_c=args.feature_dim, num_class=args.num_class, voca_size=train_dataset.vocab_size, caps_length=train_dataset.cap_length, embedding_dim=args.embedding_dim, hidden_dim=args.hidden_dim, use_gpu=args.use_gpu) init_eval_metric() if args.use_gpu: model_att = model_att.cuda() model_tep = model_tep.cuda() model_sg = model_sg.cuda() # Define loss function and optimizer criterion = DVCLoss(alpha=args.alpha, beta=args.beta, alpha1=args.alpha1, alpha2=args.alpha2, lambda1=args.lambda1, lambda2=args.lambda2, use_gpu=args.use_gpu) params = list(model_tep.parameters()) + list(model_sg.parameters()) if args.optim == 'adam': optimizer = optim.Adam(params, lr=args.lr, weight_decay=args.weight_decay) elif args.optim == 'sgd': optimizer = optim.SGD(params, lr=args.lr, momentum=args.momentum, weight_decay=args.weight_decay) elif args.optim == 'rms': optimizer = optim.RMSprop(params, lr=args.lr) else: print("Incorrect optimizer") return # Print text = "\nSave file name : {}\n" \ "Resume Attribute Detector : {}\n" \ "Resume Sentence Generator : {}\n" \ "Resume Dense Video Captioning with Cross Entropy Loss : {}\n" \ "Resume Dense Video Captioning : {}\n" \ "Reinforcement learning : {}\n" \ "Start epoch : {}\nMax epoch : {}\n" \ "Batch size : {}\nOptimizer : {}\n" \ "Learning rate : {}\nMomentum : {}\nWeight decay : {}\n" \ "Feature dimension : {}\nNum class : {}\n" \ "Embedding dimension : {}\nHidden dimension : {}\n" \ "Threshold : {}\nAlpha : {}\nBeta : {}\n" \ "Alpha1 : {}\nAlpha2 : {}\nLambda1 : {}\nLambda2 : {}\n" \ "METEOR weight : {}\nCIDEr weight : {}\nBleu@4 weight : {}\n".format( args.file_name, args.resume_att, args.resume_sg, args.resume_dvc_xe, args.resume, args.rl_flag, args.start_epoch, args.epochs, args.batch_size, args.optim, args.lr, args.momentum, args.weight_decay, args.feature_dim, args.num_class, args.embedding_dim, args.hidden_dim, args.threshold, args.alpha, args.beta, args.alpha1, args.alpha2, args.lambda1, args.lambda2, args.meteor_weight, args.cider_weight, args.bleu_weight ) text = '=' * 40 + text + '=' * 40 + '\n' with open('./log/' + args.file_name + '.txt', 'w') as f: print(text, file=f) print(text) # Load resume from a checkpoint best_score = 0.0 if args.resume_att: if os.path.isfile(args.resume_att): print("=> loading checkpoint " "for attribute detector module '{}'".format(args.resume_att)) checkpoint = torch.load(args.resume_att) model_att.load_state_dict(checkpoint['state_dict']) else: print("=> no checkpoint found at '{}'".format(args.resume_att)) if args.resume_sg: if os.path.isfile(args.resume_sg): print("=> loading checkpoint " "for sentence generation module '{}'".format(args.resume_sg)) checkpoint = torch.load(args.resume_sg) model_sg.load_state_dict(checkpoint['state_dict']) else: print("=> no checkpoint found at '{}'".format(args.resume_sg)) if args.resume_dvc_xe: if os.path.isfile(args.resume_dvc_xe): print("=> loading checkpoint " "for DVC with Cross Entropy module '{}'".format( args.resume_dvc_xe)) checkpoint = torch.load(args.resume_dvc_xe) model_tep.load_state_dict(checkpoint['tep_state_dict']) model_sg.load_state_dict(checkpoint['sg_state_dict']) else: print("=> no checkpoint found at '{}'".format(args.resume_dvc_xe)) if args.resume: if os.path.isfile(args.resume): print("=> loading checkpoint " "for DVC module '{}'".format(args.resume)) checkpoint = torch.load(args.resume) args.start_epoch = checkpoint['epoch'] best_score = checkpoint['best_score'] model_tep.load_state_dict(checkpoint['tep_state_dict']) model_sg.load_state_dict(checkpoint['sg_state_dict']) optimizer.load_state_dict(checkpoint['optimizer']) print("=> loaded checkpoint '{}'\n" "\t : epoch {}, best score {}".format( args.resume, checkpoint['epoch'], checkpoint['best_score'])) else: print("=> no checkpoint found at '{}'".format(args.resume)) if args.validation: if args.evaluate: _ = evaluate_gt_proposal(val_loader, model_att, model_tep, model_sg, train_dataset.idx_to_word) else: _ = validate(val_loader, model_att, model_tep, model_sg) return for epoch in range(args.start_epoch, args.epochs): print("Epoch:", epoch) # train for one epoch train_avg_loss, train_avg_loss_event, train_avg_loss_tcr, \ train_avg_loss_des, train_avg_loss_self_reward = train( train_loader, model_att, model_tep, model_sg, criterion, optimizer, epoch) # validation for one epoch if args.evaluate: scores = evaluate_gt_proposal(val_loader, model_att, model_tep, model_sg, train_dataset.idx_to_word, epoch + 1) score = scores['METEOR'] else: score = validate(val_loader, model_att, model_tep, model_sg) # remember best acc and save checkpoint is_best = score > best_score best_score = max(score, best_score) save_checkpoint( { 'epoch': epoch + 1, 'tep_state_dict': model_tep.state_dict(), 'sg_state_dict': model_sg.state_dict(), 'best_score': best_score, 'optimizer': optimizer.state_dict(), }, is_best, epoch + 1, filename=args.file_name, save_every=args.save_every) # log text = "{:04d} Epoch : Train loss ({:.4f}), " \ "Validation score ({:.4f})\n".format( epoch+1, train_avg_loss, score) with open('./log/' + args.file_name + '.txt', 'a') as f: print(text, file=f)
def train_main(opt=None, bucket_iter_train=None, bucket_iter_val=None, models=None): if not opt: opt = get_options(True) contrastiveLoss = T(ContrastiveLoss()) nllloss_for_recon = T( torch.nn.NLLLoss(ignore_index=1)) #ignores the padding characters bce = T(torch.nn.BCELoss()) bce2 = T(torch.nn.BCELoss()) # --------- training funtions ------------------------------------ def train(b, models, dont_optimize): models.en_sty.zero_grad() models.en_sem.zero_grad() models.decoder.zero_grad() #h_sem0 = models.en_sem(b.sent_0) # sent0 = b.sent_0 #sem A , style A h_sem1 = models.en_sem(b.sent_1) # sent1 = b.sent_1 #sem A, style B h_semX = models.en_sem(b.sent_x) # sentX = b.sent_x #semAorC , style A recon_target = b.sent_0_target # one-hot ######### SIM LOSS ######### # In practice, it does not help, and thus, usually ignored. sim_loss = contrastiveLoss(h_sem1, h_semX, T(torch.round(b.is_x_0))) ######### RECONSTRUCTION LOSS ######### note: quite slow # reconstruct sent0 from semantics of sent1 (==sem of sent0, different style), and style of sent0. h_sty0 = models.en_sty(b.sent_0) merged = torch.cat([h_sem1, h_sty0], dim=1) merged.unsqueeze_( 0) # 32x25 -> 1x32x25 . 1 is for one hidden-layer (not-stacked) recon_sent0, _, _ = models.decoder( inputs= recon_target, # pass not None for teacher focring (batch, seq_len, input_size) encoder_hidden= merged, # (num_layers * num_directions, batch_size, hidden_size) encoder_outputs=None, # pass not None for attention teacher_forcing_ratio=1, function=F.log_softmax # in(0, 1-random.random()* epoch * 0.1) #range 0..1 , must pass inputs if >0. as epochs increase, it's lower ) # good reconstruction-loss need to: # ignore padding (it is easy to guess always <pad> as a result and to be usually right.also called masking) # consider length of sentences. is a short 5 word sentnece weight the same as long 40 words sentence? # 'elementwise_mean' means look at each word by itself. one can change this to be on sentence level # we calculate it manually as sizes may not match in the returned array using seq2seq library # in the end , we sum the loss per timestamp and divide by number of timestamps acc_loss, norm_term = 0, 0 for step, timestamp_output in enumerate( recon_sent0): #list of 65 x [32, 2071] batch_size = recon_target.size(0) if step + 1 >= recon_target.size( 1 ): #in beginning, model might output 200 steps, later will converge to target # print ('breaking!!! at step',step) break gold = recon_target[:, step + 1] # this is one timstamp across batches curr_loss = nllloss_for_recon(timestamp_output, gold) acc_loss += curr_loss norm_term += 1 rec_loss = acc_loss / norm_term ######### ADV LOSS ######### h_sty1 = models.en_sty(b.sent_1) h_styX = models.en_sty(b.sent_x) adv_disc_p = models.adv_disc(torch.cat([h_sty1, h_styX], dim=1)) adv_target = T( torch.FloatTensor( np.full(shape=(b.sent_0[0].shape[0], 1), fill_value=0.5))) # the loss below is a parabula with min at log(0.5)=0.693... see article adv_disc_loss = bce(adv_disc_p, adv_target) + np.log( 0.5) # np.log(0.5)=-0.693 , # logger.debug(f'### adv_disc_loss {N(adv_disc_p.data[:5]).T} target={N(adv_target.data[:5]).T} bce={adv_disc_loss}') # logger.debug(f' sanity test: on first step, you expect adv_disc_loss to be near zero') ######### BACKWARD ######### loss = rec_loss + sim_loss * opt.sem_sim_weight + opt.sd_weight * adv_disc_loss # rec_loss + sim_loss + opt.sd_weight*adv_disc_loss if not dont_optimize: # used in validation(eval mode) only loss.backward() optimizer_en_sem.step() optimizer_en_sty.step() optimizer_decoder.step() return N(sim_loss.data) * opt.sem_sim_weight, ( rec_loss.data), N(adv_disc_loss.data) * opt.sd_weight # N def train_scene_discriminator(b, models, dont_optimize): models.adv_disc.zero_grad() h_sty0 = models.en_sty(T(b.sent_1)) h_sty0or2 = models.en_sty( T(b.sent_x )) # same style, same or different semantics with random chance merged = torch.cat([h_sty0, h_sty0or2], dim=1) out = models.adv_disc(merged).flatten() y = T(b.is_x_0) bce = bce2(out, y) if not dont_optimize: bce.backward() optimizer_adv_disc.step() acc = np.round(N(out.detach())) == np.round(N(y)) logger.debug(f'adv_disc out {out.shape} is_x_0 {y.shape}') logger.debug( f'out {out.flatten()} y {y.flatten()} acc {acc} bce {bce.data}') acc = acc.reshape(-1) acc = acc.sum() / len(acc) return N(bce.data), N(acc) """ one epoc train, runs for epoch_size batches """ def one_epoc(epoch, bucket_iter_train, models, dont_optimize): logger.debug('one_epoc starts') epoch_sim_loss, epoch_rec_loss, epoch_anti_disc_loss, epoch_sd_loss, epoch_sd_acc = 0, 0, 0, 0, 0 training_batch_generator = None for i in range(opt.epoch_size): # take next batch from current iterator. If it finished, create a new iterator b = None try: b = next(training_batch_generator) except: logger.debug('creating new iterator') training_batch_generator = iter( bucket_iter_train) # only if no choice... it's 1.5 min b = next(training_batch_generator) # train scene discriminator logger.debug(f'b_sent_0 {b.sent_0[0].shape}{b.sent_0[1].shape}' ) # TEXT.reverse(b.sent_0)) sd_loss, sd_acc = train_scene_discriminator( b, models, dont_optimize) logger.debug('train_scene_discriminator done') epoch_sd_loss += sd_loss epoch_sd_acc += sd_acc # train main model sim_loss, rec_loss, anti_disc_loss = train(b, models, dont_optimize) logger.debug('train done') epoch_sim_loss += sim_loss epoch_rec_loss += rec_loss epoch_anti_disc_loss += anti_disc_loss logger.setLevel(logging.INFO) logger.info( '[%02d] %s rec loss: %.4f | sim loss: %.4f | anti_disc_loss: %.4f || scene disc %.4f %.3f%% ' % (epoch, "eval" if dont_optimize else "train", epoch_rec_loss / opt.epoch_size, epoch_sim_loss / opt.epoch_size, epoch_anti_disc_loss / opt.epoch_size, epoch_sd_loss / opt.epoch_size, 100 * epoch_sd_acc / opt.epoch_size)) def set_train(models): models.en_sty.train() # and not eval() mode models.en_sem.train() models.decoder.train() models.adv_disc.train() def set_eval(models): models.en_sty.eval() models.en_sem.eval() models.decoder.eval() models.adv_disc.eval() # --------- training loop ------------------------------------ logger = logging.getLogger() logging.basicConfig(format='%(asctime)s %(levelname)s:%(message)s', level=logging.DEBUG, datefmt='%I:%M:%S') logger.setLevel(logging.INFO) # not DEBUG print('running train with options:', opt) if (bucket_iter_train == None and bucket_iter_val == None and models == None): if opt.dataset == 'bible': bucket_iter_train, bucket_iter_val = build_bible_datasets( verbose=False) elif opt.dataset == 'quora': bucket_iter_train, bucket_iter_val = build_quora_dataset( verbose=False) else: raise ValueError(f'unkown dataset type {opt.dataset}') models = build_models(bucket_iter_train.dataset, opt) if opt.optimizer == 'adam': optimizer = torch.optim.Adam epoc_count = 0 for lr in [opt.lr]: print('training with lr', lr) optimizer_en_sem = optimizer(models.en_sem.parameters(), lr, betas=(opt.beta1, 0.999)) optimizer_en_sty = optimizer(models.en_sty.parameters(), lr, betas=(opt.beta1, 0.999)) optimizer_decoder = optimizer(models.decoder.parameters(), lr, betas=(opt.beta1, 0.999)) optimizer_adv_disc = torch.optim.SGD( models.adv_disc.parameters(), opt.adv_disc_lr) # always using SGD for discriminator for epoch in range(0, opt.epocs): set_train(models) one_epoc(epoc_count, bucket_iter_train, models, dont_optimize=False) if epoch % 10 == 0: # validations once every 10 epocs. done by running a full epoch cylce on validation WITHOUT updating gradiants set_eval(models) one_epoc(epoc_count, bucket_iter_val, models, dont_optimize=True) eval_sample(bucket_iter_val, models) epoc_count += 1 print('training loop done') return bucket_iter_train, bucket_iter_val, models