Esempio n. 1
0
def build_or_load(allow_load=True):
    from model import build_models
    models = build_models()
    models[0].summary()
    if allow_load:
        try:
            models[0].load_weights(MODEL_FILE)
            print('Loaded model from file.')
        except:
            print('Unable to load model from file.')
    return models
Esempio n. 2
0
def build_or_load(allow_load=True):
    from model import build_models
    models = build_models()
    models[0].summary()
    if allow_load:
        try:
            models[0].load_weights(MODEL_FILE)
            print('Loaded model from file.')
        except:
            print('Unable to load model from file.')
    return models
import prep_data
import model
import os
from pathlib import Path
from sklearn.externals import joblib

os.chdir(os.path.join(os.getcwd(), "Speech-Recognition/project"))

paths = Path(r"sample").glob("**/*.wav")
splits = list(map(prep_data.split_numbers, paths))
splits_dict = {k: [d[k] for d in splits] for k in splits[0]}

prep_data.save_splits(splits_dict, "new_data")

augmented_data = list(map(prep_data.augment,
                          ["new_data/0", "new_data/1", "new_data/2",
                           "new_data/3","new_data/4", "new_data/5",
                           "new_data/6", "new_data/7", "new_data/8",
                           "new_data/9"]))


augmented_dict = {k: v for item in augmented_data for k, v in item.items()}

prep_data.save_augments(augmented_dict, "new_data")

num_models = model.build_models("new_data/")

joblib.dump(num_models, "saved_num_models.pkl")
Esempio n. 4
0
def main():
    global args

    torch.manual_seed(args.seed)
    os.environ['CUDA_VISIBLE_DEVICES'] = args.gpu_devices
    if not torch.cuda.is_available():
        args.use_gpu = False
    print("USE GPU: {}".format(args.use_gpu))

    # Data load
    data_file = os.path.join(args.root, args.train_data)
    with open(data_file) as f:
        data = json.load(f)
    word_to_idx = data['word_to_idx']
    vocab_size = len(word_to_idx)
    idx_to_word = {i: w for w, i in word_to_idx.items()}
    cap_length = data['cap_length']
    pca = PCA(args.pca_file)

    # Build model
    model_c3d = C3D()
    model_att, model_tep, model_sg, args.scale_ratios = build_models(
        in_c=args.feature_dim,
        num_class=args.num_class,
        voca_size=vocab_size,
        caps_length=cap_length,
        embedding_dim=args.embedding_dim,
        hidden_dim=args.hidden_dim,
        use_gpu=args.use_gpu)
    if args.use_gpu:
        model_c3d = model_c3d.cuda()
        model_att = model_att.cuda()
        model_tep = model_tep.cuda()
        model_sg = model_sg.cuda()

    # Load resume from a checkpoint
    if args.resume_c3d:
        if os.path.isfile(args.resume_c3d):
            print("=> loading checkpoint "
                  "for C3D module '{}'".format(args.resume_c3d))
            checkpoint = torch.load(args.resume_c3d)
            model_c3d.load_state_dict(checkpoint)
        else:
            print("=> no checkpoint found at '{}'".format(args.resume_c3d))
    if args.resume_att:
        if os.path.isfile(args.resume_att):
            print("=> loading checkpoint "
                  "for attribute detector module '{}'".format(args.resume_att))
            checkpoint = torch.load(args.resume_att)
            model_att.load_state_dict(checkpoint['state_dict'])
        else:
            print("=> no checkpoint found at '{}'".format(args.resume_att))
    if args.resume_dvc:
        if os.path.isfile(args.resume_dvc):
            print("=> loading checkpoint "
                  "for DVC with Cross Entropy module '{}'".format(
                      args.resume_dvc))
            checkpoint = torch.load(args.resume_dvc)
            model_tep.load_state_dict(checkpoint['tep_state_dict'])
            model_sg.load_state_dict(checkpoint['sg_state_dict'])
        else:
            print("=> no checkpoint found at '{}'".format(args.resume_dvc))

    # Run eval
    run_video(model_c3d, model_att, model_tep, model_sg, pca, idx_to_word)
Esempio n. 5
0
def main():
    global args

    if args.batch_size != 1:
        raise SizeError()

    torch.manual_seed(args.seed)
    os.environ['CUDA_VISIBLE_DEVICES'] = args.gpu_devices
    if not torch.cuda.is_available():
        args.use_gpu = False
    print("USE GPU: {}".format(args.use_gpu))

    # Data loader
    train_dataset = ActivityNet(args.root,
                                args.train_data,
                                args.train_ids,
                                args.feature_set,
                                caps=args.train_vec)
    train_loader = DataLoader(train_dataset,
                              batch_size=args.batch_size,
                              shuffle=True,
                              num_workers=args.workers,
                              drop_last=True)
    if args.evaluate:
        val_loader = DataLoader(ActivityNet(args.root,
                                            args.val_data,
                                            args.val_ids,
                                            args.feature_set,
                                            mode='eval'),
                                batch_size=args.batch_size,
                                shuffle=False,
                                num_workers=args.workers)
    else:
        val_loader = DataLoader(ActivityNet(args.root,
                                            args.val_data,
                                            args.val_ids,
                                            args.feature_set,
                                            caps=args.val_vec),
                                batch_size=args.batch_size,
                                shuffle=False,
                                num_workers=args.workers)

    print("Train dataset : {} / Validation dataset: {}".format(
        len(train_loader.dataset), len(val_loader.dataset)))

    # Build model
    model_att, model_tep, model_sg, args.scale_ratios = build_models(
        in_c=args.feature_dim,
        num_class=args.num_class,
        voca_size=train_dataset.vocab_size,
        caps_length=train_dataset.cap_length,
        embedding_dim=args.embedding_dim,
        hidden_dim=args.hidden_dim,
        use_gpu=args.use_gpu)
    init_eval_metric()
    if args.use_gpu:
        model_att = model_att.cuda()
        model_tep = model_tep.cuda()
        model_sg = model_sg.cuda()

    # Define loss function and optimizer
    criterion = DVCLoss(alpha=args.alpha,
                        beta=args.beta,
                        alpha1=args.alpha1,
                        alpha2=args.alpha2,
                        lambda1=args.lambda1,
                        lambda2=args.lambda2,
                        use_gpu=args.use_gpu)
    params = list(model_tep.parameters()) + list(model_sg.parameters())
    if args.optim == 'adam':
        optimizer = optim.Adam(params,
                               lr=args.lr,
                               weight_decay=args.weight_decay)
    elif args.optim == 'sgd':
        optimizer = optim.SGD(params,
                              lr=args.lr,
                              momentum=args.momentum,
                              weight_decay=args.weight_decay)
    elif args.optim == 'rms':
        optimizer = optim.RMSprop(params, lr=args.lr)
    else:
        print("Incorrect optimizer")
        return

    # Print
    text = "\nSave file name : {}\n" \
           "Resume Attribute Detector : {}\n" \
           "Resume Sentence Generator : {}\n" \
           "Resume Dense Video Captioning with Cross Entropy Loss : {}\n" \
           "Resume Dense Video Captioning : {}\n" \
           "Reinforcement learning : {}\n" \
           "Start epoch : {}\nMax epoch : {}\n" \
           "Batch size : {}\nOptimizer : {}\n" \
           "Learning rate : {}\nMomentum : {}\nWeight decay : {}\n" \
           "Feature dimension : {}\nNum class : {}\n" \
           "Embedding dimension : {}\nHidden dimension : {}\n" \
           "Threshold : {}\nAlpha : {}\nBeta : {}\n" \
           "Alpha1 : {}\nAlpha2 : {}\nLambda1 : {}\nLambda2 : {}\n" \
           "METEOR weight : {}\nCIDEr weight : {}\nBleu@4 weight : {}\n".format(
            args.file_name, args.resume_att, args.resume_sg, args.resume_dvc_xe,
            args.resume, args.rl_flag, args.start_epoch, args.epochs,
            args.batch_size, args.optim, args.lr, args.momentum, args.weight_decay,
            args.feature_dim, args.num_class, args.embedding_dim, args.hidden_dim,
            args.threshold, args.alpha, args.beta,
            args.alpha1, args.alpha2, args.lambda1, args.lambda2,
            args.meteor_weight, args.cider_weight, args.bleu_weight
            )
    text = '=' * 40 + text + '=' * 40 + '\n'
    with open('./log/' + args.file_name + '.txt', 'w') as f:
        print(text, file=f)
    print(text)

    # Load resume from a checkpoint
    best_score = 0.0
    if args.resume_att:
        if os.path.isfile(args.resume_att):
            print("=> loading checkpoint "
                  "for attribute detector module '{}'".format(args.resume_att))
            checkpoint = torch.load(args.resume_att)
            model_att.load_state_dict(checkpoint['state_dict'])
        else:
            print("=> no checkpoint found at '{}'".format(args.resume_att))
    if args.resume_sg:
        if os.path.isfile(args.resume_sg):
            print("=> loading checkpoint "
                  "for sentence generation module '{}'".format(args.resume_sg))
            checkpoint = torch.load(args.resume_sg)
            model_sg.load_state_dict(checkpoint['state_dict'])
        else:
            print("=> no checkpoint found at '{}'".format(args.resume_sg))
    if args.resume_dvc_xe:
        if os.path.isfile(args.resume_dvc_xe):
            print("=> loading checkpoint "
                  "for DVC with Cross Entropy module '{}'".format(
                      args.resume_dvc_xe))
            checkpoint = torch.load(args.resume_dvc_xe)
            model_tep.load_state_dict(checkpoint['tep_state_dict'])
            model_sg.load_state_dict(checkpoint['sg_state_dict'])
        else:
            print("=> no checkpoint found at '{}'".format(args.resume_dvc_xe))
    if args.resume:
        if os.path.isfile(args.resume):
            print("=> loading checkpoint "
                  "for DVC module '{}'".format(args.resume))
            checkpoint = torch.load(args.resume)
            args.start_epoch = checkpoint['epoch']
            best_score = checkpoint['best_score']
            model_tep.load_state_dict(checkpoint['tep_state_dict'])
            model_sg.load_state_dict(checkpoint['sg_state_dict'])
            optimizer.load_state_dict(checkpoint['optimizer'])
            print("=> loaded checkpoint '{}'\n"
                  "\t : epoch {}, best score {}".format(
                      args.resume, checkpoint['epoch'],
                      checkpoint['best_score']))
        else:
            print("=> no checkpoint found at '{}'".format(args.resume))

    if args.validation:
        if args.evaluate:
            _ = evaluate_gt_proposal(val_loader, model_att, model_tep,
                                     model_sg, train_dataset.idx_to_word)
        else:
            _ = validate(val_loader, model_att, model_tep, model_sg)
        return

    for epoch in range(args.start_epoch, args.epochs):
        print("Epoch:", epoch)

        # train for one epoch
        train_avg_loss, train_avg_loss_event, train_avg_loss_tcr, \
            train_avg_loss_des, train_avg_loss_self_reward = train(
                train_loader, model_att, model_tep, model_sg, criterion, optimizer, epoch)

        # validation for one epoch
        if args.evaluate:
            scores = evaluate_gt_proposal(val_loader, model_att, model_tep,
                                          model_sg, train_dataset.idx_to_word,
                                          epoch + 1)
            score = scores['METEOR']
        else:
            score = validate(val_loader, model_att, model_tep, model_sg)

        # remember best acc and save checkpoint
        is_best = score > best_score
        best_score = max(score, best_score)
        save_checkpoint(
            {
                'epoch': epoch + 1,
                'tep_state_dict': model_tep.state_dict(),
                'sg_state_dict': model_sg.state_dict(),
                'best_score': best_score,
                'optimizer': optimizer.state_dict(),
            },
            is_best,
            epoch + 1,
            filename=args.file_name,
            save_every=args.save_every)

        # log
        text = "{:04d} Epoch : Train loss ({:.4f}), " \
               "Validation score ({:.4f})\n".format(
                epoch+1, train_avg_loss, score)
        with open('./log/' + args.file_name + '.txt', 'a') as f:
            print(text, file=f)
Esempio n. 6
0
def train_main(opt=None,
               bucket_iter_train=None,
               bucket_iter_val=None,
               models=None):
    if not opt:
        opt = get_options(True)

    contrastiveLoss = T(ContrastiveLoss())
    nllloss_for_recon = T(
        torch.nn.NLLLoss(ignore_index=1))  #ignores the padding characters
    bce = T(torch.nn.BCELoss())
    bce2 = T(torch.nn.BCELoss())

    # --------- training funtions ------------------------------------
    def train(b, models, dont_optimize):

        models.en_sty.zero_grad()
        models.en_sem.zero_grad()
        models.decoder.zero_grad()

        #h_sem0 = models.en_sem(b.sent_0)  # sent0 = b.sent_0 #sem A , style A
        h_sem1 = models.en_sem(b.sent_1)  # sent1 = b.sent_1 #sem A, style B
        h_semX = models.en_sem(b.sent_x)  # sentX = b.sent_x #semAorC , style A
        recon_target = b.sent_0_target  # one-hot

        ######### SIM LOSS #########
        # In practice, it does not help, and thus, usually ignored.
        sim_loss = contrastiveLoss(h_sem1, h_semX, T(torch.round(b.is_x_0)))

        ######### RECONSTRUCTION LOSS ######### note: quite slow
        # reconstruct sent0 from semantics of sent1 (==sem of sent0, different style), and style of sent0.
        h_sty0 = models.en_sty(b.sent_0)
        merged = torch.cat([h_sem1, h_sty0], dim=1)
        merged.unsqueeze_(
            0)  # 32x25 -> 1x32x25 . 1 is for one hidden-layer (not-stacked)
        recon_sent0, _, _ = models.decoder(
            inputs=
            recon_target,  # pass not None for teacher focring  (batch, seq_len, input_size)
            encoder_hidden=
            merged,  # (num_layers * num_directions, batch_size, hidden_size)
            encoder_outputs=None,  # pass not None for attention
            teacher_forcing_ratio=1,
            function=F.log_softmax
            # in(0, 1-random.random()* epoch * 0.1) #range 0..1 , must pass inputs if >0. as epochs increase, it's lower
        )

        # good reconstruction-loss need to:
        # ignore padding (it is easy to guess always <pad> as a result and to be usually right.also called masking)
        # consider length of sentences. is a short 5 word sentnece weight the same as long 40 words sentence?
        #   'elementwise_mean' means look at each word by itself. one can change this to be on sentence level
        # we calculate it manually as sizes may not match in the returned array using seq2seq library
        # in the end , we sum the loss per timestamp and divide by number of timestamps
        acc_loss, norm_term = 0, 0
        for step, timestamp_output in enumerate(
                recon_sent0):  #list of 65 x [32, 2071]
            batch_size = recon_target.size(0)
            if step + 1 >= recon_target.size(
                    1
            ):  #in beginning, model might output 200 steps, later will converge to target
                # print ('breaking!!! at step',step)
                break
            gold = recon_target[:, step +
                                1]  # this is one timstamp across batches
            curr_loss = nllloss_for_recon(timestamp_output, gold)
            acc_loss += curr_loss
            norm_term += 1

        rec_loss = acc_loss / norm_term

        ######### ADV LOSS #########
        h_sty1 = models.en_sty(b.sent_1)
        h_styX = models.en_sty(b.sent_x)
        adv_disc_p = models.adv_disc(torch.cat([h_sty1, h_styX], dim=1))

        adv_target = T(
            torch.FloatTensor(
                np.full(shape=(b.sent_0[0].shape[0], 1), fill_value=0.5)))
        # the loss below is a parabula with min at log(0.5)=0.693... see article
        adv_disc_loss = bce(adv_disc_p, adv_target) + np.log(
            0.5)  # np.log(0.5)=-0.693 ,
        # logger.debug(f'### adv_disc_loss {N(adv_disc_p.data[:5]).T} target={N(adv_target.data[:5]).T} bce={adv_disc_loss}')
        # logger.debug(f'    sanity test: on first step, you expect adv_disc_loss to be near zero')

        ######### BACKWARD #########
        loss = rec_loss + sim_loss * opt.sem_sim_weight + opt.sd_weight * adv_disc_loss  # rec_loss + sim_loss + opt.sd_weight*adv_disc_loss

        if not dont_optimize:  # used in validation(eval mode) only
            loss.backward()
            optimizer_en_sem.step()
            optimizer_en_sty.step()
            optimizer_decoder.step()

        return N(sim_loss.data) * opt.sem_sim_weight, (
            rec_loss.data), N(adv_disc_loss.data) * opt.sd_weight  # N

    def train_scene_discriminator(b, models, dont_optimize):
        models.adv_disc.zero_grad()

        h_sty0 = models.en_sty(T(b.sent_1))
        h_sty0or2 = models.en_sty(
            T(b.sent_x
              ))  # same style, same or different semantics with random chance
        merged = torch.cat([h_sty0, h_sty0or2], dim=1)
        out = models.adv_disc(merged).flatten()

        y = T(b.is_x_0)
        bce = bce2(out, y)

        if not dont_optimize:
            bce.backward()
            optimizer_adv_disc.step()

        acc = np.round(N(out.detach())) == np.round(N(y))
        logger.debug(f'adv_disc out {out.shape} is_x_0 {y.shape}')
        logger.debug(
            f'out {out.flatten()} y {y.flatten()} acc {acc} bce {bce.data}')

        acc = acc.reshape(-1)
        acc = acc.sum() / len(acc)
        return N(bce.data), N(acc)

    """
        one epoc train, runs for epoch_size batches
    """

    def one_epoc(epoch, bucket_iter_train, models, dont_optimize):
        logger.debug('one_epoc starts')

        epoch_sim_loss, epoch_rec_loss, epoch_anti_disc_loss, epoch_sd_loss, epoch_sd_acc = 0, 0, 0, 0, 0

        training_batch_generator = None
        for i in range(opt.epoch_size):

            # take next batch from current iterator. If it finished, create a new iterator
            b = None
            try:
                b = next(training_batch_generator)
            except:
                logger.debug('creating new iterator')
                training_batch_generator = iter(
                    bucket_iter_train)  # only if no choice... it's 1.5 min
                b = next(training_batch_generator)

            # train scene discriminator
            logger.debug(f'b_sent_0 {b.sent_0[0].shape}{b.sent_0[1].shape}'
                         )  # TEXT.reverse(b.sent_0))

            sd_loss, sd_acc = train_scene_discriminator(
                b, models, dont_optimize)
            logger.debug('train_scene_discriminator done')

            epoch_sd_loss += sd_loss
            epoch_sd_acc += sd_acc

            # train main model
            sim_loss, rec_loss, anti_disc_loss = train(b, models,
                                                       dont_optimize)
            logger.debug('train done')

            epoch_sim_loss += sim_loss
            epoch_rec_loss += rec_loss
            epoch_anti_disc_loss += anti_disc_loss

            logger.setLevel(logging.INFO)
        logger.info(
            '[%02d] %s rec loss: %.4f | sim loss: %.4f | anti_disc_loss: %.4f || scene disc %.4f %.3f%% '
            % (epoch, "eval" if dont_optimize else "train", epoch_rec_loss /
               opt.epoch_size, epoch_sim_loss / opt.epoch_size,
               epoch_anti_disc_loss / opt.epoch_size, epoch_sd_loss /
               opt.epoch_size, 100 * epoch_sd_acc / opt.epoch_size))

    def set_train(models):
        models.en_sty.train()  # and not eval() mode
        models.en_sem.train()
        models.decoder.train()
        models.adv_disc.train()

    def set_eval(models):
        models.en_sty.eval()
        models.en_sem.eval()
        models.decoder.eval()
        models.adv_disc.eval()

    # --------- training loop ------------------------------------
    logger = logging.getLogger()
    logging.basicConfig(format='%(asctime)s %(levelname)s:%(message)s',
                        level=logging.DEBUG,
                        datefmt='%I:%M:%S')
    logger.setLevel(logging.INFO)  # not DEBUG

    print('running train with options:', opt)
    if (bucket_iter_train == None and bucket_iter_val == None
            and models == None):
        if opt.dataset == 'bible':
            bucket_iter_train, bucket_iter_val = build_bible_datasets(
                verbose=False)
        elif opt.dataset == 'quora':
            bucket_iter_train, bucket_iter_val = build_quora_dataset(
                verbose=False)
        else:
            raise ValueError(f'unkown dataset type {opt.dataset}')
        models = build_models(bucket_iter_train.dataset, opt)

    if opt.optimizer == 'adam':
        optimizer = torch.optim.Adam

    epoc_count = 0
    for lr in [opt.lr]:
        print('training with lr', lr)
        optimizer_en_sem = optimizer(models.en_sem.parameters(),
                                     lr,
                                     betas=(opt.beta1, 0.999))
        optimizer_en_sty = optimizer(models.en_sty.parameters(),
                                     lr,
                                     betas=(opt.beta1, 0.999))
        optimizer_decoder = optimizer(models.decoder.parameters(),
                                      lr,
                                      betas=(opt.beta1, 0.999))
        optimizer_adv_disc = torch.optim.SGD(
            models.adv_disc.parameters(),
            opt.adv_disc_lr)  # always using SGD for discriminator

        for epoch in range(0, opt.epocs):
            set_train(models)
            one_epoc(epoc_count,
                     bucket_iter_train,
                     models,
                     dont_optimize=False)

            if epoch % 10 == 0:
                # validations once every 10 epocs. done by running a full epoch cylce on validation WITHOUT updating gradiants
                set_eval(models)
                one_epoc(epoc_count,
                         bucket_iter_val,
                         models,
                         dont_optimize=True)
                eval_sample(bucket_iter_val, models)

            epoc_count += 1

    print('training loop done')

    return bucket_iter_train, bucket_iter_val, models