Beispiel #1
0
def train(args, model, train_loader, epoch, optimizer, scaler, run_avg):
    model.train()
    device = torch.device('cuda')
    criterion = nn.CrossEntropyLoss(reduction='mean')

    for batch_index, input_tensor in enumerate(train_loader):
        input_data, target = input_tensor

        if args.cuda:
            input_data = input_data.to(device)
            target = target.to(device)

        optimizer.zero_grad()
        with torch.cuda.amp.autocast():
            output = model(input_data)
            loss = criterion(output, target)

        run_avg.update_train_loss_avg(loss.item(), args.batch_size)
        scaler.scale(loss).backward()
        scaler.step(optimizer)
        scaler.update()
        accuracy = acc(output, target)
        run_avg.update_train_acc_avg(accuracy, args.batch_size)

        if batch_index % 10 == 9:
            print('epoch =', epoch, ' train_loss = ',
                  run_avg.train_loss_run_avg, ' accuracy =',
                  run_avg.train_acc_run_avg)
        wandb.log({
            'epoch': epoch,
            'train_avg_loss': run_avg.train_loss_run_avg,
            'train_accuracy': run_avg.train_acc_run_avg
        })
Beispiel #2
0
def evaluation(args, model, valid_loader, epoch, run_avg):
    model.eval()
    device = args.device
    model.to(device)
    criterion = nn.CrossEntropyLoss(reduction='mean')

    with torch.no_grad():
        for batch_index, input_tensor in enumerate(valid_loader):
            input_data, target = input_tensor
            input_data, target = input_data.to(device), target.to(device)

            with torch.cuda.amp.autocast():
                output = model(input_data)
                valid_loss = criterion(output, target)

            run_avg.update_val_loss_avg(valid_loss.item(), args.batch_size)
            accuracy = acc(output, target)
            run_avg.update_val_acc_avg(accuracy, args.batch_size)

            if batch_index % 10 == 9:
                print('epoch', epoch, 'val_loss = ', run_avg.val_loss_run_avg,
                      ' accuracy =', run_avg.val_acc_run_avg)
            wandb.log({
                'epoch': epoch,
                'valid_avg_loss': run_avg.val_loss_run_avg,
                'val_accuracy': run_avg.val_acc_run_avg
            })

        return run_avg.val_acc_run_avg
Beispiel #3
0
def stats(predict_dir):
    """Calculates prediction and uncertainty statistics."""

    bp = np.load(predict_dir + "/bayesian/bayesian_pred.npy").squeeze()
    bu = np.load(predict_dir + "/bayesian/bayesian_unc.npy").squeeze()
    dp = np.load(predict_dir + "/dropout/dropout_pred.npy").squeeze()
    du = np.load(predict_dir + "/dropout/dropout_unc.npy").squeeze()
    y = np.load(predict_dir + "/test_targets.npy").squeeze()

    with open(predict_dir + "/stats.csv", "w") as csvfile:
        w = csv.writer(csvfile, delimiter=" ")
        w.writerow(["Category", "Dropout", "Bayesian"])
        w.writerow(["Pred_Acc", acc(dp, y), acc(bp, y)])
        w.writerow(["Unc_Mean", du.mean(), bu.mean()])
        w.writerow(["Unc_Var", du.var(), bu.var()])
        w.writerow(["Unc_Max", du.max(), bu.max()])
        w.writerow(["Unc_Min", du.min(), bu.min()])
Beispiel #4
0
    def compute_output(self,
                       X,
                       Y,
                       keep_prob=cfg.keep_prob,
                       regularization_scale=cfg.regularization_scale):

        print("Size of input:")
        print(X.get_shape())

        # 1. Convolve the input image up to the digit capsules.
        digit_caps = self._image_to_digitcaps(X)

        # 2. Get the margin loss
        margin_loss = u.margin_loss(digit_caps, Y)

        # 3. Reconstruct the images
        reconstructed_image, reconstruction_1, reconstruction_2 = self._digitcaps_to_image(
            digit_caps, Y)

        # 4. Get the reconstruction loss
        reconstruction_loss = u.reconstruction_loss(reconstructed_image, X)

        # 5. Get the total loss
        total_loss = margin_loss + regularization_scale * reconstruction_loss

        # 6. Get the batch accuracy
        batch_accuracy = u.acc(digit_caps, Y)

        # 7. Reconstruct all possible images
        memo = self._digitcaps_to_memo(X, digit_caps)

        # 8. Get the memo capsules
        memo_caps = self._memo_to_digitcaps(memo, keep_prob=keep_prob)

        # 9. Get the memo margin loss
        memo_margin_loss = u.margin_loss(memo_caps, Y)

        # 10. Get the memo accuracy
        memo_accuracy = u.acc(memo_caps, Y)

        # 11. Return all of the losses and reconstructions
        return (total_loss, margin_loss, reconstruction_loss,
                reconstructed_image, reconstruction_1, reconstruction_2,
                batch_accuracy, memo, memo_margin_loss, memo_accuracy)
Beispiel #5
0
def train(model, training_data, dev_data, learning_rate, batch_size,
          max_epoch):
    X_train, Y_train = training_data['X'], training_data['Y']
    X_dev, Y_dev = dev_data['X'], dev_data['Y']
    for i in range(max_epoch):
        for X, Y in data_loader(X_train,
                                Y_train,
                                batch_size=batch_size,
                                shuffle=True):
            training_loss, grad_Ws, grad_bs = model.compute_gradients(X, Y)
            model.update(grad_Ws, grad_bs, learning_rate)
        dev_acc = acc(model.predict(X_dev), Y_dev)
        print("Epoch {: >3d}/{}\tloss:{:.5f}\tdev_acc:{:.5f}".format(
            i + 1, max_epoch, training_loss, dev_acc))
    return model
Beispiel #6
0
def operate(phase):
    if phase == 'train':
        model.train()
        loader = trainloader
    else:
        model.eval()
        loader = valloader
    for i, (data, target) in enumerate(loader):
        start = time.time()
        data = data.to(device)
        target = target.to(device)
        output = model(data)
        loss = lossf(output, target)
        loss.backward()
        optimizer.step()
        optimizer.zero_grad()
        acc = U.acc(output, target)
        print(
            f'{e}/{epoch}:{i}/{len(loader)}, loss:{loss:.2f},acc":{acc:.2f},time:{time.time()-start:.4f}'
        )
        Co.addvalue(writer, 'loss', loss.item(), e)
        Co.addvalue(writer, 'acc', acc.item(), e)
Beispiel #7
0
def train_1pass(model,
                training_data,
                dev_data,
                learning_rate,
                batch_size,
                print_every=100,
                plot_every=10):
    X_train, Y_train = training_data['X'], training_data['Y']
    X_dev, Y_dev = dev_data['X'], dev_data['Y']

    num_samples = 0
    print_loss_total = 0
    plot_loss_total = 0

    plot_losses = []
    plot_num_samples = []
    for idx, (X, Y) in enumerate(
            data_loader(X_train, Y_train, batch_size=batch_size, shuffle=True),
            1):
        training_loss, grad_Ws, grad_bs = model.compute_gradients(X, Y)
        model.update(grad_Ws, grad_bs, learning_rate)
        num_samples += Y.shape[1]
        print_loss_total += training_loss
        plot_loss_total += training_loss

        if idx % print_every == 0:
            dev_acc = acc(model.predict(X_dev), Y_dev)
            print_loss_avg = print_loss_total / print_every
            print_loss_total = 0
            print("#Samples {: >5d}\tloss:{:.5f}\tdev_acc:{:.5f}".format(
                num_samples, print_loss_avg, dev_acc))
        if idx % plot_every == 0:
            plot_loss_avg = plot_loss_total / plot_every
            plot_loss_total = 0
            plot_losses.append(plot_loss_avg)
            plot_num_samples.append(num_samples)

    return model, {"losses": plot_losses, "num_samples": plot_num_samples}
Beispiel #8
0
def estimate_effects(model_dat):
    """nonlinear estimation of linearized structural model
    using theoretical direct effects as starting values""" # ToDo: reintroduce # yyyy

    if model_dat["alpha"] is None:
        if model_dat["dof"] is not None:
            raise ValueError("dof is determined together with alpha.")

        # alpha_min (with posdef hessian) and alpha_max to search over
        alpha_min, alpha_max = alpha_min_max(model_dat)

        # optimal alpha with minimal out-of-sample sse
        alpha, dof = estimate_alpha(alpha_min, alpha_max, model_dat)
        model_dat["alpha"] = alpha
        model_dat["dof"] = dof
    else:
        if model_dat["dof"] is None:
            raise ValueError("dof must be given together with alpha.")
        print("\ngiven alpha: {:10f}, dof: {:10f}".format(
            model_dat["alpha"], model_dat["dof"]))

    # final estimation given optimal alpha
    # algebraic Hessian
    (check, hessian_hat, direct_hat, sse_hat, mx_hat, my_hat, ex_hat,
     ey_hat) = check_estimate_effects(model_dat)
    # automatic Hessian
    hessian = utils.sse_hess(mx_hat, my_hat, model_dat)
    # numeric Hessian
    hessian_num = sse_hess_num(mx_hat, my_hat, model_dat)

    print(
        "\nAlgebraic and numeric   Hessian allclose: {} with accuracy {:10f}.".
        format(allclose(hessian_hat, hessian_num),
               utils.acc(hessian_hat, hessian_num)))
    print("Automatic and numeric   Hessian allclose: {} with accuracy {:10f}.".
          format(allclose(hessian, hessian_num),
                 utils.acc(hessian, hessian_num)))
    print("Automatic and algebraic Hessian allclose: {} with accuracy {:10f}.".
          format(allclose(hessian, hessian_hat),
                 utils.acc(hessian, hessian_hat)))

    assert check, "Hessian not well conditioned."
    cov_direct_hat = compute_cov_direct(sse_hat, hessian_hat, model_dat)

    # compute estimated direct, total and mediation effects and standard deviations
    mx_hat_std, my_hat_std = utils.compute_direct_std(cov_direct_hat,
                                                      model_dat)
    ex_hat_std, ey_hat_std = utils.total_effects_std(direct_hat,
                                                     cov_direct_hat, model_dat)
    exj_hat, eyj_hat, eyx_hat, eyy_hat = utils.compute_mediation_effects(
        mx_hat, my_hat, ex_hat, ey_hat, model_dat["yvars"],
        model_dat["final_var"])
    (exj_hat_std, eyj_hat_std, eyx_hat_std,
     eyy_hat_std) = utils.compute_mediation_std(ex_hat_std, ey_hat_std,
                                                eyx_hat, eyy_hat,
                                                model_dat["yvars"],
                                                model_dat["final_var"])

    estimate_dat = {
        "direct_hat": direct_hat,
        "sse_hat": sse_hat,
        "hessian_hat": hessian_hat,
        "cov_direct_hat": cov_direct_hat,
        "mx_hat": mx_hat,
        "my_hat": my_hat,
        "mx_hat_std": mx_hat_std,
        "my_hat_std": my_hat_std,
        "ex_hat": ex_hat,
        "ey_hat": ey_hat,
        "ex_hat_std": ex_hat_std,
        "ey_hat_std": ey_hat_std,
        "exj_hat": exj_hat,
        "eyj_hat": eyj_hat,
        "eyx_hat": eyx_hat,
        "eyy_hat": eyy_hat,
        "exj_hat_std": exj_hat_std,
        "eyj_hat_std": eyj_hat_std,
        "eyx_hat_std": eyx_hat_std,
        "eyy_hat_std": eyy_hat_std,
    }

    return estimate_dat
Beispiel #9
0
    train_acc = 0

    model.train()
    for step, batch in tqdm(enumerate(train), desc='steps', total=len(train)):

        x_train, y_train = map(lambda x: x.to(device), batch)

        optimizer.zero_grad()
        y_hat = model(x_train)
        loss = loss_fn(y_hat, y_train)
        loss.backward()
        clip_grad_norm_(model.parameters(), config.max_grad_norm)
        optimizer.step()

        with torch.no_grad():
            batch_acc = utils.acc(y_hat, y_train)

        train_loss += loss.item()
        train_acc += batch_acc.item()

        # evaluation
        if (epoch * len(train) + step) % config.summary_step == 0:

            val_loss = 0
            val_acc = 0

            if model.training:
                model.eval()

            for val_step, batch in enumerate(dev):
Beispiel #10
0
def test(testing_data_file, super_batch_size, tokenizer, mode, kw, p_key, \
        model1, device, model2, model3):
    '''Train three models
    
    Train models through bundles
    
    Args:
        training_data_file (list) : training data json file, raw json file used to load data
        super_batch_size (int) : how many samples will be loaded into memory at once
        tokenizer : SentencePiece tokenizer used to obtain the token ids
        mode (str): mode of the passage format, coule be a list (processed) or a long string (unprocessed).
        kw (str) : the key word map to the passage in each data dictionary. Defaults to 'abstract'
        p_key (str) : the key word to search for specific passage. Default to 'title'
        model1 (nn.DataParallel) : local dependency encoder
        device (torch.device) : The device which models and data are on.
        model2 (nn.Module) : global coherence encoder
        model3 (nn.Module) : attention decoder
        
    Returns:
        result_list (list) : the list that contains the result of all samples organized in a dictionary form
        {taus, accs, pmrs, rouge-ws, pred, truth}
        over_all (dict) : the overall result of the result. keys include four metrics
    '''

    with torch.no_grad():
        print('test..............')

        valid_critic_dict = {
            'rouge-w': rouge_w,
            'acc': acc,
            'ken-tau': kendall_tau,
            'pmr': pmr
        }

        result_list = []
        over_all = {
            'Kendall-tau': None,
            'Accuracy': None,
            'ROUGE-w': None,
            'PMR': None
        }

        accs = []
        rouge_ws = []
        ken_taus = []
        pmrs = []

        for superbatch in load_superbatch(testing_data_file, super_batch_size):

            bundles = []

            for data in superbatch:
                try:
                    bundles.append(
                        convert_passage_to_samples_bundle(
                            tokenizer, data, mode, kw, p_key))
                except:
                    traceback.print_exc()

            num_batch, valid_dataloader = homebrew_data_loader(bundles,
                                                               batch_size=1)

            valid_value = []
            for step, batch in enumerate(valid_dataloader):
                try:
                    batch = tuple(t for idx, t in enumerate(batch))
                    pointers_output, ground_truth \
                        = dev_test(batch, model1, model2, model3, device)
                    # valid_value.append(valid_critic_dict[valid_critic](pointers_output, ground_truth))

                except Exception as err:
                    traceback.print_exc()

                rouge_ws.append(rouge_w(pointers_output, ground_truth))
                accs.append(acc(pointers_output, ground_truth))
                ken_taus.append(kendall_tau(pointers_output, ground_truth))
                pmrs.append(pmr(pointers_output, ground_truth))

                result_list.append({
                    'Kendall-tau': ken_taus[-1],
                    'Accuracy': accs[-1],
                    'ROUGE-w': rouge_ws[-1],
                    'PMR': pmrs[-1],
                    'true': ground_truth,
                    'pred': pointers_output
                })

            print('finishe {} samples. \n'.format(len(rouge_ws)))

        over_all['Kendall-tau'] = np.mean(ken_taus)
        over_all['Accuracy'] = np.mean(accs)
        over_all['ROUGE-w'] = np.mean(rouge_ws)
        over_all['PMR'] = np.mean(pmrs)

        print('Final scores:  kendall:{:.4f}, accuracy:{:.4f}, rouge-w:{:.4f}, pmr:{:.4f}\n'.format( \
            over_all['Kendall-tau'], over_all['Accuracy'], over_all['ROUGE-w'], over_all['PMR']))

    return result_list, over_all
Beispiel #11
0
def train(training_data_file, valid_data_file, super_batch_size, tokenizer, mode, kw, p_key, model1, device, model2, model3, \
            batch_size, num_epoch, gradient_accumulation_steps, lr1, lr2, lambda_, valid_critic, early_stop):
    '''Train three models
    
    Train models through bundles
    
    Args:
        training_data_file (list) : training data json file, raw json file used to load data
        super_batch_size (int) : how many samples will be loaded into memory at once
        tokenizer : SentencePiece tokenizer used to obtain the token ids
        mode (str): mode of the passage format, coule be a list (processed) or a long string (unprocessed).
        kw (str) : the key word map to the passage in each data dictionary. Defaults to 'abstract'
        p_key (str) : the key word to search for specific passage. Default to 'title'
        model1 (nn.DataParallel) : local dependency encoder
        device (torch.device): The device which models and data are on.
        model2 (nn.Module): global coherence encoder
        model3 (nn.Module): attention decoder
        batch_size (int): Defaults to 4.
        num_epoch (int): Defaults to 1.
        gradient_accumulation_steps (int): Defaults to 1. 
        lr (float): Defaults to 1e-4. The Start learning rate.
        lambda_ (float): Defaults to 0.01. Balance factor for param nomalization.
        valid_critic (bool) : what critic to use when early stop evaluation. Default to 5 
        early_stop (int) : set the early stop boundary. Default to 5 

    '''

    # Prepare optimizer for Sys1
    param_optimizer_bert = list(model1.named_parameters())
    param_optimizer_others = list(model2.named_parameters()) + list(
        model3.named_parameters())
    no_decay = ['bias', 'LayerNorm.bias', 'LayerNorm.weight']
    # We tend to fix the embedding. Temeporarily we doesn't find the embedding layer
    optimizer_grouped_parameters_bert = [{
        'params': [
            p for n, p in param_optimizer_bert
            if not any(nd in n for nd in no_decay)
        ],
        'weight_decay':
        lambda_
    }, {
        'params': [
            p for n, p in param_optimizer_bert
            if any(nd in n for nd in no_decay)
        ],
        'weight_decay':
        0.0
    }]

    optimizer_grouped_parameters_others = [{
        'params': [
            p for n, p in param_optimizer_others
            if not any(nd in n for nd in no_decay)
        ],
        'weight_decay':
        lambda_
    }, {
        'params': [
            p for n, p in param_optimizer_others
            if any(nd in n for nd in no_decay)
        ],
        'weight_decay':
        0.0
    }]
    # We shall adda  module to count the num of parameters here
    critic = nn.NLLLoss(reduction='none')

    line_num = int(os.popen("wc -l " + training_data_file).read().split()[0])
    global_step = 0  # global step
    opt1 = BertAdam(optimizer_grouped_parameters_bert,
                    lr=lr1,
                    warmup=0.1,
                    t_total=line_num / batch_size * num_epoch)  # optimizer 1
    # opt = Adam(optimizer_grouped_parameter, lr=lr)
    opt2 = Adadelta(optimizer_grouped_parameters_others, lr=lr2, rho=0.95)
    model1.to(device)  #
    model1.train()  #
    model2.to(device)  #
    model2.train()  #
    model3.to(device)  #
    model3.train()  #
    warmed = True
    for epoch in trange(num_epoch, desc='Epoch'):

        smooth_mean = WindowMean()
        opt1.zero_grad()
        opt2.zero_grad()

        for superbatch, line_num in load_superbatch(training_data_file,
                                                    super_batch_size):
            bundles = []

            for data in superbatch:
                try:
                    bundles.append(
                        convert_passage_to_samples_bundle(
                            tokenizer, data, mode, kw, p_key))

                except:
                    print_exc()

            num_batch, dataloader = homebrew_data_loader(bundles,
                                                         batch_size=batch_size)

            tqdm_obj = tqdm(dataloader, total=num_batch)
            num_steps = line_num  #
            for step, batch in enumerate(tqdm_obj):
                try:
                    #batch[0] = batch[0].to(device)
                    #batch[1] = batch[1].to(device)
                    #batch[2] = batch[2].to(device)
                    batch = tuple(t for t in batch)
                    log_prob_loss, pointers_output, ground_truth = calculate_loss(
                        batch, model1, model2, model3, device, critic)
                    # here we need to add code to cal rouge-w and acc
                    rouge_ws = []
                    accs = []
                    ken_taus = []
                    pmrs = []
                    for pred, true in zip(pointers_output, ground_truth):
                        rouge_ws.append(rouge_w(pred, true))
                        accs.append(acc(pred, true))
                        ken_taus.append(kendall_tau(pred, true))
                        pmrs.append(pmr(pred, true))

                    log_prob_loss.backward()

                    # ******** In the following code we gonna edit it and made early stop ************

                    if (step + 1) % gradient_accumulation_steps == 0:
                        # modify learning rate with special warm up BERT uses. From BERT pytorch examples
                        lr_this_step = lr1 * warmup_linear(
                            global_step / num_steps, warmup=0.1)
                        for param_group in opt1.param_groups:
                            param_group['lr'] = lr_this_step
                        global_step += 1

                        opt2.step()
                        opt2.zero_grad()
                        smooth_mean_loss = smooth_mean.update(
                            log_prob_loss.item())
                        tqdm_obj.set_description(
                            '{}: {:.4f}, {}: {:.4f}, smooth_mean_loss: {:.4f}'.
                            format('accuracy', np.mean(accs), 'rough-w',
                                   np.mean(rouge_ws), smooth_mean_loss))
                        # During warming period, model1 is frozen and model2 is trained to normal weights
                        if smooth_mean_loss < 1.0 and step > 100:  # ugly manual hyperparam
                            warmed = True
                        if warmed:
                            opt1.step()
                        opt1.zero_grad()
                        if step % 1000 == 0:
                            output_model_file = './models/bert-base-cased.bin.tmp'
                            saved_dict = {
                                'params1': model1.module.state_dict()
                            }
                            saved_dict['params2'] = model2.state_dict()
                            saved_dict['params3'] = model3.state_dict()
                            torch.save(saved_dict, output_model_file)

                except Exception as err:
                    traceback.print_exc()
                    exit()
                    # if mode == 'list':
                    #     print(batch._id)

        if epoch < 5:
            best_score = 0
            continue

        with torch.no_grad():
            print('valid..............')

            valid_critic_dict = {
                'rouge-w': rouge_w,
                'acc': acc,
                'ken-tau': kendall_tau,
                'pmr': pmr
            }

            for superbatch, _ in load_superbatch(valid_data_file,
                                                 super_batch_size):
                bundles = []

                for data in superbatch:
                    try:
                        bundles.append(
                            convert_passage_to_samples_bundle(
                                tokenizer, data, mode, kw, p_key))
                    except:
                        print_exc()

                num_batch, valid_dataloader = homebrew_data_loader(
                    bundles, batch_size=1)

                valid_value = []
                for step, batch in enumerate(valid_dataloader):
                    try:
                        batch = tuple(t for idx, t in enumerate(batch))
                        pointers_output, ground_truth \
                            = dev_test(batch, model1, model2, model3, device)
                        valid_value.append(valid_critic_dict[valid_critic](
                            pointers_output, ground_truth))

                    except Exception as err:
                        traceback.print_exc()
                        # if mode == 'list':
                        #     print(batch._id)

                score = np.mean(valid_value)
            print('epc:{}, {} : {:.2f} best : {:.2f}\n'.format(
                epoch, valid_critic, score, best_score))

            if score > best_score:
                best_score = score
                best_iter = epoch

                print('Saving model to {}'.format(
                    output_model_file))  # save model structure
                saved_dict = {
                    'params1': model1.module.state_dict()
                }  # save parameters
                saved_dict['params2'] = model2.state_dict()  # save parameters
                saved_dict['params3'] = model3.state_dict()
                torch.save(saved_dict, output_model_file)  #

                # print('save best model at epc={}'.format(epc))
                # checkpoint = {'model': model.state_dict(),
                #             'args': args,
                #             'loss': best_score}
                # torch.save(checkpoint, '{}/{}.best.pt'.format(args.model_path, args.model))

            if early_stop and (epoch - best_iter) >= early_stop:
                print('early stop at epc {}'.format(epoch))
                break
Beispiel #12
0
def train(rank, args):
    if rank is None:
        is_distributed = False
        rank = 0
    else:
        is_distributed = True

    if is_distributed:
        utils.setuplogger()
        dist.init_process_group('nccl',
                                world_size=args.nGPU,
                                init_method='env://',
                                rank=rank)

    torch.cuda.set_device(rank)

    news, news_index, category_dict, subcategory_dict, word_dict = read_news(
        os.path.join(args.train_data_dir, 'news.tsv'), args, mode='train')

    news_title, news_category, news_subcategory = get_doc_input(
        news, news_index, category_dict, subcategory_dict, word_dict, args)
    news_combined = np.concatenate([
        x
        for x in [news_title, news_category, news_subcategory] if x is not None
    ],
                                   axis=-1)

    if rank == 0:
        logging.info('Initializing word embedding matrix...')

    embedding_matrix, have_word = utils.load_matrix(args.glove_embedding_path,
                                                    word_dict,
                                                    args.word_embedding_dim)
    if rank == 0:
        logging.info(f'Word dict length: {len(word_dict)}')
        logging.info(f'Have words: {len(have_word)}')
        logging.info(
            f'Missing rate: {(len(word_dict) - len(have_word)) / len(word_dict)}'
        )

    module = importlib.import_module(f'model.{args.model}')
    model = module.Model(args, embedding_matrix, len(category_dict),
                         len(subcategory_dict))

    if args.load_ckpt_name is not None:
        ckpt_path = utils.get_checkpoint(args.model_dir, args.load_ckpt_name)
        checkpoint = torch.load(ckpt_path, map_location='cpu')
        model.load_state_dict(checkpoint['model_state_dict'])
        logging.info(f"Model loaded from {ckpt_path}.")

    optimizer = optim.Adam(model.parameters(), lr=args.lr)

    if args.enable_gpu:
        model = model.cuda(rank)

    if is_distributed:
        model = torch.nn.parallel.DistributedDataParallel(model,
                                                          device_ids=[rank])

    # if rank == 0:
    #     print(model)
    #     for name, param in model.named_parameters():
    #         print(name, param.requires_grad)

    data_file_path = os.path.join(args.train_data_dir,
                                  f'behaviors_np{args.npratio}_{rank}.tsv')

    dataset = DatasetTrain(data_file_path, news_index, news_combined, args)
    dataloader = DataLoader(dataset, batch_size=args.batch_size)

    logging.info('Training...')
    for ep in range(args.start_epoch, args.epochs):
        loss = 0.0
        accuary = 0.0
        for cnt, (log_ids, log_mask, input_ids,
                  targets) in enumerate(dataloader):
            if args.enable_gpu:
                log_ids = log_ids.cuda(rank, non_blocking=True)
                log_mask = log_mask.cuda(rank, non_blocking=True)
                input_ids = input_ids.cuda(rank, non_blocking=True)
                targets = targets.cuda(rank, non_blocking=True)

            bz_loss, y_hat = model(log_ids, log_mask, input_ids, targets)
            loss += bz_loss.data.float()
            accuary += utils.acc(targets, y_hat)
            optimizer.zero_grad()
            bz_loss.backward()
            optimizer.step()

            if cnt % args.log_steps == 0:
                logging.info(
                    '[{}] Ed: {}, train_loss: {:.5f}, acc: {:.5f}'.format(
                        rank, cnt * args.batch_size, loss.data / cnt,
                        accuary / cnt))

            if rank == 0 and cnt != 0 and cnt % args.save_steps == 0:
                ckpt_path = os.path.join(args.model_dir,
                                         f'epoch-{ep+1}-{cnt}.pt')
                torch.save(
                    {
                        'model_state_dict': {
                            '.'.join(k.split('.')[1:]): v
                            for k, v in model.state_dict().items()
                        } if is_distributed else model.state_dict(),
                        'category_dict': category_dict,
                        'word_dict': word_dict,
                        'subcategory_dict': subcategory_dict
                    }, ckpt_path)
                logging.info(f"Model saved to {ckpt_path}.")

        logging.info('Training finish.')

        if rank == 0:
            ckpt_path = os.path.join(args.model_dir, f'epoch-{ep+1}.pt')
            torch.save(
                {
                    'model_state_dict': {
                        '.'.join(k.split('.')[1:]): v
                        for k, v in model.state_dict().items()
                    } if is_distributed else model.state_dict(),
                    'category_dict': category_dict,
                    'subcategory_dict': subcategory_dict,
                    'word_dict': word_dict,
                }, ckpt_path)
            logging.info(f"Model saved to {ckpt_path}.")
Beispiel #13
0
#%% Testing the result need modification if use different input


def Yes_No(digit):
    if digit > 0.02:  # Setting the threshold
        return 1
    else:
        return 0


def test_model(testSet, model):
    _, n = testSet.size()
    n = n - 59
    pred = torch.zeros(n)

    for i in range(n):
        curr_win = testSet[1:9, i:i + 60]
        curr_win = curr_win.reshape(1, 1, 8, 60)
        pred[i] = Yes_No(model(curr_win))
        #print(i)
    return pred


test_result = test_model(torch.tensor(test, dtype=torch.float32), net)

#%%
from utils import acc

result = acc(test_result.numpy(), test[10, :], 100)
Beispiel #14
0
def train(**kwargs):
    model = kwargs['model']
    dataloader = kwargs['dataloader']
    epochs = kwargs['epochs']
    pth_file = kwargs['pth']
    root = kwargs['root']
    alpha = kwargs['alpha']

    # optimizer = torch.optim.SGD(params=model.parameters(), lr=1e-2, momentum=0.9, weight_decay=1e-5)
    optimizer = torch.optim.Adam(params=model.parameters(),
                                 lr=1e-3,
                                 weight_decay=1e-5)
    mse_criterion = nn.MSELoss()
    kl_criterion = nn.KLDivLoss(reduction='mean')

    # ============K-means=======================================
    features = []
    y_true = []
    for x, y in dataloader:
        y_true.append(y.detach().cpu().numpy())
        x = x.cuda()
        f = model(x)['feature']
        features.append(f.detach().cpu().numpy())
    features = np.concatenate(features, axis=0)
    kmeans = KMeans(n_clusters=model.n_clusters, random_state=0).fit(features)
    cluster_centers = kmeans.cluster_centers_
    cluster_centers = torch.tensor(cluster_centers, dtype=torch.float).cuda()
    model.ClusteringLayer.centers = torch.nn.Parameter(cluster_centers)
    # =========================================================
    y_pred = kmeans.predict(features)
    y_true = np.concatenate(y_true, axis=0)
    accuracy = acc(y_true, y_pred)
    logger.info('Initial Accuracy: {}'.format(accuracy))

    best_acc = 0.0
    model.train()
    for epoch in range(1, epochs + 1):
        train_mse_loss = 0.0
        train_kl_loss = 0.0
        accuracy = 0.0
        for cnt, (x, y) in enumerate(dataloader):
            x = x.cuda()
            # ===================forward=====================
            output = model(x)
            x_hat = output['rec']
            rec_loss = mse_criterion(x_hat, x)
            train_mse_loss += rec_loss.item()

            source_ = output['source']
            # if target not detach, the model collapse
            target_ = model.target_distribute(source_).detach()
            kl_loss = kl_criterion(source_.log(), target_)
            train_kl_loss += kl_loss.item()

            y_pred = source_.argmax(1)
            accuracy += acc(y.cpu().numpy(), y_pred.cpu().numpy())
            if epoch % 10 == 0 and cnt == 0:
                visualize(epoch, output['feature'].detach().cpu().numpy(),
                          y.detach().cpu().numpy(), root)
                x = x[0]
                x_hat = x_hat[0]
                final = torch.cat([x, x_hat], dim=1).detach().cpu().numpy()
                final = np.transpose(final, (2, 1, 0))
                final = np.clip(final * 255.0, 0, 255).astype(np.uint8)
                cv2.imwrite(f"{root}/clustering.png", final)
            # ===================backward====================
            optimizer.zero_grad()
            total_loss = rec_loss + alpha * kl_loss
            total_loss.backward()
            optimizer.step()

        # ===================log========================
        train_mse_loss /= len(dataloader)
        train_kl_loss /= len(dataloader)
        accuracy /= len(dataloader)
        logger.info(
            'epoch [{}/{}], MSE_loss:{:.4f}, KL_loss:{:.4f}, Accuracv:{:.4f}'.
            format(epoch, epochs, train_mse_loss, train_kl_loss, accuracy))

        if best_acc < accuracy:
            torch.save(model.state_dict(), pth_file)
                loss = criterion(labels, outputs)
                loss.backward()
                optimizer.step()

                # print statistics
                running_loss += loss.item()
                if i % 200 == 199:  # print every 2000 mini-batches
                    print('[%d, %5d] loss: %.3f' %
                          (epoch + 1, i + 1, running_loss / 200))
                    running_loss = 0.0

        print('Finished Training')
        test_result = test_model(torch.tensor(test_new, dtype=torch.float32),
                                 net, thrs)
        try:
            result = acc(test_result.numpy(), test_new[10, :], 10)
            specificity = result[3]
            precision = result[1]
            TP = result[4]
        except (ZeroDivisionError):
            specificity = 0
            precision = 0
            TP = 0
            pass
        count = 0
        while (not (0.45 <= specificity <= 0.55) and count < 10000):
            if specificity > 0.55:
                thrs += 0.01
            if specificity < 0.45:
                thrs -= 0.01
            test_result = test_model(
Beispiel #16
0
eval_batches, num_eval_batches, num_eval_samples = get_batch(hp.eval1,
                                                             hp.eval2,
                                                             100000,
                                                             100000,
                                                             hp.vocab,
                                                             hp.batch_size,
                                                             shuffle=False)
iter = tf.data.Iterator.from_structure(eval_batches.output_types,
                                       eval_batches.output_shapes)
xs, ys = iter.get_next()
decoder_inputs, y, y_seqlen, sents2 = ys
eval_init_op = iter.make_initializer(eval_batches)

logging.info("# Load model")
m = Transformer(hp)
y_mask = m.y_masks(y)
y_hat, eval_summaries = m.eval(xs, ys, y_mask)
saver = tf.train.Saver()

with tf.Session() as sess:
    ckpt = tf.train.latest_checkpoint(hp.logdir)
    saver.restore(sess, ckpt)
    summary_writer = tf.summary.FileWriter(hp.logdir, sess.graph)
    sess.run(eval_init_op)
    _y_hat, _y = sess.run([y_hat, y])
    print(_y_hat)
    print(_y)
    print(acc(_y_hat, _y))
    #hypotheses = get_hypotheses(1, 128, sess, y_hat, m.idx2token)

#print(hypotheses)
Beispiel #17
0
genreId = 1
genre_map = {}

clips = utils.get_clip_set()

with open('initial/genres.csv', encoding="utf8") as csvfile:
    reader = csv.reader(csvfile)
    next(reader)
    with open('cleaned/genres_cleaned.csv', 'w', encoding="utf8") as out:
        wr = csv.writer(out)
        added = set()
        for row in reader:
            if row[0] in clips:
                clipid = row[0]
                genre = row[1]
                l = utils.acc(genre)
                b = utils.lettres(l)
                if utils.diff_letters(l, b) < 2 and len(b) != 0 and b.lower(
                ) != 'null' and b.lower() != 'none':
                    if b not in genres:
                        new_row = (genreId, b)
                        if new_row not in added:
                            genres.add(b)
                            genre_map[b] = genreId
                            genreId += 1
                            wr.writerow(new_row)
                            added.add(new_row)
                    else:
                        new_row = (genre_map[b], b)
                        if new_row not in added:
                            wr.writerow(new_row)
Beispiel #18
0
def train(args):
    # Only support title Turing now
    assert args.enable_hvd  # TODO
    if args.enable_hvd:
        import horovod.torch as hvd

    if args.load_ckpt_name is not None:
        #TODO: choose ckpt_path
        ckpt_path = utils.get_checkpoint(args.model_dir, args.load_ckpt_name)
    else:
        ckpt_path = utils.latest_checkpoint(args.model_dir)

    hvd_size, hvd_rank, hvd_local_rank = utils.init_hvd_cuda(
        args.enable_hvd, args.enable_gpu)

    tokenizer = AutoTokenizer.from_pretrained("bert-base-uncased")
    config = AutoConfig.from_pretrained("bert-base-uncased",
                                        output_hidden_states=True)
    bert_model = AutoModel.from_pretrained("bert-base-uncased", config=config)

    #bert_model.load_state_dict(torch.load('../bert_encoder_part.pkl'))
    # freeze parameters
    for name, param in bert_model.named_parameters():
        if name not in finetuneset:
            param.requires_grad = False

    news, news_index, category_dict, domain_dict, subcategory_dict = read_news_bert(
        os.path.join(args.root_data_dir,
                     f'{args.dataset}/{args.train_dir}/news.tsv'), args,
        tokenizer)

    news_title, news_title_type, news_title_attmask, \
    news_abstract, news_abstract_type, news_abstract_attmask, \
    news_body, news_body_type, news_body_attmask, \
    news_category, news_domain, news_subcategory = get_doc_input_bert(
        news, news_index, category_dict, domain_dict, subcategory_dict, args)

    news_combined = np.concatenate([
        x for x in
        [news_title, news_title_type, news_title_attmask, \
            news_abstract, news_abstract_type, news_abstract_attmask, \
            news_body, news_body_type, news_body_attmask, \
            news_category, news_domain, news_subcategory]
        if x is not None], axis=1)

    model = ModelBert(args, bert_model, len(category_dict), len(domain_dict),
                      len(subcategory_dict))
    word_dict = None

    if args.enable_gpu:
        model = model.cuda()

    lr_scaler = hvd.local_size()
    optimizer = optim.Adam(model.parameters(), lr=args.lr)
    if args.enable_hvd:
        hvd.broadcast_parameters(model.state_dict(), root_rank=0)
        hvd.broadcast_optimizer_state(optimizer, root_rank=0)
        compression = hvd.Compression.none
        optimizer = hvd.DistributedOptimizer(
            optimizer,
            named_parameters=model.named_parameters(),
            compression=compression,
            op=hvd.Average)

    dataloader = DataLoaderTrain(
        news_index=news_index,
        news_combined=news_combined,
        word_dict=word_dict,
        data_dir=os.path.join(args.root_data_dir,
                              f'{args.market}/{args.train_dir}'),
        filename_pat=args.filename_pat,
        args=args,
        world_size=hvd_size,
        worker_rank=hvd_rank,
        cuda_device_idx=hvd_local_rank,
        enable_prefetch=True,
        enable_shuffle=True,
        enable_gpu=args.enable_gpu,
    )

    logging.info('Training...')
    for ep in range(args.epochs):
        loss = 0.0
        accuary = 0.0
        for cnt, (log_ids, log_mask, input_ids,
                  targets) in enumerate(dataloader):
            if cnt > args.max_steps_per_epoch:
                break

            if args.enable_gpu:
                log_ids = log_ids.cuda(non_blocking=True)
                log_mask = log_mask.cuda(non_blocking=True)
                input_ids = input_ids.cuda(non_blocking=True)
                targets = targets.cuda(non_blocking=True)

            bz_loss, y_hat = model(input_ids, log_ids, log_mask, targets)
            loss += bz_loss.data.float()
            accuary += utils.acc(targets, y_hat)
            optimizer.zero_grad()
            bz_loss.backward()
            optimizer.step()

            if cnt % args.log_steps == 0:
                logging.info(
                    '[{}] Ed: {}, train_loss: {:.5f}, acc: {:.5f}'.format(
                        hvd_rank, cnt * args.batch_size, loss.data / cnt,
                        accuary / cnt))

            # save model minibatch
            print(hvd_rank, cnt, args.save_steps, cnt % args.save_steps)
            if hvd_rank == 0 and cnt % args.save_steps == 0:
                ckpt_path = os.path.join(args.model_dir,
                                         f'epoch-{ep+1}-{cnt}.pt')
                torch.save(
                    {
                        'model_state_dict': model.state_dict(),
                        'category_dict': category_dict,
                        'word_dict': word_dict,
                        'domain_dict': domain_dict,
                        'subcategory_dict': subcategory_dict
                    }, ckpt_path)
                logging.info(f"Model saved to {ckpt_path}")

        loss /= cnt
        print(ep + 1, loss)

        # save model last of epoch
        if hvd_rank == 0:
            ckpt_path = os.path.join(args.model_dir, f'epoch-{ep+1}.pt')
            torch.save(
                {
                    'model_state_dict': model.state_dict(),
                    'category_dict': category_dict,
                    'word_dict': word_dict,
                    'domain_dict': domain_dict,
                    'subcategory_dict': subcategory_dict
                }, ckpt_path)
            logging.info(f"Model saved to {ckpt_path}")

    dataloader.join()
import utils

with open('initial/release_dates.csv', encoding="utf8") as csvfile:
    reader = csv.reader(csvfile)

    with open('cleaned/releasedates_cleaned.csv', 'w', encoding="utf8") as out:
        wr = csv.writer(out)
        country_map = utils.get_country_map()
        clips = utils.get_clip_set()
        next(reader)

        added = set()

        for row in reader:
            if row[0] in clips:
                clipid = row[0]
                no_accents = utils.acc(row[1])
                only_letters = utils.lettres(no_accents).lstrip()
                if only_letters == 'Democratic Republic of Congo':
                    only_letters = 'Democratic Republic of the Congo'

                # Only keep the numbers and the letters in the "ReleaseDate" column
                only_numbers_letters = utils.alet(row[2])

                if only_letters in country_map:
                    countryId = country_map[only_letters]
                    new_row = (clipid, countryId)
                    if new_row not in added:
                        wr.writerow((clipid, countryId, only_numbers_letters))
                        added.add(new_row)
Beispiel #20
0
languageId = 1
language_map = {}

clips = utils.get_clip_set()

with open('initial/languages.csv', encoding="utf8") as csvfile:
    reader = csv.reader(csvfile)
    next(reader)
    with open('cleaned/languages_cleaned.csv', 'w', encoding="utf8") as out:
        wr = csv.writer(out)
        added = set()
        for row in reader:
            if row[0] in clips:
                clipid = row[0]
                language = row[1]
                l = utils.acc(language)
                b = utils.lettres(l)
                if b == 'some dialogue with English subtitles some without':
                    b = 'English'

                if utils.diff_letters(l, b) < 2 and len(b) != 0 and b.lower() != 'null' and b.lower() != 'none':
                    if b not in languages:
                        new_row = (languageId, b)
                        if new_row not in added:
                            languages.add(b)
                            language_map[b] = languageId
                            languageId += 1
                            wr.writerow(new_row)
                            added.add(new_row)
                    else:
                        new_row = (language_map[b], b)