def main():
    # TODO: Parse hyper-parameters from a json config file?
    parser = argparse.ArgumentParser()
    parser.add_argument('--cuda', action='store_true', default=False,
                        help='Enable CUDA training.')
    parser.add_argument('--seed', type=int, default=42, help='Random seed.')
    parser.add_argument('--epochs', type=int, default=200,
                        help='Number of epochs to train.')
    parser.add_argument('--lr', type=float, default=0.01,
                        help='Initial learning rate.')
    parser.add_argument('--weight-decay', type=float, default=5e-4,
                        help='Weight decay (L2 loss on parameters).')
    parser.add_argument('--nhidden', type=int, nargs='*', default=[16],
                        help='Number of hidden units for each layer.')
    parser.add_argument('--dropout', type=float, default=0.5,
                        help='Dropout rate (1 - keep probability).')
    parser.add_argument('--alpha', type=float, default=0.5,
                        help='Mixing weight between word and document losses.')
    parser.add_argument('--word-features', type=str, nargs='*', default=[],
                        help='List of word features to use. If empty, uses identity matrix.')
    parser.add_argument('--doc-features', type=str, nargs='*', default=[],
                        help='List of doc features to use. If empty, uses identity matrix.')
    parser.add_argument('--activation', type=str, default='none',
                        choices=['none', 'relu', 'tanh'],
                        help='Add the specified activation function for each GCN layer.')
    parser.add_argument('--efcamdat-file-path', type=str, default=None,
                        help='Path to EFCamDat. '
                             'If not specified, the dataset will not be used.')
    parser.add_argument('--heads', type=str, default='twin',
                        choices=['single', 'twin'],
                        help='Use either single/same or different linear layer for both word and doc as a final layer.')
    parser.add_argument('--tfidf', action='store_true',
                        help='If specified, weight the adjacency matrix by tf.idf')
    parser.add_argument('--pmi-window-width', type=int, default=-1,
                        help='Window size for calculating PMI, which is disabled when -1')
    parser.add_argument('--conversion', type=str, default='max',
                        choices=['max', 'weighted_sum'],
                        help='If using correlation during evaluation, select whether to convert'
                             'classification to a real value by weighted sum or taking the max.')
    parser.add_argument('--mode', type=str, default='classification',
                        choices=['classification', 'regression'],
                        help='Use either classification or regression loss during training.')
    parser.add_argument('--training-portion', type=int, default=10,
                        help='Specify the amount of training data between 1 (10%) and 10 (100%).')
    args = parser.parse_args()

    np.random.seed(args.seed)
    torch.manual_seed(args.seed)

    words = list(read_cefrj_wordlist(training_portion=args.training_portion))
    datasets = [
        read_cambridge_readability_dataset(training_portion=args.training_portion),
        read_a1_passages(training_portion=args.training_portion)]
    if args.efcamdat_file_path:
        datasets.append(read_efcamdat_dataset(args.efcamdat_file_path))
    docs = list(itertools.chain(*datasets))

    # Initialize FeatureExtractor
    doc_value2feat = {feat.value: feat for feat in DocFeature}
    doc_features = {doc_value2feat[value] for value in args.doc_features}
    word_value2feat = {feat.value: feat for feat in WordFeature}
    word_features = {word_value2feat[value] for value in args.word_features}
    feature_extractor = FeatureExtractor(word_features=word_features, doc_features=doc_features,
                                         cuda=args.cuda)

    graph = Graph(feature_extractor)
    graph.add_words(words)
    graph.add_documents(docs)
    num_labeled_docs = sum(1 for doc in docs if doc.label)
    max_word_freq = int(.1 * num_labeled_docs)     # it's too much if a word appears more than 10% of labeled docs
    graph.build_mapping(min_word_freq=3, max_word_freq=max_word_freq, min_document_len=5)
    graph.index()

    print(graph, file=sys.stderr)    # show graph stats

    adj = graph.get_adj(use_tfidf=args.tfidf,
                        pmi_window_width=args.pmi_window_width)

    x = graph.get_feature_matrix()

    type_masks, split_masks = graph.get_type_and_split_masks()
    labels = graph.get_labels()
    labels_beta = torch.Tensor([cefr_to_beta(CEFR_LEVELS[i.item()]) for i in labels])

    # TODO: complete the training pipeline
    # Training
    nclass = 1 if args.mode == "regression" else len(CEFR_LEVELS)

    model = GCN(nfeat=x.shape[1],
                nhidden=args.nhidden,
                nclass=nclass,
                dropout=args.dropout,
                activation=args.activation,
                heads=args.heads)
    if args.cuda:
        adj = adj.cuda()
        model = model.cuda()
        x = x.cuda()
        labels = labels.cuda()
        labels_beta = labels_beta.cuda()
        for k, v in type_masks.items():
            type_masks[k] = v.cuda()
        for k, v in split_masks.items():
            split_masks[k] = v.cuda()

    optimizer = optim.Adam(model.parameters(), lr=args.lr,
                           weight_decay=args.weight_decay)

    stats = defaultdict(list)
    if not args.efcamdat_file_path:
        stats['num_unlabeled_docs'] = 0
    else:
        stats['num_unlabeled_docs'] = len(list(read_efcamdat_dataset(args.efcamdat_file_path)))
    stats['num_docs'] = graph.get_num_indexed_docs()

    for epoch in range(args.epochs):
        print('Epoch: {:04d}'.format(epoch + 1), file=sys.stderr)

        model.train()
        optimizer.zero_grad()
        logit1, logit2 = model(adj, x)
        if args.mode == "regression":
            loss1 = masked_mean_squared_error(
                logit1, labels_beta, type_masks[NodeType.WORD] * split_masks[DatasetSplit.TRAIN])
            loss2 = masked_mean_squared_error(
                logit2, labels_beta, type_masks[NodeType.DOC] * split_masks[DatasetSplit.TRAIN])
        else:
            loss1 = masked_cross_entropy(
                logit1, labels, type_masks[NodeType.WORD] * split_masks[DatasetSplit.TRAIN])
            loss2 = masked_cross_entropy(
                logit2, labels, type_masks[NodeType.DOC] * split_masks[DatasetSplit.TRAIN])

        loss = (args.alpha * loss1 + (1. - args.alpha) * loss2) * 2
        loss.backward()
        optimizer.step()

        # compute and save loss
        with torch.no_grad():
            if args.mode == "regression":
                dev_loss1 = masked_mean_squared_error(
                    logit1, labels_beta, type_masks[NodeType.WORD] * split_masks[DatasetSplit.DEV])
                dev_loss2 = masked_mean_squared_error(
                    logit2, labels_beta, type_masks[NodeType.DOC] * split_masks[DatasetSplit.DEV])
            else:
                dev_loss1 = masked_cross_entropy(
                    logit1, labels, type_masks[NodeType.WORD] * split_masks[DatasetSplit.DEV])
                dev_loss2 = masked_cross_entropy(
                    logit2, labels, type_masks[NodeType.DOC] * split_masks[DatasetSplit.DEV])
            dev_loss = dev_loss1 + dev_loss2

        print('\tloss: {:.4f}, dev_loss: {:.4f}'.format(loss.item(), dev_loss.item()),
              file=sys.stderr)
        stats['train_loss'].append(loss.item())
        stats['dev_loss'].append(dev_loss.item())

        # compute and save accuracy for train and dev
        model.eval()
        for split in [DatasetSplit.TRAIN, DatasetSplit.DEV]:
            for node_type in [NodeType.WORD, NodeType.DOC]:
                if node_type == NodeType.WORD:
                    logit = logit1
                else:
                    logit = logit2
                acc = accuracy(logit, labels, type_masks[node_type] * split_masks[split],
                               mode=args.mode)

                corr = correlation(logit, labels, type_masks[node_type] * split_masks[split],
                                   mode=args.mode, conversion=args.conversion)

                stats_acc_key = '{}_acc_{}'.format(split.value, node_type.value)
                print('\t{}: {:.4f}'.format(stats_acc_key, acc), file=sys.stderr)
                stats[stats_acc_key].append(acc)

                stats_corr_key = '{}_corr_{}'.format(split.value, node_type.value)
                print('\t{}: {:.4f}'.format(stats_corr_key, corr), file=sys.stderr)
                stats[stats_corr_key].append(corr)

        macro_avg_dev_acc = (stats['dev_acc_word'][-1] + stats['dev_acc_doc'][-1]) / 2
        stats['dev_acc_avr'].append(macro_avg_dev_acc)
        macro_avg_dev_corr = (stats['dev_corr_word'][-1] + stats['dev_corr_doc'][-1]) / 2
        stats['dev_corr_avr'].append(macro_avg_dev_corr)

    # Evaluation
    model.eval()  # turn off dropout (if we are using one)
    logit1, logit2 = model(adj, x)

    print('Evaluation', file=sys.stderr)
    for split in [DatasetSplit.DEV, DatasetSplit.TEST]:
        for node_type in NodeType:
            if node_type == NodeType.WORD:
                logit = logit1
            else:
                logit = logit2

            acc = accuracy(logit, labels, type_masks[node_type] * split_masks[split],
                           mode=args.mode)

            corr = correlation(logit, labels, type_masks[node_type] * split_masks[split],
                               mode=args.mode, conversion=args.conversion)

            stats_key_acc = 'eval_{}_acc_{}'.format(split.value, node_type.value)
            print('\t{}: {:.4f}'.format(stats_key_acc, acc), file=sys.stderr)
            stats[stats_key_acc].append(acc)

            stats_key_corr = 'eval_{}_corr_{}'.format(split.value, node_type.value)
            print('\t{}: {:.4f}'.format(stats_key_corr, corr), file=sys.stderr)
            stats[stats_key_corr].append(corr)

        macro_avg_acc = (stats[f"eval_{split.value}_acc_word"][-1] +
                         stats[f"eval_{split.value}_acc_doc"][-1]) / 2
        print('\teval_{}_acc_avr: {:.4f}'.format(split.value, macro_avg_acc), file=sys.stderr)
        stats[f'eval_{split.value}_acc_avr'].append(macro_avg_acc)

        macro_avg_corr = (stats[f"eval_{split.value}_corr_word"][-1] +
                          stats[f"eval_{split.value}_corr_doc"][-1]) / 2
        print('\teval_{}_corr_avr: {:.4f}'.format(split.value, macro_avg_corr), file=sys.stderr)
        stats[f'eval_{split.value}_corr_avr'].append(macro_avg_corr)

    # Dump stats
    print(json.dumps(stats))
Example #2
0
for epoch in range(3):
    print('Start of epoch %d' % (epoch, ))

    for step, (batch_x, batch_y) in enumerate(train_data):

        run_optimization(batch_x,
                         batch_y,
                         step,
                         loss_type=use_loss,
                         use_vat=use_vat)

        if step % display_step == 0:

            embed, pred = conv_net(x_test)

            acc = utils.accuracy(pred, y_test)

            if use_loss == 'arcface':
                # arcface_logit = arcface_loss(embedding=embed, labels=y_test, out_num=num_classes,
                #                             weights=conv_net.out.weights[0], m=m_arcface)
                # embed_loss = tf.reduce_mean(focal_loss_with_softmax(logits=arcface_logit, labels=y_test))
                # infer_loss = utils.cross_entropy_loss(pred, y_test)
                embed_loss, infer_loss = embed_infer_loss(embed, pred, y_test)
                print(
                    "step: %i, embed_loss: %f, infer_loss: %f, accuracy: %f" %
                    (step, embed_loss, infer_loss, acc))
            else:
                loss = utils.cross_entropy_loss(pred, y_test)
                print("step: %i, loss: %f, accuracy: %f" % (step, loss, acc))
'''
for step, (batch_x, batch_y) in enumerate(train_data.take(training_steps), 1):
Example #3
0
        batch_pointclass_preds = lasernet_seg(x=batch_rv)

        L_train = loss(batch_pointclass_preds, batch_labels)

        if torch.isnan(L_train):
            print("L_train had value of nan")
            break

        lasernet_seg.zero_grad()
        L_train.backward()
        optimizer.step()

        # save loss and accuracy
        losses_train.append(L_train.item())
        accs_train.append(
            accuracy(batch_pointclass_preds, batch_labels).item())

    with torch.no_grad():
        for batch_rv, batch_labels, _ in tqdm(val_dataloader):

            batch_pointclass_preds = lasernet_seg(x=batch_rv)

            L_val = loss(batch_pointclass_preds, batch_labels)

            if torch.isnan(L_val):
                print("L_val had value of nan")
                break

            losses_val.append(L_val.item())
            accs_val.append(
                accuracy(batch_pointclass_preds, batch_labels).item())
Example #4
0
def train(model, reglog, optimizer, loader, epoch):
    """
    Train the models on the dataset.
    """
    # running statistics
    batch_time = AverageMeter()
    data_time = AverageMeter()

    # training statistics
    top1 = AverageMeter()
    top5 = AverageMeter()
    losses = AverageMeter()
    end = time.perf_counter()

    model.eval()
    reglog.train()
    criterion = nn.CrossEntropyLoss().cuda()

    for iter_epoch, (inp, target) in enumerate(loader):
        # measure data loading time
        data_time.update(time.perf_counter() - end)

        # move to gpu
        inp = inp.cuda(non_blocking=True)
        target = target.cuda(non_blocking=True)

        # forward
        with torch.no_grad():
            output = model(inp)
        output = reglog(output)

        # compute cross entropy loss
        loss = criterion(output, target)

        # compute the gradients
        optimizer.zero_grad()
        loss.backward()

        # step
        optimizer.step()

        # update stats
        acc1, acc5 = accuracy(output, target, topk=(1, 5))
        losses.update(loss.item(), inp.size(0))
        top1.update(acc1[0], inp.size(0))
        top5.update(acc5[0], inp.size(0))

        batch_time.update(time.perf_counter() - end)
        end = time.perf_counter()

        # verbose
        if args.rank == 0 and iter_epoch % 50 == 0:
            logger.info("Epoch[{0}] - Iter: [{1}/{2}]\t"
                        "Time {batch_time.val:.3f} ({batch_time.avg:.3f})\t"
                        "Data {data_time.val:.3f} ({data_time.avg:.3f})\t"
                        "Loss {loss.val:.4f} ({loss.avg:.4f})\t"
                        "Prec {top1.val:.3f} ({top1.avg:.3f})\t"
                        "LR {lr}".format(
                            epoch,
                            iter_epoch,
                            len(loader),
                            batch_time=batch_time,
                            data_time=data_time,
                            loss=losses,
                            top1=top1,
                            lr=optimizer.param_groups[0]["lr"],
                        ))

    return epoch, losses.avg, top1.avg.item(), top5.avg.item()
Example #5
0
def train(opt: argparse.Namespace):
    if torch.cuda.is_available():
        torch.cuda.manual_seed(123)
    else:
        torch.manual_seed(123)
    training_params = {
        "batch_size": opt.batch_size,
        "shuffle": True,
        "drop_last": True
    }
    test_params = {
        "batch_size": opt.batch_size,
        "shuffle": False,
        "drop_last": False
    }

    docs = list(
        itertools.chain(
            read_cambridge_readability_dataset(
                training_portion=opt.training_portion),
            read_a1_passages(training_portion=opt.training_portion)))
    max_word_length, max_sent_length = get_max_lengths(docs)
    training_set = MyDataset(docs=docs,
                             split=DatasetSplit.TRAIN,
                             max_length_word=max_word_length,
                             max_length_sentences=max_sent_length)
    training_generator = DataLoader(training_set, **training_params)
    dev_set = MyDataset(docs=docs,
                        split=DatasetSplit.DEV,
                        max_length_word=max_word_length,
                        max_length_sentences=max_sent_length)
    dev_generator = DataLoader(dev_set, **test_params)
    test_set = MyDataset(docs=docs,
                         split=DatasetSplit.TEST,
                         max_length_word=max_word_length,
                         max_length_sentences=max_sent_length)
    test_generator = DataLoader(test_set, **test_params)

    model = HierAttNet(opt.word_hidden_size, opt.sent_hidden_size,
                       opt.batch_size, training_set.num_classes,
                       opt.word2vec_path, max_sent_length, max_word_length)

    # Handling skewed dataset
    label_dist = [0] * training_set.num_classes
    for doc in docs:
        label_id = CEFR2INT[doc.label]
        label_dist[label_id] += 1
    weight = torch.FloatTensor([1 / i for i in label_dist])
    criterion = nn.CrossEntropyLoss(weight=weight)

    if torch.cuda.is_available():
        model = model.cuda()
        criterion = criterion.cuda()

    optimizer = torch.optim.SGD(filter(lambda p: p.requires_grad,
                                       model.parameters()),
                                lr=opt.lr,
                                momentum=opt.momentum)
    best_metrics = {"accuracy": 0.0}
    best_epoch = 0
    model.train()
    num_iter_per_epoch = len(training_generator)
    for epoch in range(opt.num_epoches):
        for iter, (feature, label) in enumerate(training_generator):
            if torch.cuda.is_available():
                feature = feature.cuda()
                label = label.cuda()
            optimizer.zero_grad()
            model._init_hidden_state()
            predictions = model(feature)
            loss = criterion(predictions, label)
            loss.backward()
            nn.utils.clip_grad_norm_(model.parameters(), opt.clip)
            optimizer.step()
            acc = accuracy(predictions,
                           label,
                           mask=torch.ones_like(label, dtype=torch.long),
                           mode=opt.mode)
            print(
                "Epoch: {}/{}, Iteration: {}/{}, Lr: {}, Loss: {}, Accuracy: {}"
                .format(epoch + 1, opt.num_epoches, iter + 1,
                        num_iter_per_epoch, optimizer.param_groups[0]['lr'],
                        loss, acc))
        if epoch % opt.test_interval == 0:
            te_loss, test_metrics = evaluate(criterion, dev_set, model,
                                             dev_generator, opt.mode)
            print(
                "Epoch: {}/{}, Lr: {}, Dev Loss: {}, Dev Accuracy: {}, Dev Corr: {}"
                .format(
                    epoch + 1,
                    opt.num_epoches,
                    optimizer.param_groups[0]['lr'],
                    te_loss,
                    test_metrics["accuracy"],
                    test_metrics["corr"],
                ),
                file=sys.stderr)
            model.train()
            if test_metrics["accuracy"] > best_metrics["accuracy"]:
                best_metrics = test_metrics
                best_metrics["loss"] = te_loss
                best_epoch = epoch
                torch.save(model, opt.saved_path + os.sep + "whole_model_han")

            # Early stopping
            if epoch - best_epoch > opt.es_patience > 0:
                print(
                    "Stop training at epoch {}. The lowest loss achieved is {}"
                    .format(epoch, te_loss))
                break

    model = torch.load(opt.saved_path + os.sep + "whole_model_han")
    te_loss, test_metrics = evaluate(criterion, test_set, model,
                                     test_generator, opt.mode)
    print("Best Dev Loss: {}, Best Dev Accuracy: {}, Best Dev Corr: {}".format(
        best_metrics["loss"], best_metrics["accuracy"], best_metrics["corr"]),
          file=sys.stderr)
    print("Test Loss: {}, Test Accuracy: {}, Test Corr: {}".format(
        te_loss, test_metrics["accuracy"], test_metrics["corr"]),
          file=sys.stderr)
Example #6
0
def train(model, data_loader, optimizer, epoch, train_mloss, train_rloss,
          train_acc, learning_rate, lr_wr, output_tensor):
    """
    Train CapsuleNet model on training set

    Args:
        model: The CapsuleNet model.
        data_loader: An interator over the dataset. It combines a dataset and a sampler.
        optimizer: Optimization algorithm.
        epoch: Current epoch.
    """
    print('===> Training mode')

    num_batches = len(data_loader)  # iteration per epoch. e.g: 469
    total_step = args.epochs * num_batches
    epoch_tot_acc = 0

    # Switch to train mode
    model.train()

    if args.cuda:
        # When we wrap a Module in DataParallel for multi-GPUs
        model = model.module

    start_time = timer()

    for batch_idx, (data, target) in enumerate(tqdm(data_loader,
                                                    unit='batch')):
        batch_size = data.size(0)
        global_step = batch_idx + (epoch * num_batches) - num_batches

        labels = target
        target_one_hot = utils.one_hot_encode(target, length=args.num_classes)
        assert target_one_hot.size() == torch.Size([batch_size, 10])

        data, target = Variable(data), Variable(target_one_hot)

        if args.cuda:
            data = data.to(args.device)
            target = target.to(args.device)
            labels = labels.to(args.device)

        # Train step - forward, backward and optimize
        optimizer.zero_grad()
        #utils.exponential_decay_LRR(optimizer, args.lr, global_step, args.decay_steps, args.decay_rate, args.staircase)
        # learning rate policies
        if args.find_lr:
            utils.find_lr(optimizer, global_step)

        elif args.exp_decay_lr:
            utils.exponential_decay_LRR(optimizer, args.lr, global_step,
                                        args.decay_steps, args.decay_rate,
                                        args.staircase)

        elif args.one_cycle_policy:
            utils.one_cycle_policy(optimizer, args.lr, global_step, total_step)

        elif args.warm_restarts:
            # lr_wr.update_lr(optimizer, num_batches)
            lr_wr.update_lr(optimizer)

        output, reconstruction = model(data, labels, True)
        # utils.write_tensor(output, output_tensor)
        loss, margin_loss, recon_loss = loss_func(output, target,
                                                  args.regularization_scale,
                                                  reconstruction, data,
                                                  args.device, batch_size)
        loss.backward()
        optimizer.step()

        for param_group in optimizer.param_groups:
            lr_temp = param_group['lr']
        learning_rate.write('%.10f \n' % lr_temp)

        # Calculate accuracy for each step and average accuracy for each epoch
        acc = utils.accuracy(output, labels, args.cuda)
        epoch_tot_acc += acc
        epoch_avg_acc = epoch_tot_acc / (batch_idx + 1)

        train_mloss.write('%.6f \n' % margin_loss)
        train_rloss.write('%.6f \n' % recon_loss)
        train_acc.write('%.6f \n' % acc)

        # Print losses
        if batch_idx % args.log_interval == 0:
            template = 'Epoch {}/{}, ' \
                    'Step {}/{}: ' \
                    '[Total loss: {:.6f},' \
                    '\tMargin loss: {:.6f},' \
                    '\tReconstruction loss: {:.6f},' \
                    '\tBatch accuracy: {:.6f},' \
                    '\tAccuracy: {:.6f}]'
            tqdm.write(
                template.format(
                    epoch, args.epochs, global_step, total_step,
                    loss.data.item(), margin_loss.data.item(),
                    recon_loss.data.item() if args.use_reconstruction_loss else
                    0, acc, epoch_avg_acc))

    # Print time elapsed for an epoch
    end_time = timer()

    global avg_training_time_per_epoch

    avg_training_time_per_epoch = (avg_training_time_per_epoch *
                                   (epoch - 1) + end_time - start_time) / epoch

    print('Time elapsed for epoch {}: {:.0f}s.'.format(epoch,
                                                       end_time - start_time))
Example #7
0
def job(tuning, params_path, devices, resume):
    """
    Example:
        python exp0.py job --devices 0,1 -s
        python exp0.py tuning --devices 0,1 --n-gpu 1 --mode 'random' --n-iter 4
    """

    exp_path = ROOT + f'experiments/{params["ex_name"]}/'
    os.environ['CUDA_VISIBLE_DEVICES'] = devices

    global params
    if tuning:
        with open(params_path, 'r') as f:
            params = json.load(f)
        mode_str = 'tuning'
        setting = '_'.join(f'{tp}-{params[tp]}'
                           for tp in params['tuning_params'])
    else:
        mode_str = 'train'
        setting = ''

    logger, writer = utils.get_logger(
        log_dir=exp_path + f'{mode_str}/log/{setting}',
        tensorboard_dir=exp_path + f'{mode_str}/tf_board/{setting}')

    train_df = pd.read_csv(ROOT + 'data/train.csv')
    train_df, val_df = train_test_split(train_df,
                                        test_size=1024,
                                        random_state=params['seed'])

    model = models.UNet(in_channels=3,
                        n_classes=2,
                        depth=4,
                        ch_first=32,
                        padding=True,
                        batch_norm=False,
                        up_mode='upconv').cuda()

    optimizer = utils.get_optim(model, params)

    if resume is not None:
        model, optimizer = utils.load_checkpoint(model,
                                                 resume,
                                                 optimizer=optimizer)

    if len(devices.split(',')) > 1:
        model = nn.DataParallel(model)

    data_transforms = {
        'train':
        transforms.Compose([
            transforms.ToPILImage(),
            transforms.RandomHorizontalFlip(),
            transforms.ToTensor(),
            transforms.Normalize([0.5, 0.5, 0.5], [0.5, 0.5, 0.5])
        ]),
        'val':
        transforms.Compose([
            transforms.ToPILImage(),
            transforms.ToTensor(),
            transforms.Normalize([0.5, 0.5, 0.5], [0.5, 0.5, 0.5])
        ]),
    }
    image_datasets = {
        'train': data_utils.CSVDataset(train_df, data_transforms['train']),
        'val': data_utils.CSVDataset(val_df, data_transforms['val'])
    }
    data_loaders = {
        'train':
        DataLoader(image_datasets['train'],
                   batch_size=params['batch_size'],
                   pin_memory=True,
                   shuffle=True,
                   drop_last=True,
                   num_workers=params['workers']),
        'val':
        DataLoader(image_datasets['val'],
                   batch_size=params['test_batch_size'],
                   pin_memory=True,
                   shuffle=False,
                   num_workers=params['workers'])
    }

    criterion = nn.CrossEntropyLoss()
    scheduler = optim.lr_scheduler.MultiStepLR(
        optimizer,
        milestones=[int(params['epochs'] * 0.7),
                    int(params['epochs'] * 0.9)],
        gamma=0.1)

    for epoch in range(params['epochs']):
        logger.info(
            f'Epoch {epoch}/{params["epochs"]} | lr: {optimizer.param_groups[0]["lr"]}'
        )

        # ============================== train ============================== #
        model.train(True)

        losses = utils.AverageMeter()
        prec1 = utils.AverageMeter()

        for i, (x, y) in tqdm(enumerate(data_loaders['train']),
                              total=len(data_loaders['train']),
                              miniters=50):
            x = x.to('cuda:0')
            y = y.to('cuda:0', non_blocking=True)

            outputs = model(x)
            loss = criterion(outputs, y)

            optimizer.zero_grad()
            loss.backward()
            optimizer.step()

            acc = utils.accuracy(outputs, y)
            losses.update(loss.item(), x.size(0))
            prec1.update(acc.item(), x.size(0))

        train_loss = losses.avg
        train_acc = prec1.avg

        # ============================== validation ============================== #
        model.train(False)
        losses.reset()
        prec1.reset()

        for i, (x, y) in tqdm(enumerate(data_loaders['val']),
                              total=len(data_loaders['val'])):
            x = x.cuda()
            y = y.cuda(non_blocking=True)

            with torch.no_grad():
                outputs = model(x)
                loss = criterion(outputs, y)

            acc = utils.accuracy(outputs, y)
            losses.update(loss.item(), x.size(0))
            prec1.update(acc.item(), x.size(0))

        val_loss = losses.avg
        val_acc = prec1.avg

        logger.info(f'[Val] Loss: \033[1m{val_loss:.4f}\033[0m | '
                    f'Acc: \033[1m{val_acc:.4f}\033[0m\n')

        writer.add_scalars('Loss', {'train': train_loss}, epoch)
        writer.add_scalars('Acc', {'train': train_acc}, epoch)
        writer.add_scalars('Loss', {'val': val_loss}, epoch)
        writer.add_scalars('Acc', {'val': val_acc}, epoch)
        writer.add_scalar('LR', optimizer.param_groups[0]['lr'], epoch)

        scheduler.step()

        if not tuning:
            utils.save_checkpoint(model, epoch, exp_path + 'model_optim.pth',
                                  optimizer)

    if tuning:
        tuning_result = {}
        for key in ['train_loss', 'train_acc', 'val_loss', 'val_acc']:
            tuning_result[key] = [eval(key)]
        utils.write_tuning_result(params, tuning_result,
                                  exp_path + 'tuning/results.csv')
Example #8
0
vanilla_loss = net.softmax_crossent(predict, y)
regularized_loss = net.weighted_loss((vanilla_loss, 1.0), (regularizer1, .1),
                                     (regularizer2, .1))
net.optimize(regularized_loss, 'rmsprop', 1e-3)


# Helper function
def real_len(x_batch):
    return [np.argmin(s + [0]) for s in x_batch]


# Training
batch = int(64)
epoch = int(15)
step = int(0)
for sentences, label in dat.yield_batch(batch, epoch):
    pred, loss = net.train([predict], {
        x: sentences,
        y: label,
        keep: .8,
        lens: real_len(sentences),
        center: 0.
    })
    acc = accuracy(pred, label)
    print('Step {}, Loss {}, Accuracy {}%'.format(step + 1, loss, acc * 100))
    step += 1

x_test, y_test = dat.yield_test()
pred = net.forward([predict], {x: x_test, keep: 1., lens: real_len(x_test)})[0]
acc = accuracy(pred, y_test)
print('Accuracy on test set: {}'.format(acc))