예제 #1
0
def run(args):

    batch_size = args.batch_size

    training_params = {"batch_size": batch_size,
                       "shuffle": True,
                       "num_workers": args.workers}

    texts, labels, number_of_classes, sample_weights = load_data(args)
    train_texts, _, train_labels, _, _, _ = train_test_split(texts,
                                                             labels,
                                                             sample_weights,
                                                             test_size=args.validation_split,
                                                             random_state=42,
                                                             stratify=labels)

    training_set = MyDataset(train_texts, train_labels, args)
    training_generator = DataLoader(training_set, **training_params)
    model = CharacterLevelCNN(args, number_of_classes)

    if torch.cuda.is_available():
        model.cuda()

    model.train()

    criterion = nn.CrossEntropyLoss()

    if args.optimizer == 'sgd':
        optimizer = torch.optim.SGD(
            model.parameters(), lr=args.start_lr, momentum=0.9
        )
    elif args.optimizer == 'adam':
        optimizer = torch.optim.Adam(
            model.parameters(), lr=args.start_lr
        )

    start_lr = args.start_lr
    end_lr = args.end_lr
    lr_find_epochs = args.epochs
    smoothing = args.smoothing

    def lr_lambda(x): return math.exp(
        x * math.log(end_lr / start_lr) / (lr_find_epochs * len(training_generator)))
    scheduler = torch.optim.lr_scheduler.LambdaLR(optimizer, lr_lambda)

    losses = []
    learning_rates = []

    for epoch in range(lr_find_epochs):
        print(f'[epoch {epoch + 1} / {lr_find_epochs}]')
        progress_bar = tqdm(enumerate(training_generator),
                            total=len(training_generator))
        for iter, batch in progress_bar:
            features, labels = batch
            if torch.cuda.is_available():
                features = features.cuda()
                labels = labels.cuda()

            optimizer.zero_grad()

            predictions = model(features)
            loss = criterion(predictions, labels)

            loss.backward()
            optimizer.step()
            scheduler.step()

            lr = optimizer.state_dict()["param_groups"][0]["lr"]
            learning_rates.append(lr)

            if iter == 0:
                losses.append(loss.item())
            else:
                loss = smoothing * loss.item() + (1 - smoothing) * losses[-1]
                losses.append(loss)

    plt.semilogx(learning_rates, losses)
    plt.savefig('./plots/losses_vs_lr.png')
예제 #2
0
def run(args, both_cases=False):

    if args.flush_history == 1:
        objects = os.listdir(args.log_path)
        for f in objects:
            if os.path.isdir(args.log_path + f):
                shutil.rmtree(args.log_path + f)

    now = datetime.now()
    logdir = args.log_path + now.strftime("%Y%m%d-%H%M%S") + "/"
    os.makedirs(logdir)
    log_file = logdir + 'log.txt'
    writer = SummaryWriter(logdir)

    batch_size = args.batch_size

    training_params = {
        "batch_size": batch_size,
        "shuffle": True,
        "num_workers": args.workers,
        "drop_last": True
    }

    validation_params = {
        "batch_size": batch_size,
        "shuffle": False,
        "num_workers": args.workers,
        "drop_last": True
    }

    texts, labels, number_of_classes, sample_weights = load_data(args)

    class_names = sorted(list(set(labels)))
    class_names = [str(class_name) for class_name in class_names]

    train_texts, val_texts, train_labels, val_labels, train_sample_weights, _ = train_test_split(
        texts,
        labels,
        sample_weights,
        test_size=args.validation_split,
        random_state=42,
        stratify=labels)
    training_set = MyDataset(train_texts, train_labels, args)
    validation_set = MyDataset(val_texts, val_labels, args)

    if bool(args.use_sampler):
        train_sample_weights = torch.from_numpy(train_sample_weights)
        sampler = WeightedRandomSampler(
            train_sample_weights.type('torch.DoubleTensor'),
            len(train_sample_weights))
        training_params['sampler'] = sampler
        training_params['shuffle'] = False

    training_generator = DataLoader(training_set, **training_params)
    validation_generator = DataLoader(validation_set, **validation_params)

    model = CharacterLevelCNN(args, number_of_classes)
    if torch.cuda.is_available():
        model.cuda()

    if not bool(args.focal_loss):
        if bool(args.class_weights):
            class_counts = dict(Counter(train_labels))
            m = max(class_counts.values())
            for c in class_counts:
                class_counts[c] = m / class_counts[c]
            weights = []
            for k in sorted(class_counts.keys()):
                weights.append(class_counts[k])

            weights = torch.Tensor(weights)
            if torch.cuda.is_available():
                weights = weights.cuda()
                print(f'passing weights to CrossEntropyLoss : {weights}')
                criterion = nn.CrossEntropyLoss(weight=weights)
        else:
            criterion = nn.CrossEntropyLoss()

    else:
        if args.alpha is None:
            criterion = FocalLoss(gamma=args.gamma, alpha=None)
        else:
            criterion = FocalLoss(gamma=args.gamma,
                                  alpha=[args.alpha] * number_of_classes)

    if args.optimizer == 'sgd':
        if args.scheduler == 'clr':
            optimizer = torch.optim.SGD(model.parameters(),
                                        lr=1,
                                        momentum=0.9,
                                        weight_decay=0.00001)
        else:
            optimizer = torch.optim.SGD(model.parameters(),
                                        lr=args.learning_rate,
                                        momentum=0.9)
    elif args.optimizer == 'adam':
        optimizer = torch.optim.Adam(model.parameters(), lr=args.learning_rate)

    best_f1 = 0
    best_epoch = 0

    if args.scheduler == 'clr':
        stepsize = int(args.stepsize * len(training_generator))
        clr = utils.cyclical_lr(stepsize, args.min_lr, args.max_lr)
        scheduler = torch.optim.lr_scheduler.LambdaLR(optimizer, [clr])
    else:
        scheduler = None

    for epoch in range(args.epochs):
        training_loss, training_accuracy, train_f1 = train(
            model, training_generator, optimizer, criterion, epoch, writer,
            log_file, scheduler, class_names, args, args.log_every)

        validation_loss, validation_accuracy, validation_f1 = evaluate(
            model, validation_generator, criterion, epoch, writer, log_file,
            args.log_every)

        print(
            '[Epoch: {} / {}]\ttrain_loss: {:.4f} \ttrain_acc: {:.4f} \tval_loss: {:.4f} \tval_acc: {:.4f}'
            .format(epoch + 1, args.epochs, training_loss, training_accuracy,
                    validation_loss, validation_accuracy))
        print("=" * 50)

        # learning rate scheduling

        if args.scheduler == 'step':
            if args.optimizer == 'sgd' and ((epoch + 1) % 3
                                            == 0) and epoch > 0:
                current_lr = optimizer.state_dict()['param_groups'][0]['lr']
                current_lr /= 2
                print('Decreasing learning rate to {0}'.format(current_lr))
                for param_group in optimizer.param_groups:
                    param_group['lr'] = current_lr

        # model checkpoint

        if validation_f1 > best_f1:
            best_f1 = validation_f1
            best_epoch = epoch
            if args.checkpoint == 1:
                torch.save(
                    model.state_dict(), args.output +
                    'model_{}_epoch_{}_maxlen_{}_lr_{}_loss_{}_acc_{}_f1_{}.pth'
                    .format(args.model_name, epoch, args.max_length,
                            optimizer.state_dict()['param_groups'][0]['lr'],
                            round(validation_loss, 4),
                            round(validation_accuracy, 4),
                            round(validation_f1, 4)))

        if bool(args.early_stopping):
            if epoch - best_epoch > args.patience > 0:
                print(
                    "Stop training at epoch {}. The lowest loss achieved is {} at epoch {}"
                    .format(epoch, validation_loss, best_epoch))
                break
def run(args, both_cases=False):
    print("3 : -> Entered in Run")

    #log generation for TensorBoardX
    log_path = args.log_path
    if os.path.isdir(log_path):
        shutil.rmtree(log_path)

    os.makedirs(log_path)

    if not os.path.exists(args.output):
        os.makedirs(args.output)

    writer = SummaryWriter(log_path)

    batch_size = args.batch_size

    training_params = {
        "batch_size": batch_size,
        "shuffle": True,
        "num_workers": args.workers
    }

    validation_params = {
        "batch_size": batch_size,
        "shuffle": False,
        "num_workers": args.workers
    }

    full_dataset = MyDataset(args)

    #train size if by default 80%
    train_size = int(args.validation_split * len(full_dataset))
    print("4 :-> ", train_size)

    #validation_size = ful_dataset - train_size
    validation_size = len(full_dataset) - train_size
    print("5 :-> ", validation_size)

    #torch function to split data into training and validation randomly
    training_set, validation_set = torch.utils.data.random_split(
        full_dataset, [train_size, validation_size])

    training_generator = DataLoader(training_set, **training_params)
    print("6 :-> ", training_generator)

    validation_generator = DataLoader(validation_set, **validation_params)
    print("7 :-> ", validation_generator)

    #passing args to src.scc_model
    model = CharacterLevelCNN(args)
    print("8 :-> ", model)

    if torch.cuda.is_available():
        model.cuda()

    criterion = nn.CrossEntropyLoss()
    print("9 :->  ", criterion)
    if args.optimizer == 'sgd':
        optimizer = torch.optim.SGD(model.parameters(),
                                    lr=args.learning_rate,
                                    momentum=0.9)
    elif args.optimizer == 'adam':
        optimizer = torch.optim.Adam(model.parameters(), lr=args.learning_rate)

    best_loss = 1e10
    best_epoch = 0

    for epoch in range(args.epochs):
        training_loss, training_accuracy = train(model, training_generator,
                                                 optimizer, criterion, epoch,
                                                 writer)

        validation_loss, validation_accuracy = evaluate(
            model, validation_generator, criterion, epoch, writer)

        print(
            '[Epoch: {} / {}]\ttrain_loss: {:.4f} \ttrain_acc: {:.4f} \tval_loss: {:.4f} \tval_acc: {:.4f}'
            .format(epoch + 1, args.epochs, training_loss, training_accuracy,
                    validation_loss, validation_accuracy))
        print("=" * 50)

        # learning rate scheduling

        if args.schedule != 0:
            if args.optimizer == 'sgd' and epoch % args.schedule == 0 and epoch > 0:
                current_lr = optimizer.state_dict()['param_groups'][0]['lr']
                current_lr /= 2
                print('Decreasing learning rate to {0}'.format(current_lr))
                for param_group in optimizer.param_groups:
                    param_group['lr'] = current_lr

        # early stopping
        if validation_loss < best_loss:
            best_loss = validation_loss
            best_epoch = epoch
            if args.checkpoint == 1:
                torch.save(
                    model, args.output +
                    'char_cnn_epoch_{}_{}_{}_loss_{}_acc_{}.pth'.format(
                        args.model_name, epoch,
                        optimizer.state_dict()['param_groups'][0]['lr'],
                        round(validation_loss, 4), round(
                            validation_accuracy, 4)))

        if epoch - best_epoch > args.patience > 0:
            print(
                "Stop training at epoch {}. The lowest loss achieved is {} at epoch {}"
                .format(epoch, validation_loss, best_epoch))
            break
def run(x_train, y_train, features, x_test, model_obj, feats=False, clip=True):
    seed_everything(SEED)
    avg_losses_f = []
    avg_val_losses_f = []
    # matrix for the out-of-fold predictions
    train_preds = np.zeros((len(x_train)))
    # matrix for the predictions on the test set
    test_preds = np.zeros((len(x_test)))
    splits = list(
        StratifiedKFold(n_splits=n_splits, shuffle=True,
                        random_state=SEED).split(x_train, y_train))
    for i, (train_idx, valid_idx) in enumerate(splits):
        seed_everything(i * 1000 + i)
        x_train = np.array(x_train)
        y_train = np.array(y_train)
        if feats:
            features = np.array(features)
        x_train_fold = torch.tensor(x_train[train_idx.astype(int)],
                                    dtype=torch.long).cuda()
        y_train_fold = torch.tensor(y_train[train_idx.astype(int), np.newaxis],
                                    dtype=torch.float32).cuda()
        if feats:
            kfold_X_features = features[train_idx.astype(int)]
            kfold_X_valid_features = features[valid_idx.astype(int)]
            test_features = features[valid_idx.astype(int)]
        x_val_fold = torch.tensor(x_train[valid_idx.astype(int)],
                                  dtype=torch.long).cuda()
        y_val_fold = torch.tensor(y_train[valid_idx.astype(int), np.newaxis],
                                  dtype=torch.float32).cuda()

        model = copy.deepcopy(model_obj)
        if args.snapshot is not None:
            print('\nLoading model from {}...'.format(args.snapshot))
            cnn.load_state_dict(torch.load(args.snapshot))
        model.cuda()

        loss_fn = torch.nn.BCEWithLogitsLoss(reduction='sum')
        optimizer = torch.optim.Adam(filter(lambda p: p.requires_grad,
                                            model.parameters()),
                                     lr=args.lr)

        ################################################################################################
        scheduler = False
        ###############################################################################################

        train = MyDataset(
            torch.utils.data.TensorDataset(x_train_fold, y_train_fold))
        valid = MyDataset(
            torch.utils.data.TensorDataset(x_val_fold, y_val_fold))

        train_loader = torch.utils.data.DataLoader(train,
                                                   batch_size=batch_size,
                                                   shuffle=True)
        valid_loader = torch.utils.data.DataLoader(valid,
                                                   batch_size=batch_size,
                                                   shuffle=False)

        print(f'Fold {i + 1}')
        steps = 0
        best_step = 0
        best_val_loss = float('inf')
        for epoch in range(n_epochs):
            start_time = time.time()
            avg_loss = train(model, train_loader, optimizer, loss_fn,
                             scheduler, clip, feats, kfold_X_features)
            valid_preds_fold = np.zeros((x_val_fold.size(0)))
            test_preds_fold = np.zeros((len(x_test)))
            avg_val_loss, valid_preds_fold = evaluate(model, valid_loader,
                                                      loss_fn,
                                                      valid_preds_fold, feats,
                                                      kfold_X_valid_features)
            steps += 1

            elapsed_time = time.time() - start_time
            print(
                'Epoch {}/{} \t loss={:.4f} \t val_loss={:.4f} \t time={:.2f}s'
                .format(epoch + 1, n_epochs, avg_loss, avg_val_loss,
                        elapsed_time))
            if avg_val_loss < best_val_loss:
                best_val_loss = avg_val_loss
                best_step = steps
                if args.save_best:
                    checkpoint(model, i + 1, epoch)
            else:
                if steps - best_step >= args.early_stop:
                    print('early stop by {} steps.'.format(steps))
        avg_losses_f.append(avg_loss)
        avg_val_losses_f.append(avg_val_loss)
        # predict all samples in the test set batch per batch
        test_preds = test(model, test_loader, train_preds, valid_idx,
                          test_preds, valid_preds_fold, test_preds_fold,
                          splits, feats, test_features)

    print('All \t loss={:.4f} \t val_loss={:.4f} \t '.format(
        np.average(avg_losses_f), np.average(avg_val_losses_f)))
    return train_preds, test_preds