예제 #1
0
파일: train.py 프로젝트: pauking/nnsplit
def train(
    sentences_train,
    labels_train,
    sentences_valid,
    labels_valid,
    batch_size=128,
    n_epochs=10,
):
    train_dataset = data.TensorDataset(sentences_train, labels_train)
    valid_dataset = data.TensorDataset(sentences_valid, labels_valid)

    model = Network()

    train_loader = data.DataLoader(
        train_dataset, batch_size=batch_size, shuffle=True, pin_memory=False
    )
    valid_loader = data.DataLoader(
        valid_dataset, batch_size=batch_size, shuffle=False, pin_memory=False
    )

    databunch = DataBunch(train_dl=train_loader, valid_dl=valid_loader)
    learn = Learner(databunch, model, loss_func=loss)

    if torch.cuda.is_available():
        learn = learn.to_fp16()

    learn.fit_one_cycle(n_epochs)

    return learn.model
예제 #2
0
def load_base_model_if_needed(learner: Learner,
                              lm_training_config: LMTrainingConfig,
                              model_file='best') -> None:
    if lm_training_config.base_model:
        model = os.path.join(lm_training_config.base_model, model_file)
        logger.info(f"Using pretrained model: {model}.pth")
        # not setting purge to True raises a pickle serialization error
        learner.load(model, purge=False)
    else:
        logger.info("Training form scratch")
예제 #3
0
def run(ini_file='tinyimg.ini',
        data_in_dir='./../../dataset',
        model_cfg='../cfg/vgg-tiny.cfg',
        model_out_dir='./models',
        epochs=30,
        lr=3.0e-5,
        batch_sz=256,
        num_worker=4,
        log_freq=20,
        use_gpu=True):
    # Step 1: parse config
    cfg = parse_cfg(ini_file,
                    data_in_dir=data_in_dir,
                    model_cfg=model_cfg,
                    model_out_dir=model_out_dir,
                    epochs=epochs,
                    lr=lr,
                    batch_sz=batch_sz,
                    log_freq=log_freq,
                    num_worker=num_worker,
                    use_gpu=use_gpu)
    print_cfg(cfg)

    # Step 2: create data sets and loaders
    train_ds, val_ds = build_train_val_datasets(cfg, in_memory=True)
    train_loader, val_loader = DLFactory.create_train_val_dataloader(
        cfg, train_ds, val_ds)

    # Step 3: create model
    model = MFactory.create_model(cfg)

    # Step 4: train/valid
    # This demos our approach can be easily intergrate with our app framework
    device = get_device(cfg)
    data = DataBunch(train_loader, val_loader, device=device)
    learn = Learner(data,
                    model,
                    loss_func=torch.nn.CrossEntropyLoss(),
                    metrics=accuracy)
    #  callback_fns=[partial(EarlyStoppingCallback, monitor='accuracy', min_delta=0.01, patience=2)])

    # lr_find(learn, start_lr=1e-7, end_lr=10)
    # learn.recorder.plot()
    # lrs_losses = [(lr, loss) for lr, loss in zip(learn.recorder.lrs, learn.recorder.losses)]
    # min_lr = min(lrs_losses[10:-5], key=lambda x: x[1])[0]
    # lr = min_lr/10.0
    # plt.show()
    # print(f'Minimal lr rate is {min_lr} propose init lr {lr}')
    # fit_one_cycle(learn, epochs, lr)

    learn.fit(epochs, lr)
예제 #4
0
    def train_(self, x_train, y_train, y_aux_train, x_test):
        y_train_torch = torch.tensor(np.hstack([y_train, y_aux_train]),
                                     dtype=torch.float32)
        test_dataset = data.TensorDataset(x_test, self.test_lengths)
        train_dataset = data.TensorDataset(x_train, self.train_lengths,
                                           y_train_torch)
        valid_dataset = data.Subset(train_dataset, indices=[0, 1])
        del x_train, x_test
        gc.collect()

        train_collator = SequenceBucketCollator(lambda lenghts: lenghts.max(),
                                                sequence_index=0,
                                                length_index=1,
                                                label_index=2)

        train_loader = data.DataLoader(train_dataset,
                                       batch_size=batch_size,
                                       shuffle=True,
                                       collate_fn=train_collator)
        valid_loader = data.DataLoader(valid_dataset,
                                       batch_size=batch_size,
                                       shuffle=False,
                                       collate_fn=train_collator)

        databunch = DataBunch(train_dl=train_loader,
                              valid_dl=valid_loader,
                              collate_fn=train_collator)

        del train_dataset, valid_dataset
        gc.collect()

        for model_idx in range(NUM_MODELS):
            all_test_preds = []
            print('Model ', model_idx)
            self.seed_everything(1234 + model_idx)
            model = NeuralNet(self.embedding_matrix, y_aux_train.shape[-1],
                              y_train.shape[-1] - 1)
            if y_train.shape[-1] > 2:
                learn = Learner(databunch, model, loss_func=self.custom_loss1)
            else:
                learn = Learner(databunch, model, loss_func=self.custom_loss)
            test_preds = self.train_model(learn,
                                          test_dataset,
                                          output_dim=y_train.shape[-1] +
                                          y_aux_train.shape[-1] - 1)
            all_test_preds.append(test_preds)
        preds = np.mean(all_test_preds, axis=0)
        return preds
def make_embedings():

    model = NeuralNet(embedding_matrix, y_aux_train.shape[-1])
    learn = Learner(databunch, model, loss_func=custom_loss)
    val_preds, test_preds = train_model(learn,
                                        output_dim=y_aux_train.shape[-1] + 1,
                                        batch_size=BATCH_SIZE,
                                        n_epochs=N_EPOCH)

    return val_preds, test_preds
예제 #6
0
def create_cnn(data, arch, pretrained=False, is_mono_input=True, **kwargs):
    meta = cnn_config(arch)
    body = create_body(arch, pretrained)

    # sum up the weights of in_channels axis, to reduce to single input channel
    # Suggestion by David Gutman
    # https://forums.fast.ai/t/black-and-white-images-on-vgg16/2479/2
    if is_mono_input:
        first_conv_layer = body[0]
        first_conv_weights = first_conv_layer.state_dict()['weight']
        assert first_conv_weights.size(1) == 3  # RGB channels dim
        summed_weights = torch.sum(first_conv_weights, dim=1, keepdim=True)
        first_conv_layer.weight.data = summed_weights
        first_conv_layer.in_channels = 1
    else:
        # In this case, the input is a stereo
        first_conv_layer = body[0]
        first_conv_weights = first_conv_layer.state_dict()['weight']
        assert first_conv_weights.size(1) == 3  # RGB channels dim
        summed_weights = torch.sum(first_conv_weights, dim=1, keepdim=True)
        first_conv_layer.weight.data = first_conv_weights[:, :
                                                          2, :, :]  # Keep only 2 channels for the weights
        first_conv_layer.in_channels = 2

    nf = num_features_model(body) * 2
    head = create_head(nf, data.c, None, 0.5)
    model = nn.Sequential(body, head)
    learn = Learner(data, model, **kwargs)
    learn.split(meta['split'])
    if pretrained:
        learn.freeze()
    apply_init(model[1], nn.init.kaiming_normal_)
    return learn
예제 #7
0
def get_score():
    print('Make Train Features.')
    with open(args.temporary_file, 'rb') as f:
        x_train, x_feat_train, y_train_o, y_aux_train, embedding_matrix = pickle.load(
            f)

    def power_mean(series, p=-5):
        total = sum(np.power(series, p))
        return np.power(total / len(series), 1 / p)

    def sigmoid(x):
        return 1 / (1 + np.exp(-x))

    # all, sub, s&t, !s&t, s&!t, !s&!t
    weight_factor = list(map(float, args.weight_factor.split(',')))
    identity_factor_1 = list(map(float, args.identity_factor_1.split(',')))
    identity_factor_2 = list(map(float, args.identity_factor_2.split(',')))
    model_factor = list(map(int, args.model_factor.split(',')))
    print('weight_factor =', weight_factor)
    print('identity_factor_1 = ', identity_factor_1)
    print('identity_factor_2 = ', identity_factor_2)
    print('model_factor = ', model_factor)
    train = read_competision_file(train=True)
    identity_columns = [
        'male', 'female', 'homosexual_gay_or_lesbian', 'christian', 'jewish',
        'muslim', 'black', 'white', 'psychiatric_or_mental_illness'
    ]
    index_subgroup, index_bpsn, index_bnsp = dict(), dict(), dict()
    for col in identity_columns:
        index_subgroup[col] = (train[col].fillna(0).values >= 0.5).astype(bool)
        index_bpsn[col] = (
            (((train['target'].values < 0.5).astype(bool).astype(np.int) +
              (train[col].fillna(0).values >= 0.5).astype(bool).astype(np.int))
             > 1).astype(bool)) + ((
                 ((train['target'].values >= 0.5).astype(bool).astype(np.int) +
                  (train[col].fillna(0).values < 0.5).astype(bool).astype(
                      np.int)) > 1).astype(bool))
        index_bnsp[col] = (
            (((train['target'].values >= 0.5).astype(bool).astype(np.int) +
              (train[col].fillna(0).values >= 0.5).astype(bool).astype(np.int))
             > 1).astype(bool)) + ((
                 ((train['target'].values < 0.5).astype(bool).astype(np.int) +
                  (train[col].fillna(0).values < 0.5).astype(bool).astype(
                      np.int)) > 1).astype(bool))
    # Overall
    weights = np.ones((len(x_train), )) * weight_factor[0]
    # Subgroup
    weights += (train[identity_columns].fillna(0).values >= 0.5).sum(
        axis=1).astype(bool).astype(np.int) * weight_factor[1]
    weights += (((train['target'].values >= 0.5).astype(bool).astype(np.int) +
                 (train[identity_columns].fillna(0).values >= 0.5).sum(
                     axis=1).astype(bool).astype(np.int)) >
                1).astype(bool).astype(np.int) * weight_factor[2]
    weights += (((train['target'].values >= 0.5).astype(bool).astype(np.int) +
                 (train[identity_columns].fillna(0).values < 0.5).sum(
                     axis=1).astype(bool).astype(np.int)) >
                1).astype(bool).astype(np.int) * weight_factor[3]
    weights += (((train['target'].values < 0.5).astype(bool).astype(np.int) +
                 (train[identity_columns].fillna(0).values >= 0.5).sum(
                     axis=1).astype(bool).astype(np.int)) >
                1).astype(bool).astype(np.int) * weight_factor[4]
    weights += (((train['target'].values < 0.5).astype(bool).astype(np.int) +
                 (train[identity_columns].fillna(0).values < 0.5).sum(
                     axis=1).astype(bool).astype(np.int)) >
                1).astype(bool).astype(np.int) * weight_factor[5]
    index_id1, index_id2 = dict(), dict()
    for col in identity_columns:
        index_id1[col] = (
            ((train[col].fillna(0).values >= 0.5).astype(bool).astype(np.int) +
             (train['target'].values >= 0.5).astype(bool).astype(np.int)) >
            1).astype(bool)
        index_id2[col] = (
            ((train[col].fillna(0).values >= 0.5).astype(bool).astype(np.int) +
             (train['target'].values < 0.5).astype(bool).astype(np.int)) >
            1).astype(bool)
    for col, id1 in zip(identity_columns, identity_factor_1):
        weights[index_id1[col]] += id1
    for col, id2 in zip(identity_columns, identity_factor_2):
        weights[index_id2[col]] += id2

    loss_weight = 1.0 / weights.mean()

    aux_impact_factor = list(map(float, args.aux_impact_factor.split(',')))
    aux_identity_factor = list(map(float, args.aux_identity_factor.split(',')))
    print('aux_impact_factor =', aux_impact_factor)
    print('aux_identity_factor =', aux_identity_factor)

    weights_aux = np.ones((len(x_train), ))
    weights_aux[(train['target'].values >= 0.5).astype(np.int) +
                (train[identity_columns].fillna(0).values < 0.5).sum(axis=1).
                astype(bool).astype(np.int) > 1] = aux_identity_factor[0]
    weights_aux[(train['target'].values >= 0.5).astype(np.int) +
                (train[identity_columns].fillna(0).values >= 0.5).sum(axis=1).
                astype(bool).astype(np.int) > 1] = aux_identity_factor[1]
    weights_aux[(train['target'].values < 0.5).astype(np.int) +
                (train[identity_columns].fillna(0).values < 0.5).sum(axis=1).
                astype(bool).astype(np.int) > 1] = aux_identity_factor[2]
    weights_aux[(train['target'].values < 0.5).astype(np.int) +
                (train[identity_columns].fillna(0).values >= 0.5).sum(axis=1).
                astype(bool).astype(np.int) > 1] = aux_identity_factor[3]

    y_train = np.vstack([y_train_o, weights, weights_aux]).T

    del train

    def custom_loss_aux(data, targets):
        ''' Define custom loss function for weighted BCE on 'target' column '''
        bce_loss_1 = nn.BCEWithLogitsLoss(weight=targets[:,
                                                         1:2])(data[:, :1],
                                                               targets[:, :1])
        bce_loss_aux_1 = nn.BCEWithLogitsLoss(weight=targets[:, 2:3])(
            data[:, 1:2], targets[:, 3:4])
        bce_loss_aux_2 = nn.BCEWithLogitsLoss(weight=targets[:, 2:3])(
            data[:, 2:3], targets[:, 4:5])
        bce_loss_aux_3 = nn.BCEWithLogitsLoss(weight=targets[:, 2:3])(
            data[:, 3:4], targets[:, 5:6])
        bce_loss_aux_4 = nn.BCEWithLogitsLoss(weight=targets[:, 2:3])(
            data[:, 4:5], targets[:, 6:7])
        bce_loss_aux_5 = nn.BCEWithLogitsLoss(weight=targets[:, 2:3])(
            data[:, 5:6], targets[:, 7:8])
        bce_loss_aux_6 = nn.BCEWithLogitsLoss(weight=targets[:, 2:3])(
            data[:, 6:7], targets[:, 8:9])
        return (bce_loss_1 * loss_weight) + (
            bce_loss_aux_1 *
            aux_impact_factor[0]) + (bce_loss_aux_2 * aux_impact_factor[1]) + (
                bce_loss_aux_3 * aux_impact_factor[2]
            ) + (bce_loss_aux_4 * aux_impact_factor[3]) + (
                bce_loss_aux_5 * aux_impact_factor[4]) + (bce_loss_aux_6 *
                                                          aux_impact_factor[5])

    from sklearn.model_selection import KFold, train_test_split
    from sklearn.metrics import classification_report, roc_auc_score
    batch_size = args.batch_size
    lr = args.learning_ratio
    max_features = np.max(x_train)
    kf = KFold(n_splits=5, random_state=12, shuffle=True)
    final_epoch_score_cv = dict()
    final_fold_count = 0
    for fold_id, (big_index, small_index) in enumerate(kf.split(y_train)):
        final_fold_count += 1
        if args.minimize == 1:
            train_index, test_index = train_test_split(np.arange(len(y_train)),
                                                       test_size=0.5,
                                                       random_state=1234,
                                                       shuffle=True)
        elif args.minimize == 2:
            train_index, test_index = train_test_split(np.arange(len(y_train)),
                                                       test_size=0.666,
                                                       random_state=1234,
                                                       shuffle=True)
        elif args.minimize == 3:
            train_index, test_index = big_index[:25600], small_index[:12800]
        else:
            train_index, test_index = big_index, small_index

        if len(args.model_file) > 0:
            train_index = np.arange(len(x_train))

        if args.use_feats_url:
            x_train_train = np.hstack(
                [x_feat_train[train_index], x_train[train_index]])
            x_train_test = np.hstack(
                [x_feat_train[test_index], x_train[test_index]])
            feats_nums = x_feat_train.shape[1]
        else:
            x_train_train = x_train[train_index]
            x_train_test = x_train[test_index]
            feats_nums = 0

        x_train_torch = torch.tensor(x_train_train, dtype=torch.long)
        x_test_torch = torch.tensor(x_train_test, dtype=torch.long)
        y_train_torch = torch.tensor(np.hstack([y_train,
                                                y_aux_train])[train_index],
                                     dtype=torch.float32)
        y_test_torch = torch.tensor(np.hstack([y_train,
                                               y_aux_train])[test_index],
                                    dtype=torch.float32)

        train_dataset = data.TensorDataset(x_train_torch, y_train_torch)
        valid_dataset = data.TensorDataset(x_test_torch, y_test_torch)

        train_loader = torch.utils.data.DataLoader(train_dataset,
                                                   batch_size=batch_size,
                                                   shuffle=True)
        valid_loader = torch.utils.data.DataLoader(valid_dataset,
                                                   batch_size=batch_size,
                                                   shuffle=False)

        databunch = DataBunch(train_dl=train_loader, valid_dl=valid_loader)

        checkpoint_predictions = []
        weights = []
        seed_everything(args.random_seed + fold_id)
        num_units = list(map(int, args.num_units.split(',')))
        model = get_model(model_factor, num_units[0], num_units[1],
                          embedding_matrix, max_features,
                          y_aux_train.shape[-1], args.num_words, feats_nums)
        model = model.cuda(device=cuda)
        if args.optimizer == 'Nadam':
            from NadamLocal import Nadam
            learn = Learner(databunch,
                            model,
                            loss_func=custom_loss_aux,
                            opt_func=Nadam)
        else:
            learn = Learner(databunch, model, loss_func=custom_loss_aux)
        all_test_preds = []
        checkpoint_weights = [2**epoch for epoch in range(args.num_epochs)]
        test_loader = valid_loader
        n = len(learn.data.train_dl)
        phases = [(TrainingPhase(n).schedule_hp('lr', lr * (0.6**(i))))
                  for i in range(args.num_epochs)]
        sched = GeneralScheduler(learn, phases)
        learn.callbacks.append(sched)
        final_epoch_score = 0
        for global_epoch in range(args.num_epochs):
            print("Fold#", fold_id, "epoch#", global_epoch)
            learn.fit(1)
            if args.minimize < 2 or (args.minimize >= 2 and global_epoch
                                     == int(args.num_epochs - 1)):
                test_preds = np.zeros((len(test_index), 7))
                for i, x_batch in enumerate(test_loader):
                    X = x_batch[0].cuda()
                    y_pred = sigmoid(learn.model(X).detach().cpu().numpy())
                    test_preds[i * batch_size:(i + 1) * batch_size, :] = y_pred

                all_test_preds.append(test_preds)

                prediction_one = test_preds[:, 0].flatten()
                checkpoint_predictions.append(prediction_one)

                weights.append(2**global_epoch)
                predictions = np.average(checkpoint_predictions,
                                         weights=weights,
                                         axis=0)
                y_true = (y_train[test_index, 0]).reshape(
                    (-1, )).astype(np.int)
                roc_sub, roc_bpsn, roc_bnsp = [], [], []
                roc_sub_one, roc_bpsn_one, roc_bnsp_one = [], [], []
                for col in identity_columns:
                    if args.vervose:
                        print("Subgroup#", col, ":")
                        print(
                            classification_report(
                                y_true[index_subgroup[col][test_index]],
                                (predictions[index_subgroup[col][test_index]]
                                 >= 0.5).astype(np.int)))
                    if args.minimize < 2:
                        roc_sub.append(
                            roc_auc_score(
                                y_true[index_subgroup[col][test_index]],
                                predictions[index_subgroup[col][test_index]]))
                    roc_sub_one.append(
                        roc_auc_score(
                            y_true[index_subgroup[col][test_index]],
                            prediction_one[index_subgroup[col][test_index]]))
                    if args.vervose:
                        print("BPSN#", col, ":")
                        print(
                            classification_report(
                                y_true[index_bpsn[col][test_index]],
                                (predictions[index_bpsn[col][test_index]] >=
                                 0.5).astype(np.int)))
                    if args.minimize < 2:
                        roc_bpsn.append(
                            roc_auc_score(
                                y_true[index_bpsn[col][test_index]],
                                predictions[index_bpsn[col][test_index]]))
                    roc_bpsn_one.append(
                        roc_auc_score(
                            y_true[index_bpsn[col][test_index]],
                            prediction_one[index_bpsn[col][test_index]]))
                    if args.vervose:
                        print("BNSP#", col, ":")
                        print(
                            classification_report(
                                y_true[index_bnsp[col][test_index]],
                                (predictions[index_bnsp[col][test_index]] >=
                                 0.5).astype(np.int)))
                    if args.minimize < 2:
                        roc_bnsp.append(
                            roc_auc_score(
                                y_true[index_bnsp[col][test_index]],
                                predictions[index_bnsp[col][test_index]]))
                    roc_bnsp_one.append(
                        roc_auc_score(
                            y_true[index_bnsp[col][test_index]],
                            prediction_one[index_bnsp[col][test_index]]))
                if args.minimize < 2:
                    roc_all = roc_auc_score(y_true, predictions)
                    pm_roc_sub = power_mean(roc_sub)
                    pm_roc_bpsn = power_mean(roc_bpsn)
                    pm_roc_bnsp = power_mean(roc_bnsp)
                    final_epoch_score = (roc_all + pm_roc_sub + pm_roc_bpsn +
                                         pm_roc_bnsp) / 4
                roc_all_one = roc_auc_score(y_true, prediction_one)
                pm_roc_sub_one = power_mean(roc_sub_one)
                pm_roc_bpsn_one = power_mean(roc_bpsn_one)
                pm_roc_bnsp_one = power_mean(roc_bnsp_one)
                final_epoch_score_one = (roc_all_one + pm_roc_sub_one +
                                         pm_roc_bpsn_one + pm_roc_bnsp_one) / 4
                if args.minimize >= 2:
                    return final_epoch_score_one
                if args.vervose:
                    print("roc_sub:", pm_roc_sub)
                    print("roc_bpsn:", pm_roc_bpsn)
                    print("roc_bnsp:", pm_roc_bnsp)
                    print("final score:",
                          (roc_all + pm_roc_sub + pm_roc_bpsn + pm_roc_bnsp) /
                          4)
                if global_epoch not in final_epoch_score_cv.keys():
                    final_epoch_score_cv[global_epoch] = []
                final_epoch_score_cv[global_epoch].append(
                    (final_epoch_score, final_epoch_score_one))
        if len(args.model_file) > 0:
            if args.model_file.endswith('.bz2'):
                model_file = args.model_file
            else:
                model_file = args.model_file + '.bz2'
            model_json_file = model_file[:-4] + '.json'
            model.save_model(model_file)
            with open(model_json_file, 'w') as pf:
                pf.write('{')
                pf.write('\"model_factor\":[' +
                         ','.join(list(map(str, model_factor))) + ']')
                pf.write(',')
                pf.write('\"num_units\":[' +
                         ','.join(list(map(str, num_units))) + ']')
                pf.write(',')
                pf.write('\"num_aux_targets\":%d' % y_aux_train.shape[-1])
                pf.write(',')
                pf.write('\"feats_nums\":%d' % feats_nums)
                pf.write(',')
                pf.write('\"max_seq_len\":%d' % args.num_words)
                pf.write('}')
            break
        if args.minimize > 0:
            break
    return final_epoch_score_cv
        
        self.features = nn.Sequential(*layers)
        
    def forward(self, x): return self.features(x)
    
def wrn_22(): 
    return WideResNet(n_groups=3, N=3, n_classes=10, k=6)

model = wrn_22()

from fastai.basic_data import DataBunch
from fastai.train import Learner
from fastai.metrics import accuracy

data = DataBunch.create(train_ds, valid_ds, bs=batch_size, path='./data/cifar10')
learner = Learner(data, model, loss_func=F.cross_entropy, metrics=[accuracy])
learner.clip = 0.1 # gradient is clipped to be in range of [-0.1, 0.1]

# Find best learning rate
learner.lr_find()
learner.recorder.plot() # select lr with largest negative gradient (about 5e-3)

# Training
epochs = 1
lr = 5e-3
wd = 1e-4

import time

t0 = time.time()
learner.fit_one_cycle(epochs, lr, wd=wd) # wd is the lambda in l2 regularization
            collate_fn=train_collator,
        )
        return DataBunch(train_dl=train_loader,
                         valid_dl=valid_loader,
                         collate_fn=train_collator)

    y_train_torch = get_y_train_torch(weights)
    databunch = get_databunch(y_train_torch)

    logging.info("training model 1: para, rawl, w2v...")
    embedding_matrix = np.concatenate(
        [para_matrix, crawl_matrix, w2v_matrix, char_matrix], axis=1)
    seed_everything(42)
    model = NeuralNet(embedding_matrix,
                      output_aux_sub=subgroup_target.shape[1])
    learn = Learner(databunch, model, loss_func=custom_loss)
    cb = OneCycleScheduler(learn, lr_max=0.001)
    learn.callbacks.append(cb)
    learn.fit(EPOCHS)
    save_nn_without_embedding_weights(learn.model,
                                      "./models/Notebook_100_1.bin")

    logging.info("training model 2: glove, crawl, w2v...")
    embedding_matrix = np.concatenate(
        [glove_matrix, crawl_matrix, w2v_matrix, char_matrix], axis=1)
    seed_everything(43)
    model = NeuralNet(embedding_matrix,
                      output_aux_sub=subgroup_target.shape[1])
    learn = Learner(databunch, model, loss_func=custom_loss)
    cb = OneCycleScheduler(learn, lr_max=0.001)
    learn.callbacks.append(cb)
예제 #10
0
    cv_train_dataset = data.TensorDataset(x_train_torch, y_train_torch)
    cv_fake_val_dataset = data.TensorDataset(x_train_torch[:1000],
                                             y_train_torch[:1000])
    cv_val_dataset = data.TensorDataset(x_val_torch, y_val_torch)

    cv_train_loader = data.DataLoader(cv_train_dataset,
                                      batch_size=512,
                                      shuffle=True)
    cv_fake_val_loader = data.DataLoader(cv_fake_val_dataset,
                                         batch_size=512,
                                         shuffle=False)
    cv_databunch = DataBunch(train_dl=cv_train_loader,
                             valid_dl=cv_fake_val_loader)

    cv_model = MLP(y_aux_train.shape[-1])
    cv_learn = Learner(cv_databunch, cv_model, loss_func=custom_loss)

    cv_predictions, _, _, _, _ = train_model_per_epoch(
        cv_learn,
        cv_val_dataset,
        output_dim=7,
        model_idx=0,
        lr=0.001,
        lr_decay=1,
        n_epochs=10,
        save_models='last',
        model_name='mlp_stacking')

    # cv_test_predictions = cv_model.predict(x_test)
    y_train_predictions[val_index] = cv_predictions
    # test_predictions += cv_test_predictions / float(N_SPLITS)
            '.',
            bert_train.iloc[trn_idx, :].sample(frac=1,
                                               random_state=SEED + CUR_STEP),
            bert_train.iloc[val_idx, :],
            bert_test,
            tokenizer=fastai_tokenizer,
            vocab=fastai_bert_vocab,
            include_bos=False,
            include_eos=False,
            text_cols='comment_text',
            label_cols=label_cols,
            bs=BATCH_SIZE,
            collate_fn=partial(pad_collate, pad_first=False, pad_idx=0),
        )

        learner = Learner(databunch, bert_model, loss_func=bert_custom_loss)
        if CUR_STEP != 1:
            learner.load('/kaggle/input/freeze-bert-1-s-uc-260ml-3e-8f-s-' +
                         str(CUR_STEP - 1) + '-f-' + str(MAKE_FOLD) +
                         '/models/' + FILE_NAME)

        learner.fit_one_cycle(N_EPOCH, max_lr=MAX_LR)

        oof[val_idx] = get_preds_as_nparray(DatasetType.Valid).astype(
            np.float32)
        predictions += get_preds_as_nparray(DatasetType.Test).astype(
            np.float32) / NFOLDS

        validate_df(train.iloc[val_idx], oof[val_idx, 0], verbose=True)

        learner.save(FILE_NAME)
예제 #12
0
파일: train.py 프로젝트: expz/fast-training
def build_learner(params, project_dir, pindex=0, comm_file=None, queues=None):
    """
    Builds a fastai `Learner` object containing the model and data specified by
    `params`. It is configured to run on GPU `device_id`. Assumes it is GPU
    `pindex` of `world_size` total GPUs. In case more than one GPU is being
    used, a file named `comm_file` is used to communicate between processes.
    """
    # For user friendly error messages, check these parameters exist.
    check_params(params, [
        'cpu',
        'data.batch_size',
        'data.dir',
        'data.epoch_size',
        'data.max_length',
        'data.max_val_size',
        'data.src',
        'data.tgt',
        'data.vocab',
        'decoder.embedding_dim',
        'decoder.embedding_dropout',
        'decoder.prediction_dropout',
        'encoder.embedding_dim',
        'encoder.embedding_dropout',
        'network.bias',
        'network.block_sizes',
        'network.division_factor',
        'network.dropout',
        'network.efficient',
        'network.growth_rate',
        'network.kernel_size',
    ])

    model_name = params['model_name']

    # Try to make the directory for saving models.
    model_dir = os.path.join(project_dir, 'model', model_name)
    os.makedirs(model_dir, exist_ok=True)

    # Configure GPU/CPU device settings.
    cpu = params['cpu']
    gpu_ids = params['gpu_ids'] if not cpu else []
    world_size = len(gpu_ids) if len(gpu_ids) > 0 else 1
    distributed = world_size > 1
    if gpu_ids:
        device_id = gpu_ids[pindex]
        device = torch.device(device_id)
        torch.cuda.set_device(device_id)
    else:
        device_id = None
        device = torch.device('cpu')

    # If distributed, initialize inter-process communication using shared file.
    if distributed:
        torch.distributed.init_process_group(backend='nccl',
                                             world_size=world_size,
                                             rank=pindex,
                                             init_method=f'file://{comm_file}')

    # Load vocabulary.
    vocab_path = os.path.join(params['data']['dir'], params['data']['vocab'])
    vocab = VocabData(vocab_path)

    # Load data.
    src_l = params['data']['src']
    tgt_l = params['data']['tgt']
    loader = PervasiveDataLoader(os.path.join(params['data']['dir'],
                                              f'{src_l}.h5'),
                                 os.path.join(params['data']['dir'],
                                              f'{tgt_l}.h5'),
                                 vocab,
                                 vocab,
                                 params['data']['batch_size'] // world_size,
                                 params['data']['max_length'],
                                 epoch_size=params['data']['epoch_size'],
                                 max_val_size=params['data']['max_val_size'],
                                 distributed=distributed,
                                 world_size=world_size,
                                 pindex=pindex)

    # Define neural network.
    # Max length is 1 more than setting to account for BOS.
    if params['network']['type'] == 'pervasive-embeddings':
        model = PervasiveEmbedding(
            params['network']['block_sizes'], vocab.bos, loader.max_length,
            loader.max_length, loader.datasets['train'].arrays[0].shape[2],
            params['encoder']['embedding_dim'],
            params['encoder']['embedding_dropout'],
            params['network']['dropout'],
            params['decoder']['prediction_dropout'],
            params['network']['division_factor'],
            params['network']['growth_rate'], params['network']['bias'],
            params['network']['efficient'])
        # Rescale loss by 100 for easier display in training output.
        loss_func = scaled_mse_loss
    elif params['network']['type'] == 'pervasive-downsample':
        model = PervasiveDownsample(
            params['network']['block_sizes'], vocab.bos, loader.max_length,
            loader.max_length, params['encoder']['embedding_dim'],
            params['encoder']['embedding_dropout'],
            params['network']['dropout'],
            params['decoder']['prediction_dropout'],
            params['network']['division_factor'],
            params['network']['growth_rate'], params['network']['bias'],
            params['network']['efficient'], params['network']['kernel_size'])
        # Rescale loss by 100 for easier display in training output.
        loss_func = F.cross_entropy
    elif params['network']['type'] == 'pervasive-bert':
        model = PervasiveBert(
            params['network']['block_sizes'], vocab.bos, loader.max_length,
            loader.max_length, params['encoder']['embedding_dim'],
            params['encoder']['embedding_dropout'],
            params['network']['dropout'],
            params['decoder']['prediction_dropout'],
            params['network']['division_factor'],
            params['network']['growth_rate'], params['network']['bias'],
            params['network']['efficient'], params['network']['kernel_size'])
        loss_func = F.cross_entropy
    elif params['network']['type'] == 'pervasive-original':
        model = PervasiveOriginal(
            params['network']['block_sizes'], len(vocab), vocab.bos,
            loader.max_length, loader.max_length,
            params['encoder']['embedding_dim'],
            params['encoder']['embedding_dropout'],
            params['network']['dropout'],
            params['decoder']['prediction_dropout'],
            params['network']['division_factor'],
            params['network']['growth_rate'], params['network']['bias'],
            params['network']['efficient'], params['network']['kernel_size'])
        loss_func = F.cross_entropy
    elif params['network']['type'] == 'pervasive':
        model = Pervasive(
            params['network']['block_sizes'], len(vocab), vocab.bos,
            loader.max_length, loader.max_length,
            params['encoder']['initial_emb_dim'],
            params['encoder']['embedding_dim'],
            params['encoder']['embedding_dropout'],
            params['network']['dropout'],
            params['decoder']['prediction_dropout'],
            params['network']['division_factor'],
            params['network']['growth_rate'], params['network']['bias'],
            params['network']['efficient'], params['network']['kernel_size'])
        loss_func = F.cross_entropy

    model.init_weights()
    if device_id is not None:
        if not torch.cuda.is_available():
            raise ValueError(
                'Request to train on GPU {device_id}, but not GPU found.')
        model.cuda(device_id)
        if distributed:
            model = DistributedDataParallel(model, device_ids=[device_id])
    data = DataBunch(loader.loaders['train'],
                     loader.loaders['valid'],
                     loader.loaders['valid'],
                     device=device)

    # Create Learner with Adam optimizer.
    learn = Learner(data, model, loss_func=loss_func, model_dir=model_dir)
    AdamP = partial(torch.optim.Adam,
                    betas=(params['optim']['beta1'], params['optim']['beta2']))
    learn.opt_func = AdamP
    learn.wd = params['optim']['wd']

    return (learn, loader.loaders['train'].src_vocab,
            loader.loaders['train'].tgt_vocab)
예제 #13
0
def train(train_dataset: torch.utils.data.Dataset,
          test_dataset: torch.utils.data.Dataset,
          training_config: dict = train_config,
          global_config: dict = global_config):
    """
    Template training routine. Takes a training and a test dataset wrapped
    as torch.utils.data.Dataset type and two corresponding generic
    configs for both gobal path settings and training settings.
    Returns the fitted fastai.train.Learner object which can be
    used to assess the resulting metrics and error curves etc.
    """

    for path in global_config.values():
        create_dirs(path)

    # wrap datasets with Dataloader classes
    train_loader = torch.utils.data.DataLoader(
        train_dataset, **train_config["DATA_LOADER_CONFIG"])
    test_loader = torch.utils.data.DataLoader(
        test_dataset, **train_config["DATA_LOADER_CONFIG"])
    databunch = DataBunch(train_loader, test_loader)

    # instantiate model and learner
    if training_config["WEIGHTS"] is None:
        model = training_config["MODEL"](**training_config["MODEL_CONFIG"])
    else:
        model = load_model(training_config["MODEL"],
                           training_config["MODEL_CONFIG"],
                           training_config["WEIGHTS"],
                           training_config["DEVICE"])

    learner = Learner(databunch,
                      model,
                      metrics=train_config["METRICS"],
                      path=global_config["ROOT_PATH"],
                      model_dir=global_config["WEIGHT_DIR"],
                      loss_func=train_config["LOSS"])

    # model name & paths
    name = "_".join([train_config["DATE"], train_config["SESSION_NAME"]])
    modelpath = os.path.join(global_config["WEIGHT_DIR"], name)

    if train_config["MIXED_PRECISION"]:
        learner.to_fp16()

    learner.save(modelpath)

    torch.backends.cudnn.benchmark = True

    cbs = [
        SaveModelCallback(learner),
        LearnerTensorboardWriter(
            learner,
            Path(os.path.join(global_config["LOG_DIR"]), "tensorboardx"),
            name),
        TerminateOnNaNCallback()
    ]

    # perform training iteration
    try:
        if train_config["ONE_CYCLE"]:
            learner.fit_one_cycle(train_config["EPOCHS"],
                                  max_lr=train_config["LR"],
                                  callbacks=cbs)
        else:
            learner.fit(train_config["EPOCHS"],
                        lr=train_config["LR"],
                        callbacks=cbs)
    # save model files
    except KeyboardInterrupt:
        learner.save(modelpath)
        raise KeyboardInterrupt

    learner.save(modelpath)
    val_loss = min(learner.recorder.val_losses)
    val_metrics = learner.recorder.metrics

    # log using the logging tool
    logger = log.Log(train_config, run_name=train_config['SESSION_NAME'])
    logger.log_metric('Validation Loss', val_loss)
    logger.log.metrics(val_metrics)
    logger.end_run()

    #write csv log file
    log_content = train_config.copy()
    log_content["VAL_LOSS"] = val_loss
    log_content["VAL_METRICS"] = val_metrics
    log_path = os.path.join(global_config["LOG_DIR"], train_config["LOGFILE"])
    write_log(log_path, log_content)

    return learner, log_content, name
예제 #14
0
import config
from dataset import Dataset
from model import WideResNet22
from fastai.train import Learner
from fastai.metrics import accuracy
from torch.nn import functional as f
from fastai.basic_data import DataBunch

cifar10 = Dataset()
# cifar10.download_dataset()
train_dataloader, valid_dataloader = cifar10.get_dataloader()
model = WideResNet22(3, 10)

data = DataBunch(train_dataloader, valid_dataloader)
learner = Learner(data, model, loss_func=f.cross_entropy, metrics=[accuracy])
learner.clip = 0.1
learner.fit_one_cycle(config.EPOCHS, config.LEARNING_RATE, wd=1e-4)
예제 #15
0
for images, labels in train_dl:
    print('images.shape:', images.shape)
    out = model(images)
    print('out.shape:', out.shape)
    break


# now we will use the library FastAi to help us out(I still need to download this module)
from fastai.basic_data import DataBunch
from fastai.train import Learner
from fastai.metrics import accuracy


data = DataBunch.create(train_ds, valid_ds, bs=batch_size, path='./data/cifar10')
learner = Learner(data, model, loss_func=F.cross_entropy, metrics=[accuracy])
learner.clip = 0.1


# this starts with a low lr then adjusts it and tracks the loss
learner.lr_find()


# plot the marked lr that gives the fastest reduction in loss
learner.recorder.plot()


learner.fit_one_cycle(9, 5e-3, wd=1e-4) # epochs, lr, weight decay


# plot all the weights, losses and accuracy of the model
                                           shuffle=False)

databunch = DataBunch(train_dl=train_loader, valid_dl=valid_loader)


def custom_loss(data, targets):
    ''' Define custom loss function for weighted BCE on 'target' column '''
    bce_loss_1 = nn.BCEWithLogitsLoss(weight=targets[:, 1:2])(data[:, :1],
                                                              targets[:, :1])
    bce_loss_2 = nn.BCEWithLogitsLoss()(data[:, 1:], targets[:, 2:])
    return (bce_loss_1 * loss_weight) + bce_loss_2


all_test_preds = []

for model_idx in range(NUM_MODELS):
    print('Model ', model_idx)
    seed_everything(1234 + model_idx)
    model = NeuralNet(embedding_matrix, out_shape)
    learn = Learner(databunch, model, loss_func=custom_loss)
    test_preds = train_model(learn, test_dataset, output_dim=3)
    all_test_preds.append(test_preds)

submission = pd.DataFrame.from_dict({
    'id':
    testid,
    'prediction':
    np.mean(all_test_preds, axis=0)[:, 0]
})

submission.to_csv('submission.csv', index=False)
예제 #17
0
DENSE_HIDDEN_UNITS = 4 * LSTM_UNITS
embed_size = 50
# Note: these parameters can be tunes for improving model performance


# targets
num_targets = 12


# init LSTM model
LUT = ModelEmbedding(max_features, embed_size, dropout = 0.3)
NET = NeuralNet(embed_size, LSTM_UNITS, DENSE_HIDDEN_UNITS, num_targets)
model = LSTM_model(LUT, NET)

# Fast ai learner
learn = Learner(databunch, model, loss_func=loss_function)



# train model
LSTM_valid_raw_preds = train_model(learn,output_dim=num_targets, lr = 1.0e-3)


# test set prediction

LSTM_pred_raw = torch.zeros(len(X_test), num_targets)
test_preds = np.zeros((len(X_test)))
learn.model.eval()

for i, x_batch in enumerate(test_loader):
    X = x_batch[0].cuda()