Ejemplo n.º 1
0
def run(ini_file='tinyimg.ini',
        data_in_dir='./../../dataset',
        model_cfg='../cfg/vgg-tiny.cfg',
        model_out_dir='./models',
        epochs=30,
        lr=3.0e-5,
        batch_sz=256,
        num_worker=4,
        log_freq=20,
        use_gpu=True):
    # Step 1: parse config
    cfg = parse_cfg(ini_file,
                    data_in_dir=data_in_dir,
                    model_cfg=model_cfg,
                    model_out_dir=model_out_dir,
                    epochs=epochs,
                    lr=lr,
                    batch_sz=batch_sz,
                    log_freq=log_freq,
                    num_worker=num_worker,
                    use_gpu=use_gpu)
    print_cfg(cfg)

    # Step 2: create data sets and loaders
    train_ds, val_ds = build_train_val_datasets(cfg, in_memory=True)
    train_loader, val_loader = DLFactory.create_train_val_dataloader(
        cfg, train_ds, val_ds)

    # Step 3: create model
    model = MFactory.create_model(cfg)

    # Step 4: train/valid
    # This demos our approach can be easily intergrate with our app framework
    device = get_device(cfg)
    data = DataBunch(train_loader, val_loader, device=device)
    learn = Learner(data,
                    model,
                    loss_func=torch.nn.CrossEntropyLoss(),
                    metrics=accuracy)
    #  callback_fns=[partial(EarlyStoppingCallback, monitor='accuracy', min_delta=0.01, patience=2)])

    # lr_find(learn, start_lr=1e-7, end_lr=10)
    # learn.recorder.plot()
    # lrs_losses = [(lr, loss) for lr, loss in zip(learn.recorder.lrs, learn.recorder.losses)]
    # min_lr = min(lrs_losses[10:-5], key=lambda x: x[1])[0]
    # lr = min_lr/10.0
    # plt.show()
    # print(f'Minimal lr rate is {min_lr} propose init lr {lr}')
    # fit_one_cycle(learn, epochs, lr)

    learn.fit(epochs, lr)
Ejemplo n.º 2
0
def get_score():
    print('Make Train Features.')
    with open(args.temporary_file, 'rb') as f:
        x_train, x_feat_train, y_train_o, y_aux_train, embedding_matrix = pickle.load(
            f)

    def power_mean(series, p=-5):
        total = sum(np.power(series, p))
        return np.power(total / len(series), 1 / p)

    def sigmoid(x):
        return 1 / (1 + np.exp(-x))

    # all, sub, s&t, !s&t, s&!t, !s&!t
    weight_factor = list(map(float, args.weight_factor.split(',')))
    identity_factor_1 = list(map(float, args.identity_factor_1.split(',')))
    identity_factor_2 = list(map(float, args.identity_factor_2.split(',')))
    model_factor = list(map(int, args.model_factor.split(',')))
    print('weight_factor =', weight_factor)
    print('identity_factor_1 = ', identity_factor_1)
    print('identity_factor_2 = ', identity_factor_2)
    print('model_factor = ', model_factor)
    train = read_competision_file(train=True)
    identity_columns = [
        'male', 'female', 'homosexual_gay_or_lesbian', 'christian', 'jewish',
        'muslim', 'black', 'white', 'psychiatric_or_mental_illness'
    ]
    index_subgroup, index_bpsn, index_bnsp = dict(), dict(), dict()
    for col in identity_columns:
        index_subgroup[col] = (train[col].fillna(0).values >= 0.5).astype(bool)
        index_bpsn[col] = (
            (((train['target'].values < 0.5).astype(bool).astype(np.int) +
              (train[col].fillna(0).values >= 0.5).astype(bool).astype(np.int))
             > 1).astype(bool)) + ((
                 ((train['target'].values >= 0.5).astype(bool).astype(np.int) +
                  (train[col].fillna(0).values < 0.5).astype(bool).astype(
                      np.int)) > 1).astype(bool))
        index_bnsp[col] = (
            (((train['target'].values >= 0.5).astype(bool).astype(np.int) +
              (train[col].fillna(0).values >= 0.5).astype(bool).astype(np.int))
             > 1).astype(bool)) + ((
                 ((train['target'].values < 0.5).astype(bool).astype(np.int) +
                  (train[col].fillna(0).values < 0.5).astype(bool).astype(
                      np.int)) > 1).astype(bool))
    # Overall
    weights = np.ones((len(x_train), )) * weight_factor[0]
    # Subgroup
    weights += (train[identity_columns].fillna(0).values >= 0.5).sum(
        axis=1).astype(bool).astype(np.int) * weight_factor[1]
    weights += (((train['target'].values >= 0.5).astype(bool).astype(np.int) +
                 (train[identity_columns].fillna(0).values >= 0.5).sum(
                     axis=1).astype(bool).astype(np.int)) >
                1).astype(bool).astype(np.int) * weight_factor[2]
    weights += (((train['target'].values >= 0.5).astype(bool).astype(np.int) +
                 (train[identity_columns].fillna(0).values < 0.5).sum(
                     axis=1).astype(bool).astype(np.int)) >
                1).astype(bool).astype(np.int) * weight_factor[3]
    weights += (((train['target'].values < 0.5).astype(bool).astype(np.int) +
                 (train[identity_columns].fillna(0).values >= 0.5).sum(
                     axis=1).astype(bool).astype(np.int)) >
                1).astype(bool).astype(np.int) * weight_factor[4]
    weights += (((train['target'].values < 0.5).astype(bool).astype(np.int) +
                 (train[identity_columns].fillna(0).values < 0.5).sum(
                     axis=1).astype(bool).astype(np.int)) >
                1).astype(bool).astype(np.int) * weight_factor[5]
    index_id1, index_id2 = dict(), dict()
    for col in identity_columns:
        index_id1[col] = (
            ((train[col].fillna(0).values >= 0.5).astype(bool).astype(np.int) +
             (train['target'].values >= 0.5).astype(bool).astype(np.int)) >
            1).astype(bool)
        index_id2[col] = (
            ((train[col].fillna(0).values >= 0.5).astype(bool).astype(np.int) +
             (train['target'].values < 0.5).astype(bool).astype(np.int)) >
            1).astype(bool)
    for col, id1 in zip(identity_columns, identity_factor_1):
        weights[index_id1[col]] += id1
    for col, id2 in zip(identity_columns, identity_factor_2):
        weights[index_id2[col]] += id2

    loss_weight = 1.0 / weights.mean()

    aux_impact_factor = list(map(float, args.aux_impact_factor.split(',')))
    aux_identity_factor = list(map(float, args.aux_identity_factor.split(',')))
    print('aux_impact_factor =', aux_impact_factor)
    print('aux_identity_factor =', aux_identity_factor)

    weights_aux = np.ones((len(x_train), ))
    weights_aux[(train['target'].values >= 0.5).astype(np.int) +
                (train[identity_columns].fillna(0).values < 0.5).sum(axis=1).
                astype(bool).astype(np.int) > 1] = aux_identity_factor[0]
    weights_aux[(train['target'].values >= 0.5).astype(np.int) +
                (train[identity_columns].fillna(0).values >= 0.5).sum(axis=1).
                astype(bool).astype(np.int) > 1] = aux_identity_factor[1]
    weights_aux[(train['target'].values < 0.5).astype(np.int) +
                (train[identity_columns].fillna(0).values < 0.5).sum(axis=1).
                astype(bool).astype(np.int) > 1] = aux_identity_factor[2]
    weights_aux[(train['target'].values < 0.5).astype(np.int) +
                (train[identity_columns].fillna(0).values >= 0.5).sum(axis=1).
                astype(bool).astype(np.int) > 1] = aux_identity_factor[3]

    y_train = np.vstack([y_train_o, weights, weights_aux]).T

    del train

    def custom_loss_aux(data, targets):
        ''' Define custom loss function for weighted BCE on 'target' column '''
        bce_loss_1 = nn.BCEWithLogitsLoss(weight=targets[:,
                                                         1:2])(data[:, :1],
                                                               targets[:, :1])
        bce_loss_aux_1 = nn.BCEWithLogitsLoss(weight=targets[:, 2:3])(
            data[:, 1:2], targets[:, 3:4])
        bce_loss_aux_2 = nn.BCEWithLogitsLoss(weight=targets[:, 2:3])(
            data[:, 2:3], targets[:, 4:5])
        bce_loss_aux_3 = nn.BCEWithLogitsLoss(weight=targets[:, 2:3])(
            data[:, 3:4], targets[:, 5:6])
        bce_loss_aux_4 = nn.BCEWithLogitsLoss(weight=targets[:, 2:3])(
            data[:, 4:5], targets[:, 6:7])
        bce_loss_aux_5 = nn.BCEWithLogitsLoss(weight=targets[:, 2:3])(
            data[:, 5:6], targets[:, 7:8])
        bce_loss_aux_6 = nn.BCEWithLogitsLoss(weight=targets[:, 2:3])(
            data[:, 6:7], targets[:, 8:9])
        return (bce_loss_1 * loss_weight) + (
            bce_loss_aux_1 *
            aux_impact_factor[0]) + (bce_loss_aux_2 * aux_impact_factor[1]) + (
                bce_loss_aux_3 * aux_impact_factor[2]
            ) + (bce_loss_aux_4 * aux_impact_factor[3]) + (
                bce_loss_aux_5 * aux_impact_factor[4]) + (bce_loss_aux_6 *
                                                          aux_impact_factor[5])

    from sklearn.model_selection import KFold, train_test_split
    from sklearn.metrics import classification_report, roc_auc_score
    batch_size = args.batch_size
    lr = args.learning_ratio
    max_features = np.max(x_train)
    kf = KFold(n_splits=5, random_state=12, shuffle=True)
    final_epoch_score_cv = dict()
    final_fold_count = 0
    for fold_id, (big_index, small_index) in enumerate(kf.split(y_train)):
        final_fold_count += 1
        if args.minimize == 1:
            train_index, test_index = train_test_split(np.arange(len(y_train)),
                                                       test_size=0.5,
                                                       random_state=1234,
                                                       shuffle=True)
        elif args.minimize == 2:
            train_index, test_index = train_test_split(np.arange(len(y_train)),
                                                       test_size=0.666,
                                                       random_state=1234,
                                                       shuffle=True)
        elif args.minimize == 3:
            train_index, test_index = big_index[:25600], small_index[:12800]
        else:
            train_index, test_index = big_index, small_index

        if len(args.model_file) > 0:
            train_index = np.arange(len(x_train))

        if args.use_feats_url:
            x_train_train = np.hstack(
                [x_feat_train[train_index], x_train[train_index]])
            x_train_test = np.hstack(
                [x_feat_train[test_index], x_train[test_index]])
            feats_nums = x_feat_train.shape[1]
        else:
            x_train_train = x_train[train_index]
            x_train_test = x_train[test_index]
            feats_nums = 0

        x_train_torch = torch.tensor(x_train_train, dtype=torch.long)
        x_test_torch = torch.tensor(x_train_test, dtype=torch.long)
        y_train_torch = torch.tensor(np.hstack([y_train,
                                                y_aux_train])[train_index],
                                     dtype=torch.float32)
        y_test_torch = torch.tensor(np.hstack([y_train,
                                               y_aux_train])[test_index],
                                    dtype=torch.float32)

        train_dataset = data.TensorDataset(x_train_torch, y_train_torch)
        valid_dataset = data.TensorDataset(x_test_torch, y_test_torch)

        train_loader = torch.utils.data.DataLoader(train_dataset,
                                                   batch_size=batch_size,
                                                   shuffle=True)
        valid_loader = torch.utils.data.DataLoader(valid_dataset,
                                                   batch_size=batch_size,
                                                   shuffle=False)

        databunch = DataBunch(train_dl=train_loader, valid_dl=valid_loader)

        checkpoint_predictions = []
        weights = []
        seed_everything(args.random_seed + fold_id)
        num_units = list(map(int, args.num_units.split(',')))
        model = get_model(model_factor, num_units[0], num_units[1],
                          embedding_matrix, max_features,
                          y_aux_train.shape[-1], args.num_words, feats_nums)
        model = model.cuda(device=cuda)
        if args.optimizer == 'Nadam':
            from NadamLocal import Nadam
            learn = Learner(databunch,
                            model,
                            loss_func=custom_loss_aux,
                            opt_func=Nadam)
        else:
            learn = Learner(databunch, model, loss_func=custom_loss_aux)
        all_test_preds = []
        checkpoint_weights = [2**epoch for epoch in range(args.num_epochs)]
        test_loader = valid_loader
        n = len(learn.data.train_dl)
        phases = [(TrainingPhase(n).schedule_hp('lr', lr * (0.6**(i))))
                  for i in range(args.num_epochs)]
        sched = GeneralScheduler(learn, phases)
        learn.callbacks.append(sched)
        final_epoch_score = 0
        for global_epoch in range(args.num_epochs):
            print("Fold#", fold_id, "epoch#", global_epoch)
            learn.fit(1)
            if args.minimize < 2 or (args.minimize >= 2 and global_epoch
                                     == int(args.num_epochs - 1)):
                test_preds = np.zeros((len(test_index), 7))
                for i, x_batch in enumerate(test_loader):
                    X = x_batch[0].cuda()
                    y_pred = sigmoid(learn.model(X).detach().cpu().numpy())
                    test_preds[i * batch_size:(i + 1) * batch_size, :] = y_pred

                all_test_preds.append(test_preds)

                prediction_one = test_preds[:, 0].flatten()
                checkpoint_predictions.append(prediction_one)

                weights.append(2**global_epoch)
                predictions = np.average(checkpoint_predictions,
                                         weights=weights,
                                         axis=0)
                y_true = (y_train[test_index, 0]).reshape(
                    (-1, )).astype(np.int)
                roc_sub, roc_bpsn, roc_bnsp = [], [], []
                roc_sub_one, roc_bpsn_one, roc_bnsp_one = [], [], []
                for col in identity_columns:
                    if args.vervose:
                        print("Subgroup#", col, ":")
                        print(
                            classification_report(
                                y_true[index_subgroup[col][test_index]],
                                (predictions[index_subgroup[col][test_index]]
                                 >= 0.5).astype(np.int)))
                    if args.minimize < 2:
                        roc_sub.append(
                            roc_auc_score(
                                y_true[index_subgroup[col][test_index]],
                                predictions[index_subgroup[col][test_index]]))
                    roc_sub_one.append(
                        roc_auc_score(
                            y_true[index_subgroup[col][test_index]],
                            prediction_one[index_subgroup[col][test_index]]))
                    if args.vervose:
                        print("BPSN#", col, ":")
                        print(
                            classification_report(
                                y_true[index_bpsn[col][test_index]],
                                (predictions[index_bpsn[col][test_index]] >=
                                 0.5).astype(np.int)))
                    if args.minimize < 2:
                        roc_bpsn.append(
                            roc_auc_score(
                                y_true[index_bpsn[col][test_index]],
                                predictions[index_bpsn[col][test_index]]))
                    roc_bpsn_one.append(
                        roc_auc_score(
                            y_true[index_bpsn[col][test_index]],
                            prediction_one[index_bpsn[col][test_index]]))
                    if args.vervose:
                        print("BNSP#", col, ":")
                        print(
                            classification_report(
                                y_true[index_bnsp[col][test_index]],
                                (predictions[index_bnsp[col][test_index]] >=
                                 0.5).astype(np.int)))
                    if args.minimize < 2:
                        roc_bnsp.append(
                            roc_auc_score(
                                y_true[index_bnsp[col][test_index]],
                                predictions[index_bnsp[col][test_index]]))
                    roc_bnsp_one.append(
                        roc_auc_score(
                            y_true[index_bnsp[col][test_index]],
                            prediction_one[index_bnsp[col][test_index]]))
                if args.minimize < 2:
                    roc_all = roc_auc_score(y_true, predictions)
                    pm_roc_sub = power_mean(roc_sub)
                    pm_roc_bpsn = power_mean(roc_bpsn)
                    pm_roc_bnsp = power_mean(roc_bnsp)
                    final_epoch_score = (roc_all + pm_roc_sub + pm_roc_bpsn +
                                         pm_roc_bnsp) / 4
                roc_all_one = roc_auc_score(y_true, prediction_one)
                pm_roc_sub_one = power_mean(roc_sub_one)
                pm_roc_bpsn_one = power_mean(roc_bpsn_one)
                pm_roc_bnsp_one = power_mean(roc_bnsp_one)
                final_epoch_score_one = (roc_all_one + pm_roc_sub_one +
                                         pm_roc_bpsn_one + pm_roc_bnsp_one) / 4
                if args.minimize >= 2:
                    return final_epoch_score_one
                if args.vervose:
                    print("roc_sub:", pm_roc_sub)
                    print("roc_bpsn:", pm_roc_bpsn)
                    print("roc_bnsp:", pm_roc_bnsp)
                    print("final score:",
                          (roc_all + pm_roc_sub + pm_roc_bpsn + pm_roc_bnsp) /
                          4)
                if global_epoch not in final_epoch_score_cv.keys():
                    final_epoch_score_cv[global_epoch] = []
                final_epoch_score_cv[global_epoch].append(
                    (final_epoch_score, final_epoch_score_one))
        if len(args.model_file) > 0:
            if args.model_file.endswith('.bz2'):
                model_file = args.model_file
            else:
                model_file = args.model_file + '.bz2'
            model_json_file = model_file[:-4] + '.json'
            model.save_model(model_file)
            with open(model_json_file, 'w') as pf:
                pf.write('{')
                pf.write('\"model_factor\":[' +
                         ','.join(list(map(str, model_factor))) + ']')
                pf.write(',')
                pf.write('\"num_units\":[' +
                         ','.join(list(map(str, num_units))) + ']')
                pf.write(',')
                pf.write('\"num_aux_targets\":%d' % y_aux_train.shape[-1])
                pf.write(',')
                pf.write('\"feats_nums\":%d' % feats_nums)
                pf.write(',')
                pf.write('\"max_seq_len\":%d' % args.num_words)
                pf.write('}')
            break
        if args.minimize > 0:
            break
    return final_epoch_score_cv
Ejemplo n.º 3
0
def train(train_dataset: torch.utils.data.Dataset,
          test_dataset: torch.utils.data.Dataset,
          training_config: dict = train_config,
          global_config: dict = global_config):
    """
    Template training routine. Takes a training and a test dataset wrapped
    as torch.utils.data.Dataset type and two corresponding generic
    configs for both gobal path settings and training settings.
    Returns the fitted fastai.train.Learner object which can be
    used to assess the resulting metrics and error curves etc.
    """

    for path in global_config.values():
        create_dirs(path)

    # wrap datasets with Dataloader classes
    train_loader = torch.utils.data.DataLoader(
        train_dataset, **train_config["DATA_LOADER_CONFIG"])
    test_loader = torch.utils.data.DataLoader(
        test_dataset, **train_config["DATA_LOADER_CONFIG"])
    databunch = DataBunch(train_loader, test_loader)

    # instantiate model and learner
    if training_config["WEIGHTS"] is None:
        model = training_config["MODEL"](**training_config["MODEL_CONFIG"])
    else:
        model = load_model(training_config["MODEL"],
                           training_config["MODEL_CONFIG"],
                           training_config["WEIGHTS"],
                           training_config["DEVICE"])

    learner = Learner(databunch,
                      model,
                      metrics=train_config["METRICS"],
                      path=global_config["ROOT_PATH"],
                      model_dir=global_config["WEIGHT_DIR"],
                      loss_func=train_config["LOSS"])

    # model name & paths
    name = "_".join([train_config["DATE"], train_config["SESSION_NAME"]])
    modelpath = os.path.join(global_config["WEIGHT_DIR"], name)

    if train_config["MIXED_PRECISION"]:
        learner.to_fp16()

    learner.save(modelpath)

    torch.backends.cudnn.benchmark = True

    cbs = [
        SaveModelCallback(learner),
        LearnerTensorboardWriter(
            learner,
            Path(os.path.join(global_config["LOG_DIR"]), "tensorboardx"),
            name),
        TerminateOnNaNCallback()
    ]

    # perform training iteration
    try:
        if train_config["ONE_CYCLE"]:
            learner.fit_one_cycle(train_config["EPOCHS"],
                                  max_lr=train_config["LR"],
                                  callbacks=cbs)
        else:
            learner.fit(train_config["EPOCHS"],
                        lr=train_config["LR"],
                        callbacks=cbs)
    # save model files
    except KeyboardInterrupt:
        learner.save(modelpath)
        raise KeyboardInterrupt

    learner.save(modelpath)
    val_loss = min(learner.recorder.val_losses)
    val_metrics = learner.recorder.metrics

    # log using the logging tool
    logger = log.Log(train_config, run_name=train_config['SESSION_NAME'])
    logger.log_metric('Validation Loss', val_loss)
    logger.log.metrics(val_metrics)
    logger.end_run()

    #write csv log file
    log_content = train_config.copy()
    log_content["VAL_LOSS"] = val_loss
    log_content["VAL_METRICS"] = val_metrics
    log_path = os.path.join(global_config["LOG_DIR"], train_config["LOGFILE"])
    write_log(log_path, log_content)

    return learner, log_content, name
                         valid_dl=valid_loader,
                         collate_fn=train_collator)

    y_train_torch = get_y_train_torch(weights)
    databunch = get_databunch(y_train_torch)

    logging.info("training model 1: para, rawl, w2v...")
    embedding_matrix = np.concatenate(
        [para_matrix, crawl_matrix, w2v_matrix, char_matrix], axis=1)
    seed_everything(42)
    model = NeuralNet(embedding_matrix,
                      output_aux_sub=subgroup_target.shape[1])
    learn = Learner(databunch, model, loss_func=custom_loss)
    cb = OneCycleScheduler(learn, lr_max=0.001)
    learn.callbacks.append(cb)
    learn.fit(EPOCHS)
    save_nn_without_embedding_weights(learn.model,
                                      "./models/Notebook_100_1.bin")

    logging.info("training model 2: glove, crawl, w2v...")
    embedding_matrix = np.concatenate(
        [glove_matrix, crawl_matrix, w2v_matrix, char_matrix], axis=1)
    seed_everything(43)
    model = NeuralNet(embedding_matrix,
                      output_aux_sub=subgroup_target.shape[1])
    learn = Learner(databunch, model, loss_func=custom_loss)
    cb = OneCycleScheduler(learn, lr_max=0.001)
    learn.callbacks.append(cb)
    learn.fit(EPOCHS)
    save_nn_without_embedding_weights(learn.model,
                                      "./models/Notebook_100_2.bin")