Exemple #1
0
def blja_test():
    from common import load_train
    from common import MANAGER_ID
    from common import TARGET

    train_df = load_train()
    visualize_condit_distrib_of_target_on_top_N_values_of_col(
        train_df, MANAGER_ID, TARGET, 3)


# blja_test()

# def visualize_exp_lambda():
#     import matplotlib.pyplot as plt
#
#     fig, ax = plt.subplots()
#     sz=300
#     ixes = range(sz)
#     l0=get_exp_lambda(50, 1)
#     l1=get_exp_lambda(50, 5)
#     l2=get_exp_lambda(50, 0.2)
#     to_plot0 = [l0(x) for x in ixes]
#     to_plot1 = [l1(x) for x in ixes]
#     to_plot2 = [l2(x) for x in ixes]
#     ax.plot(ixes, to_plot0, label='f=1')
#     ax.plot(ixes, to_plot1, label='f=5')
#     ax.plot(ixes, to_plot2, label='f=0.2')
#     ax.legend()
Exemple #2
0
def main():
    df_train = common.load_train()
    X, y = df_train.loc[:, common.X_cols].values, df_train.target.values
    est = GradientBoostingClassifier(n_estimators=35,
                                     subsample=0.7,
                                     max_features=0.7,
                                     max_depth=4)
    common.predict_and_report(est, X, y)
Exemple #3
0
def main():
    df_train = common.load_train()
    X, y = df_train.loc[:, common.X_cols].values, df_train.target.values
    clf = make_pipeline(PCA(), GaussianNB())
    clf = BaggingClassifier(base_estimator=clf,
                            max_samples=0.2,
                            n_estimators=25)
    common.predict_and_report(clf, X, y)
Exemple #4
0
def main():
    df_train = common.load_train()
    X, y = df_train.loc[:, common.X_cols].values, df_train.target.values

    clf = Pipeline([
        ('vec', PolynomialFeatures()),
        ('scale', MinMaxScaler()),
        ('clf', LogisticRegression()),
    ])

    common.predict_and_report(clf, X, y, cv=10)
Exemple #5
0
def main():
    df_train = common.load_train()
    X, y = df_train.loc[:, common.X_cols].values, df_train.target.values
    est = BaggingClassifier(base_estimator=GaussianNB())
    params = dict(max_features=[0.4, 0.6, 0.8],
                  max_samples=[0.4, 0.6, 0.8],
                  n_estimators=[8, 18],
                  bootstrap=[False, True])
    clf = GridSearchCV(est, params, scoring='roc_auc')
    clf.fit(X, y)
    common.predict_and_report(clf, X, y)
Exemple #6
0
def main():
    df_train = common.load_train()
    X, y = df_train.loc[:, common.X_cols].values, df_train.target.values
    est = BaggingClassifier(base_estimator=GaussianNB())
    params = dict(
            max_features=[0.4, 0.6, 0.8],
            max_samples=[0.4, 0.6, 0.8],
            n_estimators=[8, 18],
            bootstrap=[False, True]
    )
    clf = GridSearchCV(est, params, scoring='roc_auc')
    clf.fit(X, y)
    common.predict_and_report(clf, X, y)
def maybe_save_model(model,
                     opt,
                     schd,
                     epoch,
                     save_path,
                     curr_val,
                     other_values,
                     model_path=None):

    path = model_path if model_path is not None else ''

    if not other_values or curr_val > max(other_values):
        path = save_train(save_path, model, opt, schd, epoch)
        print(f'saving model at path {path} new max psnr {curr_val}')
        clean(save_path, save_count=10)
    elif curr_val < max(other_values) - 1:
        load_train(path, model, opt)
        schd.step()
        print(
            f'model diverge reloded last model state current lr {schd.get_lr()}'
        )

    return path
def train(model, args):

    #optimizer = optim.Adam(
    #    [
    #        {'params': model.softthrsh0.parameters()},
    #        {'params': model.softthrsh1.parameters()},
    #        {'params': model.encode_conv0.parameters()},
    #        {'params': model.encode_conv1.parameters()},
    #        {'params': model.decode_conv1.parameters(), 'lr':
    #         args['learning_rate']},
    #    ],
    #    lr=args['learning_rate']
    #)
    optimizer = optim.Adam(model.parameters(), lr=args['learning_rate'])
    break_down_sum = sum(
        map(common.count_parameters, [
            model.softthrsh0, model.encode_conv0, model.softthrsh1,
            model.encode_conv1, model.decode_conv1
        ]))

    # ReduceLROnPlateau(optimizer, 'min', verbose=True)
    train_loader, valid_loader = get_train_valid_loaders(
        args['dataset_path'], args['batch_size'], args['noise'])

    valid_loss = reconsturction_loss(use_cuda=True)

    criterion = common.get_criterion(
        losses_types=['l1', 'l2'],  #, 'msssim'],
        factors=[0.8, 0.2],
        use_cuda=USE_CUDA)

    print('train args:')
    _pprint(args)

    model_path = None
    _train_loss = []
    _valid_loss = []
    _valid_psnr = []
    running_loss = 0
    compare_loss = 1
    valid_every = int(0.1 * len(train_loader))

    gamma = 0.1
    #if model.ista_iters < 20 else\
    #        0.1 * (20 / args['noise']) * (1 / model.ista_iters)**0.5

    scheduler = lr_scheduler.StepLR(optimizer, step_size=3, gamma=gamma)

    if args.get('load_path', '') != '':
        ld_p = args['load_path']
        print('loading from %s' % ld_p)
        load_train(ld_p, model, optimizer, scheduler)
        print('Done!')

    itr = 0
    for e in range(args['epoch']):
        print('Epoch number {}'.format(e))
        for img, img_n in train_loader:
            itr += 1

            _loss, _ = step(model, img, img_n, optimizer, criterion=criterion)
            running_loss += float(_loss)
            compare_loss += 1e-1 * float(_loss)

            if itr % valid_every == 0 or itr % len(train_loader) == 0:
                _v_loss, _v_psnr = run_valid(model, valid_loader, valid_loss,
                                             args['save_dir'],
                                             f'perf_iter{itr}',
                                             itr == valid_every)

                scheduler.step(_v_loss)

                model_path = maybe_save_model(model, optimizer, scheduler, e,
                                              args['save_dir'], _v_psnr,
                                              _valid_psnr, model_path)
            if itr % valid_every == 0:
                _train_loss.append(running_loss / valid_every)
                _valid_loss.append(_v_loss)
                _valid_psnr.append(_v_psnr)
                print("epoch {} train loss: {} valid loss: {}, valid psnr: {}".
                      format(e, running_loss / valid_every, _v_loss, _v_psnr))
                running_loss = 0

    plot_losses(_train_loss, _valid_loss, _valid_psnr, args['save_dir'])
    return model_path, _valid_loss[-1], _valid_psnr[-1]
Exemple #9
0
def main():
    df_train = common.load_train()
    X, y = df_train.loc[:, common.X_cols].values, df_train.target.values
    common.predict_and_report(GaussianNB(), X, y)
def train(model, args):

    optimizer = optim.Adam(model.parameters(), lr=args['learning_rate'])
    scheduler = lr_scheduler.ReduceLROnPlateau(optimizer, 'min', verbose=True)
    sup_criterion = get_sup_criterion(use_cuda=USE_CUDA)
    unsup_criterion = get_unsup_criterion(args['unsup_factors'])

    labeled_loader, unlabeled_loader, valid_loader =\
    get_train_loaders(labeled_size=args["label_count"], valid_size=5000, batch_size=args['batch_size'], pin_memory=USE_CUDA)

    print('Running Train\ntrain args:\n')
    _pprint(args)

    print('labeld count: {}  unlabeled count: {}'.\
        format(len(labeled_loader), len(unlabeled_loader)))

    if args['load_path'] != '':
        ld_p = args['load_path']
        print('loading from %s'%ld_p)
        load_train(ld_p, model, optimizer, scheduler)
        print('Done!')

    _train_label_loss = []
    _valid_loss = []
    _train_unlabel_loss = []
    _model_path = ''

    running_label_loss = 0
    running_unlabel_loss = 0
    valid_every =\
     int(0.1 * (len(labeled_loader) + len(unlabeled_loader)))

    itr = 0
    unsupervised_epochs = args['unsupervised_epochs']
    for e in range(args['epoch']):
        print('Epoch number {}'.format(e))
        for (x, y), (u, _) in zip(cycle(labeled_loader), unlabeled_loader):
            itr += 1

            if USE_CUDA:
                x = x.cuda()
                y = y.cuda()
                u = u.cuda()

            optimizer.zero_grad()

            if e < unsupervised_epochs:
                y = None

            ll_unsup, loss_sup =\
                    train_step(model, sup_criterion, unsup_criterion, args['noise'], x, y)
            ul_unsup, _ =\
                     train_step(model, sup_criterion, unsup_criterion, args['noise'], u)

            loss_unsup = 0.5 * (ll_unsup + ul_unsup)
            _loss = loss_unsup + loss_sup

            _loss.backward()
            optimizer.step()

            running_label_loss += float(loss_sup)
            running_unlabel_loss += float(loss_unsup)

            if itr % valid_every == 0:
                _train_label_loss.append(running_label_loss / valid_every)
                _train_unlabel_loss.append(running_unlabel_loss / valid_every)

                _v_loss, acc = run_valid(
                    model,
                    valid_loader,
                    sup_criterion,
                )
                scheduler.step(_v_loss)

                _model_path = maybe_save_model(model, optimizer,
                        scheduler, e, args['save_dir'],
                        _v_loss, _valid_loss, _model_path)

                _valid_loss.append(_v_loss)

                if e >= unsupervised_epochs:
                    line = "epoch ssl {}:{} train loss labeld: {} "
                    line += "train unlabeld loss: {}valid loss: {} valid accuracy {}"
                    print(line.format(
                        e, args['epoch'],
                        running_label_loss / valid_every, running_unlabel_loss /
                        valid_every, _v_loss, acc))
                else: 
                    avg_train_loss = ((running_label_loss + running_unlabel_loss) /
                                        (valid_every * 2))
                    print("epoch unsupervised {}:{} train loss {} valid loss: {} valid accuracy {}".format(
                        e, args['epoch'], avg_train_loss , _v_loss, acc))

                running_label_loss = 0
                runninig_unlabel_loss = 0
                running_loss = 0

    _, acc = run_valid(
        model, valid_loader,
        sup_criterion
    )
    return _model_path, acc
Exemple #11
0
def main():
    df_train = common.load_train()
    X, y = df_train.loc[:, common.X_cols].values, df_train.target.values
    common.predict_and_report(GaussianNB(), X, y)
Exemple #12
0
def main():
    df_train = common.load_train()
    X, y = df_train.loc[:, common.X_cols].values, df_train.target.values
    est = RandomForestClassifier(n_estimators=125)
    common.predict_and_report(est, X, y)
Exemple #13
0
def main():
    df_train = common.load_train()
    X, y = df_train.loc[:, common.X_cols].values, df_train.target.values
    est = RandomForestClassifier(n_estimators=125)
    common.predict_and_report(est, X, y)
Exemple #14
0
def main():
    df_train = common.load_train()
    X, y = df_train.loc[:, common.X_cols].values, df_train.target.values
    est = GradientBoostingClassifier(n_estimators=35, subsample=0.7, max_features=0.7, max_depth=4)
    common.predict_and_report(est, X, y)