def blja_test(): from common import load_train from common import MANAGER_ID from common import TARGET train_df = load_train() visualize_condit_distrib_of_target_on_top_N_values_of_col( train_df, MANAGER_ID, TARGET, 3) # blja_test() # def visualize_exp_lambda(): # import matplotlib.pyplot as plt # # fig, ax = plt.subplots() # sz=300 # ixes = range(sz) # l0=get_exp_lambda(50, 1) # l1=get_exp_lambda(50, 5) # l2=get_exp_lambda(50, 0.2) # to_plot0 = [l0(x) for x in ixes] # to_plot1 = [l1(x) for x in ixes] # to_plot2 = [l2(x) for x in ixes] # ax.plot(ixes, to_plot0, label='f=1') # ax.plot(ixes, to_plot1, label='f=5') # ax.plot(ixes, to_plot2, label='f=0.2') # ax.legend()
def main(): df_train = common.load_train() X, y = df_train.loc[:, common.X_cols].values, df_train.target.values est = GradientBoostingClassifier(n_estimators=35, subsample=0.7, max_features=0.7, max_depth=4) common.predict_and_report(est, X, y)
def main(): df_train = common.load_train() X, y = df_train.loc[:, common.X_cols].values, df_train.target.values clf = make_pipeline(PCA(), GaussianNB()) clf = BaggingClassifier(base_estimator=clf, max_samples=0.2, n_estimators=25) common.predict_and_report(clf, X, y)
def main(): df_train = common.load_train() X, y = df_train.loc[:, common.X_cols].values, df_train.target.values clf = Pipeline([ ('vec', PolynomialFeatures()), ('scale', MinMaxScaler()), ('clf', LogisticRegression()), ]) common.predict_and_report(clf, X, y, cv=10)
def main(): df_train = common.load_train() X, y = df_train.loc[:, common.X_cols].values, df_train.target.values est = BaggingClassifier(base_estimator=GaussianNB()) params = dict(max_features=[0.4, 0.6, 0.8], max_samples=[0.4, 0.6, 0.8], n_estimators=[8, 18], bootstrap=[False, True]) clf = GridSearchCV(est, params, scoring='roc_auc') clf.fit(X, y) common.predict_and_report(clf, X, y)
def main(): df_train = common.load_train() X, y = df_train.loc[:, common.X_cols].values, df_train.target.values est = BaggingClassifier(base_estimator=GaussianNB()) params = dict( max_features=[0.4, 0.6, 0.8], max_samples=[0.4, 0.6, 0.8], n_estimators=[8, 18], bootstrap=[False, True] ) clf = GridSearchCV(est, params, scoring='roc_auc') clf.fit(X, y) common.predict_and_report(clf, X, y)
def maybe_save_model(model, opt, schd, epoch, save_path, curr_val, other_values, model_path=None): path = model_path if model_path is not None else '' if not other_values or curr_val > max(other_values): path = save_train(save_path, model, opt, schd, epoch) print(f'saving model at path {path} new max psnr {curr_val}') clean(save_path, save_count=10) elif curr_val < max(other_values) - 1: load_train(path, model, opt) schd.step() print( f'model diverge reloded last model state current lr {schd.get_lr()}' ) return path
def train(model, args): #optimizer = optim.Adam( # [ # {'params': model.softthrsh0.parameters()}, # {'params': model.softthrsh1.parameters()}, # {'params': model.encode_conv0.parameters()}, # {'params': model.encode_conv1.parameters()}, # {'params': model.decode_conv1.parameters(), 'lr': # args['learning_rate']}, # ], # lr=args['learning_rate'] #) optimizer = optim.Adam(model.parameters(), lr=args['learning_rate']) break_down_sum = sum( map(common.count_parameters, [ model.softthrsh0, model.encode_conv0, model.softthrsh1, model.encode_conv1, model.decode_conv1 ])) # ReduceLROnPlateau(optimizer, 'min', verbose=True) train_loader, valid_loader = get_train_valid_loaders( args['dataset_path'], args['batch_size'], args['noise']) valid_loss = reconsturction_loss(use_cuda=True) criterion = common.get_criterion( losses_types=['l1', 'l2'], #, 'msssim'], factors=[0.8, 0.2], use_cuda=USE_CUDA) print('train args:') _pprint(args) model_path = None _train_loss = [] _valid_loss = [] _valid_psnr = [] running_loss = 0 compare_loss = 1 valid_every = int(0.1 * len(train_loader)) gamma = 0.1 #if model.ista_iters < 20 else\ # 0.1 * (20 / args['noise']) * (1 / model.ista_iters)**0.5 scheduler = lr_scheduler.StepLR(optimizer, step_size=3, gamma=gamma) if args.get('load_path', '') != '': ld_p = args['load_path'] print('loading from %s' % ld_p) load_train(ld_p, model, optimizer, scheduler) print('Done!') itr = 0 for e in range(args['epoch']): print('Epoch number {}'.format(e)) for img, img_n in train_loader: itr += 1 _loss, _ = step(model, img, img_n, optimizer, criterion=criterion) running_loss += float(_loss) compare_loss += 1e-1 * float(_loss) if itr % valid_every == 0 or itr % len(train_loader) == 0: _v_loss, _v_psnr = run_valid(model, valid_loader, valid_loss, args['save_dir'], f'perf_iter{itr}', itr == valid_every) scheduler.step(_v_loss) model_path = maybe_save_model(model, optimizer, scheduler, e, args['save_dir'], _v_psnr, _valid_psnr, model_path) if itr % valid_every == 0: _train_loss.append(running_loss / valid_every) _valid_loss.append(_v_loss) _valid_psnr.append(_v_psnr) print("epoch {} train loss: {} valid loss: {}, valid psnr: {}". format(e, running_loss / valid_every, _v_loss, _v_psnr)) running_loss = 0 plot_losses(_train_loss, _valid_loss, _valid_psnr, args['save_dir']) return model_path, _valid_loss[-1], _valid_psnr[-1]
def main(): df_train = common.load_train() X, y = df_train.loc[:, common.X_cols].values, df_train.target.values common.predict_and_report(GaussianNB(), X, y)
def train(model, args): optimizer = optim.Adam(model.parameters(), lr=args['learning_rate']) scheduler = lr_scheduler.ReduceLROnPlateau(optimizer, 'min', verbose=True) sup_criterion = get_sup_criterion(use_cuda=USE_CUDA) unsup_criterion = get_unsup_criterion(args['unsup_factors']) labeled_loader, unlabeled_loader, valid_loader =\ get_train_loaders(labeled_size=args["label_count"], valid_size=5000, batch_size=args['batch_size'], pin_memory=USE_CUDA) print('Running Train\ntrain args:\n') _pprint(args) print('labeld count: {} unlabeled count: {}'.\ format(len(labeled_loader), len(unlabeled_loader))) if args['load_path'] != '': ld_p = args['load_path'] print('loading from %s'%ld_p) load_train(ld_p, model, optimizer, scheduler) print('Done!') _train_label_loss = [] _valid_loss = [] _train_unlabel_loss = [] _model_path = '' running_label_loss = 0 running_unlabel_loss = 0 valid_every =\ int(0.1 * (len(labeled_loader) + len(unlabeled_loader))) itr = 0 unsupervised_epochs = args['unsupervised_epochs'] for e in range(args['epoch']): print('Epoch number {}'.format(e)) for (x, y), (u, _) in zip(cycle(labeled_loader), unlabeled_loader): itr += 1 if USE_CUDA: x = x.cuda() y = y.cuda() u = u.cuda() optimizer.zero_grad() if e < unsupervised_epochs: y = None ll_unsup, loss_sup =\ train_step(model, sup_criterion, unsup_criterion, args['noise'], x, y) ul_unsup, _ =\ train_step(model, sup_criterion, unsup_criterion, args['noise'], u) loss_unsup = 0.5 * (ll_unsup + ul_unsup) _loss = loss_unsup + loss_sup _loss.backward() optimizer.step() running_label_loss += float(loss_sup) running_unlabel_loss += float(loss_unsup) if itr % valid_every == 0: _train_label_loss.append(running_label_loss / valid_every) _train_unlabel_loss.append(running_unlabel_loss / valid_every) _v_loss, acc = run_valid( model, valid_loader, sup_criterion, ) scheduler.step(_v_loss) _model_path = maybe_save_model(model, optimizer, scheduler, e, args['save_dir'], _v_loss, _valid_loss, _model_path) _valid_loss.append(_v_loss) if e >= unsupervised_epochs: line = "epoch ssl {}:{} train loss labeld: {} " line += "train unlabeld loss: {}valid loss: {} valid accuracy {}" print(line.format( e, args['epoch'], running_label_loss / valid_every, running_unlabel_loss / valid_every, _v_loss, acc)) else: avg_train_loss = ((running_label_loss + running_unlabel_loss) / (valid_every * 2)) print("epoch unsupervised {}:{} train loss {} valid loss: {} valid accuracy {}".format( e, args['epoch'], avg_train_loss , _v_loss, acc)) running_label_loss = 0 runninig_unlabel_loss = 0 running_loss = 0 _, acc = run_valid( model, valid_loader, sup_criterion ) return _model_path, acc
def main(): df_train = common.load_train() X, y = df_train.loc[:, common.X_cols].values, df_train.target.values est = RandomForestClassifier(n_estimators=125) common.predict_and_report(est, X, y)