def _eval(model, dataloader, CONFIG): model.eval() torch.set_grad_enabled(False) correct = 0 total = 0 all_targ = torch.tensor([]).to(dtype=torch.int64).cuda(CONFIG['NUM_GPU']) all_pred = torch.tensor([]).to(dtype=torch.int64).cuda(CONFIG['NUM_GPU']) for test_data in dataloader: X, y = test_data X, y = X.cuda(CONFIG['NUM_GPU']), y.cuda(CONFIG['NUM_GPU']) y_pred = model(X) all_pred = torch.cat((all_pred, torch.argmax(y_pred, dim=1))) all_targ = torch.cat((all_targ, y.to(torch.int64))) total += y.size(0) correct += accuracy(torch.argmax(y_pred, dim=1), y, method='5_class_vec_output') * y.size(0) acc = round(correct / total, 4) c_matrix, kappa = quadratic_weighted_kappa(all_targ.cpu().numpy(), all_pred.cpu().numpy()) model.train() torch.set_grad_enabled(True) return acc, c_matrix, kappa, all_pred.cpu().numpy()
def _eval(model, dataloader, CONFIG): model.eval() torch.set_grad_enabled(False) correct = 0 total = 0 all_targ = torch.tensor([]).to(dtype=torch.int64).cuda() all_pred = torch.tensor([]).to(dtype=torch.int64).cuda() for test_data in dataloader: X, y = test_data X, y = X.cuda(), y.cuda() y_pred = model(X) y_pred_classified = y_pred.view(-1).clone() for i in range(len(y_pred)): y_pred_classified[i] = classify(y_pred[i]) all_pred = torch.cat((all_pred, y_pred_classified.to(torch.int64))) all_targ = torch.cat((all_targ, y.to(torch.int64))) total += y.size(0) correct += accuracy(y_pred.cpu(), y.cpu().float()) * y.size(0) acc = round(correct / total, 4) c_matrix, kappa = quadratic_weighted_kappa(all_targ.cpu().numpy(), all_pred.cpu().numpy()) model.train() torch.set_grad_enabled(True) return acc, c_matrix, kappa, all_pred.cpu().numpy()
def run(): data_df = pd.read_csv('../input/train.csv') train_df, valid_df = train_test_split(data_df, random_state=42, test_size=0.1) train_df = train_df.reset_index(drop=True) valid_df = valid_df.reset_index(drop=True) train_y = train_df['median_relevance'].values valid_y = valid_df['median_relevance'].values train_dataset = CrowdFlowerDataset( query=train_df['query'].values, prod_title=train_df['product_title'].values, prod_description=train_df['product_description'].values, targets=train_y) valid_dataset = CrowdFlowerDataset( query=valid_df['query'].values, prod_title=valid_df['product_title'].values, prod_description=valid_df['product_description'].values, targets=valid_y) train_dataloader = torch.utils.data.DataLoader( train_dataset, batch_size=configs.TRAIN_BATCH_SIZE, shuffle=True) valid_dataloader = torch.utils.data.DataLoader( valid_dataset, batch_size=configs.VALID_BATCH_SIZE, shuffle=False) num_train_steps = int( len(train_dataset) / configs.TRAIN_BATCH_SIZE * configs.EPOCHS) device = configs.DEVICE model = BERTBaseUncased().to(device) optimizer = configs.OPTIMIZER(model.parameters(), lr=configs.LR) scheduler = configs.SCHEDULER(optimizer, num_warmup_steps=0, num_training_steps=num_train_steps) for epoch in range(configs.EPOCHS): epoch_start = time.time() epoch_train_loss = train_loop_fn(train_dataloader, model, optimizer, scheduler) outputs, targets, epoch_valid_loss = eval_loop_fn( valid_dataloader, model) epoch_end = time.time() epoch_time_elapsed = (epoch_end - epoch_start) / 60.0 print(f'time take to run a epoch - {epoch_time_elapsed}') print( f'Epoch - Training loss - {epoch_train_loss} Valid loss - {epoch_valid_loss}' ) qw_kappa = quadratic_weighted_kappa(targets.flatten(), outputs.flatten()) print(f'Quadratic Weighted Kappa: {qw_kappa}')
def evaluate(model_path, test_dataset, CONFIG): c_matrix = np.zeros((5, 5), dtype=int) trained_model = torch.load(model_path).cuda(CONFIG['NUM_GPU']) test_loader = DataLoader(test_dataset, batch_size=32, shuffle=False) test_acc, test_c_matrix, test_kappa = _eval(trained_model, test_loader) print('==============================') print('Finished! test acc: {}'.format(test_acc)) print('Confusion Matrix:') print(c_matrix) print('quadratic kappa: {}'.format(quadratic_weighted_kappa(c_matrix))) print('==============================')
def evaluate(CONFIG): #creat result folder if not os.path.isdir(CONFIG['SAVE_PATH']): os.makedirs(CONFIG['SAVE_PATH']) # creat dataset test_dataset = generate_stem_dataset(CONFIG['DATA_PATH'], CONFIG['INPUT_SIZE'], CONFIG['DATA_AUGMENTATION'], cv=False, mode='evaluate') # creat dataloader test_loader = DataLoader(test_dataset, batch_size=CONFIG['BATCH_SIZE'], num_workers=CONFIG['NUM_WORKERS'], shuffle=False) # define model model_name = CONFIG['MODEL_NAME'] model = EfficientNet.from_pretrained(model_name) feature = model._fc.in_features model._fc = nn.Linear(in_features=feature, out_features=1, bias=True) #multi-gpu setting torch.cuda.set_device(CONFIG['GPU_NUM'][0]) model = torch.nn.DataParallel( model, device_ids=CONFIG['GPU_NUM']).to(device=torch.device('cuda')) # load pretrained weights if CONFIG['PRETRAINED_PATH']: state_dict = torch.load(CONFIG['PRETRAINED_PATH']) from collections import OrderedDict new_state_dict = OrderedDict() for k, v in state_dict.items(): if 'module' not in k: k = 'module.' + k else: k = k.replace('features.module.', 'module.features.') new_state_dict[k] = v model.load_state_dict(new_state_dict) # evaluate model.eval() torch.set_grad_enabled(False) correct = 0 total = 0 all_targ = torch.tensor([]).to(dtype=torch.int64).cuda() all_pred = torch.tensor([]).to(dtype=torch.int64).cuda() logit_pred_y = [] logit_targ_y = [] for test_data in test_loader: X, y = test_data X, y = X.cuda(), y.cuda() y_pred = model(X) y_pred_classified = y_pred.view(-1).clone() for i in range(len(y_pred)): y_pred_classified[i] = classify(y_pred[i]) all_pred = torch.cat((all_pred, y_pred_classified.to(torch.int64))) all_targ = torch.cat((all_targ, y.to(torch.int64))) total += y.size(0) correct += accuracy(y_pred.cpu(), y.cpu().float()) * y.size(0) logit_pred_y += list(y_pred.view(-1).cpu().numpy()) logit_targ_y += list(y.cpu().float().numpy()) acc = round(correct / total, 4) c_matrix, kappa = quadratic_weighted_kappa(all_targ.cpu().numpy(), all_pred.cpu().numpy()) ks_dataframe = pd.DataFrame({'pred': logit_pred_y, 'targ': logit_targ_y}) ks_dataframe.to_csv(os.path.join(CONFIG['SAVE_PATH'], model_name + '_eval_results.csv'), index=False, sep=',') print('==============================') print('Test acc: {}'.format(acc)) print('Confusion Matrix:\n{}'.format(c_matrix)) print('quadratic kappa: {}'.format(kappa)) print('==============================')
def main(argv): if len(argv) < 5: sys.exit('please provide\n the path to your data sets;\n train or test or both keyword;\n data file(s) extension (pk or csv);\n \ what column to fit if train mode is set/how to call output if test mode is set;\n \ id column for the final result;\n \ optional:\n \ simple fit or grid search;\n \ save the model created\n') path=argv[1] if not path.endswith('/'): path=path+'/' train_or_test=argv[2] ext=argv[3] fit_y=argv[4] id='none' if len(argv) > 5: id=argv[5] simple_fit=0 dump_model=0 if len(argv) > 6: if (argv[6] == 'simple'): simple_fit=1 if (len(argv) > 7 and argv[7] == 'save'): dump_model=1 elif (argv[6] == 'save'): dump_model=1 if train_or_test != 'test': if ext == 'pk': train_features = pd.read_pickle(path + 'train.pk') else: train_features = pd.read_csv(path + 'train.csv').fillna("") if fit_y not in train_features: sys.exit(fit_y+' not found in the provided dta set, verify your data and try again') y = train_features[fit_y] train_features = train_features.drop([fit_y], axis=1) if id in train_features: train_features = train_features.drop([id], axis=1) # -- the model svd = TruncatedSVD() scl = StandardScaler() model = LinearSVC() pip = pipeline.Pipeline([('svd', svd),('scl', scl),('svm', model)]) if simple_fit: X_train, X_test, y_train, y_test = train_test_split(train_features, y, test_size=0.1, random_state=0) pip.fit(X_train, y_train) predicted = pip.predict(X_test) sc = quadratic_weighted_kappa(y_test, predicted) print("score: %0.3f" % sc) best_model = pip else: # -- Grid parameter search param_grid = {'svd__n_components' : [2,3],'svm__C': [5,10] } scorer = make_scorer(quadratic_weighted_kappa, greater_is_better = True) model = grid_search.GridSearchCV(estimator = pip, param_grid=param_grid, scoring=scorer, verbose=10, n_jobs=-1, iid=True, refit=True, cv=3) model.fit(train_features, y) print("Best score: %0.3f" % model.best_score_) print("Best parameters set:") best_parameters = model.best_estimator_.get_params() for param_name in sorted(param_grid.keys()): print("\t%s: %r" % (param_name, best_parameters[param_name])) best_model = model.best_estimator_ best_model.fit(train_features,y) if dump_model: with open(path+'model.dmp', 'wb') as f: pickle.dump(best_model, f) result="model.dmp" else: result="N/A" if train_or_test != 'train': if ext == 'pk': test_features = pd.read_pickle(path + 'test.pk') else: test_features = pd.read_csv(path + 'test.csv').fillna("") if train_or_test == 'test': with open(path+'model.dmp', 'rb') as f: best_model = pickle.load(f) out_id = None if id in test_features: out_id = test_features[id] test_features = test_features.drop([id], axis=1) predictions = best_model.predict(test_features) if out_id is not None: result = pd.DataFrame({id : out_id, fit_y : predictions}) else: result = pd.DataFrame({"ID": test_features.index.tolist(), fit_y : predictions}) result.to_csv(path+"result.csv", index=False) result="result.csv" if result == 'N/A': return 'none' else: return path+result
def main(argv): if len(argv) < 5: sys.exit( 'please provide\n the path to your data sets;\n train or test or both keyword;\n data file(s) extension (pk or csv);\n \ what column to fit if train mode is set/how to call output if test mode is set;\n \ id column for the final result;\n \ optional:\n \ simple fit or grid search;\n \ save the model created\n') path = argv[1] if not path.endswith('/'): path = path + '/' train_or_test = argv[2] ext = argv[3] fit_y = argv[4] id = 'none' if len(argv) > 5: id = argv[5] simple_fit = 0 dump_model = 0 if len(argv) > 6: if (argv[6] == 'simple'): simple_fit = 1 if (len(argv) > 7 and argv[7] == 'save'): dump_model = 1 elif (argv[6] == 'save'): dump_model = 1 if train_or_test != 'test': if ext == 'pk': train_features = pd.read_pickle(path + 'train.pk') else: train_features = pd.read_csv(path + 'train.csv').fillna("") if fit_y not in train_features: sys.exit( fit_y + ' not found in the provided dta set, verify your data and try again' ) y = train_features[fit_y] train_features = train_features.drop([fit_y], axis=1) if id in train_features: train_features = train_features.drop([id], axis=1) # -- the model svd = TruncatedSVD() scl = StandardScaler() model = LinearSVC() pip = pipeline.Pipeline([('svd', svd), ('scl', scl), ('svm', model)]) if simple_fit: X_train, X_test, y_train, y_test = train_test_split(train_features, y, test_size=0.1, random_state=0) pip.fit(X_train, y_train) predicted = pip.predict(X_test) sc = quadratic_weighted_kappa(y_test, predicted) print("score: %0.3f" % sc) best_model = pip else: # -- Grid parameter search param_grid = {'svd__n_components': [2, 3], 'svm__C': [5, 10]} scorer = make_scorer(quadratic_weighted_kappa, greater_is_better=True) model = grid_search.GridSearchCV(estimator=pip, param_grid=param_grid, scoring=scorer, verbose=10, n_jobs=-1, iid=True, refit=True, cv=3) model.fit(train_features, y) print("Best score: %0.3f" % model.best_score_) print("Best parameters set:") best_parameters = model.best_estimator_.get_params() for param_name in sorted(param_grid.keys()): print("\t%s: %r" % (param_name, best_parameters[param_name])) best_model = model.best_estimator_ best_model.fit(train_features, y) if dump_model: with open(path + 'model.dmp', 'wb') as f: pickle.dump(best_model, f) result = "model.dmp" else: result = "N/A" if train_or_test != 'train': if ext == 'pk': test_features = pd.read_pickle(path + 'test.pk') else: test_features = pd.read_csv(path + 'test.csv').fillna("") if train_or_test == 'test': with open(path + 'model.dmp', 'rb') as f: best_model = pickle.load(f) out_id = None if id in test_features: out_id = test_features[id] test_features = test_features.drop([id], axis=1) predictions = best_model.predict(test_features) if out_id is not None: result = pd.DataFrame({id: out_id, fit_y: predictions}) else: result = pd.DataFrame({ "ID": test_features.index.tolist(), fit_y: predictions }) result.to_csv(path + "result.csv", index=False) result = "result.csv" if result == 'N/A': return 'none' else: return path + result
def train_cnn(run_name, trn_x, val_x, trn_y, val_y, cfg): train_loader = factory.get_dataloader(trn_x, trn_y, cfg.data.train) valid_loader = factory.get_dataloader(val_x, val_y, cfg.data.valid) model = factory.get_model(cfg).to(device) criterion = factory.get_loss(cfg) optimizer = factory.get_optim(cfg, model.parameters()) scheduler = factory.get_scheduler(cfg, optimizer) best_epoch = -1 best_val_score = -np.inf best_coef = [] mb = master_bar(range(cfg.data.train.epochs)) train_loss_list = [] val_loss_list = [] val_score_list = [] initial_coef = [0.5, 1.5, 2.5, 3.5, 4.5] for epoch in mb: start_time = time.time() model, avg_loss = train_epoch(model, train_loader, criterion, optimizer, mb, cfg) valid_preds, avg_val_loss = val_epoch(model, valid_loader, criterion, cfg) if cfg.model.n_classes > 1: val_score = quadratic_weighted_kappa(val_y, valid_preds.argmax(1)) cm = confusion_matrix(val_y, valid_preds.argmax(1)) else: optR = QWKOptimizedRounder() optR.fit(valid_preds.copy(), val_y, initial_coef) coef = optR.coefficients() valid_preds_class = optR.predict(valid_preds.copy(), coef) val_score = quadratic_weighted_kappa(val_y, valid_preds_class) cm = confusion_matrix(val_y, valid_preds_class) # cm = np.round(cm / np.sum(cm, axis=1, keepdims=True), 3) train_loss_list.append(avg_loss) val_loss_list.append(avg_val_loss) val_score_list.append(val_score) if cfg.scheduler.name != 'ReduceLROnPlateau': scheduler.step() elif cfg.scheduler.name == 'ReduceLROnPlateau': scheduler.step(avg_val_loss) elapsed = time.time() - start_time mb.write(f'Epoch {epoch+1} - avg_train_loss: {avg_loss:.4f} avg_val_loss: {avg_val_loss:.4f} val_score: {val_score:.4f} time: {elapsed:.0f}s') logging.debug(f'Epoch {epoch+1} - avg_train_loss: {avg_loss:.4f} avg_val_loss: {avg_val_loss:.4f} val_score: {val_score:.4f} time: {elapsed:.0f}s') if val_score > best_val_score: best_epoch = epoch + 1 best_val_score = val_score best_valid_preds = valid_preds if cfg.model.multi_gpu: best_model = model.module.state_dict() else: best_model = model.state_dict() if cfg.model.n_classes == 1: best_coef = coef best_cm = cm print('\n\nCONFUSION MATRIX') logging.debug('\n\nCONFUSION MATRIX') print(cm) logging.debug(cm) print('\n\n===================================\n') print(f'CV: {best_val_score:.6f}') print(f'BEST EPOCH: {best_epoch}') logging.debug(f'\n\nCV: {best_val_score:.6f}') logging.debug(f'BEST EPOCH: {best_epoch}\n\n') print('\n===================================\n\n') result = { 'cv': best_val_score, } np.save(f'../logs/{run_name}/oof.npy', best_valid_preds) np.save(f'../logs/{run_name}/best_coef.npy', best_coef) torch.save(best_model, f'../logs/{run_name}/weight_best.pt') save_png(run_name, cfg, train_loss_list, val_loss_list, val_score_list) return result
def train_ordinal_reg(run_name, trn_x, val_x, trn_y, val_y, cfg): ordinal_val_preds = np.zeros_like(val_y) for i, col in enumerate(trn_y.columns[1:]): print(f'\n\n==================== {col} ====================') logging.debug(f'\n\n==================== {col} ====================') train_loader = factory.get_dataloader(trn_x, trn_y[col], cfg.data.train) valid_loader = factory.get_dataloader(val_x, val_y[col], cfg.data.valid) model = factory.get_model(cfg).to(device) criterion = factory.get_loss(cfg) optimizer = factory.get_optim(cfg, model.parameters()) scheduler = factory.get_scheduler(cfg, optimizer) best_epoch = -1 best_val_loss = np.inf mb = master_bar(range(cfg.data.train.epochs)) train_loss_list = [] val_loss_list = [] val_score_list = [] initial_coef = [0.5, 1.5, 2.5, 3.5, 4.5] for epoch in mb: start_time = time.time() model, avg_loss = train_epoch(model, train_loader, criterion, optimizer, mb, cfg) valid_preds, avg_val_loss = val_epoch(model, valid_loader, criterion, cfg) train_loss_list.append(avg_loss) val_loss_list.append(avg_val_loss) if cfg.scheduler.name != 'ReduceLROnPlateau': scheduler.step() elif cfg.scheduler.name == 'ReduceLROnPlateau': scheduler.step(avg_val_loss) elapsed = time.time() - start_time mb.write(f'Epoch {epoch+1} - avg_train_loss: {avg_loss:.4f} avg_val_loss: {avg_val_loss:.4f} time: {elapsed:.0f}s') logging.debug(f'Epoch {epoch+1} - avg_train_loss: {avg_loss:.4f} avg_val_loss: {avg_val_loss:.4f} time: {elapsed:.0f}s') if avg_val_loss < best_val_loss: best_epoch = epoch + 1 best_val_loss = avg_val_loss best_valid_preds = valid_preds if cfg.model.multi_gpu: best_model = model.module.state_dict() else: best_model = model.state_dict() print(f'epoch: {best_epoch} loss: {best_val_loss}') ordinal_val_preds[:, i] = 1 / (1 + np.exp(-1 * best_valid_preds)) np.save(f'../logs/{run_name}/oof_{col}.npy', best_valid_preds) torch.save(best_model, f'../logs/{run_name}/weight_best_{col}.pt') valid_preds = np.sum(ordinal_val_preds, axis=1) val_y = (np.sum(val_y.values, axis=1) - 1).astype(int) optR = QWKOptimizedRounder() optR.fit(valid_preds.copy(), val_y, initial_coef) best_coef = optR.coefficients() valid_preds_class = optR.predict(valid_preds.copy(), best_coef) best_val_score = quadratic_weighted_kappa(val_y, valid_preds_class) cm = confusion_matrix(val_y, valid_preds_class) print('\n\nCONFUSION MATRIX') logging.debug('\n\nCONFUSION MATRIX') print(cm) logging.debug(cm) print('\n\n===================================\n') print(f'CV: {best_val_score:.6f}') logging.debug(f'\n\nCV: {best_val_score:.6f}') print('\n===================================\n\n') result = { 'cv': best_val_score, } np.save(f'../logs/{run_name}/best_coef.npy', best_coef) return result
def train(net, net_size, input_size, feature_dim, train_dataset, val_dataset, epochs, learning_rate, batch_size, save_path, pretrained_model=None): # create dataloader train_targets = [sampler[1] for sampler in train_dataset.imgs] weighted_sampler = ScheduledWeightedSampler(len(train_dataset), train_targets, 0.975, True) train_loader = DataLoader(train_dataset, batch_size=batch_size, sampler=weighted_sampler, drop_last=True) val_loader = DataLoader(val_dataset, batch_size=batch_size, shuffle=False) # define model model = net(net_size, input_size, feature_dim).cuda() print_msg('Trainable layers: ', ['{}\t{}'.format(k, v) for k, v in model.layer_configs()]) # load pretrained weights if pretrained_model: pretrained_dict = model.load_weights(pretrained_model, ['fc', 'dense']) print_msg('Loaded weights from {}: '.format(pretrained_model), sorted(pretrained_dict.keys())) # define loss and optimizier MSELoss = torch.nn.MSELoss() optimizer = torch.optim.SGD(model.parameters(), lr=learning_rate, momentum=0.9, nesterov=True, weight_decay=0.0005) # optimizer = torch.optim.Adam(model.parameters(), lr=learning_rate, weight_decay=0.0005) # learning rate warmup and decay milestones = [160, 230] warmup_epoch = 10 warmup_batch = (len(train_loader) // batch_size) * warmup_epoch lr_scheduler = torch.optim.lr_scheduler.MultiStepLR(optimizer, milestones=milestones, gamma=0.1) warmup_scheduler = WarmupLRScheduler(optimizer, warmup_batch, learning_rate) # train max_kappa = 0 record_epochs, accs, losses = [], [], [] model.train() for epoch in range(1, epochs + 1): # resampling weight update weighted_sampler.step() # learning rate update lr_scheduler.step() if epoch in milestones: curr_lr = optimizer.param_groups[0]['lr'] print_msg('Learning rate decayed to {}'.format(curr_lr)) if epoch > 1 and epoch <= warmup_epoch: curr_lr = optimizer.param_groups[0]['lr'] print_msg('Learning rate warmup to {}'.format(curr_lr)) epoch_loss = 0 correct = 0 total = 0 progress = tqdm(enumerate(train_loader)) for step, train_data in progress: if epoch <= warmup_epoch: warmup_scheduler.step() X, y = train_data X, y = X.cuda(), y.float().cuda() # forward y_pred = model(X) loss = MSELoss(y_pred, y) # backward optimizer.zero_grad() loss.backward() optimizer.step() # metrics epoch_loss += loss.item() total += y.size(0) correct += accuracy(y_pred, y) * y.size(0) avg_loss = epoch_loss / (step + 1) avg_acc = correct / total progress.set_description( 'epoch: {}, loss: {:.6f}, acc: {:.4f}'.format( epoch, avg_loss, avg_acc)) # save model c_matrix = np.zeros((5, 5), dtype=int) acc = _eval(model, val_loader, c_matrix) kappa = quadratic_weighted_kappa(c_matrix) print('validation accuracy: {}, kappa: {}'.format(acc, kappa)) if kappa > max_kappa: torch.save(model, save_path) max_kappa = kappa print_msg('Model save at {}'.format(save_path)) # record record_epochs.append(epoch) accs.append(acc) losses.append(avg_loss) return record_epochs, accs, losses