def main(fold): mdl_path = "%s/%s_fold_%s_model_best_loss.pth.tar" % (config.best_models, config.model_name, str(fold)) args = get_args(mdl_path) model = MultiModalNet("se_resnext101_32x4d", "dpn26", 0.5) model_dict = torch.load(args.model_path) model.load_state_dict(model_dict['state_dict']) model.to(device) model.eval() test_files = pd.read_csv("./test.csv") test_gen = MultiModalDataset(test_files, config.test_data, config.test_vis, augument=False, mode="test") test_loader = DataLoader(test_gen, 1, shuffle=False, pin_memory=True, num_workers=1) test(test_loader, model, fold)
model2 = MultiModalNet('se_resnext101_32x4d', 'DPN26', 0.5) checkpoint2 = torch.load( 'checkpoints/se_resnext101_32x4d_fold_0_checkpoint.pth') new_state_dict = OrderedDict() for k, v in checkpoint2['state_dict'].items(): name = k[7:] # remove module. new_state_dict[name] = v model2.load_state_dict(new_state_dict) # if torch.cuda.device_count() > 1: model1 = nn.DataParallel(model1) model2 = nn.DataParallel(model2) model1.to(device) model2.to(device) model1.eval() model2.eval() torch.backends.cudnn.benchmark = True test_files = pd.read_csv("./test.csv") test_gen = MultiModalDataset(test_files, config.test_data, config.test_vis, augument=False, mode="test") test_loader = DataLoader(test_gen, batch_size=1, shuffle=False, pin_memory=True, num_workers=1)
def main(): # 4.1 mkdirs if not os.path.exists(config.submit): os.makedirs(config.submit) for fold in range(config.FOLD): if not os.path.exists(config.weights + config.model_name + os.sep + str(fold)): os.makedirs(config.weights + config.model_name + os.sep + str(fold)) if not os.path.exists(config.best_models): os.mkdir(config.best_models) if not os.path.exists("./logs/"): os.mkdir("./logs/") # with open('../data/train_lgb.pkl', 'rb') as f: # magic_trains = pickle.load(f) # with open('../data/test_lgb.pkl', 'rb') as f: # magic_tests = pickle.load(f) # resume = False # if resume: # checkpoint = torch.load(r'./checkpoints/best_models/seresnext101_dpn92_defrog_multimodal_fold_0_model_best_loss.pth.tar') # best_acc = checkpoint['best_acc'] # best_loss = checkpoint['best_loss'] # best_f1 = checkpoint['best_f1'] # start_epoch = checkpoint['epoch'] start = timer() # from torchsummary import summary # print(summary(model, [(3, 100, 100), (7*26, 24)])) all_files = pd.read_csv("../data/train.csv") all_files = all_files.sample(frac=1, random_state=666) test_files = pd.read_csv("../data/test.csv") max_epoch = config.epochs if config.debug: all_files = all_files.iloc[:1000] test_files = test_files.iloc[:100] config.batch_size = 2 max_epoch = 1 train_label = np.array(all_files['Target']) if config.OOF: result = np.zeros((len(all_files), 9)) # print(result.shape) skf = StratifiedKFold(n_splits=config.FOLD, random_state=2019, shuffle=False) for fold, (train_idx, val_idx) in enumerate(skf.split(all_files, train_label)): print('fold:', fold) val_data_list = all_files.iloc[val_idx] # load dataset val_gen = MultiModalDataset(val_data_list, config.train_data, config.train_vis, augument=False, mode="train") val_loader = DataLoader(val_gen, batch_size=config.batch_size, shuffle=False, pin_memory=True, num_workers=1) best_model = torch.load( "%s/%s_fold_%s_model_best_acc.pth.tar" % (config.best_models, config.model_name, str(fold))) model = MultiModalNet(drop=0.5) if torch.cuda.device_count() > 1: model = nn.DataParallel(model) model.to(device) model.eval() model.load_state_dict(best_model["state_dict"]) result_oof = [] with torch.no_grad(): for i, (images, (visit, ), target) in tqdm(enumerate(val_loader)): image_var = images.to(device) # print(image_var.shape) # magic = magic.to(device) visit = visit.to(device) indx_target = target.clone() target = torch.from_numpy( np.array(target)).float().to(device) y_oof = np.array( F.softmax(model(image_var, visit)).cpu().data.numpy()) # print(y_oof.shape) result_oof.extend(y_oof) result_oof = np.array(result_oof) print(len(val_idx), result_oof.shape) result[val_idx] = result_oof print(result.shape) with open("../data/oof2.pkl", 'wb') as f: pickle.dump(result, f) if config.train and config.FOLD > 1: # train_data_list,val_data_list = train_test_split(all_files, test_size=0.1, random_state = 2050) skf = StratifiedKFold(n_splits=config.FOLD, random_state=2019, shuffle=False) for fold, (train_idx, val_idx) in enumerate(skf.split(all_files, train_label)): print('fold:', fold) train_data_list = all_files.iloc[train_idx] val_data_list = all_files.iloc[val_idx] # train_magic = magic_trains.iloc[train_idx] # val_magic = magic_trains.iloc[val_idx] # load dataset train_gen = MultiModalDataset(train_data_list, config.train_data, config.train_vis, mode="train") train_loader = DataLoader( train_gen, batch_size=config.batch_size, shuffle=True, pin_memory=True, num_workers=1 ) #num_worker is limited by shared memory in Docker! val_gen = MultiModalDataset(val_data_list, config.train_data, config.train_vis, augument=False, mode="train") val_loader = DataLoader(val_gen, batch_size=config.batch_size, shuffle=False, pin_memory=True, num_workers=1) start_epoch = 0 best_acc = 0 best_loss = np.inf best_f1 = 0 best_results = [0, np.inf, 0] val_metrics = [0, np.inf, 0] #model # 4.2 get model model = MultiModalNet(drop=0.5) if fold == 0: total_num = sum(p.numel() for p in model.parameters()) trainable_num = sum(p.numel() for p in model.parameters() if p.requires_grad) print('Total', total_num, 'Trainable', trainable_num) # 4.3 optim & criterion optimizer = Nadam(model.parameters(), lr=5e-4) #torch.optim.Adamax(model.parameters(), 0.001) # optimizer = optim.SGD(model.parameters(),lr = config.lr,momentum=0.9,weight_decay=1e-4) criterion = nn.CrossEntropyLoss().to(device) # scheduler = lr_scheduler.StepLR(optimizer,step_size=5,gamma=0.25) scheduler = lr_scheduler.MultiStepLR(optimizer, [6, 12, 18], gamma=0.5) # lr_scheduler.ReduceLROnPlateau(optimizer) # scheduler = optim.lr_scheduler.MultiStepLR(optimizer, milestones=[4, 8, 12], gamma=0.25) # n_batches = int(len(train_loader.dataset) // train_loader.batch_size) # scheduler = CosineAnnealingLR(optimizer, T_max=n_batches*2) if torch.cuda.device_count() > 1: model = nn.DataParallel(model) model.to(device) #train best_acc_epoch = 0 for epoch in range(0, max_epoch): if epoch - best_acc_epoch > 5: break scheduler.step(epoch) # train # train_metrics = None train_metrics = train(train_loader, model, criterion, optimizer, epoch, val_metrics, best_results, start) # val val_metrics = evaluate(val_loader, model, criterion, epoch, train_metrics, best_results, start) # check results is_best_acc = val_metrics[0] > best_results[0] if is_best_acc: best_acc_epoch = epoch best_results[0] = max(val_metrics[0], best_results[0]) is_best_loss = val_metrics[1] < best_results[1] best_results[1] = min(val_metrics[1], best_results[1]) is_best_f1 = val_metrics[2] > best_results[2] best_results[2] = max(val_metrics[2], best_results[2]) # save model save_checkpoint( { "epoch": epoch + 1, "model_name": config.model_name, "state_dict": model.state_dict(), "best_acc": best_results[0], "best_loss": best_results[1], "optimizer": optimizer.state_dict(), "fold": fold, "best_f1": best_results[2], }, is_best_acc, is_best_loss, is_best_f1, fold) # print logs print('\r', end='', flush=True) log.write('%s %5.1f %6.1f | %0.3f %0.3f %0.3f | %0.3f %0.3f %0.3f | %s %s %s | %s' % (\ "best", epoch, epoch, train_metrics[0], train_metrics[1],train_metrics[2], val_metrics[0],val_metrics[1],val_metrics[2], str(best_results[0])[:8],str(best_results[1])[:8],str(best_results[2])[:8], time_to_str((timer() - start),'min')) ) log.write("\n") time.sleep(0.01) if config.train and config.FOLD == 1: train_data_list, val_data_list, train_magic, val_magic = train_test_split( all_files, magic_trains, test_size=0.1, random_state=2050) # skf = StratifiedKFold(n_splits=config.FOLD, random_state=2019, shuffle=False) # for fold, (train_idx, val_idx) in enumerate(skf.split(all_files, train_label)): # print('fold:', fold) # train_data_list = all_files.iloc[train_idx] # val_data_list = all_files.iloc[val_idx] # load dataset train_gen = MultiModalDataset(train_data_list, train_magic, config.train_data, config.train_vis, mode="train") train_loader = DataLoader( train_gen, batch_size=config.batch_size, shuffle=True, pin_memory=True, num_workers=1) #num_worker is limited by shared memory in Docker! val_gen = MultiModalDataset(val_data_list, val_magic, config.train_data, config.train_vis, augument=False, mode="train") val_loader = DataLoader(val_gen, batch_size=config.batch_size, shuffle=False, pin_memory=True, num_workers=1) start_epoch = 0 best_acc = 0 best_loss = np.inf best_f1 = 0 best_results = [0, np.inf, 0] val_metrics = [0, np.inf, 0] #model # 4.2 get model model = MultiModalNet(drop=0.5) # 4.3 optim & criterion optimizer = torch.optim.Adamax(model.parameters(), 0.001) # optimizer = optim.SGD(model.parameters(),lr = config.lr,momentum=0.9,weight_decay=1e-4) criterion = nn.CrossEntropyLoss().to(device) # scheduler = lr_scheduler.StepLR(optimizer,step_size=5,gamma=0.25) scheduler = lr_scheduler.ReduceLROnPlateau(optimizer) # scheduler = optim.lr_scheduler.MultiStepLR(optimizer, milestones=[4, 8, 12], gamma=0.25) # n_batches = int(len(train_loader.dataset) // train_loader.batch_size) # scheduler = CosineAnnealingLR(optimizer, T_max=n_batches*2) if torch.cuda.device_count() > 1: model = nn.DataParallel(model) model.to(device) #train best_acc_epoch = 0 for epoch in range(0, max_epoch): if epoch - best_acc_epoch > 5: break scheduler.step(epoch) # train # train_metrics = None train_metrics = train(train_loader, model, criterion, optimizer, epoch, val_metrics, best_results, start) # val val_metrics = evaluate(val_loader, model, criterion, epoch, train_metrics, best_results, start) # check results is_best_acc = val_metrics[0] > best_results[0] if is_best_acc: best_acc_epoch = epoch best_results[0] = max(val_metrics[0], best_results[0]) is_best_loss = val_metrics[1] < best_results[1] best_results[1] = min(val_metrics[1], best_results[1]) is_best_f1 = val_metrics[2] > best_results[2] best_results[2] = max(val_metrics[2], best_results[2]) # save model save_checkpoint( { "epoch": epoch + 1, "model_name": config.model_name, "state_dict": model.state_dict(), "best_acc": best_results[0], "best_loss": best_results[1], "optimizer": optimizer.state_dict(), "fold": fold, "best_f1": best_results[2], }, is_best_acc, is_best_loss, is_best_f1, fold) # print logs print('\r', end='', flush=True) log.write('%s %5.1f %6.1f | %0.3f %0.3f %0.3f | %0.3f %0.3f %0.3f | %s %s %s | %s' % (\ "best", epoch, epoch, train_metrics[0], train_metrics[1],train_metrics[2], val_metrics[0],val_metrics[1],val_metrics[2], str(best_results[0])[:8],str(best_results[1])[:8],str(best_results[2])[:8], time_to_str((timer() - start),'min')) ) log.write("\n") time.sleep(0.01) if config.predict: # test data models = [] for fold in range(5): best_model = torch.load( "%s/%s_fold_%s_model_best_acc.pth.tar" % (config.best_models, config.model_name, str(fold))) model = MultiModalNet(drop=0.5) if torch.cuda.device_count() > 1: model = nn.DataParallel(model) model.to(device) model.eval() model.load_state_dict(best_model["state_dict"]) models.append(model) test_gen = MultiModalDataset(test_files, config.test_data, config.test_vis, augument=False, mode="test", TTA=True) test_loader = DataLoader(test_gen, batch_size=config.batch_size, shuffle=False, pin_memory=True, num_workers=1) # predict test(test_loader, models)