def main(): fold = 0 # 4.1 mkdirs if not os.path.exists(config.submit): os.makedirs(config.submit) if not os.path.exists(config.weights + config.model_name + os.sep +str(fold)): os.makedirs(config.weights + config.model_name + os.sep +str(fold)) if not os.path.exists(config.best_models): os.mkdir(config.best_models) if not os.path.exists("./logs/"): os.mkdir("./logs/") # 4.2 get model model = get_net() model.cuda() # criterion optimizer = optim.SGD(model.parameters(),lr = config.lr,momentum =0.9 ,weight_decay=1e-4) criterion = nn.BCEWithLogitsLoss().cuda() start_epoch = 0 best_loss = 999 best_f1 = 0 best_results = [np.inf,0] val_metrics = [np.inf,0] resume = False all_files = pd.read_csv("./train.csv") #test_files = pd.read_csv("./sample_submission.csv") train_data_list,val_data_list = train_test_split(all_files,test_size = 0.13,random_state = 2050) # load dataset train_gen = HumanDataset(train_data_list,config.train_data,mode="train") train_loader = DataLoader(train_gen,batch_size=config.batch_size,shuffle=True,pin_memory=True,num_workers=4) val_gen = HumanDataset(val_data_list,config.train_data,augument=False,mode="train") val_loader = DataLoader(val_gen,batch_size=config.batch_size,shuffle=False,pin_memory=True,num_workers=4) #test_gen = HumanDataset(test_files,config.test_data,augument=False,mode="test") #test_loader = DataLoader(test_gen,1,shuffle=False,pin_memory=True,num_workers=4) scheduler = lr_scheduler.StepLR(optimizer,step_size=7,gamma=0.1) start = timer() #train for epoch in range(0,config.epochs): scheduler.step(epoch) # train lr = get_learning_rate(optimizer) train_metrics = train(train_loader,model,criterion,optimizer,epoch,val_metrics,best_results,start) # val val_metrics = evaluate(val_loader,model,criterion,epoch,train_metrics,best_results,start) # check results is_best_loss = val_metrics[0] < best_results[0] best_results[0] = min(val_metrics[0],best_results[0]) is_best_f1 = val_metrics[1] > best_results[1] best_results[1] = max(val_metrics[1],best_results[1]) # save model save_checkpoint({ "epoch":epoch + 1, "model_name":config.model_name, "state_dict":model.state_dict(), "best_loss":best_results[0], "optimizer":optimizer.state_dict(), "fold":fold, "best_f1":best_results[1], },is_best_loss,is_best_f1,fold) print('\r',end='',flush=True) log.write('%s %5.1f %6.1f | %0.3f %0.3f | %0.3f %0.4f | %s %s | %s' % (\ "best", epoch, epoch, train_metrics[0], train_metrics[1], val_metrics[0], val_metrics[1], str(best_results[0])[:8],str(best_results[1])[:8], time_to_str((timer() - start),'min')) ) log.write("\n") time.sleep(0.01)
def training(model, fold, log, train_image_names, train_image_labels, val_image_names, val_image_labels): # logging issues log.write( "\n---------------------------- [START %s] %s\n\n" % (datetime.now().strftime('%Y-%m-%d %H:%M:%S'), '-' * 20)) log.write( '----------------------|--------- Train ---------|-------- Valid ---------|-------Best ' 'Results-------|----------|\n') log.write( 'mode iter epoch | loss f1_macro | loss f1_macro | loss f1_macro | time ' ' |\n') log.write( '----------------------------------------------------------------------------------------------------------' '----\n') # training params optimizer = optim.SGD(model.parameters(), lr=config.learning_rate_start, momentum=0.9, weight_decay=config.weight_decay) if config.loss_name == 'ce': criterion = nn.BCEWithLogitsLoss().cuda() elif config.loss_name == 'focal': criterion = FocalLoss().cuda() elif config.loss_name == 'f1': criterion = F1Loss().cuda() else: raise ValueError('unknown loss name {}'.format(config.loss_name)) best_results = [np.inf, 0] val_metrics = [np.inf, 0] scheduler = lr_scheduler.StepLR(optimizer, step_size=config.learning_rate_decay_epochs, gamma=config.learning_rate_decay_rate) start = timer() train_gen = HumanDataset(train_image_names, train_image_labels, config.train_dir, mode="train") train_loader = DataLoader(train_gen, batch_size=config.batch_size, shuffle=True, pin_memory=True, num_workers=4) val_gen = HumanDataset(val_image_names, val_image_labels, config.train_dir, augument=False, mode="train") val_loader = DataLoader(val_gen, batch_size=config.batch_size, shuffle=False, pin_memory=True, num_workers=4) # train for epoch in range(0, config.epochs): # training & evaluating scheduler.step(epoch) get_learning_rate(optimizer) train_metrics = train(train_loader, model, criterion, optimizer, epoch, val_metrics, best_results, start) val_metrics = evaluate(val_loader, model, criterion, epoch, train_metrics, best_results, start) # check results is_best_loss = val_metrics[0] < best_results[0] best_results[0] = min(val_metrics[0], best_results[0]) is_best_f1 = val_metrics[1] > best_results[1] best_results[1] = max(val_metrics[1], best_results[1]) # save model save_checkpoint({ "epoch": epoch + 1, "model_name": config.model_name, "state_dict": model.state_dict(), "best_loss": best_results[0], "optimizer": optimizer.state_dict(), "fold": fold, "best_f1": best_results[1], }, is_best_loss, is_best_f1, fold) # print logs print('\r', end='', flush=True) log.write( logging_pattern % ( "best", epoch, epoch, train_metrics[0], train_metrics[1], val_metrics[0], val_metrics[1], str(best_results[0])[:8], str(best_results[1])[:8], time_to_str((timer() - start), 'min') ) ) log.write("\n") time.sleep(0.01)
def find_lr(init_value = 1e-8, final_value=10., beta = 0.98): # 1. load dataset all_files = pd.read_csv(config.CSV_TRAIN) train_data_list, _ = multilabel_stratification(all_files, test_size=0.2, random_state=42) train_gen = HumanDataset(train_data_list,config.train_data,mode="train") train_loader = DataLoader(train_gen,batch_size=config.batch_size,shuffle=True,pin_memory=True,num_workers=8) # 2. get the model, and set the optimizer and criterion model = get_net() model.cuda() optimizer = optim.SGD(model.parameters(),lr = init_value,momentum=0.9,weight_decay=1e-4) criterion = nn.BCEWithLogitsLoss(opt_class_weight).cuda() # 3.set init value num = len(train_loader) - 1 # num = samples_per_epoch / batch_size mult = (final_value / init_value) ** (1/num) # init_value * (mult)**num ==> final_value lr = init_value optimizer.param_groups[0]['lr'] = lr avg_loss = 0. best_loss = 0. batch_num = 0 losses = [] log_lrs = [] best_lr = 111 model.train() model.zero_grad() for i,(images,target) in enumerate(train_loader): batch_num += 1 # 0. get the loss of this batch images = images.cuda(non_blocking=True) target = torch.from_numpy(np.array(target)).float().cuda(non_blocking=True) output = model(images) loss = criterion(output,target) # 1. Compute the smoothed loss avg_loss = beta * avg_loss + (1-beta) *loss.item() smoothed_loss = avg_loss / (1 - beta**batch_num) # 2. Stop if the loss is exploding if batch_num > 1 and smoothed_loss > 4 * best_loss: return log_lrs, losses # 3. Record the best loss if smoothed_loss < best_loss or batch_num==1: best_loss = smoothed_loss best_lr = lr # 4. Store the values losses.append(smoothed_loss) log_lrs.append(math.log10(lr)) # 5. Do the SGD step loss.backward() optimizer.step() optimizer.zero_grad() # 6. Update the lr for the next step lr *= mult optimizer.param_groups[0]['lr'] = lr print('%d: factor:%.3f smoothed_loss:%f best_loss:%f lr:%f best_lr: %f'%(i,smoothed_loss/best_loss, smoothed_loss, best_loss, lr, best_lr)) return log_lrs, losses
def main(): # 4.1 mkdirs if not os.path.exists(config.submit): os.makedirs(config.submit) if not os.path.exists(config.weights + config.model_name + os.sep + 'fold_' + str(config.fold)): os.makedirs(config.weights + config.model_name + os.sep + 'fold_' + str(config.fold)) if not os.path.exists(config.best_models): os.mkdir(config.best_models) if not os.path.exists(config.logs): os.mkdir(config.logs) all_files = pd.read_csv("./input/train.csv") # ------------------------------------------------------- # training # ------------------------------------------------------- if config.mode == 'train': for fold in range(config.fold): # 4.2 get model model = get_net() model.cuda() optimizer = optim.Adam(model.parameters(), lr=config.lr) # ================================================================== # # Loss criterioin # # ================================================================== # # criterion # optimizer = optim.SGD(model.parameters(),lr = config.lr,momentum=0.9,weight_decay=1e-4) # Use the optim package to define an Optimizer that will update the weights of # the model for us. Here we will use Adam; the optim package contains many other # optimization algoriths. The first argument to the Adam constructor tells the # optimizer which Tensors it should update. assert config.loss in ['bcelog', 'f1_loss', 'focal_loss'], \ print("Loss type {0} is unknown".format(config.loss)) if config.loss == 'bcelog': criterion = nn.BCEWithLogitsLoss().cuda() elif config.loss == 'f1_loss': criterion = F1_loss().cuda() elif config.loss == 'focal_loss': criterion = FocalLoss().cuda() # best_loss = 999 # best_f1 = 0 best_results = [np.inf, 0] val_metrics = [np.inf, 0] ## k-fold-------------------------------- # tflogger tflogger = TFLogger( os.path.join( 'results', 'TFlogs', config.model_name + "_fold{0}_{1}".format(config.fold, fold))) with open( os.path.join( "./input/fold_{0}".format(config.fold), 'train_fold{0}_{1}.txt'.format(config.fold, fold)), 'r') as text_file: train_names = text_file.read().split('\n') # # oversample # s = Oversampling("./input/train.csv") # train_names = [idx for idx in train_names for _ in range(s.get(idx))] train_data_list = all_files[all_files['Id'].isin(train_names)] # train_data_list = all_files.copy().set_index('Id') # train_data_list # train_data_list = train_data_list.reindex(train_names) # 57150 -> 29016 # reset index # train_data_list = train_data_list.rename_axis('Id').reset_index() with open( os.path.join( "./input/fold_{0}".format(config.fold), 'test_fold{0}_{1}.txt'.format(config.fold, fold)), 'r') as text_file: val_names = text_file.read().split('\n') val_data_list = all_files[all_files['Id'].isin(val_names)] # load dataset train_gen = HumanDataset(train_data_list, config.train_data, mode="train") train_loader = DataLoader(train_gen, batch_size=config.batch_size, shuffle=True, pin_memory=True, num_workers=4) val_gen = HumanDataset(val_data_list, config.train_data, augument=False, mode="train") val_loader = DataLoader(val_gen, batch_size=config.batch_size, shuffle=False, pin_memory=True, num_workers=4) # initialize the early_stopping object early_stopping = EarlyStopping(patience=7, verbose=True) if config.resume: log.write('\tinitial_checkpoint = %s\n' % config.initial_checkpoint) checkpoint_path = os.path.join(config.weights, config.model_name, config.fold, config.initial_checkpoint, 'checkpoint.pth.tar') loaded_model = torch.load(checkpoint_path) model.load_state_dict(loaded_model["state_dict"]) start_epoch = loaded_model["epoch"] else: start_epoch = 0 scheduler = lr_scheduler.StepLR(optimizer, step_size=10, gamma=0.5) start = timer() # train for epoch in range(start_epoch, config.epochs): scheduler.step(epoch) # train lr = get_learning_rate(optimizer) train_metrics = train(train_loader, model, criterion, optimizer, epoch, val_metrics, best_results, start, config.threshold) # val val_metrics = evaluate(val_loader, model, criterion, epoch, train_metrics, best_results, start, config.threshold) # check results is_best_loss = val_metrics[0] < best_results[0] best_results[0] = min(val_metrics[0], best_results[0]) is_best_f1 = val_metrics[1] > best_results[1] best_results[1] = max(val_metrics[1], best_results[1]) # save model save_checkpoint( { "epoch": epoch + 1, "model_name": config.model_name, "state_dict": model.state_dict(), "best_loss": best_results[0], "optimizer": optimizer.state_dict(), "fold": config.fold, "kfold": fold, "best_f1": best_results[1], }, is_best_loss, is_best_f1, config.fold, fold) # print logs print('\r', end='', flush=True) log.write( '%s %5.1f %6.1f %.2E| %0.3f %0.3f | %0.3f %0.4f | %s %s | %s |%s ' % ( \ "best", epoch, epoch, Decimal(lr), train_metrics[0], train_metrics[1], val_metrics[0], val_metrics[1], str(best_results[0])[:8], str(best_results[1])[:8], time_to_str((timer() - start), 'min'), fold), ) log.write("\n") time.sleep(0.01) # ================================================================== # # Tensorboard Logging # # ================================================================== # # 1. Log scalar values (scalar summary) info = { 'Train_loss': train_metrics[0], 'Train_F1_macro': train_metrics[1], 'Valid_loss': val_metrics[0], 'Valid_F1_macro': val_metrics[1], 'Learnging_rate': lr } for tag, value in info.items(): tflogger.scalar_summary(tag, value, epoch) # 2. Log values and gradients of the parameters (histogram summary) for tag, value in model.named_parameters(): tag = tag.replace('.', '/') tflogger.histo_summary(tag, value.data.cpu().numpy(), epoch) tflogger.histo_summary(tag + '/grad', value.grad.data.cpu().numpy(), epoch) # ------------------------------------- # end tflogger # ================================================================== # # Early stopping # # ================================================================== # # early_stopping needs the validation loss to check if it has decresed, # and if it has, it will make a checkpoint of the current model early_stopping(val_metrics[1], model) if early_stopping.early_stop: print("Early stopping") break #==========================================================# #End of k-fold # ==========================================================# # ------------------------------------------------------- # testing # ------------------------------------------------------- elif config.mode == 'test': test_files = pd.read_csv("./input/sample_submission.csv") test_gen = HumanDataset(test_files, config.test_data, augument=False, mode="test") test_loader = DataLoader(test_gen, 1, shuffle=False, pin_memory=True, num_workers=4) # checkpoint_path = os.path.join(config.best_models,'{0}_fold_{1}_model_best_loss.pth.tar'.format(config.model_name, fold)) checkpoint_path = os.path.join( config.weights, config.model_name, 'fold_{0}'.format(fold), 'checkpoint_{}.pth.tar'.format(config.checkpoint)) best_model = torch.load(checkpoint_path) # best_model = torch.load("checkpoints/bninception_bcelog/0/checkpoint.pth.tar") model.load_state_dict(best_model["state_dict"]) thresholds = [ -0.13432257, -0.4642075, -0.50726506, -0.49715518, -0.41125674, 0.11581507, -1.0143597, -0.18461785, -0.61600877, -0.47275479, -0.9142859, -0.44323673, -0.58404387, -0.22959213, -0.26110631, -0.43723898, -0.97624685, -0.44612319, -0.4492785, -0.56681327, -0.16156543, -0.12577745, -0.75476121, -0.91473052, -0.53361931, -0.19337344, -0.0857145, -0.45739976 ] # thresholds = [-0.27631527, -0.31156957, -0.61893745, -1.01863398, -0.3141709, -0.14000374, # -0.6285302, -0.43241383, -1.60594984, -0.14425374, -0.03979607, -0.25717957, # -0.84905692, -0.37668712, 1.3710663, -0.11193908, -0.81109447, 0.72506607, # -0.05454339, -0.47056617, -0.16024197, -0.44002794, -0.65929407, -1.00900269, # -0.86197429, -0.12346229, -0.4946575, -0.52420557] test(test_loader, model, thresholds) print('Test successful!')
def main(): fold = 8 # 4.1 mkdirs if not os.path.exists(config.submit): os.makedirs(config.submit) if not os.path.exists(config.weights + config.model_name + os.sep + str(fold)): os.makedirs(config.weights + config.model_name + os.sep + str(fold)) if not os.path.exists(config.best_models): os.mkdir(config.best_models) if not os.path.exists("./logs/"): os.mkdir("./logs/") # 4.2 get model model = get_net() model.cuda() # criterion optimizer = optim.SGD(model.parameters(), lr=config.lr, momentum=0.9, weight_decay=1e-4) #,nesterov=True) criterion = nn.BCEWithLogitsLoss().cuda() #criterion = FocalLoss().cuda() #criterion = F1Loss().cuda() start_epoch = 0 best_loss = 999 best_f1 = 0 best_results = [np.inf, 0] val_metrics = [np.inf, 0] resume = False #all_files = pd.read_csv("./train.csv") train_df = pd.read_csv("./train_appended2.csv") train_df_orig = pd.read_csv("./total_train.csv") """print (type(train_df_orig)) lows = [15,15,15,8,9,10,8,9,10,8,9,10,17,20,24,26,15,27,15,20,24,17,8,15,27,27,27] for i in lows: target = str(i) indicies = train_df_orig.loc[train_df_orig['Target'] == target].index train_df = pd.concat([train_df,train_df_orig.loc[indicies]], ignore_index=True) indicies = train_df_orig.loc[train_df_orig['Target'].str.startswith(target+" ")].index train_df = pd.concat([train_df,train_df_orig.loc[indicies]], ignore_index=True) indicies = train_df_orig.loc[train_df_orig['Target'].str.endswith(" "+target)].index train_df = pd.concat([train_df,train_df_orig.loc[indicies]], ignore_index=True) indicies = train_df_orig.loc[train_df_orig['Target'].str.contains(" "+target+" ")].index train_df = pd.concat([train_df,train_df_orig.loc[indicies]], ignore_index=True) #print(train_df) #input()""" test_files = pd.read_csv("./sample_submission.csv") train_data_list, val_data_list = train_test_split(train_df, test_size=0.13, random_state=2050) train_data_list_fake, val_data_list_fake = train_test_split( train_df_orig, test_size=0.01, random_state=2050) # load dataset train_gen = HumanDataset(train_data_list, config.train_data, mode="train") train_loader = DataLoader(train_gen, batch_size=config.batch_size, shuffle=True, pin_memory=True, num_workers=4) val_gen = HumanDataset(val_data_list, config.train_data, augument=False, mode="train") val_loader = DataLoader(val_gen, batch_size=config.batch_size, shuffle=False, pin_memory=True, num_workers=4) test_gen = HumanDataset(test_files, config.test_data, augument=False, mode="test") test_loader = DataLoader(test_gen, 1, shuffle=False, pin_memory=True, num_workers=4) scheduler = lr_scheduler.StepLR(optimizer, step_size=8, gamma=0.1) start = timer() #train for epoch in range(0, config.epochs): scheduler.step(epoch) # train lr = get_learning_rate(optimizer) train_metrics = train(train_loader, model, criterion, optimizer, epoch, val_metrics, best_results, start) # val val_metrics = evaluate(val_loader, model, criterion, epoch, train_metrics, best_results, start) # check results is_best_loss = val_metrics[0] < best_results[0] best_results[0] = min(val_metrics[0], best_results[0]) is_best_f1 = val_metrics[1] > best_results[1] best_results[1] = max(val_metrics[1], best_results[1]) # save model save_checkpoint( { "epoch": epoch + 1, "model_name": config.model_name, "state_dict": model.state_dict(), "best_loss": best_results[0], "optimizer": optimizer.state_dict(), "fold": fold, "best_f1": best_results[1], }, is_best_loss, is_best_f1, fold) # print logs print('\r', end='', flush=True) log.write('%s %5.1f %6.1f | %0.3f %0.3f | %0.3f %0.4f | %s %s | %s' % (\ "best", epoch, epoch, train_metrics[0], train_metrics[1], val_metrics[0], val_metrics[1], str(best_results[0])[:8],str(best_results[1])[:8], time_to_str((timer() - start),'min')) ) log.write("\n") time.sleep(0.01)
model.load_state_dict( torch.load( 'checkpoints/best_models/%s_fold_%d_model_best_f1.pth.tar' % (config.model_name, fold))['state_dict']) model.cuda() criterion = nn.BCEWithLogitsLoss().cuda() optimizer = optim.Adam(model.parameters(), lr=1e-3) scheduler = ReduceLROnPlateau(optimizer, factor=0.5, patience=2, min_lr=1e-5) train_gen = HumanDataset(X_train, y_train, augment=True) val_gen = HumanDataset(X_val, y_val, augment=False) train_loader = torch.utils.data.DataLoader( train_gen, batch_size=config.batch_size, shuffle=True, num_workers=6, pin_memory=True) val_loader = torch.utils.data.DataLoader(val_gen, batch_size=config.batch_size, num_workers=6, pin_memory=True) # allPred = featExt(val_loader, model) break
# train_data_list train_data_list = train_data_list.reindex(train_names) # 57150 -> 29016 # reset index train_data_list = train_data_list.rename_axis('Id').reset_index() with open(os.path.join("./input/protein-trainval-split", 'val_names.txt'), 'r') as text_file: val_names = text_file.read().split(',') val_data_list = all_files[all_files['Id'].isin(val_names)] # 4.2 get model model = get_net() model.cuda() fold = 0 # load dataset train_gen = HumanDataset(train_data_list, config.train_data, mode="train") train_loader = DataLoader(train_gen, batch_size=config.batch_size, shuffle=True, pin_memory=True, num_workers=4) val_gen = HumanDataset(val_data_list, config.train_data, augument=False, mode="train") val_loader = DataLoader(val_gen, batch_size=config.batch_size, shuffle=False, pin_memory=True, num_workers=4) # checkpoint_path = os.path.join(config.best_models,'{0}_fold_{1}_model_best_loss.pth.tar'.format(config.model_name, fold)) checkpoint_path = os.path.join(config.weights, config.model_name, 'fold_{0}'.format(fold), 'checkpoint_{}.pth.tar'.format(config.checkpoint)) best_model = torch.load(checkpoint_path) #best_model = torch.load("checkpoints/bninception_bcelog/0/checkpoint.pth.tar") model.load_state_dict(best_model["state_dict"]) preds,y = validate(val_loader,model)
def main(): fold = config.fold # 4.1 mkdirs if not os.path.exists(config.submit): os.makedirs(config.submit) if not os.path.exists(config.weights + config.model_name + os.sep + str(fold)): os.makedirs(config.weights + config.model_name + os.sep + str(fold)) if not os.path.exists(config.best_models): os.mkdir(config.best_models) if not os.path.exists("./logs/"): os.mkdir("./logs/") # 4.2 get model model = get_net() model.cuda() if config.is_train_after_crash: best_model_name = config.weights + config.model_name + os.sep + str( fold - 10) + os.sep + "checkpoint.pth.tar" best_model = torch.load(best_model_name) print(best_model_name) model.load_state_dict(best_model["state_dict"]) best_results = [np.inf, 0] val_metrics = [np.inf, 0] best_results[0] = best_model["best_loss"] best_results[1] = best_model["best_f1"] else: best_results = [np.inf, 0] val_metrics = [np.inf, 0] print(best_results) train_files = pd.read_csv(config.train_csv) external_files = pd.read_csv(config.external_csv) test_files = pd.read_csv(config.test_csv) all_files, test_files, weight_log = process_df(train_files, external_files, test_files) # train_data_list,val_data_list = train_test_split(all_files,test_size = 0.13,random_state = 2050) train_data_list, val_data_list = tra_val_split(all_files) print(len(all_files)) print(len(train_data_list)) print(len(val_data_list)) # train_data_list = train_data_list.iloc[np.arange(10000)] # val_data_list = val_data_list.iloc[np.arange(1000)] # load dataset train_gen = HumanDataset(train_data_list, mode="train") sampler = WeightedRandomSampler( train_data_list['freq'].values, num_samples=int(len(train_data_list) * config.multiply), replacement=True) train_loader = DataLoader(train_gen, batch_size=config.batch_size, drop_last=True, sampler=sampler, pin_memory=True, num_workers=6) # train_loader = DataLoader(train_gen,batch_size=config.batch_size,shuffle=True,pin_memory=True,num_workers=6) val_gen = HumanDataset(val_data_list, augument=False, mode="train") val_loader = DataLoader(val_gen, batch_size=config.batch_size, drop_last=True, shuffle=False, pin_memory=True, num_workers=6) test_gen = HumanDataset(test_files, augument=False, mode="test") test_loader = DataLoader(test_gen, 1, shuffle=False, pin_memory=True, num_workers=6) search_gen = HumanDataset(val_data_list, augument=False, mode="train") search_loader = DataLoader(search_gen, batch_size=config.batch_size * 4, drop_last=False, shuffle=False, pin_memory=True, num_workers=6) # optimizer = optim.Adam(model.parameters(), lr=config.lr, weight_decay=1e-4, amsgrad=True) optimizer = optim.SGD(model.parameters(), lr=config.lr, momentum=0.9, weight_decay=1e-4) criterion = nn.BCEWithLogitsLoss().cuda() # criterion = nn.BCEWithLogitsLoss(torch.from_numpy(process_loss_weight(weight_log)).float()).cuda() # scheduler = lr_scheduler.CosineAnnealingLR(optimizer, T_max=config.epochs, eta_min=4e-8) # scheduler = lr_scheduler.StepLR(optimizer,step_size=6,gamma=0.1) # scheduler = lr_scheduler.ReduceLROnPlateau(optimizer, patience=0, threshold=1e-3) scheduler = lr_scheduler.MultiStepLR(optimizer, milestones=[6, 13, 20], gamma=0.1) start = timer() # train if config.is_train: for epoch in range(0, config.epochs): scheduler.step(epoch) # train lr = get_learning_rate(optimizer) train_metrics = train(train_loader, model, criterion, optimizer, epoch, val_metrics, best_results, start, lr) # val val_metrics = evaluate(val_loader, model, criterion, epoch, train_metrics, best_results, start) # check results is_best_loss = val_metrics[0] < best_results[0] best_results[0] = min(val_metrics[0], best_results[0]) is_best_f1 = val_metrics[1] > best_results[1] best_results[1] = max(val_metrics[1], best_results[1]) # scheduler.step(val_metrics[0]) # save model save_checkpoint( { "epoch": epoch + 1, "model_name": config.model_name, "state_dict": model.state_dict(), "best_loss": best_results[0], "optimizer": optimizer.state_dict(), "fold": fold, "best_f1": best_results[1], }, is_best_loss, is_best_f1, fold) # print logs print('\r', end='', flush=True) log.write('%s %5.1f %6.1f | %0.3f %0.3f | %0.3f %0.4f | %s %s | %s' % (\ "best", epoch + 1, epoch + 1, train_metrics[0], train_metrics[1], val_metrics[0], val_metrics[1], str(best_results[0])[:8],str(best_results[1])[:8], time_to_str((timer() - start),'min')) ) log.write("\n") time.sleep(0.01) if config.is_search_thres: best_model_name = "%s/%s_fold_%s_model_best_%s.pth.tar" % ( config.best_models, config.model_name, str(fold), config.best) # best_model_name = config.weights + config.model_name + os.sep +str(fold) + os.sep + "checkpoint.pth.tar" print(best_model_name) best_model = torch.load(best_model_name) model.load_state_dict(best_model["state_dict"]) search_thresholds(search_loader, model) if config.is_test: knums = config.threshold_factor for knum in knums: for f in range(5): best_model_name = "%s/%s_fold_%s_model_best_%s.pth.tar" % ( config.best_models, config.model_name, str(fold + f), config.best) # best_model_name = config.weights + config.model_name + os.sep +str(fold) + os.sep + "checkpoint.pth.tar" print(best_model_name) best_model = torch.load(best_model_name) model.load_state_dict(best_model["state_dict"]) test(test_loader, model, (fold + f), knum)
def main(): fold = 0 # 4.1 mkdirs if not os.path.exists(config.submit): os.makedirs(config.submit) if not os.path.exists(config.weights + config.model_name + os.sep + str(fold)): os.makedirs(config.weights + config.model_name + os.sep + str(fold)) if not os.path.exists(config.best_models): os.mkdir(config.best_models) if not os.path.exists("./logs/"): os.mkdir("./logs/") # 4.2 get model model = get_net() model.cuda() # load old weight trained model #model.load_state_dict(torch.load("{}/{}_fold_{}_model_best_loss.pth.tar".format(config.best_models,config.model_name,str(fold)))["state_dict"]) start_epoch = 0 best_loss = 999 best_f1 = 0 best_results = [np.inf, 0] val_metrics = [np.inf, 0] resume = False # get train # train data, this data include external data df1 = pd.read_csv(config.train_kaggle_csv) df2 = pd.read_csv(config.train_external_csv) all_files = pd.concat([df1, df2]) # create duplicate for low data # https://www.kaggle.com/c/human-protein-atlas-image-classification/discussion/74374#437548 train_df_orig = all_files.copy() lows = [ 15, 15, 15, 8, 9, 10, 8, 9, 10, 8, 9, 10, 17, 20, 24, 26, 15, 27, 15, 20, 24, 17, 8, 15, 27, 27, 27 ] for i in lows: target = str(i) indicies = train_df_orig.loc[train_df_orig['Target'] == target].index all_files = pd.concat([all_files, train_df_orig.loc[indicies]], ignore_index=True) indicies = train_df_orig.loc[train_df_orig['Target'].str.startswith( target + " ")].index all_files = pd.concat([all_files, train_df_orig.loc[indicies]], ignore_index=True) indicies = train_df_orig.loc[train_df_orig['Target'].str.endswith( " " + target)].index all_files = pd.concat([all_files, train_df_orig.loc[indicies]], ignore_index=True) indicies = train_df_orig.loc[train_df_orig['Target'].str.contains( " " + target + " ")].index all_files = pd.concat([all_files, train_df_orig.loc[indicies]], ignore_index=True) del df1, df2, train_df_orig gc.collect() # compute class weight target = all_files.apply(lambda x: x['Target'].split(' '), axis=1) y = target.tolist() y = MultiLabelBinarizer().fit_transform(y) labels_dict = dict() count_classes = np.sum(y, axis=0) for i, count in enumerate(count_classes): labels_dict[i] = count del target, y gc.collect() dampened_cw = create_class_weight(labels_dict)[1] tmp = list(dampened_cw.values()) class_weight = torch.FloatTensor(tmp).cuda() # criterion optimizer = torch.optim.Adam(model.parameters(), lr=config.lr, weight_decay=config.weight_decay) criterion = nn.BCEWithLogitsLoss(weight=class_weight).cuda() #print(all_files) test_files = pd.read_csv(config.sample_submission) train_data_list, val_data_list = train_test_split(all_files, test_size=0.13, random_state=2050) # load dataset train_gen = HumanDataset(train_data_list, config.train_data, mode="train") train_loader = DataLoader(train_gen, batch_size=config.batch_size, shuffle=True, pin_memory=True, num_workers=4) val_gen = HumanDataset(val_data_list, config.train_data, augument=False, mode="train") val_loader = DataLoader(val_gen, batch_size=config.batch_size, shuffle=False, pin_memory=True, num_workers=4) test_gen = HumanDataset(test_files, config.test_data, augument=False, mode="test") test_loader = DataLoader(test_gen, 1, shuffle=False, pin_memory=True, num_workers=4) scheduler = lr_scheduler.StepLR(optimizer, step_size=8, gamma=0.1) start = timer() #train for epoch in range(0, config.epochs): scheduler.step(epoch) # train lr = get_learning_rate(optimizer) train_metrics = train(train_loader, model, criterion, optimizer, epoch, val_metrics, best_results, start) # val val_metrics = evaluate(val_loader, model, criterion, epoch, train_metrics, best_results, start) # check results is_best_loss = val_metrics[0] < best_results[0] best_results[0] = min(val_metrics[0], best_results[0]) is_best_f1 = val_metrics[1] > best_results[1] best_results[1] = max(val_metrics[1], best_results[1]) # save model save_checkpoint( { "epoch": epoch + 1, "model_name": config.model_name, "state_dict": model.state_dict(), "best_loss": best_results[0], "optimizer": optimizer.state_dict(), "fold": fold, "best_f1": best_results[1], }, is_best_loss, is_best_f1, fold) # print logs print('\r', end='', flush=True) log.write('%s %5.1f %6.1f | %0.3f %0.3f | %0.3f %0.4f | %s %s | %s' % (\ "best", epoch, epoch, train_metrics[0], train_metrics[1], val_metrics[0], val_metrics[1], str(best_results[0])[:8], str(best_results[1])[:8], time_to_str((timer() - start), 'min')) ) log.write("\n") time.sleep(0.01) best_model = torch.load("{}/{}_fold_{}_model_best_loss.pth.tar".format( config.best_models, config.model_name, str(fold))) #best_model = torch.load("checkpoints/bninception_bcelog/0/checkpoint.pth.tar") model.load_state_dict(best_model["state_dict"]) test(test_loader, model, fold)