"Valid CE: {:.4f} ".format(val_bce) + "Valid LWLRAP: {:.4f} ".format(val_lwlrap) + "sec: {:.1f}".format(endtime)) # save log and weights train_log_epoch = pd.DataFrame([[ epoch + 1, bce, lwlrap, bce_noisy, lwlrap_noisy, val_bce, val_lwlrap, endtime ]], columns=log_columns) train_log = pd.concat([train_log, train_log_epoch]) train_log.to_csv("{}/train_log_fold{}.csv".format( OUTPUT_DIR, fold + 1), index=False) if val_lwlrap > min_val_lwlrap: min_val_lwlrap = val_lwlrap trigger = 0 torch.save( model.state_dict(), "{}/weight_fold_{}_best.pth".format(OUTPUT_DIR, fold + 1, epoch + 1)) if (epoch + 1) % NUM_CYCLE == 0: torch.save( model.state_dict(), "{}/weight_fold_{}_epoch_{}.pth".format( OUTPUT_DIR, fold + 1, epoch + 1)) if trigger > 100: break trigger += 1
def main(): # load table data df_train = pd.read_csv("../input/train_curated.csv") df_noisy = pd.read_csv("../input/train_noisy.csv") df_test = pd.read_csv("../input/sample_submission.csv") labels = df_test.columns[1:].tolist() for label in labels: df_train[label] = df_train['labels'].apply(lambda x: label in x) df_noisy[label] = df_noisy['labels'].apply(lambda x: label in x) df_train['path'] = "../input/mel128/train/" + df_train['fname'] df_test['path'] = "../input/mel128/test/" + df_train['fname'] df_noisy['path'] = "../input/mel128/noisy/" + df_noisy['fname'] # fold splitting folds = list(KFold(n_splits=NUM_FOLD, shuffle=True, random_state=SEED).split(np.arange(len(df_train)))) # Training log_columns = ['epoch', 'bce', 'lwlrap', 'bce_noisy', 'lwlrap_noisy', 'val_bce', 'val_lwlrap', 'time'] for fold, (ids_train_split, ids_valid_split) in enumerate(folds): if fold+1 not in FOLD_LIST: continue print("fold: {}".format(fold + 1)) train_log = pd.DataFrame(columns=log_columns) # build model model = ResNet(NUM_CLASS).cuda() # prepare data loaders df_train_fold = df_train.iloc[ids_train_split].reset_index(drop=True) dataset_train = MelDataset(df_train_fold['path'], df_train_fold[labels].values, crop=CROP_LENGTH, crop_mode='random', mixup=True, freqmask=True, gain=True, ) train_loader = DataLoader(dataset_train, batch_size=BATCH_SIZE, shuffle=True, num_workers=1, pin_memory=True, ) df_valid = df_train.iloc[ids_valid_split].reset_index(drop=True) dataset_valid = MelDataset(df_valid['path'], df_valid[labels].values,) valid_loader = DataLoader(dataset_valid, batch_size=1, shuffle=False, num_workers=1, pin_memory=True, ) dataset_noisy = MelDataset(df_noisy['path'], df_noisy[labels].values, crop=CROP_LENGTH, crop_mode='random', mixup=True, freqmask=True, gain=True, ) noisy_loader = DataLoader(dataset_noisy, batch_size=BATCH_SIZE, shuffle=True, num_workers=1, pin_memory=True, ) noisy_itr = cycle(noisy_loader) # set optimizer and loss optimizer = optim.Adam(filter(lambda p: p.requires_grad, model.parameters()), lr=LR[0]) scheduler = CosineLR(optimizer, step_size_min=LR[1], t0=len(train_loader) * NUM_CYCLE, tmult=1) # training for epoch in range(NUM_EPOCH): # train for one epoch bce, lwlrap, bce_noisy, lwlrap_noisy = train((train_loader, noisy_itr), model, optimizer, scheduler, epoch) # evaluate on validation set val_bce, val_lwlrap = validate(valid_loader, model) # print log endtime = time.time() - starttime print("Epoch: {}/{} ".format(epoch + 1, NUM_EPOCH) + "CE: {:.4f} ".format(bce) + "LwLRAP: {:.4f} ".format(lwlrap) + "Noisy CE: {:.4f} ".format(bce_noisy) + "Noisy LWLRAP: {:.4f} ".format(lwlrap_noisy) + "Valid CE: {:.4f} ".format(val_bce) + "Valid LWLRAP: {:.4f} ".format(val_lwlrap) + "sec: {:.1f}".format(endtime) ) # save log and weights train_log_epoch = pd.DataFrame( [[epoch+1, bce, lwlrap, bce_noisy, lwlrap_noisy, val_bce, val_lwlrap, endtime]], columns=log_columns) train_log = pd.concat([train_log, train_log_epoch]) train_log.to_csv("{}/train_log_fold{}.csv".format(OUTPUT_DIR, fold+1), index=False) if (epoch+1)%NUM_CYCLE==0: torch.save(model.state_dict(), "{}/weight_fold_{}_epoch_{}.pth".format(OUTPUT_DIR, fold+1, epoch+1))
# params=model.parameters(), # lr=0.1, # momentum=0.9, # weight_decay=5e-4, # nesterov=True # ) lr_scheduler = torch.optim.lr_scheduler.MultiStepLR(optimizer=optimizer, milestones=[6, 12, 16], gamma=0.2) print("Start training") start_time = time.time() for epoch in range(epochs): train_one_epoch(model, criterion, optimizer, train_loader, device, epoch) lr_scheduler.step() evaluate(model, criterion, valid_loader, device=device) checkpoint = { 'model': model.state_dict(), 'optimizer': optimizer.state_dict(), 'lr_scheduler': lr_scheduler.state_dict(), 'epoch': epoch } torch.save(checkpoint, os.path.join(output_dir, 'model_{}.pth'.format(epoch))) torch.save(checkpoint, os.path.join(output_dir, 'checkpoint.pth')) total_time = time.time() - start_time total_time_str = str(datetime.timedelta(seconds=int(total_time))) print('Training time {}'.format(total_time_str))
oo = outputs.data.cpu().numpy() oc = np.argmax(oo, axis=1) acc = np.sum(oc == label) / batch_size acc_record += acc ''' Show loss and accuracy, save best model. ''' if it > 0 and (it + 1) % args.show_iters == 0: loss_record /= args.show_iters acc_record /= args.show_iters print( '\rExtractor | iter %05d, average batch loss: %.5f, average batch accuracy: %.3f' % (it + 1, loss_record, acc_record)) if acc_record > best_acc: best_acc = acc_record torch.save( model.state_dict(), osp.join(args.model_dir, 'best_extractor_%d.pth' % (hash_bit))) loss_record = 0. acc_record = 0. if best_acc > 0.98: print('Extractor | Early Stop Training.') break del criterion del optimizer ''' ==================== Triplet Similarity Learning ==================== ''' model.norm = True model.zero_grad()
def train(k, epochs): model = ResNet(k=k) opt = torch.optim.Adam(model.parameters(), lr=1e-4) criterion = nn.CrossEntropyLoss() if use_gpu: model.to('cuda') if use_horovod: # broadcast parameters and optimizer state from root device to other devices hvd.broadcast_parameters(model.state_dict(), root_rank=0) hvd.broadcast_optimizer_state(opt, root_rank=0) # Wraps the opimizer for multiGPU operation opt = hvd.DistributedOptimizer( opt, named_parameters=model.named_parameters(), op=hvd.Adasum) loss_dict = {'epoch': [], 'train': [], 'val': []} for epoch in range(epochs): train_loss = 0 val_loss = 0 # train block for img_batch, labels_batch in train_loader: if use_gpu: img_batch = img_batch.to('cuda') labels_batch = labels_batch.to('cuda') pred = model(img_batch) opt.zero_grad() loss = criterion(pred, labels_batch) loss.backward() opt.step() train_loss += loss.item() #val block with torch.no_grad(): for img_batch, labels_batch in val_loader: if use_gpu: img_batch = img_batch.to('cuda') labels_batch = labels_batch.to('cuda') pred = model(img_batch) loss = criterion(pred, labels_batch) val_loss += loss.item() if use_horovod: train_loss = average_loss(train_loss, 'avg_train_loss') val_loss = average_loss(val_loss, 'avg_val_loss') loss_dict['epoch'].append(epoch + 1) loss_dict['train'].append(train_loss) loss_dict['val'].append(val_loss) print(",".join([ "{}:{:.2f}".format(key, val[epoch]) for key, val in loss_dict.items() ])) torch.save(model.state_dict(), "models/modelsdata/ResNet18_Cifar10_d{}.ckpt".format(k)) save_obj(loss_dict, "models/modelsdata/losses/ResNet18_Cifar10_d{}".format(k)) return loss_dict
def main(): if not sys.warnoptions: warnings.simplefilter("ignore") # --- hyper parameters --- # BATCH_SIZE = 256 LR = 1e-3 WEIGHT_DECAY = 1e-4 N_layer = 18 device = torch.device('cuda:0' if torch.cuda.is_available() else 'cpu') # --- data process --- # # info src_path = './data/' target_path = './saved/ResNet18/' model_path = target_path + 'pkls/' pred_path = target_path + 'preds/' if not os.path.exists(model_path): os.makedirs(model_path) if not os.path.exists(pred_path): os.makedirs(pred_path) # evaluation: num of classify labels & image size # output testing id csv label2num_dict, num2label_dict = data_evaluation(src_path) # load train_data = dataLoader(src_path, 'train', label2num_dict) train_len = len(train_data) test_data = dataLoader(src_path, 'test') train_loader = Data.DataLoader( dataset=train_data, batch_size=BATCH_SIZE, shuffle=True, num_workers=12, ) test_loader = Data.DataLoader( dataset=test_data, batch_size=BATCH_SIZE, shuffle=False, num_workers=12, ) # --- model training --- # # fp: for storing data fp_train_acc = open(target_path + 'train_acc.txt', 'w') fp_time = open(target_path + 'time.txt', 'w') # train highest_acc, train_acc_seq = 0, [] loss_funct = nn.CrossEntropyLoss() net = ResNet(N_layer).to(device) optimizer = torch.optim.Adam(net.parameters(), lr=LR, weight_decay=WEIGHT_DECAY) print(net) for epoch_i in count(1): right_count = 0 # print('\nTraining epoch {}...'.format(epoch_i)) # for batch_x, batch_y in tqdm(train_loader): for batch_x, batch_y in train_loader: batch_x = batch_x.to(device) batch_y = batch_y.to(device) # clear gradient optimizer.zero_grad() # forward & backward output = net.forward(batch_x.float()) highest_out = torch.max(output, 1)[1] right_count += sum(batch_y == highest_out).item() loss = loss_funct(output, batch_y) loss.backward() # update parameters optimizer.step() # calculate accuracy train_acc = right_count / train_len train_acc_seq.append(train_acc * 100) if train_acc > highest_acc: highest_acc = train_acc # save model torch.save( net.state_dict(), '{}{}_{}_{}.pkl'.format(model_path, target_path.split('/')[2], round(train_acc * 1000), epoch_i)) # write data fp_train_acc.write(str(train_acc * 100) + '\n') fp_time.write( str(time.strftime("%Y-%m-%d %H:%M:%S", time.localtime())) + '\n') print('\n{} Epoch {}, Training accuracy: {}'.format( time.strftime("%Y-%m-%d %H:%M:%S", time.localtime()), epoch_i, train_acc)) # test net.eval() test_df = pd.read_csv(src_path + 'testing_data/testing_labels.csv') with torch.no_grad(): for i, (batch_x, _) in enumerate(test_loader): batch_x = batch_x.to(device) output = net.forward(batch_x.float()) highest_out = torch.max(output, 1)[1].cpu() labels = [ num2label_dict[out_j.item()] for out_j in highest_out ] test_df['label'].iloc[i * BATCH_SIZE:(i + 1) * BATCH_SIZE] = labels test_df.to_csv('{}{}_{}_{}.csv'.format(pred_path, target_path.split('/')[2], round(train_acc * 1000), epoch_i), index=False) net.train() lr_decay(optimizer) fp_train_acc.close() fp_time.close()
def main(): # load table data df_train = pd.read_csv("../input/train_curated.csv") df_noisy = pd.read_csv("../input/train_noisy.csv") df_test = pd.read_csv("../input/sample_submission.csv") labels = df_test.columns[1:].tolist() for label in labels: df_train[label] = df_train['labels'].apply(lambda x: label in x) df_noisy[label] = df_noisy['labels'].apply(lambda x: label in x) df_train['path'] = "../input/mel128/train/" + df_train['fname'] df_test['path'] = "../input/mel128/test/" + df_train['fname'] df_noisy['path'] = "../input/mel128/noisy/" + df_noisy['fname'] # calc sampling weight df_train['weight'] = 1 df_noisy['weight'] = len(df_train) / len(df_noisy) # generate pseudo label with sharpening tmp = np.load("../input/pseudo_label/preds_noisy.npy").mean(axis=(0, 1)) tmp = tmp**TEMPERATURE tmp = tmp / tmp.sum(axis=1)[:, np.newaxis] df_noisy_pseudo = df_noisy.copy() df_noisy_pseudo[labels] = tmp # fold splitting folds = list( KFold(n_splits=NUM_FOLD, shuffle=True, random_state=SEED).split(np.arange(len(df_train)))) folds_noisy = list( KFold(n_splits=NUM_FOLD, shuffle=True, random_state=SEED).split(np.arange(len(df_noisy)))) # Training log_columns = [ 'epoch', 'bce', 'lwlrap', 'bce_noisy', 'lwlrap_noisy', 'semi_mse', 'val_bce', 'val_lwlrap', 'time' ] for fold, (ids_train_split, ids_valid_split) in enumerate(folds): if fold + 1 not in FOLD_LIST: continue print("fold: {}".format(fold + 1)) train_log = pd.DataFrame(columns=log_columns) # build model model = ResNet(NUM_CLASS).cuda() model.load_state_dict( torch.load("{}/weight_fold_{}_epoch_512.pth".format( LOAD_DIR, fold + 1))) # prepare data loaders df_train_fold = df_train.iloc[ids_train_split].reset_index(drop=True) dataset_train = MelDataset( df_train_fold['path'], df_train_fold[labels].values, crop=CROP_LENGTH, crop_mode='additional', crop_rate=CROP_RATE, mixup=True, freqmask=True, gain=True, ) train_loader = DataLoader( dataset_train, batch_size=BATCH_SIZE, shuffle=True, num_workers=1, pin_memory=True, ) df_valid = df_train.iloc[ids_valid_split].reset_index(drop=True) dataset_valid = MelDataset( df_valid['path'], df_valid[labels].values, ) valid_loader = DataLoader( dataset_valid, batch_size=1, shuffle=False, num_workers=1, pin_memory=True, ) dataset_noisy = MelDataset( df_noisy['path'], df_noisy[labels].values, crop=CROP_LENGTH, crop_mode='additional', crop_rate=CROP_RATE, mixup=True, freqmask=True, gain=True, ) noisy_loader = DataLoader( dataset_noisy, batch_size=BATCH_SIZE, shuffle=True, num_workers=1, pin_memory=True, ) noisy_itr = cycle(noisy_loader) df_semi = pd.concat([ df_train.iloc[ids_train_split], df_noisy_pseudo.iloc[folds_noisy[fold][0]] ]).reset_index(drop=True) semi_sampler = torch.utils.data.sampler.WeightedRandomSampler( df_semi['weight'].values, len(df_semi)) dataset_semi = MelDataset( df_semi['path'], df_semi[labels].values, crop=CROP_LENGTH, crop_mode='additional', crop_rate=CROP_RATE, mixup=True, freqmask=True, gain=True, ) semi_loader = DataLoader( dataset_semi, batch_size=BATCH_SIZE, shuffle=False, num_workers=1, pin_memory=True, sampler=semi_sampler, ) semi_itr = cycle(semi_loader) # set optimizer and loss optimizer = optim.Adam(filter(lambda p: p.requires_grad, model.parameters()), lr=LR[0]) scheduler = CosineLR(optimizer, step_size_min=LR[1], t0=len(train_loader) * NUM_CYCLE, tmult=1) # training for epoch in range(NUM_EPOCH): # train for one epoch bce, lwlrap, bce_noisy, lwlrap_noisy, mse_semi = train( (train_loader, noisy_itr, semi_itr), model, optimizer, scheduler, epoch) # evaluate on validation set val_bce, val_lwlrap = validate(valid_loader, model) # print log endtime = time.time() - starttime print("Epoch: {}/{} ".format(epoch + 1, NUM_EPOCH) + "CE: {:.4f} ".format(bce) + "LwLRAP: {:.4f} ".format(lwlrap) + "Noisy CE: {:.4f} ".format(bce_noisy) + "Noisy LWLRAP: {:.4f} ".format(lwlrap_noisy) + "Semi MSE: {:.4f} ".format(mse_semi) + "Valid CE: {:.4f} ".format(val_bce) + "Valid LWLRAP: {:.4f} ".format(val_lwlrap) + "sec: {:.1f}".format(endtime)) # save log and weights train_log_epoch = pd.DataFrame([[ epoch + 1, bce, lwlrap, bce_noisy, lwlrap_noisy, mse_semi, val_bce, val_lwlrap, endtime ]], columns=log_columns) train_log = pd.concat([train_log, train_log_epoch]) train_log.to_csv("{}/train_log_fold{}.csv".format( OUTPUT_DIR, fold + 1), index=False) if (epoch + 1) % NUM_CYCLE == 0: torch.save( model.state_dict(), "{}/weight_fold_{}_epoch_{}.pth".format( OUTPUT_DIR, fold + 1, epoch + 1))