def train(train_loaders, model, optimizer, scheduler, epoch): train_loader, noisy_itr = train_loaders bce_avr = AverageMeter() bce_noisy_avr = AverageMeter() criterion_bce = nn.BCEWithLogitsLoss() #.cuda() sigmoid = nn.Sigmoid() #.cuda() # switch to train mode model.train() inp = torch.zeros(80, 80, dtype=torch.float32) for i in range(len(inp)): inp[i][i] = 1 # training preds = np.zeros([0, NUM_CLASS], np.float32) y_true = np.zeros([0, NUM_CLASS], np.float32) preds_noisy = np.zeros([0, NUM_CLASS], np.float32) y_true_noisy = np.zeros([0, NUM_CLASS], np.float32) for i, (input, target) in enumerate(train_loader): # get batches input = torch.autograd.Variable(input.to(device)) #.cuda()) target = torch.autograd.Variable(target.to(device)) #.cuda()) input_noisy, target_noisy = next(noisy_itr) input_noisy = torch.autograd.Variable(input_noisy.to(device)) #.cuda()) target_noisy = torch.autograd.Variable(target_noisy.to(device)) #.cuda()) # compute output output = model(input, inp.to(device)) bce = criterion_bce(output, target) output_noisy = model.noisy(input_noisy, inp.to(device)) bce_noisy = criterion_bce(sigmoid(output_noisy), target_noisy) loss = bce + bce_noisy pred = sigmoid(output) pred = pred.data.cpu().numpy() pred_noisy = sigmoid(output_noisy) pred_noisy = pred_noisy.data.cpu().numpy() # backprop optimizer.zero_grad() loss.backward() optimizer.step() scheduler.step() # record log bce_avr.update(bce.data, input.size(0)) bce_noisy_avr.update(bce_noisy.data, input.size(0)) preds = np.concatenate([preds, pred]) y_true = np.concatenate([y_true, target.data.cpu().numpy()]) preds_noisy = np.concatenate([preds_noisy, pred_noisy]) y_true_noisy = np.concatenate([y_true_noisy, target_noisy.data.cpu().numpy()]) # calc metric per_class_lwlrap, weight_per_class = calculate_per_class_lwlrap(y_true, preds) lwlrap = np.sum(per_class_lwlrap * weight_per_class) per_class_lwlrap, weight_per_class = calculate_per_class_lwlrap(y_true_noisy, preds_noisy) lwlrap_noisy = np.sum(per_class_lwlrap * weight_per_class) return bce_avr.avg.item(), lwlrap, bce_noisy_avr.avg.item(), lwlrap_noisy
def train(train_loaders, model, optimizer, scheduler, epoch): train_loader, noisy_itr = train_loaders kl_avr = AverageMeter() kl_noisy_avr = AverageMeter() lsigmoid = nn.LogSigmoid().cuda() lsoftmax = nn.LogSoftmax(dim=1).cuda() softmax = nn.Softmax(dim=1).cuda() criterion_kl = nn.KLDivLoss().cuda() # switch to train mode model.train() # training preds = np.zeros([0, NUM_CLASS], np.float32) y_true = np.zeros([0, NUM_CLASS], np.float32) preds_noisy = np.zeros([0, NUM_CLASS], np.float32) y_true_noisy = np.zeros([0, NUM_CLASS], np.float32) for i, (input, target) in enumerate(train_loader): # get batches input = torch.autograd.Variable(input.cuda()) target = torch.autograd.Variable(target.cuda()) input_noisy, target_noisy = next(noisy_itr) input_noisy = torch.autograd.Variable(input_noisy.cuda()) target_noisy = torch.autograd.Variable(target_noisy.cuda()) # compute output output = model(input) kl = criterion_kl(lsoftmax(output), target) output_noisy = model.noisy(input_noisy) kl_noisy = criterion_kl(lsoftmax(output_noisy), target_noisy) loss = kl + kl_noisy pred = softmax(output) pred = pred.data.cpu().numpy() pred_noisy = softmax(output_noisy) pred_noisy = pred_noisy.data.cpu().numpy() # backprop optimizer.zero_grad() loss.backward() optimizer.step() scheduler.step() # record log kl_avr.update(kl.data, input.size(0)) kl_noisy_avr.update(kl_noisy.data, input.size(0)) preds = np.concatenate([preds, pred]) y_true = np.concatenate([y_true, target.data.cpu().numpy()]) preds_noisy = np.concatenate([preds_noisy, pred_noisy]) y_true_noisy = np.concatenate([y_true_noisy, target_noisy.data.cpu().numpy()]) # calc metric per_class_lwlrap, weight_per_class = calculate_per_class_lwlrap(y_true, preds) lwlrap = np.sum(per_class_lwlrap * weight_per_class) per_class_lwlrap, weight_per_class = calculate_per_class_lwlrap(y_true_noisy, preds_noisy) lwlrap_noisy = np.sum(per_class_lwlrap * weight_per_class) return kl_avr.avg.item(), lwlrap, kl_noisy_avr.avg.item(), lwlrap_noisy
def validate(val_loader, model): bce_avr = AverageMeter() sigmoid = torch.nn.Sigmoid().cuda() criterion_bce = nn.BCEWithLogitsLoss().cuda() # switch to eval mode model.eval() # validate preds = np.zeros([0, NUM_CLASS], np.float32) y_true = np.zeros([0, NUM_CLASS], np.float32) for i, (input, target) in enumerate(val_loader): # get batches input = torch.autograd.Variable(input.cuda()) target = torch.autograd.Variable(target.cuda()) # compute output with torch.no_grad(): output = model(input) bce = criterion_bce(output, target) pred = sigmoid(output) pred = pred.data.cpu().numpy() # record log bce_avr.update(bce.data, input.size(0)) preds = np.concatenate([preds, pred]) y_true = np.concatenate([y_true, target.data.cpu().numpy()]) # calc metric per_class_lwlrap, weight_per_class = calculate_per_class_lwlrap(y_true, preds) lwlrap = np.sum(per_class_lwlrap * weight_per_class) return bce_avr.avg.item(), lwlrap
def validate(val_loader, model): kl_avr = AverageMeter() lsoftmax = nn.LogSoftmax(dim=1).cuda() softmax = torch.nn.Softmax(dim=1).cuda() criterion_kl = nn.KLDivLoss().cuda() # switch to eval mode model.eval() # validate preds = np.zeros([0, NUM_CLASS], np.float32) y_true = np.zeros([0, NUM_CLASS], np.float32) for i, (input, target) in enumerate(val_loader): # get batches input = torch.autograd.Variable(input.cuda()) target = torch.autograd.Variable(target.cuda()) # compute output with torch.no_grad(): output = model(input) kl = criterion_kl(lsoftmax(output), target) pred = softmax(output) pred = pred.data.cpu().numpy() # record log kl_avr.update(kl.data, input.size(0)) preds = np.concatenate([preds, pred]) y_true = np.concatenate([y_true, target.data.cpu().numpy()]) # calc metric per_class_lwlrap, weight_per_class = calculate_per_class_lwlrap(y_true, preds) lwlrap = np.sum(per_class_lwlrap * weight_per_class) return kl_avr.avg.item(), lwlrap
def test(traindatadir, testdatadir, traindatacsv, testdatacsv, device, model_path=''): """ test data is loaded in same format as validation data """ print('initialize dataset...') voiceDataset = FATDataset(traindatadir, testdatadir, traindatacsv, testdatacsv, batch_size=8) # print (voiceDataset.get_class_num()) print('create model ... ') cnnmodel = models.CNNModelv2(voiceDataset.get_class_num()).to(device) #loading trained model print('loading model from {}...'.format(model_path)) cnnmodel.load_state_dict(torch.load(model_path)) # testing cnnmodel.eval() test_batches_num = voiceDataset.get_numof_batch(False) test_preds = np.array([]).reshape(0, voiceDataset.get_class_num()) test_labels = np.array([]).reshape(0, voiceDataset.get_class_num()) for tbidx in tqdm(range(test_batches_num)): # print ('generating validation fingerprint ... ') test_data, test_samplenumbatch, test_label = voiceDataset.get_data( tbidx, False) pred = oneSampleOutput( cnnmodel(test_data.to(device)).detach(), test_samplenumbatch).to(device) test_preds = np.vstack((test_preds, pred.cpu().numpy())) test_labels = np.vstack((test_labels, test_label.cpu().numpy())) score, weight = utils.calculate_per_class_lwlrap(test_labels, test_preds) lwlrap = (score * weight).sum() msg = '[TESTING] lwlrap:{}'.format(lwlrap) print(msg) logger.info(msg)
def finetune(traindatadir, valdatadir, traindatacsv, valdatacsv, device, model_path, cfg_path, save_model_filename, frorm_scratch=True): EPOCH = 100 printout_steps = 50 eval_steps = 200 lr_steps = 300 lr = 5e-4 t_max = 200 eta_min = 3e-6 print('initialize dataset...') voiceDataset = FATDataset(traindatadir, valdatadir, traindatacsv, valdatacsv, batch_size=8) # print (voiceDataset.get_class_num()) print('create model ... ') # cnnmodel = models.CNNModelv2(voiceDataset.get_class_num()).to(device) print('cfg_path', cfg_path) cfg = pkl.load(open(cfg_path, 'rb')) if config.get('Parameters', 'model_arch').lower() == 'basic': cnnmodel = model4prune.CNNModelBasic(voiceDataset.get_class_num(), cfg).to(device) elif config.get('Parameters', 'model_arch').lower() == 'poolrevised': cnnmodel = model4prune.CNNModelPoolingRevised( voiceDataset.get_class_num(), cfg).to(device) print(cnnmodel) if not frorm_scratch: print('loading model from {}...'.format(model_path)) cnnmodel.load_state_dict(torch.load(model_path)) optimizer = torch.optim.Adam(cnnmodel.parameters(), lr) scheduler = CosineAnnealingLR(optimizer, T_max=t_max, eta_min=eta_min) criterion = nn.BCEWithLogitsLoss() bestlwlrap = -1 for e in range(EPOCH): voiceDataset.shuffle_trainingdata() num_step_per_epoch = voiceDataset.get_numof_batch(istraindata=True) # print (num_step_per_epoch) for bidx in tqdm(range(num_step_per_epoch)): # print ('get fingerprint...') batch_data, samplenumbatch, label_batch = voiceDataset.get_data( bidx, True) #[M, 128, duration, 3] # print ('fingerprint got...') if batch_data.shape[0] <= 1: continue # print (batch_data.shape,label_batch.shape) bx = batch_data.to(device) output = cnnmodel(bx) output = oneSampleOutput(output, samplenumbatch).to(device) # print (output.shape,label_batch.shape) by = label_batch.to(device) # by = autograd.Variable(label_batch,requires_grad = True) loss = criterion(output, by) # loss = autograd.Variable(loss, requires_grad = True) if bidx % printout_steps == 0: msg = '[TRAINING] Epoch:{}, step:{}/{}, loss:{}'.format( e, bidx, num_step_per_epoch, loss) print(msg) logger.info(msg) optimizer.zero_grad() loss.backward() optimizer.step() if (bidx + 1) % eval_steps == 0: # doing validation cnnmodel.eval() val_batches_num = voiceDataset.get_numof_batch(False) val_preds = np.array([]).reshape(0, voiceDataset.get_class_num()) val_labels = np.array([]).reshape(0, voiceDataset.get_class_num()) val_loss = 0. for vbidx in tqdm(range(val_batches_num)): # print ('generating validation fingerprint ... ') val_data, val_samplenumbatch, val_label = voiceDataset.get_data( vbidx, False) # print ('val_data shape:',val_data.shape) pred = oneSampleOutput( cnnmodel(val_data.to(device)).detach(), val_samplenumbatch).to(device) val_preds = np.vstack((val_preds, pred.cpu().numpy())) # print (pred.shape) # print (criterion(pred,val_label.to(device))) val_loss += criterion( pred, val_label.to(device)).item() / val_label.shape[0] val_labels = np.vstack( (val_labels, val_label.cpu().numpy())) score, weight = utils.calculate_per_class_lwlrap( val_labels, val_preds) lwlrap = (score * weight).sum() msg = '[VALIDATION] Epoch:{}, step:{}:/{}, loss:{}, lwlrap:{}'.format( e, bidx, num_step_per_epoch, val_loss, lwlrap) print(msg) logger.info(msg) if lwlrap > bestlwlrap or bidx == num_step_per_epoch - 1: bestlwlrap = lwlrap #save model save_model_path = os.path.join(checkpoint_dir, save_model_filename) torch.save(cnnmodel.state_dict(), save_model_path) msg = 'save model to: {}'.format(save_model_path) print(msg) logger.info(msg) cnnmodel.train() if bidx % lr_steps == 0: scheduler.step()
def train(device, model_path='', loadModel=False): EPOCH = 50 printout_steps = 50 eval_steps = 200 lr_steps = 300 lr = 3e-3 t_max = 200 eta_min = 3e-6 print('initialize dataset...') curated_data_path = os.path.join(config.get('DataPath', 'split_dir'), 'mels_train_curated_split.pkl') curated_csv = os.path.join(config.get('DataPath', 'split_dir'), 'train_curated_split.csv') noisy_data_path = config.get('DataPath', 'noisy_data_path') noisy_csv = config.get('DataPath', 'filtered_noisy_csv_path') val_datapath = os.path.join(config.get('DataPath', 'split_dir'), 'mels_val_curated_split.pkl') val_csv_path = os.path.join(config.get('DataPath', 'split_dir'), 'val_curated_split.csv') voiceDataset = SemiSupervisedDataset(curated_data_path,curated_csv,\ noisy_data_path,noisy_csv,\ val_datapath,val_csv_path,\ batch_size=4) # print (voiceDataset.get_class_num()) print('create model ... ') cnnmodel = models.CNNModelv2(voiceDataset.get_class_num()).to(device) if loadModel: print('loading model from {}...'.format(model_path)) cnnmodel.load_state_dict(torch.load(model_path)) optimizer = torch.optim.Adam(cnnmodel.parameters(), lr) scheduler = CosineAnnealingLR(optimizer, T_max=t_max, eta_min=eta_min) criterion = nn.BCEWithLogitsLoss() bestlwlrap = -1 for e in range(EPOCH): voiceDataset.shuffle_and_cut() num_step_per_epoch = voiceDataset.get_numof_batch(True) # print (num_step_per_epoch) for bidx in tqdm(range(num_step_per_epoch)): # Z1 batch_data1, samplenumbatch1, label_batch1 = voiceDataset.get_data( bidx, True) #[M, 128, duration, 3] # Z2 # Z1 and Z2 should be from two exactly same batch, but use different data augmentation batch_data2, samplenumbatch2, label_batch2 = voiceDataset.get_data( bidx, True) #[M, 128, duration, 3] # print ('fingerprint got...') if batch_data1.shape[0] <= 1: continue # print (batch_data.shape,label_batch.shape) bx1 = batch_data1.to(device) bx2 = batch_data2.to(device) output1 = cnnmodel(bx1) output2 = cnnmodel(bx2) Z1 = oneSampleOutput(output1, samplenumbatch1).to(device) Z2 = oneSampleOutput(output2, samplenumbatch2).to(device) # print (Z1.shape,Z2.shape,label_batch1.shape) by = label_batch1.to( device) # label_batch1 should be exactly same as label_batch2 # loss = loss_func(criterion, Z1, Z2, by) # loss = autograd.Variable(loss, requires_grad = True) if bidx % printout_steps == 0: msg = '[TRAINING] Epoch:{}, step:{}/{}, loss:{}'.format( e, bidx, num_step_per_epoch, loss) print(msg) logger.info(msg) optimizer.zero_grad() loss.backward() optimizer.step() if (bidx + 1) % eval_steps == 0: # doing validation cnnmodel.eval() val_batches_num = voiceDataset.get_numof_batch(False) val_preds = np.array([]).reshape(0, voiceDataset.get_class_num()) val_labels = np.array([]).reshape(0, voiceDataset.get_class_num()) # val_loss = 0. for vbidx in tqdm(range(val_batches_num)): # val_data, val_samplenumbatch, val_label = voiceDataset.get_data( vbidx, False) # print ('val_data shape:',val_data.shape) pred = oneSampleOutput( cnnmodel(val_data.to(device)).detach(), val_samplenumbatch).to(device) val_preds = np.vstack((val_preds, pred.cpu().numpy())) # print (pred.shape) # print (criterion(pred,val_label.to(device))) # val_loss += criterion(pred,val_label.to(device)).item()/val_label.shape[0] val_labels = np.vstack( (val_labels, val_label.cpu().numpy())) score, weight = utils.calculate_per_class_lwlrap( val_labels, val_preds) lwlrap = (score * weight).sum() msg = '[VALIDATION] Epoch:{}, step:{}:/{}, lwlrap:{}'.format( e, bidx, num_step_per_epoch, lwlrap) print(msg) logger.info(msg) if lwlrap > bestlwlrap: bestlwlrap = lwlrap #save model save_model_filename = config.get('SaveModel', 'stage2_model') save_model_path = os.path.join( config.get('DataPath', 'checkpoint_dir'), save_model_filename) torch.save(cnnmodel.state_dict(), save_model_path) msg = 'save model to: {}'.format(save_model_path) print(msg) logger.info(msg) cnnmodel.train() if bidx % lr_steps == 0: scheduler.step()
def test(traindatadir, testdatadir, traindatacsv, testdatacsv, device, model_path=''): """ test data is loaded in same format as validation data """ print('initialize dataset...') voiceDataset = FATDataset(traindatadir, testdatadir, traindatacsv, testdatacsv, batch_size=8) # print (voiceDataset.get_class_num()) print('create model ... ') # cnnmodel = models.CNNModelv2(voiceDataset.get_class_num()).to(device) cnnmodel = model4prune.CNNModelBasic( voiceDataset.get_class_num()).to(device) if config.get('Parameters', 'model_arch').lower() == 'basic': cnnmodel = model4prune.CNNModelBasic( voiceDataset.get_class_num()).to(device) elif config.get('Parameters', 'model_arch').lower() == 'poolrevised': cnnmodel = model4prune.CNNModelPoolingRevised( voiceDataset.get_class_num()).to(device) #loading trained model print('loading model from {}...'.format(model_path)) cnnmodel.load_state_dict(torch.load(model_path)) # testing cnnmodel.eval() test_batches_num = voiceDataset.get_numof_batch(False) test_preds = np.array([]).reshape(0, voiceDataset.get_class_num()) test_labels = np.array([]).reshape(0, voiceDataset.get_class_num()) # calculate flops and params flop_test_data, _, __ = voiceDataset.get_data(0, False) flop_test_data = flop_test_data[0:1, :, :, :].to(device) flops, params = profile(cnnmodel, inputs=(flop_test_data, )) eval_start = timeit.default_timer() for tbidx in tqdm(range(test_batches_num)): # print ('generating validation fingerprint ... ') test_data, test_samplenumbatch, test_label = voiceDataset.get_data( tbidx, False) pred = oneSampleOutput( cnnmodel(test_data.to(device)).detach(), test_samplenumbatch).to(device) test_preds = np.vstack((test_preds, pred.cpu().numpy())) test_labels = np.vstack((test_labels, test_label.cpu().numpy())) eval_stop = timeit.default_timer() score, weight = utils.calculate_per_class_lwlrap(test_labels, test_preds) lwlrap = (score * weight).sum() msg = '[TESTING] lwlrap:{}, flops:{}, params:{}, running time:{}'.format( lwlrap, flops, params, eval_stop - eval_start) print(msg) logger.info(msg)