def train_and_eval_eccv(train, test): """ train and test eccv dataset :param train: :param test: :return: """ train_vec = list() train_label = list() test_vec = list() test_label = list() for k, v in train.items(): feature = np.concatenate((extract_feature(k, layer_name="conv5_2"), extract_feature(k, layer_name="conv5_3")), axis=0) train_vec.append(feature) train_label.append(v) for k, v in test.items(): feature = np.concatenate((extract_feature(k, layer_name="conv5_2"), extract_feature(k, layer_name="conv5_3")), axis=0) test_vec.append(feature) test_label.append(v) reg = linear_model.BayesianRidge() reg.fit(np.array(train_vec), np.array(train_label)) mkdirs_if_not_exist('./model') joblib.dump(reg, config['eccv_fbp_reg_model']) predicted_label = reg.predict(np.array(test_vec)) mae_lr = round(mean_absolute_error(np.array(test_label), predicted_label), 4) rmse_lr = round( math.sqrt(mean_squared_error(np.array(test_label), predicted_label)), 4) pc = round(np.corrcoef(test_label, predicted_label)[0, 1], 4) print( '===============The Mean Absolute Error of Model is {0}====================' .format(mae_lr)) print( '===============The Root Mean Square Error of Model is {0}====================' .format(rmse_lr)) print( '===============The Pearson Correlation of Model is {0}====================' .format(pc)) csv_tag = time.time() mkdirs_if_not_exist('./result') df = pd.DataFrame([mae_lr, rmse_lr, pc]) df.to_csv('./result/performance_%s.csv' % csv_tag, index=False) out_result(list(test.keys()), predicted_label.flatten().tolist(), test_label, None, path='./result/detail_%s.csv' % csv_tag)
def train_and_eval_scutfbp(train_set_vector, test_set_vector, trainset_label, testset_label, testset_filenames): """ train and eval on SCUT-FBP dataset :param train_set_vector: :param test_set_vector: :param trainset_label: :param testset_label: :param testset_filenames :return: """ print("The shape of training set is {0}".format( np.array(train_set_vector).shape)) print("The shape of test set is {0}".format( np.array(test_set_vector).shape)) reg = linear_model.BayesianRidge() reg.fit(train_set_vector, trainset_label) predicted_label = reg.predict(test_set_vector) mae_lr = round(mean_absolute_error(testset_label, predicted_label), 4) rmse_lr = round( math.sqrt(mean_squared_error(testset_label, predicted_label)), 4) pc = round(np.corrcoef(testset_label, predicted_label)[0, 1], 4) print( '===============The Mean Absolute Error of Model is {0}====================' .format(mae_lr)) print( '===============The Root Mean Square Error of Model is {0}====================' .format(rmse_lr)) print( '===============The Pearson Correlation of Model is {0}====================' .format(pc)) mkdirs_if_not_exist('./model') joblib.dump(reg, './model/BayesRidge_SCUTFBP.pkl') print('The regression model has been persisted...') mkdirs_if_not_exist('./result') out_result(testset_filenames, predicted_label, testset_label, None, path='./result/Pred_GT_SCUTFBP.csv') df = pd.DataFrame([mae_lr, rmse_lr, pc]) df.to_csv('./result/BayesRidge_SCUTFBP.csv', index=False) print('The result csv file has been generated...')
def cv_train(dataset, labels, cv=10): """ train model with cross validation :param model: :param dataset: :param labels: :param cv: :return: """ reg = linear_model.BayesianRidge() mae_list = -cross_val_score(reg, dataset, labels, cv=cv, n_jobs=-1, scoring='neg_mean_absolute_error') rmse_list = np.sqrt(-cross_val_score(reg, dataset, labels, cv=cv, n_jobs=-1, scoring='neg_mean_squared_error')) pc_list = cross_val_score(reg, dataset, labels, cv=cv, n_jobs=-1, scoring='r2') print(mae_list) print(rmse_list) print(pc_list) print('=========The Mean Absolute Error of Model is {0}========='.format( np.mean(mae_list))) print( '=========The Root Mean Square Error of Model is {0}========='.format( np.mean(rmse_list))) print('=========The Pearson Correlation of Model is {0}========='.format( np.mean(pc_list))) mkdirs_if_not_exist('./model') joblib.dump(reg, "./model/BayesRidge_SCUT-FBP.pkl") print('The regression model has been persisted...')
def train_and_eval_eccv_with_align_or_lean(aligned_train, aligned_test, lean_train, lean_test): """ train and eval model with frontal faces and side faces :param aligned_train: :param aligned_test: :param lean_train: :param lean_test: :return: """ aligned_train_vec = list() aligned_train_label = list() aligned_test_vec = list() aligned_test_label = list() lean_train_vec = list() lean_train_label = list() lean_test_vec = list() lean_test_label = list() test_filenames = list() attribute_list = list() for k, v in aligned_train.items(): feature = np.concatenate((extract_feature(k, layer_name="conv5_2"), extract_feature(k, layer_name="conv5_3")), axis=0) aligned_train_vec.append(feature) aligned_train_label.append(v) for k, v in aligned_test.items(): test_filenames.append(k) attribute_list.append('aligned') feature = np.concatenate((extract_feature(k, layer_name="conv5_2"), extract_feature(k, layer_name="conv5_3")), axis=0) aligned_test_vec.append(feature) aligned_test_label.append(v) for k, v in lean_train.items(): feature = np.concatenate((extract_feature(k, layer_name="conv5_2"), extract_feature(k, layer_name="conv5_3")), axis=0) lean_train_vec.append(feature) lean_train_label.append(v) for k, v in lean_test.items(): test_filenames.append(k) attribute_list.append('lean') feature = np.concatenate((extract_feature(k, layer_name="conv5_2"), extract_feature(k, layer_name="conv5_3")), axis=0) lean_test_vec.append(feature) lean_test_label.append(v) aligned_reg = linear_model.BayesianRidge() lean_reg = linear_model.BayesianRidge() aligned_reg.fit(np.array(aligned_train_vec), np.array(aligned_train_label)) lean_reg.fit(np.array(lean_train_vec), np.array(lean_train_label)) mkdirs_if_not_exist('./model') joblib.dump(aligned_reg, './model/eccv_fbp_dcnn_bayes_reg_aligned.pkl') joblib.dump(lean_reg, './model/eccv_fbp_dcnn_bayes_reg_lean.pkl') aligned_predicted_label = aligned_reg.predict(np.array(aligned_test_vec)) lean_predicted_label = lean_reg.predict(np.array(lean_test_vec)) predicted_label = aligned_predicted_label.tolist( ) + lean_predicted_label.tolist() test_label = aligned_test_label + lean_test_label mae_lr = round( mean_absolute_error(np.array(test_label), np.array(predicted_label)), 4) rmse_lr = round( math.sqrt( mean_squared_error(np.array(test_label), np.array(predicted_label))), 4) pc = round(np.corrcoef(test_label, predicted_label)[0, 1], 4) aligned_mae_lr = round( mean_absolute_error(np.array(aligned_test_label), np.array(aligned_predicted_label.tolist())), 4) aligned_rmse_lr = round( math.sqrt( mean_squared_error(np.array(aligned_test_label), np.array(aligned_predicted_label.tolist()))), 4) aligned_pc = round( np.corrcoef(aligned_test_label, aligned_predicted_label.tolist())[0, 1], 4) lean_mae_lr = round( mean_absolute_error(np.array(lean_test_label), np.array(lean_predicted_label)), 4) lean_rmse_lr = round( math.sqrt( mean_squared_error(np.array(lean_test_label), np.array(lean_predicted_label))), 4) lean_pc = round( np.corrcoef(lean_test_label, lean_predicted_label)[0, 1], 4) print( '===============The Mean Absolute Error of Model is {0}====================' .format(mae_lr)) print( '===============The Root Mean Square Error of Model is {0}====================' .format(rmse_lr)) print( '===============The Pearson Correlation of Model is {0}====================' .format(pc)) mkdirs_if_not_exist('./result') csv_file_tag = time.time() df = pd.DataFrame([mae_lr, rmse_lr, pc]) df.to_csv('./result/%f_all.csv' % csv_file_tag, index=False) aligned_df = pd.DataFrame([aligned_mae_lr, aligned_rmse_lr, aligned_pc]) aligned_df.to_csv('./result/%f_aligned.csv' % csv_file_tag, index=False) lean_df = pd.DataFrame([lean_mae_lr, lean_rmse_lr, lean_pc]) lean_df.to_csv('./result/%f_lean.csv' % csv_file_tag, index=False) out_result(test_filenames, predicted_label, test_label, attribute_list, './result/%f_detail.csv' % csv_file_tag)
def train(train_set, test_set, train_label, test_label, data_name, test_filenames, dimension_reduce=False, distribute_training=False): """ train ML model, evaluate on testset, and serialize it into a binary pickle file :param train_set: :param test_set: :param train_label: :param test_label: :param data_name :param test_filenames :param distribute_training :return: :Version:1.0 """ train_set = np.array(train_set) test_set = np.array(test_set) print("The shape of training set before dimension reduction is {0}".format( train_set.shape)) print("The shape of test set before dimension reduction is {0}".format( test_set.shape)) print('Use distribute training ? >> {0}'.format(distribute_training)) reg = linear_model.BayesianRidge() if dimension_reduce: pca = PCA(n_components=128) train_set = pca.fit_transform(train_set) test_set = pca.fit_transform(test_set) print("The shape of training set after dimension reduction is {0}".format( train_set.shape)) print("The shape of test set after dimension reduction is {0}".format( test_set.shape)) if not distribute_training: reg.fit(train_set, train_label) else: train_set, test_set, train_label, test_label = da.array( train_set), da.array(test_set), da.array(train_label), da.array( test_label) reg.fit(train_set, train_label) predicted_label = reg.predict(test_set) mae_lr = round(mean_absolute_error(test_label, predicted_label), 4) rmse_lr = round(math.sqrt(mean_squared_error(test_label, predicted_label)), 4) pc = round(np.corrcoef(test_label, predicted_label)[0, 1], 4) print( '===============The Mean Absolute Error of Model is {0}====================' .format(mae_lr)) print( '===============The Root Mean Square Error of Model is {0}====================' .format(rmse_lr)) print( '===============The Pearson Correlation of Model is {0}====================' .format(pc)) mkdirs_if_not_exist('./model') joblib.dump(reg, './model/BayesRidge_%s.pkl' % data_name) print('The regression model has been persisted...') mkdirs_if_not_exist('./result') out_result(test_filenames, predicted_label, test_label, None, path='./result/Pred_GT_{0}.csv'.format(data_name)) df = pd.DataFrame([mae_lr, rmse_lr, pc]) df.to_csv('./result/%s.csv' % data_name, index=False) print('The result csv file has been generated...')
def train_combinator(model, dataloaders, criterion, optimizer, scheduler, num_epochs, inference=False): """ train combinator :param model: :param dataloaders: :param criterion: :param optimizer: :param scheduler: :param num_epochs: :param inference: :return: """ print(model) model_name = model.__class__.__name__ model = model.float() device = torch.device( 'cuda:0' if torch.cuda.is_available() and cfg['use_gpu'] else 'cpu') if torch.cuda.device_count() > 1: print("Let's use", torch.cuda.device_count(), "GPUs!") model = nn.DataParallel(model) model = model.to(device) dataset_sizes = { x: dataloaders[x].__len__() for x in ['train', 'val', 'test'] } if not inference: print('Start training %s...' % model_name) since = time.time() best_model_wts = copy.deepcopy(model.state_dict()) best_record = {'pc': 0.0, 'epoch': 0} for epoch in range(num_epochs): print('-' * 100) print('Epoch {}/{}'.format(epoch, num_epochs - 1)) # Each epoch has a training and validation phase for phase in ['train', 'val']: if phase == 'train': if torch.__version__ <= '1.1.0': scheduler.step() model.train() # Set model to training mode else: model.eval() # Set model to evaluate mode running_loss = 0.0 epoch_gt = [] epoch_pred = [] # Iterate over data. # for data in dataloaders[phase]: for i, data in enumerate(dataloaders[phase], 0): inputs = data['image'] scores = data['score'] classes = data['class'] inputs = inputs.to(device) scores = scores.to(device).float() classes = classes.to(device) # zero the parameter gradients optimizer.zero_grad() # forward # track history if only in train with torch.set_grad_enabled(phase == 'train'): regression_output, classification_output = model( inputs) regression_output = regression_output.view(-1) _, predicted = torch.max(classification_output.data, 1) loss = criterion(regression_output, scores, F.softmax(classification_output, 1), predicted, classes) # backward + optimize only if in training phase if phase == 'train': loss.backward() optimizer.step() # statistics running_loss += loss.item() * inputs.size(0) epoch_gt += scores.to( 'cpu').detach().numpy().ravel().tolist() epoch_pred += regression_output.to( 'cpu').detach().numpy().ravel().tolist() if phase == 'train': if torch.__version__ >= '1.1.0': scheduler.step() epoch_loss = running_loss / dataset_sizes[phase] epoch_mae = round( mean_absolute_error( np.array(epoch_gt).flatten(), np.array(epoch_pred).flatten()), 4) epoch_rmse = round( np.math.sqrt( mean_squared_error( np.array(epoch_gt).flatten(), np.array(epoch_pred).flatten())), 4) epoch_pc = round( np.corrcoef( np.array(epoch_gt).flatten(), np.array(epoch_pred).flatten())[0, 1], 4) print('[{}] Loss: {:.4f} MAE: {} RMSE: {} PC: {}'.format( phase, epoch_loss, epoch_mae, epoch_rmse, epoch_pc)) # deep copy the model if phase == 'val' and epoch_pc > best_record['pc']: best_record['pc'] = epoch_pc best_record['epoch'] = epoch best_model_wts = copy.deepcopy(model.state_dict()) model.load_state_dict(best_model_wts) mkdirs_if_not_exist('./model') torch.save( model.state_dict(), './model/{0}_best_epoch-{1}.pth'.format( model_name, epoch)) time_elapsed = time.time() - since print('Training complete in {:.0f}m {:.0f}s'.format( time_elapsed // 60, time_elapsed % 60)) print('+' * 100) print('Epoch {} achieves best PC: {:4f}'.format( best_record['epoch'], best_record['pc'])) print('+' * 100) # load best model weights model.load_state_dict(best_model_wts) mkdirs_if_not_exist('./model') torch.save(model.state_dict(), './model/%s.pth' % model_name) else: print('Start testing %s...' % model_name) model.load_state_dict( torch.load(os.path.join('./model/%s.pth' % model_name))) model.eval() total = 0 y_pred = [] y_true = [] filenames = [] with torch.no_grad(): for data in dataloaders['test']: images = data['image'] images = images.to(device) filenames += data['filename'] regression_output, classification_output = model(images) probs = F.softmax(classification_output, dim=1) cls = torch.from_numpy( np.array([[1.0, 2.0, 3.0, 4.0, 5.0]], dtype=np.float).T).to(device) # for SCUT-FBP* # cls = torch.from_numpy(np.array([[1.0, 2.0, 3.0]], dtype=np.float).T).to(device) # for HotOrNot # expectation = torch.matmul(probs, cls.float()).view(-1).view(-1, 1) # output = (2 * regression_output + expectation) / 3 output = regression_output total += images.size(0) y_pred += output.to("cpu").detach().numpy().tolist() y_true += data['score'].detach().numpy().tolist() mae = round( mean_absolute_error( np.array(y_true).ravel(), np.array(y_pred).ravel()), 4) rmse = round( np.math.sqrt( mean_squared_error( np.array(y_true).ravel(), np.array(y_pred).ravel())), 4) pc = round( np.corrcoef(np.array(y_true).ravel(), np.array(y_pred).ravel())[0, 1], 4) print( '===============The Mean Absolute Error of {0} is {1}====================' .format(model_name, mae)) print( '===============The Root Mean Square Error of {0} is {1}====================' .format(model_name, rmse)) print( '===============The Pearson Correlation of {0} is {1}====================' .format(model_name, pc)) col = ['filename', 'gt', 'pred'] df = pd.DataFrame([[filenames[i], y_true[i], y_pred[i][0]] for i in range(len(y_true))], columns=col) df.to_excel("./{0}.xlsx".format(model_name), sheet_name='Output', index=False) print('Output Excel has been generated~')
def train_model(model, dataloaders, criterion, optimizer, scheduler, num_epochs, inference=False): """ train model :param model: :param dataloaders: :param criterion: :param optimizer: :param scheduler: :param num_epochs: :param inference: :return: """ print(model) model_name = model.__class__.__name__ model = model.float() device = torch.device( 'cuda:0' if torch.cuda.is_available() and cfg['use_gpu'] else 'cpu') if torch.cuda.device_count() > 1: print("Let's use", torch.cuda.device_count(), "GPUs!") model = nn.DataParallel(model) model = model.to(device) dataset_sizes = { x: len(dataloaders[x].dataset) for x in ['train', 'val', 'test'] } for k, v in dataset_sizes.items(): print('Dataset size of {0} is {1}...'.format(k, v)) if not inference: print('Start training %s...' % model_name) since = time.time() best_model_wts = copy.deepcopy(model.state_dict()) best_ssim = 0.0 best_cosine_similarity = 0.0 best_l2_dis = float('inf') for epoch in range(num_epochs): print('-' * 100) print('Epoch {}/{}'.format(epoch, num_epochs - 1)) # Each epoch has a training and validation phase for phase in ['train', 'val']: if phase == 'train': if torch.__version__ <= '1.1.0': scheduler.step() model.train() # Set model to training mode else: model.eval() # Set model to evaluate mode running_loss = 0.0 running_ssim = 0.0 running_l2_dis = 0.0 running_cos_sim = 0.0 # Iterate over data. # for data in dataloaders[phase]: for i, data in enumerate(dataloaders[phase], 0): inputs = data['image'] inputs = inputs.to(device) # zero the parameter gradients optimizer.zero_grad() # forward # track history if only in train with torch.set_grad_enabled(phase == 'train'): outputs = model(inputs) loss = criterion(outputs, inputs) # backward + optimize only if in training phase if phase == 'train': loss.sum().backward() optimizer.step() # statistics running_loss += loss.sum() * inputs.size(0) running_cos_sim += 1 - spatial.distance.cosine( outputs.to('cpu').detach().numpy().ravel(), inputs.to('cpu').detach().numpy().ravel()) running_l2_dis += np.linalg.norm( outputs.to('cpu').detach().numpy().ravel() - inputs.to('cpu').detach().numpy().ravel()) running_ssim += ssim.ssim(outputs, inputs) if phase == 'train': if torch.__version__ >= '1.1.0': scheduler.step() epoch_loss = running_loss / dataset_sizes[phase] epoch_l2_dis = running_l2_dis / dataset_sizes[phase] epoch_cos_sim = running_cos_sim / dataset_sizes[phase] epoch_ssim = running_ssim / dataset_sizes[phase] print( '{} Loss: {:.4f} L2_Distance: {} Cosine_Similarity: {} SSIM: {}' .format(phase, epoch_loss, epoch_l2_dis, epoch_cos_sim, epoch_ssim)) # deep copy the model if phase == 'val' and epoch_l2_dis <= best_l2_dis: best_l2_dis = epoch_l2_dis best_model_wts = copy.deepcopy(model.state_dict()) model.load_state_dict(best_model_wts) model_path_dir = './model' mkdirs_if_not_exist(model_path_dir) state_dict = model.module.state_dict( ) if torch.cuda.device_count() > 1 else model.state_dict() torch.save( state_dict, './model/{0}_best_epoch-{1}.pth'.format( model_name, epoch)) time_elapsed = time.time() - since print('Training complete in {:.0f}m {:.0f}s'.format( time_elapsed // 60, time_elapsed % 60)) print('Best L2_Distance: {:4f}'.format(best_l2_dis)) # load best model weights model.load_state_dict(best_model_wts) model_path_dir = './model' mkdirs_if_not_exist(model_path_dir) state_dict = model.module.state_dict( ) if torch.cuda.device_count() > 1 else model.state_dict() torch.save(state_dict, './model/%s.pth' % model_name) else: print('Start testing %s...' % model.__class__.__name__) model.load_state_dict( torch.load(os.path.join('./model/%s.pth' % model_name))) model.eval() cos_sim, l2_dist, ssim_ = 0.0, 0.0, 0.0 with torch.no_grad(): for data in dataloaders['test']: images = data['image'] images = images.to(device) outputs = model(images) cos_sim += 1 - spatial.distance.cosine( outputs.to('cpu').detach().numpy().ravel(), images.to('cpu').detach().numpy().ravel()) l2_dist += np.linalg.norm( outputs.to('cpu').detach().numpy().ravel() - images.to('cpu').detach().numpy().ravel()) ssim_ += ssim.ssim(outputs, images) print('*' * 200) print('Avg L2 Distance of {0} on test set: {1}'.format( model_name, l2_dist / dataset_sizes['test'])) print('Avg CosineSimilarity of {0} on test set: {1}'.format( model_name, cos_sim / dataset_sizes['test'])) print('Avg SSIM of {0} on test set: {1}'.format( model_name, ssim_ / dataset_sizes['test'])) print('*' * 200)