def search_multitask_auto_encoder(hyper_parameters_list, data: dict): splits = cross_val.get_k_fod_cross_val_splits_stratified_by_students(data) student_list = conversions.extract_distinct_student_idsfrom_keys( data['data'].keys()) tensorified_data = tensorify.tensorify_data_gru_d( copy.deepcopy(data), torch.cuda.is_available()) final_scores_for_each_config = [] print("Label Distribution") print(statistics.get_train_test_val_label_counts_from_raw_data(data)) for model_params_no, model_params in enumerate(hyper_parameters_list): print( "###################### Param Config No: {} ########################" .format(model_params_no)) print("Params: ", model_params) (use_histogram, autoencoder_bottle_neck_feature_size, autoencoder_num_layers, alpha, beta, decay, num_features, num_covariates, shared_hidden_layer_size, user_dense_layer_hidden_size, num_classes, learning_rate, n_epochs, shared_layer_dropout_prob, user_head_dropout_prob, class_weights, device) = helper.get_params_from_model(model_params, data) best_val_scores = [] for split_no, split in enumerate(splits): print("Split {}".format(split_no)) best_split_score = -1 tensorified_data['train_ids'] = split["train_ids"] tensorified_data['val_ids'] = split["val_ids"] tensorified_data['test_ids'] = [] model, reconstruction_criterion, classification_criterion, optimizer = helper.init_multitask_autoencoder_learner( num_features, autoencoder_bottle_neck_feature_size, autoencoder_num_layers, shared_hidden_layer_size, user_dense_layer_hidden_size, num_classes, num_covariates, shared_layer_dropout_prob, user_head_dropout_prob, learning_rate, decay, class_weights, student_list) total_loss_over_epochs, scores_over_epochs = plotting.get_empty_stat_over_n_epoch_dictionaries( ) reconstruction_loss_over_epochs = copy.deepcopy( total_loss_over_epochs) classification_loss_over_epochs = copy.deepcopy( total_loss_over_epochs) for epoch in tqdm.tqdm(range(n_epochs)): (train_total_loss, train_total_reconstruction_loss, train_total_classification_loss, train_labels, train_preds, train_users), (val_total_loss, val_total_reconstruction_loss, val_total_classification_loss, val_labels, val_preds, val_users) = helper.train_for_one_epoch( tensorified_data, num_classes, model, reconstruction_criterion, classification_criterion, device, optimizer, alpha, beta, use_histogram) ######## Appending losses ######## total_loss_over_epochs['train_loss'].append(train_total_loss) total_loss_over_epochs['val_loss'].append(val_total_loss) reconstruction_loss_over_epochs['train_loss'].append( train_total_reconstruction_loss) reconstruction_loss_over_epochs['val_loss'].append( val_total_reconstruction_loss) classification_loss_over_epochs['train_loss'].append( train_total_classification_loss) classification_loss_over_epochs['val_loss'].append( val_total_classification_loss) ######## Appending Metrics ######## train_label_list = conversions.tensor_list_to_int_list( train_labels) train_pred_list = conversions.tensor_list_to_int_list( train_preds) val_label_list = conversions.tensor_list_to_int_list( val_labels) val_pred_list = conversions.tensor_list_to_int_list(val_preds) train_scores = metrics.precision_recall_fscore_support( train_label_list, train_pred_list, average='weighted')[F_SCORE_INDEX] val_scores = metrics.precision_recall_fscore_support( val_label_list, val_pred_list, average='weighted')[F_SCORE_INDEX] scores_over_epochs['train_scores'].append(train_scores) scores_over_epochs['val_scores'].append(val_scores) if val_scores > best_split_score: best_split_score = val_scores best_val_scores.append(best_split_score) avg_val_score = list_mean(best_val_scores) final_scores_for_each_config.append((avg_val_score, model_params)) print("Average score for current configuration: {}".format( avg_val_score)) grid_search_details_file_path = os.path.join(definitions.DATA_DIR, "grid_search_details.pkl") write_utils.data_structure_to_pickle(final_scores_for_each_config, grid_search_details_file_path)
shared_layer_dropout_prob = 0.00 user_head_dropout_prob = 0.00 device = torch.device('cuda') if torch.cuda.is_available else torch.device( 'cpu') class_weights = torch.tensor([0.6456, 0.5635, 1.0000]) print("Num Features:", num_features) print("Device: ", device) print("Num_covariates:", num_covariates) print("Class Weights:", class_weights) print("alpha: {} Beta: {}".format(alpha, beta)) cuda_enabled = torch.cuda.is_available() tensorified_data = tensorify.tensorify_data_gru_d(deepcopy(data), cuda_enabled) student_list = conversions.extract_distinct_student_idsfrom_keys( data['data'].keys()) # K fold Cross val score. for student_filter_ids, learning_rate in student_filter_list: splits = cross_val.get_k_fod_cross_val_splits_stratified_by_students( data=data, n_splits=5, filter_by_student_ids=student_filter_ids) split_val_scores = [] best_score_epoch_log = [] for split_no, split in enumerate(splits): best_split_score = -1 epoch_at_best_score = 0
def train_gru(): # Data student_list = GRU_D_CONFIG[definitions.STUDENT_LIST_CONFIG_KEY] data = helper.get_data_for_gru_d(*student_list) # Parameter Setup output_size = GRU_D_CONFIG['classes'] first_key = next(iter(data['data'].keys())) num_features = len(data['data'][first_key][0]) hidden_size = num_features num_layers = GRU_D_CONFIG['num_layers'] x_mean = GRU_D_CONFIG['x_mean'] learning_rate = GRU_D_CONFIG['learning_rate'] n_epochs = GRU_D_CONFIG['epochs'] # CUDA Enabled. if torch.cuda.device_count() > 0: cuda_enabled = True else: cuda_enabled = False print("CUDA Status: ", cuda_enabled, end="\n\n") # Data to tensors data = tensorify.tensorify_data_gru_d(data, cuda_enabled) model, criterion, optimizer = initialize_gru(num_features, hidden_size, output_size, x_mean, num_layers, learning_rate) loss_over_epochs, scores_over_epochs = plotting.get_empty_stat_over_n_epoch_dictionaries( ) for epoch in range(1, n_epochs + 1): print("xxxxxxxxxxxxxx epoch: {} xxxxxxxxxxxxxx".format(epoch)) train_loss, train_labels, train_preds = trainer.evaluate_set( data, 'train_ids', model, criterion, optimizer) val_loss, val_labels, val_preds = trainer.evaluate_set( data, 'val_ids', model, criterion) test_loss, test_labels, test_preds = trainer.evaluate_set( data, 'train_ids', model, criterion) loss_over_epochs['train_loss'].append(train_loss) loss_over_epochs['val_loss'].append(val_loss) loss_over_epochs['test_loss'].append(test_loss) train_scores, val_scores, test_scores = scoring.get_precission_recall_f_scores( train_labels=train_labels, train_preds=train_preds, val_labels=val_labels, val_preds=val_preds, test_labels=test_labels, test_preds=test_preds) scores_over_epochs['train_scores'].append(train_scores) scores_over_epochs['val_scores'].append(val_scores) scores_over_epochs['test_scores'].append(test_scores) # Plot every 10 Epochs. if epoch % 10 == 0 and not CLUSTER_MODE: plotting.plot_score_over_n_epochs( scores_over_epochs, score_type='f1', file_path="./gru_d/f1_at_epoch_{}".format(epoch)) plotting.plot_loss_over_n_epochs( loss_over_epochs, file_path="./gru_d/loss_at_epoch_{}".format(epoch)) print("Train Loss: {} Val Loss: {} Test Loss: {}".format( loss_over_epochs['train_loss'], loss_over_epochs['val_loss'], loss_over_epochs['test_loss'])) val_precision, val_recall, val_f1, _ = val_scores print("Precision: {} Recall: {} F1 Score: {}".format( val_precision, val_recall, val_f1))