def naive_bayes_classification(PARAMS, train_data, train_label, test_data, test_label): NB_model = GaussianNB() start = time.process_time() ''' Checking if model is already available ''' NB_ModelFileName = PARAMS['opDir'] + PARAMS['modelName'].split('/')[-1].split('.')[0] + '.pkl' if not os.path.exists(NB_ModelFileName): NB_model.fit(train_data, train_label.flatten()) if PARAMS['save_flag']: misc.save_obj(NB_model, PARAMS['opDir'], PARAMS['modelName'].split('/')[-1].split('.')[0]) else: NB_model = misc.load_obj(PARAMS['opDir'], PARAMS['modelName'].split('/')[-1].split('.')[0]) trainingTimeTaken = time.process_time() - start start = time.process_time() PtdLabels_train = NB_model.predict(train_data) PtdLabels_test = NB_model.predict(test_data) # Predictions_train = NB_model.predict_proba(train_data) Predictions_test = NB_model.predict_proba(test_data) accuracy_train = np.mean(PtdLabels_train.ravel() == train_label.ravel()) * 100 accuracy_test = np.mean(PtdLabels_test.ravel() == test_label.ravel()) * 100 ConfMat_train, fscore_train = misc.getPerformance(PtdLabels_train, train_label) ConfMat_test, fscore_test = misc.getPerformance(PtdLabels_test, test_label) # Performance_train = np.array([accuracy_train, fscore_train[0], fscore_train[1], fscore_train[2]]) Performance_test = np.array([accuracy_test, fscore_test[0], fscore_test[1], fscore_test[2]]) testingTimeTaken = time.process_time() - start print('Accuracy: train=', accuracy_train, ' test=', accuracy_test, 'F-score: train=', fscore_train[-1], ' test=', fscore_test[-1]) Train_Params = { 'model':NB_model, 'trainingTimeTaken': trainingTimeTaken, } Test_Params = { 'PtdLabels': PtdLabels_test, 'Predictions': Predictions_test, 'accuracy': accuracy_test, 'Performance_test': Performance_test, 'testingTimeTaken': testingTimeTaken, 'fscore': fscore_test, 'GroundTruth': test_label, } return Train_Params, Test_Params
def test_NB_ensemble(PARAMS, All_Test_Params): PtdLabels_Ensemble = [] GroundTruth_Ensemble = [] Predictions_Ensemble = np.empty([]) featCount = 0 testingTimeTaken = 0 start = time.process_time() for featName in All_Test_Params.keys(): if not 'PtdLabels' in All_Test_Params[featName].keys(): continue Test_Params = All_Test_Params[featName] if featCount==0: Predictions_Ensemble = np.array(Test_Params['PtdLabels'], ndmin=2).T GroundTruth_Ensemble = Test_Params['GroundTruth'] else: # print('Predictions_Ensemble: ', np.shape(Predictions_Ensemble), np.shape(Test_Params['PtdLabels'])) Predictions_Ensemble = np.append(Predictions_Ensemble, np.array(Test_Params['PtdLabels'], ndmin=2).T, axis=1) featCount += 1 PtdLabels_Ensemble, mode_count = scipy.stats.mode(Predictions_Ensemble, axis=1) ConfMat_Ensemble, fscore_Ensemble = misc.getPerformance(PtdLabels_Ensemble, GroundTruth_Ensemble) accuracy_Ensemble = np.sum(np.diag(ConfMat_Ensemble))/np.sum(ConfMat_Ensemble) testingTimeTaken = time.process_time() - start print('NB Ensemble: ', accuracy_Ensemble, fscore_Ensemble) Ensemble_Test_Params = { 'accuracy_Ensemble': accuracy_Ensemble, 'testingTimeTaken': testingTimeTaken, 'ConfMat_Ensemble': ConfMat_Ensemble, 'fscore_Ensemble': fscore_Ensemble, 'PtdLabels_Ensemble': PtdLabels_Ensemble, 'Predictions_Ensemble': Predictions_Ensemble, 'GroundTruth_Ensemble': GroundTruth_Ensemble, } return Ensemble_Test_Params
def test_model(PARAMS, data_dict, Train_Params): loss = 0 performance = 0 testingTimeTaken = 0 PtdLabels = [] test_data = data_dict['test_data'] start = time.clock() if not PARAMS['data_generator']: OHE_testLabel = to_categorical(data_dict['test_label']) loss, performance = Train_Params['model'].evaluate(x=test_data, y=OHE_testLabel) Predictions = Train_Params['model'].predict(test_data) PtdLabels = np.argmax(Predictions, axis=1) GroundTruth = data_dict['test_label'] else: class_wise_numFiles = [ len(files[0]) for files in Train_Params['FL_Ret']['file_list_test'].values() ] totTestFiles = np.sum(class_wise_numFiles) SPE = int(totTestFiles / Train_Params['batch_size']) loss, performance = Train_Params['model'].evaluate_generator(generator( Train_Params['FL_Ret']['file_list_test'], PARAMS, Train_Params['batch_size']), steps=SPE, verbose=1) PtdLabels = [] GroundTruth = [] count = -1 Predictions = np.empty([]) file_keys = [ key for key in Train_Params['FL_Ret']['file_list_test'].keys() ] for clNum in range(len(file_keys)): files = Train_Params['FL_Ret']['file_list_test'][ file_keys[clNum]][0] for fl in files: count += 1 file_name = fl batchData, batchLabel = generator_test(file_name, PARAMS, clNum) pred = Train_Params['model'].predict(x=batchData) pred_lab = np.argmax(pred, axis=1) PtdLabels.extend(pred_lab) GroundTruth.extend(batchLabel) if np.size(Predictions) <= 1: Predictions = pred else: Predictions = np.append(Predictions, pred, 0) testingTimeTaken = time.clock() - start print('Time taken for model testing: ', testingTimeTaken) ConfMat, fscore = misc.getPerformance(PtdLabels, GroundTruth) return loss, performance, testingTimeTaken, ConfMat, fscore, PtdLabels, Predictions
def test_model(PARAMS, Train_Params): start = time.clock() PtdLabels = [] GroundTruth = [] Predictions = np.empty([]) count = -1 class_labels = { PARAMS['classes'][key]: key for key in PARAMS['classes'].keys() } startTime = time.clock() for classname in PARAMS['test_files'].keys(): clNum = class_labels[classname] files = PARAMS['test_files'][classname] # print('test_files: ', files) for fl in files: fName = PARAMS['test_folder'] + '/' + PARAMS[ 'featName'] + '/' + classname + '/' + fl if not os.path.exists(fName): continue count += 1 batchData, batchLabel = generator_test(PARAMS, PARAMS['featName'], fl, clNum) endTime = time.clock() print('Data loading time: ', endTime - startTime) startTime = time.clock() pred = Train_Params['model'].predict(x=batchData) print('Prediction time: ', time.clock() - startTime) pred_lab = np.argmax(pred, axis=1) PtdLabels.extend(pred_lab) GroundTruth.extend(batchLabel.tolist()) print('pred_lab: ', np.sum(pred_lab == 0), np.sum(pred_lab == 1)) print('ground_truth: ', np.sum(batchLabel == 0), np.sum(batchLabel == 1)) if np.size(Predictions) <= 1: Predictions = pred else: Predictions = np.append(Predictions, pred, 0) print( PARAMS['classes'][clNum], fl, np.shape(batchData), ' acc=', np.round( np.sum(pred_lab == batchLabel) * 100 / len(batchLabel), 2)) testingTimeTaken = time.clock() - start print('Time taken for model testing: ', testingTimeTaken) ConfMat, fscore = misc.getPerformance(PtdLabels, GroundTruth) return ConfMat, fscore, PtdLabels, Predictions, GroundTruth, testingTimeTaken
def performance_dump(PARAMS, PtdLabels, GroundTruths, labels, info='', fName_suffix=''): ConfMat, precision, recall, fscore = misc.getPerformance( PtdLabels, GroundTruths, labels) accuracy = np.round(np.sum(np.diag(ConfMat)) / np.sum(ConfMat), 4) print('Total data performance: ', fscore) print(ConfMat) if len(labels) == 2: classnames = ['neg', 'pos'] else: classnames = ['mu', 'sp', 'spmu'] res_dict = {} res_dict['0'] = 'feature_name:' + PARAMS['featName'][PARAMS['Model']] res_dict['1'] = 'model:' + PARAMS['Model'] ln = 2 if not info == '': res_dict[str(ln)] = info ln += 1 res_dict[str(ln)] = 'loss:--' ln += 1 res_dict[str(ln)] = 'accuracy:' + str(accuracy) ln += 1 res_dict[str(ln)] = 'Prec_' + classnames[0] + ':' + str(precision[0]) ln += 1 res_dict[str(ln)] = 'Rec_' + classnames[0] + ':' + str(recall[0]) ln += 1 res_dict[str(ln)] = 'F1_' + classnames[0] + ':' + str(fscore[0]) ln += 1 res_dict[str(ln)] = 'Prec_' + classnames[1] + ':' + str(precision[1]) ln += 1 res_dict[str(ln)] = 'Rec_' + classnames[1] + ':' + str(recall[1]) ln += 1 res_dict[str(ln)] = 'F1_' + classnames[1] + ':' + str(fscore[1]) if len(labels) == 3: ln += 1 res_dict[str(ln)] = 'Prec_' + classnames[2] + ':' + str(precision[2]) ln += 1 res_dict[str(ln)] = 'Rec_' + classnames[2] + ':' + str(recall[2]) ln += 1 res_dict[str(ln)] = 'F1_' + classnames[2] + ':' + str(fscore[2]) ln += 1 res_dict[str(ln)] = 'F1_avg:' + str(np.round(np.mean(fscore), 4)) misc.print_results(PARAMS, fName_suffix, res_dict)
def perform_testing(PARAMS, Train_Params): metrics, metric_names, testingTimeTaken = test_model_generator( PARAMS, Train_Params) Test_Params = { 'metrics': metrics, 'metric_names': metric_names, 'testingTimeTaken': testingTimeTaken, } ConfMat, precision, recall, fscore, PtdLabels, Predictions, GroundTruth, testingTimeTaken = test_model( PARAMS, Train_Params, None) Test_Params['testingTimeTaken_annot'] = testingTimeTaken Test_Params['ConfMat_annot'] = ConfMat Test_Params['precision_annot'] = precision Test_Params['recall_annot'] = recall Test_Params['fscore_annot'] = fscore Test_Params['PtdLabels_test_annot'] = PtdLabels Test_Params['Predictions_test_annot'] = Predictions Test_Params['GroundTruth_test_annot'] = GroundTruth if len(PARAMS['classes']) == 3: PtdLabels_All = [] GroundTruths_All = [] for target_dB in PARAMS['test_SMR_levels']: ConfMat, precision, recall, fscore, PtdLabels, Predictions, GroundTruth, testingTimeTaken = test_model( PARAMS, Train_Params, target_dB) PtdLabels_All.extend(PtdLabels) GroundTruths_All.extend(GroundTruth) Test_Params['testingTimeTaken_' + str(target_dB) + 'dB'] = testingTimeTaken Test_Params['ConfMat_' + str(target_dB) + 'dB'] = ConfMat Test_Params['precision_' + str(target_dB) + 'dB'] = precision Test_Params['recall_' + str(target_dB) + 'dB'] = recall Test_Params['fscore_' + str(target_dB) + 'dB'] = fscore Test_Params['PtdLabels_test_' + str(target_dB) + 'dB'] = PtdLabels Test_Params['Predictions_test_' + str(target_dB) + 'dB'] = Predictions Test_Params['GroundTruth_test_' + str(target_dB) + 'dB'] = GroundTruth labels = [key for key in PARAMS['classes'].keys()] ConfMat_All, precision_All, recall_All, fscore_All = misc.getPerformance( PtdLabels_All, GroundTruths_All, labels) Test_Params['ConfMat_All'] = ConfMat_All Test_Params['precision_All'] = precision_All Test_Params['recall_All'] = recall_All Test_Params['fscore_All'] = fscore_All return Test_Params
def patch_probability_generator(PARAMS, fl, Train_Params): startTime = time.process_time() labels_sp = [] labels_mu = [] pred_opDir = PARAMS['opDir'] + '/__Frame_Predictions_CNN/' if not os.path.exists(pred_opDir): os.makedirs(pred_opDir) result_fName = fl + '_fold' + str(PARAMS['fold']) + '_result' n_fft = PARAMS['n_fft'][PARAMS['Model']] n_mels = PARAMS['n_mels'][PARAMS['Model']] featName = PARAMS['featName'][PARAMS['Model']] if not os.path.exists(pred_opDir + result_fName + '.pkl'): fName_path = PARAMS['test_path'] + '/features/' + fl + '.npy' if not os.path.exists(fName_path): return {} fv = np.load(fName_path, allow_pickle=True) fv = get_featuregram(PARAMS, PARAMS['feature_opDir'], fl, fv, n_fft, n_mels, featName, save_feat=True) if not 'HarmPerc' in featName: fv = fv.T fv = StandardScaler(copy=False).fit_transform(fv) fv = fv.T else: nDim = np.shape(fv)[0] fv_H = fv[:int(nDim / 2), :] fv_H = fv_H.T fv_H = StandardScaler(copy=False).fit_transform(fv_H) fv_H = fv_H.T fv_P = fv[int(nDim / 2):, :] fv_P = fv_P.T fv_P = StandardScaler(copy=False).fit_transform(fv_P) fv_P = fv_P.T fv = np.append(fv_H.astype(np.float32), fv_P.astype(np.float32), axis=0) nFrames = np.shape(fv)[1] annotations_mu, annotations_sp, music_marker, speech_marker = get_annotations( PARAMS['test_path'], fl, nFrames, PARAMS['opDir']) pred = np.empty([]) pred_lab = np.empty([]) batch_size = 10000 labels_mu = [] labels_sp = [] # for batchStart in range(0, np.shape(fv_patches)[0], batch_size): for batchStart in range(0, np.shape(fv)[1], batch_size): # batchEnd = np.min([batchStart+batch_size, np.shape(fv_patches)[0]]) batchEnd = np.min([batchStart + batch_size, np.shape(fv)[1]]) # fv_patches_temp = fv_patches[batchStart:batchEnd,:] fv_temp = fv[:, batchStart:batchEnd] music_marker_temp = music_marker[batchStart:batchEnd] speech_marker_temp = speech_marker[batchStart:batchEnd] print('\tBatch: (', batchStart, batchEnd, ') ', np.shape(fv_temp), ' mu=', np.sum(music_marker_temp), ' sp=', np.sum(speech_marker_temp), end=' ', flush=True) fv_patches_temp = get_feature_patches(PARAMS, fv_temp, PARAMS['W'], PARAMS['W_shift_test'], featName) labels_mu_patches = cextract_patches( np.array(music_marker_temp, ndmin=2), np.shape(np.array(music_marker_temp, ndmin=2)), PARAMS['W'], PARAMS['W_shift_test']).astype(int) labels_mu_temp = ( (np.sum(np.squeeze(labels_mu_patches, axis=1), axis=1) / np.shape(labels_mu_patches)[2]) > 0.5).astype(int) labels_sp_patches = cextract_patches( np.array(speech_marker_temp, ndmin=2), np.shape(np.array(speech_marker_temp, ndmin=2)), PARAMS['W'], PARAMS['W_shift_test']).astype(int) labels_sp_temp = ( (np.sum(np.squeeze(labels_sp_patches, axis=1), axis=1) / np.shape(labels_sp_patches)[2]) > 0.5).astype(int) if 'Lemaire_et_al' in PARAMS['Model']: # TCN input shape=(batch_size, timesteps, ndim) fv_patches_temp = np.transpose(fv_patches_temp, axes=(0, 2, 1)) if PARAMS['signal_type'] == 'music': pred_temp = Train_Params['model'].predict(x=fv_patches_temp) CM, acc, P, R, F1 = getPerformance( np.array((pred_temp > 0.5).astype(int)), labels_mu_temp) elif PARAMS['signal_type'] == 'speech': pred_temp = Train_Params['model'].predict(x=fv_patches_temp) CM, acc, P, R, F1 = getPerformance( np.array((pred_temp > 0.5).astype(int)), labels_sp_temp) pred_lab_temp = np.array(pred_temp > 0.5).astype(int) if np.size(pred) <= 1: pred = pred_temp pred_lab = pred_lab_temp else: pred = np.append(pred, pred_temp) pred_lab = np.append(pred_lab, pred_lab_temp) labels_mu.extend(labels_mu_temp) labels_sp.extend(labels_sp_temp) print(np.shape(fv_patches_temp), np.shape(pred_temp), np.shape(pred), ' acc=', acc, F1) if PARAMS['signal_type'] == 'music': ConfMat, precision, recall, fscore = misc.getPerformance( pred_lab, labels_mu, labels=[0, 1]) acc = np.round(np.sum(np.diag(ConfMat)) / np.sum(ConfMat), 4) print('Perf mu: ', acc, precision, recall, fscore) elif PARAMS['signal_type'] == 'speech': ConfMat, precision, recall, fscore = misc.getPerformance( pred_lab, labels_sp, labels=[0, 1]) acc = np.round(np.sum(np.diag(ConfMat)) / np.sum(ConfMat), 4) print('Perf sp: ', acc, precision, recall, fscore) print('\n\n\n') probability_genTime = time.process_time() - startTime result = { 'pred': pred, 'pred_lab': pred_lab, 'labels_sp': labels_sp, 'labels_mu': labels_mu, 'probability_genTime': probability_genTime, 'ConfMat': ConfMat, 'precision': precision, 'recall': recall, 'fscore': fscore, 'accuracy': acc, } misc.save_obj(result, pred_opDir, result_fName) print('Test predictions saved!!!') else: result = misc.load_obj(pred_opDir, result_fName) return result
All_Classifier_Predictions, np.array(Test_Params['PtdLabels_test'], ndmin=2), 0) data_dict_Classifier_Part = None Train_Params = None Test_Params = None del data_dict_Classifier_Part del Train_Params del Test_Params if PARAMS['use_GPU']: reset_GPU_session() PtdLabels_majority_voting = np.argmax(Majority_Voting_Ensemble_Result, axis=1) ConfMat_majority_voting, fscore_majority_voting = misc.getPerformance( PtdLabels_majority_voting, data_dict['test_label']) print('\n\n\nMajority Voting Ensemble Avg. F1-score: ', np.mean(fscore_majority_voting)) resultFile = PARAMS['opDir'] + '/Ensemble_performance_' + PARAMS[ 'featName'] + '.csv' result_fid = open(resultFile, 'a+', encoding='utf-8') # result_fid.write('Majority Voting Ensemble Average=' + str(np.round(fscore_majority_voting[-1],4)) + ' F1-score= ' + str([str(np.round(fscore_majority_voting[i], 2)) for i in range(len(fscore_majority_voting)-1)]) + '\n') result_fid.write('Majority Voting Ensemble Average\t' + str(fscore_majority_voting[0]) + '\t' + str(fscore_majority_voting[1]) + '\t' + str(fscore_majority_voting[2]) + '\n') result_fid.close() kwargs = { '0': ':',
All_PtdLabels.extend(PtdLabels) All_GroundTruths.extend(GroundTruths) Predictions = None PtdLabels = None GroundTruths = None del Predictions del PtdLabels del GroundTruths # plt.subplot(211) # plt.plot(All_GroundTruths) # plt.subplot(212) # plt.plot(All_PtdLabels) # plt.show() ConfMat, fscore = misc.getPerformance(All_PtdLabels, All_GroundTruths) print('ConfMat: ', ConfMat) print('fscore: ', fscore) get_segment_level_statistics( All_GroundTruths, All_PtdLabels, PARAMS['fold'], feature_type, PARAMS['opDir'] + '/Segment_Level_Performance.csv') compute_segmentation_performance(PARAMS, All_Labels, All_Predictions, [1000, 500], feature_type, win_size=101, plot_fig=True) if PARAMS['use_GPU']:
def test_model(PARAMS, Train_Params, target_dB): PtdLabels = np.empty([]) GroundTruth = np.empty([]) Predictions = np.empty([]) startTime = time.process_time() if target_dB == None: # class_labels = {PARAMS['classes'][key]:key for key in PARAMS['classes'].keys()} for classname in ['music', 'speech']: files = PARAMS['test_files'][classname] fl_count = 0 for fl in files: fl_count += 1 fName = PARAMS['folder'] + '/' + classname + '/' + fl if not os.path.exists(fName): continue if classname == 'speech': batchData, batchLabel = test_file_wise_generator( PARAMS, fName, '', None) pred = Train_Params['model'].predict(x=batchData) elif classname == 'music': batchData, batchLabel = test_file_wise_generator( PARAMS, '', fName, None) pred = Train_Params['model'].predict(x=batchData) pred_lab = np.argmax(pred, axis=1) if np.size(Predictions) <= 1: Predictions = pred PtdLabels = np.argmax(pred, axis=1) if classname == 'speech': GroundTruth = np.array([1] * np.shape(pred)[0]) elif classname == 'music': GroundTruth = np.array([0] * np.shape(pred)[0]) else: Predictions = np.append(Predictions, pred, 0) PtdLabels = np.append(PtdLabels, np.argmax(pred, axis=1)) if classname == 'speech': GroundTruth = np.append( GroundTruth, np.array([1] * np.shape(pred)[0])) elif classname == 'music': GroundTruth = np.append( GroundTruth, np.array([0] * np.shape(pred)[0])) print(fl_count, '/', len(files), target_dB, 'dB\t', classname, 'pred_lab: ', np.sum(pred_lab == 0), np.sum(pred_lab == 1), np.sum(pred_lab == 2), end='\t', flush=True) if classname == 'speech': acc_fl = np.round( np.sum(np.array(pred_lab) == 1) * 100 / len(pred_lab), 4) acc_all = np.round( np.sum(np.array(PtdLabels) == np.array(GroundTruth)) * 100 / len(PtdLabels), 4) print(fl, np.shape(batchData), len(PtdLabels), len(GroundTruth), ' acc=', acc_fl, acc_all) elif classname == 'music': acc_fl = np.round( np.sum(np.array(pred_lab) == 0) * 100 / len(pred_lab), 4) acc_all = np.round( np.sum(np.array(PtdLabels) == np.array(GroundTruth)) * 100 / len(PtdLabels), 4) print(fl, np.shape(batchData), len(PtdLabels), len(GroundTruth), ' acc=', acc_fl, acc_all) if len(PARAMS['classes']) == 3: files_spmu = PARAMS['test_files']['speech+music'] fl_count = 0 for spmu_info in files_spmu: fl_count += 1 fl_sp = spmu_info['speech'] fl_mu = spmu_info['music'] fName_sp = PARAMS['folder'] + '/speech/' + fl_sp fName_mu = PARAMS['folder'] + '/music/' + fl_mu if target_dB == None: # Annotated SMR is not used in the testing function if target_dB # is None so that the performance can be tested at specific # SMR values batchData, batchLabel = test_file_wise_generator( PARAMS, fName_sp, fName_mu, spmu_info['SMR']) else: batchData, batchLabel = test_file_wise_generator( PARAMS, fName_sp, fName_mu, target_dB) pred = Train_Params['model'].predict(x=batchData) pred_lab = np.argmax(pred, axis=1) if np.size(Predictions) <= 1: Predictions = pred GroundTruth = np.array([2] * np.shape(pred)[0]) PtdLabels = np.argmax(pred, axis=1) else: Predictions = np.append(Predictions, pred, 0) GroundTruth = np.append(GroundTruth, np.array([2] * np.shape(pred)[0])) PtdLabels = np.append(PtdLabels, np.argmax(pred, axis=1)) acc_fl = np.round( np.sum(np.array(pred_lab) == 2) * 100 / len(pred_lab), 4) acc_all = np.round( np.sum(np.array(PtdLabels) == np.array(GroundTruth)) * 100 / len(PtdLabels), 4) if target_dB == None: print(fl_count, '/', len(files_spmu), spmu_info['SMR'], 'dB\tspeech_music pred_lab: ', np.sum(pred_lab == 0), np.sum(pred_lab == 1), np.sum(pred_lab == 2), fl_sp, fl_mu, np.shape(batchData), ' acc=', acc_fl, acc_all) else: print(fl_count, '/', len(files_spmu), target_dB, 'dB\tspeech_music pred_lab: ', np.sum(pred_lab == 0), np.sum(pred_lab == 1), np.sum(pred_lab == 2), fl_sp, fl_mu, np.shape(batchData), ' acc=', acc_fl, acc_all) testingTimeTaken = time.process_time() - startTime print('Time taken for model testing: ', testingTimeTaken) labels = [key for key in PARAMS['classes'].keys()] ConfMat, precision, recall, fscore = misc.getPerformance( PtdLabels, GroundTruth, labels) print(ConfMat) print('Precision: ', precision) print('Recall: ', recall) print('fscore: ', fscore) return ConfMat, precision, recall, fscore, PtdLabels, Predictions, GroundTruth, testingTimeTaken
def grid_search_svm(PARAMS, data_dict): pwrs_c = list(np.arange(-5, 1, 1)) pwrs_gamma = list(np.arange(-5, 1, 1)) C = np.power(2.0, pwrs_c) Gamma = np.power(2.0, pwrs_gamma) svm_type = 'single' njobs = multiprocessing.cpu_count() - 1 cv_folds = 3 print('SVM type=', svm_type, ' CV folds=', cv_folds, ' n_jobs=', njobs) trainingTimeTaken = 0 start = time.process_time() if svm_type == 'single': clf = SVC(decision_function_shape='ovo', verbose=0, probability=True) tunable_parameters = [{'kernel': ['rbf'], 'gamma': Gamma, 'C': C}] CLF_CV = GridSearchCV(clf, tunable_parameters, cv=cv_folds, iid=True, refit=True, n_jobs=njobs, verbose=False) elif svm_type == 'bagging': clf = SVC(decision_function_shape='ovo', verbose=0, probability=True) ''' This function extracts balanced bootstraps ''' max_features = 1.0 n_estimators = 10 bagged_classifier = BalancedBaggingClassifier( base_estimator=clf, sampling_strategy=1.0, n_estimators=n_estimators) max_samples = [0.2] #[0.001, 0.005, 0.01, 0.05] print('max_samples=', max_samples, ' max_features=', max_features, ' n_estimators=', n_estimators) tunable_parameters = { 'max_samples': max_samples, 'base_estimator__gamma': Gamma, 'base_estimator__C': C } ''' Perform Grid search over individual classifiers in the bag ''' CLF_CV = GridSearchCV(bagged_classifier, tunable_parameters, scoring='accuracy', cv=cv_folds, iid=False, refit=True, n_jobs=njobs, verbose=True) All_train_data = np.append(data_dict['train_data'], data_dict['val_data'], 0) All_train_label = np.append(data_dict['train_label'], data_dict['val_label']) ''' Checking if model is already available ''' if not os.path.exists(PARAMS['modelName']): CLF_CV.fit(All_train_data, All_train_label) model = CLF_CV.best_estimator_ if PARAMS['save_flag']: misc.save_obj(model, PARAMS['opDir'], PARAMS['modelName'].split('/')[-1].split('.')[0]) misc.save_obj( CLF_CV, PARAMS['opDir'], PARAMS['modelName'].split('/')[-1].split('.')[0] + '_All_Models') else: model = misc.load_obj(PARAMS['opDir'], PARAMS['modelName'].split('/')[-1].split('.')[0]) CLF_CV = misc.load_obj( PARAMS['opDir'], PARAMS['modelName'].split('/')[-1].split('.')[0] + '_All_Models') trainingTimeTaken = time.process_time() - start testingTimeTaken = 0 start = time.process_time() if svm_type == 'single': optC = str(CLF_CV.best_params_['C']) optGamma = str(CLF_CV.best_params_['gamma']) countSV = model.n_support_ elif svm_type == 'bagging': optC = str(CLF_CV.best_params_['base_estimator__C']) optGamma = str(CLF_CV.best_params_['base_estimator__gamma']) countSV = [0, 0] countTrPts = [ np.sum(All_train_label == lab) for lab in np.unique(All_train_label) ] PtdLabels_train = model.predict(All_train_data) Predictions_train = model.predict_log_proba(All_train_data) PtdLabels_test = model.predict(data_dict['test_data']) Predictions_test = model.predict_log_proba(data_dict['test_data']) accuracy_train = np.mean( PtdLabels_train.ravel() == All_train_label.ravel()) * 100 accuracy_test = np.mean( PtdLabels_test.ravel() == data_dict['test_label'].ravel()) * 100 ConfMat_train, fscore_train = misc.getPerformance(PtdLabels_train, All_train_label) ConfMat_test, fscore_test = misc.getPerformance(PtdLabels_test, data_dict['test_label']) Performance_train = np.array( [accuracy_train, fscore_train[0], fscore_train[1], fscore_train[2]]) Performance_test = np.array( [accuracy_test, fscore_test[0], fscore_test[1], fscore_test[2]]) print('Accuracy: train=', accuracy_train, ' test=', accuracy_test, 'F-score: train=', fscore_train[-1], ' test=', fscore_test[-1], ' SupportVectors=', countSV) testingTimeTaken = time.process_time() - start Train_Params = { 'model': model, 'optC': optC, 'optGamma': optGamma, 'countSV': countSV, 'countTrPts': countTrPts, 'trainingTimeTaken': trainingTimeTaken, } Test_Params = { 'PtdLabels_train': PtdLabels_train, 'Predictions_train': Predictions_train, 'PtdLabels_test': PtdLabels_test, 'Predictions_test': Predictions_test, 'accuracy_train': accuracy_train, 'accuracy_test': accuracy_test, 'Performance_train': Performance_train, 'Performance_test': Performance_test, 'testingTimeTaken': testingTimeTaken, } return Train_Params, Test_Params
def test_cnn_noise(PARAMS, Train_Params): start = time.clock() GroundTruth = [] Predictions = {} PtdLabels = {} for dB in PARAMS['noise_dB_range']: Predictions[dB] = np.empty([]) PtdLabels[dB] = [] count = -1 class_labels = { PARAMS['classes'][key]: key for key in PARAMS['classes'].keys() } for classname in PARAMS['test_files'].keys(): clNum = class_labels[classname] files = PARAMS['test_files'][classname] # print('test_files: ', files) fl_count = 0 for fl in files: fl_count += 1 print('\t\t\t', PARAMS['fold'], PARAMS['classes'][clNum], fl, fl_count, '/', len(files)) count += 1 numFV = 0 if not os.path.exists(PARAMS['test_folder'] + '/' + PARAMS['featName'] + '/' + classname + '/' + fl): continue for targetdB in PARAMS['noise_dB_range']: batchData, batchLabel = generator_test_noise( PARAMS, PARAMS['featName'], fl, clNum, targetdB) pred = Train_Params['model'].predict(x=batchData) numFV = np.shape(pred)[0] if np.size(Predictions[targetdB]) <= 1: Predictions[targetdB] = np.array(pred, ndmin=2) PtdLabels[targetdB].extend( np.argmax(pred, axis=1).tolist()) else: Predictions[targetdB] = np.append(Predictions[targetdB], np.array(pred, ndmin=2), axis=0) PtdLabels[targetdB].extend( np.argmax(pred, axis=1).tolist()) print( '\t\t\t\t dB=', targetdB, ' batchData: ', np.shape(batchData), np.shape(Predictions[targetdB]), ' acc=', np.round( np.sum( np.argmax(pred, axis=1) == np.array([clNum] * numFV)) * 100 / numFV, 2)) GroundTruth.extend([clNum] * numFV) testingTimeTaken = time.clock() - start print('\t\t\t', PARAMS['fold'], ' Time taken for model testing: ', testingTimeTaken) ConfMat = {} fscore = {} accuracy = {} for dB in PARAMS['noise_dB_range']: ConfMat_dB, fscore_dB = misc.getPerformance(PtdLabels[dB], GroundTruth) ConfMat_dB = np.reshape( ConfMat_dB, (len(PARAMS['classes']), len(PARAMS['classes']))) accuracy_dB = np.round( np.sum(np.diag(ConfMat_dB)) / np.sum(ConfMat_dB), 4) ConfMat[dB] = ConfMat_dB fscore[dB] = fscore_dB accuracy[dB] = accuracy_dB Test_Params_Noise = { 'loss': -1, 'accuracy': accuracy, 'testingTimeTaken': testingTimeTaken, 'ConfMat': ConfMat, 'fscore': fscore, 'PtdLabels': PtdLabels, 'Predictions': Predictions, 'GroundTruth': GroundTruth, } return Test_Params_Noise
def test_cnn_ensemble(PARAMS, Ensemble_Train_Params): start = time.clock() PtdLabels_Ensemble = [] GroundTruth_Ensemble = [] Predictions_Ensemble = np.empty([]) count = -1 class_labels = { PARAMS['classes'][key]: key for key in PARAMS['classes'].keys() } individual_performances = { 'Khonglah_et_al': { 'Predictions': np.empty([]), 'GroundTruths': np.empty([]), 'fscore': [0, 0, 0] }, 'Sell_et_al': { 'Predictions': np.empty([]), 'GroundTruths': np.empty([]), 'fscore': [0, 0, 0] }, 'MFCC-39': { 'Predictions': np.empty([]), 'GroundTruths': np.empty([]), 'fscore': [0, 0, 0] }, 'Melspectrogram': { 'Predictions': np.empty([]), 'GroundTruths': np.empty([]), 'fscore': [0, 0, 0] }, 'HNGDMFCC': { 'Predictions': np.empty([]), 'GroundTruths': np.empty([]), 'fscore': [0, 0, 0] }, 'MGDCC': { 'Predictions': np.empty([]), 'GroundTruths': np.empty([]), 'fscore': [0, 0, 0] }, 'IFCC': { 'Predictions': np.empty([]), 'GroundTruths': np.empty([]), 'fscore': [0, 0, 0] }, } temp_folder = PARAMS['opDir'] + '/__temp/' if not os.path.exists(temp_folder): os.makedirs(temp_folder) for classname in PARAMS['test_files'].keys(): clNum = class_labels[classname] files = PARAMS['test_files'][classname] # print('test_files: ', files) fl_count = 0 for fl in files: fl_count += 1 print('\t\t\t', PARAMS['fold'], PARAMS['classes'][clNum], fl, fl_count, '/', len(files), end='\t') count += 1 PtdLabels = None PtdLabels_temp = np.empty([]) GroundTruth = np.empty([]) Predictions = np.empty([]) empty_predictions = False for featName in Ensemble_Train_Params.keys(): empty_predictions = False curr_fName = PARAMS[ 'test_folder'] + '/' + featName + '/' + classname + '/' + fl # print('curr_fName: ', curr_fName) if not os.path.exists(curr_fName): # print('curr_file not found') empty_predictions = True break Train_Params = Ensemble_Train_Params[featName] batchData = None batchLabel = None temp_file = 'pred_' + classname + '_fold' + str( PARAMS['fold']) + '_' + featName + '_' + fl.split('.')[0] # print(temp_folder, temp_file) # print(featName, Train_Params['model'].layers[0].output_shape, PARAMS['input_shape'][featName]) if not os.path.exists(temp_folder + '/' + temp_file + '.pkl'): batchData, batchLabel = generator_test_ensemble( PARAMS, featName, fl, clNum) # print('batchData: ', np.shape(batchData), np.shape(batchLabel)) pred = Train_Params['model'].predict(x=batchData) # print('pred: ', np.shape(pred)) misc.save_obj(pred, temp_folder, temp_file) else: try: pred = misc.load_obj(temp_folder, temp_file) except: batchData, batchLabel = generator_test_ensemble( PARAMS, featName, fl, clNum) # print('batchData: ', np.shape(batchData), np.shape(batchLabel)) pred = Train_Params['model'].predict(x=batchData) # print('pred: ', np.shape(pred)) misc.save_obj(pred, temp_folder, temp_file) # print('indv_labels: ', np.shape(indv_labels), np.shape(individual_performances[featName]['PtdLabels'])) if np.size( individual_performances[featName]['Predictions']) <= 1: individual_performances[featName][ 'Predictions'] = np.array(pred, ndmin=2) individual_performances[featName]['GroundTruth'] = np.ones( np.shape(pred)[0]) * clNum else: individual_performances[featName][ 'Predictions'] = np.append( individual_performances[featName]['Predictions'], np.array(pred, ndmin=2), axis=0) individual_performances[featName][ 'GroundTruth'] = np.append( individual_performances[featName]['GroundTruth'], np.ones(np.shape(pred)[0]) * clNum) if np.size(Predictions) <= 1: Predictions = np.array(pred, ndmin=2) PtdLabels_temp = np.array(np.argmax(pred, axis=1), ndmin=2).T else: # print('PtdLabels_temp: ', np.shape(PtdLabels_temp), np.shape(pred)) empty_predictions = False if np.shape(pred)[0] != np.shape(Predictions)[0]: if np.shape(pred)[0] > np.shape(Predictions)[0]: pred = pred[:np.shape(Predictions)[0], :] else: empty_predictions = True break Predictions = np.add(Predictions, np.array(pred, ndmin=2)) PtdLabels_temp = np.append(PtdLabels_temp, np.array(np.argmax(pred, axis=1), ndmin=2).T, axis=1) if empty_predictions: print(' ', end='\n') continue GroundTruth = np.ones(np.shape(Predictions)[0]) * clNum PtdLabels = np.argmax(Predictions, axis=1) # PtdLabels, label_counts = scipy.stats.mode(PtdLabels_temp, axis=1) # PtdLabels = np.array(PtdLabels.flatten()) # print('PtdLabels: ', np.shape(PtdLabels), ' GroundTruth: ', np.shape(GroundTruth)) print(np.shape(Predictions), ' acc=', np.round( np.sum(PtdLabels == GroundTruth) * 100 / np.size(GroundTruth), 2), end='\n') if np.size(PtdLabels_Ensemble) <= 1: PtdLabels_Ensemble = PtdLabels GroundTruth_Ensemble = GroundTruth Predictions_Ensemble = Predictions else: PtdLabels_Ensemble = np.append(PtdLabels_Ensemble, PtdLabels) GroundTruth_Ensemble = np.append(GroundTruth_Ensemble, GroundTruth) Predictions_Ensemble = np.append(Predictions_Ensemble, Predictions, axis=0) testingTimeTaken = time.clock() - start print('\t\t\t', PARAMS['fold'], ' Time taken for model testing: ', testingTimeTaken) ConfMat_Ensemble, fscore_Ensemble = misc.getPerformance( PtdLabels_Ensemble, GroundTruth_Ensemble) accuracy_Ensemble = np.round( np.sum(PtdLabels_Ensemble == GroundTruth_Ensemble) * 100 / len(GroundTruth_Ensemble), 4) for featName in Ensemble_Train_Params.keys(): # print(featName, 'individual_performances: ', np.shape(individual_performances[featName]['PtdLabels']), np.shape(GroundTruth_Ensemble)) indv_PtdLabels = np.argmax( individual_performances[featName]['Predictions'], axis=1) ConfMat_indv, fscore_indv = misc.getPerformance( indv_PtdLabels, individual_performances[featName]['GroundTruth']) individual_performances[featName]['fscore'] = fscore_indv Ensemble_Test_Params = { 'loss': -1, 'accuracy_Ensemble': accuracy_Ensemble, 'testingTimeTaken': testingTimeTaken, 'ConfMat_Ensemble': ConfMat_Ensemble, 'fscore_Ensemble': fscore_Ensemble, 'PtdLabels_Ensemble': PtdLabels_Ensemble, 'Predictions_Ensemble': Predictions_Ensemble, 'GroundTruth_Ensemble': GroundTruth_Ensemble, 'individual_performances': individual_performances, } return Ensemble_Test_Params
def grid_search_gmm(PARAMS, data_dict): K = 10 gmmModel_mu = GaussianMixture(n_components=K, max_iter=1000) gmmModel_sp = GaussianMixture(n_components=K, max_iter=1000) All_train_data = np.append(data_dict['train_data'], data_dict['val_data'], 0) All_train_label = np.append(data_dict['train_label'], data_dict['val_label']) mu_idx = np.squeeze(np.where(All_train_label == 0)) sp_idx = np.squeeze(np.where(All_train_label == 1)) ''' Checking if model is already available ''' gmmModelFileName = PARAMS['opDir'] + PARAMS['modelName'].split( '/')[-1].split('.')[0] + '_gmmModel_mu_K=' + str(K) + '.pkl' if not os.path.exists(gmmModelFileName): gmmModel_mu.fit(All_train_data[mu_idx, :]) gmmModel_sp.fit(All_train_data[sp_idx, :]) if PARAMS['save_flag']: misc.save_obj( gmmModel_mu, PARAMS['opDir'], PARAMS['modelName'].split('/')[-1].split('.')[0] + '_gmmModel_mu_K=' + str(K)) misc.save_obj( gmmModel_sp, PARAMS['opDir'], PARAMS['modelName'].split('/')[-1].split('.')[0] + '_gmmModel_sp_K=' + str(K)) else: gmmModel_mu = misc.load_obj( PARAMS['opDir'], PARAMS['modelName'].split('/')[-1].split('.')[0] + '_gmmModel_mu_K=' + str(K)) gmmModel_sp = misc.load_obj( PARAMS['opDir'], PARAMS['modelName'].split('/')[-1].split('.')[0] + '_gmmModel_sp_K=' + str(K)) score_train_mu = np.array(gmmModel_mu.score_samples(All_train_data), ndmin=2).T score_train_sp = np.array(gmmModel_sp.score_samples(All_train_data), ndmin=2).T print('scores shape: ', np.shape(score_train_mu), np.shape(score_train_sp)) score_train = np.append(score_train_mu, score_train_sp, 1) print('score_train: ', np.shape(score_train)) PtdLabels_train = np.argmax(score_train, axis=1) score_test_mu = np.array(gmmModel_mu.score_samples(data_dict['test_data']), ndmin=2).T score_test_sp = np.array(gmmModel_sp.score_samples(data_dict['test_data']), ndmin=2).T score_test = np.append(score_test_mu, score_test_sp, 1) print('score_test: ', np.shape(score_test)) PtdLabels_test = np.argmax(score_test, axis=1) accuracy_train = np.mean( PtdLabels_train.ravel() == All_train_label.ravel()) * 100 accuracy_test = np.mean( PtdLabels_test.ravel() == data_dict['test_label'].ravel()) * 100 ConfMat_train, fscore_train = misc.getPerformance(PtdLabels_train, All_train_label) ConfMat_test, fscore_test = misc.getPerformance(PtdLabels_test, data_dict['test_label']) Performance_train = np.array( [accuracy_train, fscore_train[0], fscore_train[1], fscore_train[2]]) Performance_test = np.array( [accuracy_test, fscore_test[0], fscore_test[1], fscore_test[2]]) print('Accuracy: train=', accuracy_train, ' test=', accuracy_test, 'F-score: train=', fscore_train[-1], ' test=', fscore_test[-1]) return score_test, PtdLabels_test, K, Performance_train, Performance_test
def test_model(PARAMS, test_data, test_label, Train_Params): loss = 0 performance = 0 testingTimeTaken = 0 PtdLabels = [] start = time.clock() if not PARAMS['data_generator']: loss, performance = Train_Params['model'].evaluate(x=test_data, y=test_label) Predictions = Train_Params['model'].predict(test_data) PtdLabels = np.array(Predictions > 0.5).astype(int) GroundTruth = test_label testingTimeTaken = time.clock() - start print('Time taken for model testing: ', testingTimeTaken) ConfMat, fscore = misc.getPerformance(PtdLabels, GroundTruth) else: loss, performance = Train_Params['model'].evaluate_generator( generator(PARAMS, PARAMS['test_folder'], PARAMS['test_files'], PARAMS['batch_size']), steps=PARAMS['test_steps'], verbose=1, ) print('loss: ', loss, ' performance: ', performance) PtdLabels = [] GroundTruth = [] Predictions = np.empty([]) count = -1 class_labels = { PARAMS['classes'][key]: key for key in PARAMS['classes'].keys() } # startTime = time.clock() for classname in PARAMS['test_files'].keys(): clNum = class_labels[classname] files = PARAMS['test_files'][classname] # print('test_files: ', files) for fl in files: count += 1 batchData, batchLabel = generator_test(PARAMS, PARAMS['featName'], fl, clNum) if batchData == []: continue # endTime = time.clock() # print('Data loading time: ', endTime-startTime) # startTime = time.clock() pred = Train_Params['model'].predict(x=batchData) # print('Prediction time: ', time.clock()-startTime, np.shape(pred)) if len(PARAMS['classes']) > 2: pred_lab = np.argmax(pred, axis=1) else: pred_lab = np.squeeze( np.array(np.array(pred) > 0.5).astype(int)) # print(clNum, ' batchLabel: ', batchLabel) # print(clNum, ' pred_lab: ', pred_lab) PtdLabels.extend(pred_lab) GroundTruth.extend(batchLabel.tolist()) # print('pred_lab: ', np.sum(pred_lab==0), np.sum(pred_lab==1)) # print('ground_truth: ', np.sum(batchLabel==0), np.sum(batchLabel==1)) if np.size(Predictions) <= 1: Predictions = pred else: Predictions = np.append(Predictions, pred, 0) print( PARAMS['classes'][clNum], fl, np.shape(batchData), ' acc=', np.round( np.sum(pred_lab == batchLabel) * 100 / len(batchLabel), 2)) testingTimeTaken = time.clock() - start print('Time taken for model testing: ', testingTimeTaken) ConfMat, fscore = misc.getPerformance(PtdLabels, GroundTruth) return loss, performance, testingTimeTaken, ConfMat, fscore, PtdLabels, Predictions, GroundTruth
def test_cnn_ensemble_noise(PARAMS, Ensemble_Train_Params): start = time.clock() GroundTruth_Ensemble = {dB: [] for dB in PARAMS['noise_dB_range']} Predictions_Ensemble = { dB: np.empty([]) for dB in PARAMS['noise_dB_range'] } PtdLabels_Ensemble = {dB: [] for dB in PARAMS['noise_dB_range']} count = -1 class_labels = { PARAMS['classes'][key]: key for key in PARAMS['classes'].keys() } basic_storage_cell = { 'Predictions': np.empty([]), 'GroundTruths': np.empty([]), 'fscore': [0, 0, 0] } individual_performances = { 'Khonglah_et_al': {db: basic_storage_cell for db in PARAMS['noise_dB_range']}, 'Sell_et_al': {db: basic_storage_cell for db in PARAMS['noise_dB_range']}, 'MFCC-39': {db: basic_storage_cell for db in PARAMS['noise_dB_range']}, 'Melspectrogram': {db: basic_storage_cell for db in PARAMS['noise_dB_range']}, 'HNGDMFCC': {db: basic_storage_cell for db in PARAMS['noise_dB_range']}, 'MGDCC': {db: basic_storage_cell for db in PARAMS['noise_dB_range']}, 'IFCC': {db: basic_storage_cell for db in PARAMS['noise_dB_range']}, } temp_folder = PARAMS['opDir'] + '/__temp/' if not os.path.exists(temp_folder): os.makedirs(temp_folder) for classname in PARAMS['test_files'].keys(): clNum = class_labels[classname] files = PARAMS['test_files'][classname] # print('test_files: ', files) fl_count = 0 for fl in files: fl_count += 1 print('\t\t\t', PARAMS['fold'], PARAMS['classes'][clNum], fl, fl_count, '/', len(files), end='\n') count += 1 PtdLabels = None GroundTruth = np.empty([]) Predictions = {dB: np.empty([]) for dB in PARAMS['noise_dB_range']} PtdLabels = {dB: [] for dB in PARAMS['noise_dB_range']} empty_predictions = False for featName in Ensemble_Train_Params.keys(): empty_predictions = False curr_fName = PARAMS[ 'test_folder'] + '/' + featName + '/' + classname + '/' + fl if not os.path.exists(curr_fName): empty_predictions = True break Train_Params = Ensemble_Train_Params[featName] batchData = None batchLabel = None if not os.path.exists(PARAMS['test_folder'] + '/' + featName + '/' + classname + '/' + fl): continue for targetdB in PARAMS['noise_dB_range']: temp_file = temp_folder + '/pred_' + classname + '_fold' + str( PARAMS['fold']) + '_' + featName + '_' + str( targetdB) + 'dB_' + fl.split('.')[0] + '.pkl' if not os.path.exists(temp_file): batchData, batchLabel = generator_test_noise( PARAMS, featName, fl, clNum, targetdB) pred = Train_Params['model'].predict(x=batchData) misc.save_obj( pred, temp_folder, 'pred_' + classname + '_fold' + str(PARAMS['fold']) + '_' + featName + '_' + str(targetdB) + 'dB_' + fl.split('.')[0]) else: pred = misc.load_obj( temp_folder, 'pred_' + classname + '_fold' + str(PARAMS['fold']) + '_' + featName + '_' + str(targetdB) + 'dB_' + fl.split('.')[0]) if np.size(individual_performances[featName][targetdB] ['Predictions']) <= 1: individual_performances[featName][targetdB][ 'Predictions'] = np.array(pred, ndmin=2) individual_performances[featName][targetdB][ 'GroundTruth'] = np.ones(np.shape(pred)[0]) * clNum else: individual_performances[featName][targetdB][ 'Predictions'] = np.append( individual_performances[featName][targetdB] ['Predictions'], np.array(pred, ndmin=2), axis=0) individual_performances[featName][targetdB][ 'GroundTruth'] = np.append( individual_performances[featName][targetdB] ['GroundTruth'], np.ones(np.shape(pred)[0]) * clNum) if np.size(Predictions[targetdB]) <= 1: Predictions[targetdB] = np.array(pred, ndmin=2) else: empty_predictions = False if np.shape(pred)[0] != np.shape( Predictions[targetdB])[0]: if np.shape(pred)[0] > np.shape( Predictions[targetdB])[0]: pred = pred[ np.shape(Predictions[targetdB])[0], :] else: empty_predictions = True break Predictions[targetdB] = np.add(Predictions[targetdB], np.array(pred, ndmin=2)) if empty_predictions: print(' ', end='\n') continue for dB in PARAMS['noise_dB_range']: GroundTruth = np.array(np.ones(np.shape(Predictions[dB])[0]) * clNum, ndmin=2).T PtdLabels[dB] = np.array(np.argmax(Predictions[dB], axis=1), ndmin=2).T # print('PtdLabels[dB]: ', np.shape(PtdLabels[dB]), np.shape(GroundTruth), np.sum(PtdLabels[dB]==GroundTruth), np.shape(GroundTruth)[0]) print('\t\t\t\t', dB, 'dB\t', np.shape(Predictions[dB]), ' acc=', np.round( np.sum(PtdLabels[dB] == GroundTruth) * 100 / np.shape(GroundTruth)[0], 2), end='\n') if np.size(PtdLabels_Ensemble[dB]) <= 1: PtdLabels_Ensemble[dB] = PtdLabels[dB] Predictions_Ensemble[dB] = Predictions[dB] GroundTruth_Ensemble[dB] = GroundTruth else: PtdLabels_Ensemble[dB] = np.append(PtdLabels_Ensemble[dB], PtdLabels[dB]) Predictions_Ensemble[dB] = np.append( Predictions_Ensemble[dB], Predictions[dB], axis=0) GroundTruth_Ensemble[dB] = np.append( GroundTruth_Ensemble[dB], GroundTruth) testingTimeTaken = time.clock() - start print('\t\t\t', PARAMS['fold'], ' Time taken for model testing: ', testingTimeTaken) ConfMat_Ensemble = {} fscore_Ensemble = {} accuracy_Ensemble = {} for dB in PARAMS['noise_dB_range']: # print(dB, np.shape(PtdLabels_Ensemble[dB]), np.shape(GroundTruth_Ensemble[dB])) ConfMat_dB, fscore_dB = misc.getPerformance(PtdLabels_Ensemble[dB], GroundTruth_Ensemble[dB]) ConfMat_dB = np.reshape( ConfMat_dB, (len(PARAMS['classes']), len(PARAMS['classes']))) accuracy_dB = np.round( np.sum(np.diag(ConfMat_dB)) / np.sum(ConfMat_dB), 4) ConfMat_Ensemble[dB] = ConfMat_dB fscore_Ensemble[dB] = fscore_dB accuracy_Ensemble[dB] = accuracy_dB for featName in Ensemble_Train_Params.keys(): for dB in PARAMS['noise_dB_range']: indv_PtdLabels_dB = np.argmax( individual_performances[featName][dB]['Predictions'], axis=1) ConfMat_indv_dB, fscore_indv_dB = misc.getPerformance( indv_PtdLabels_dB, individual_performances[featName][dB]['GroundTruth']) individual_performances[featName][dB]['fscore'] = fscore_indv_dB Ensemble_Test_Params = { 'loss': -1, 'accuracy_Ensemble': accuracy_Ensemble, 'testingTimeTaken': testingTimeTaken, 'ConfMat_Ensemble': ConfMat_Ensemble, 'fscore_Ensemble': fscore_Ensemble, 'PtdLabels_Ensemble': PtdLabels_Ensemble, 'Predictions_Ensemble': Predictions_Ensemble, 'GroundTruth_Ensemble': GroundTruth_Ensemble, 'individual_performances': individual_performances, } return Ensemble_Test_Params
def load_model_NB(PARAMS, test_data, test_label, input_shape): ''' Checking if model is already available ''' if not os.path.exists(PARAMS['modelName']): print('NB model does not exist') return {}, {} else: NB_model = misc.load_obj('/'.join(PARAMS['modelName'].split('/')[:-1]), PARAMS['modelName'].split('/')[-1].split('.')[0]) start = time.process_time() # PtdLabels_test = NB_model.predict(test_data) # Predictions_test = NB_model.predict_proba(test_data) # GroundTruth = test_label temp_folder = PARAMS['opDir'] + '/__temp/' if not os.path.exists(temp_folder): os.makedirs(temp_folder) PtdLabels_test = [] Predictions_test = [] GroundTruth = [] for clNum in PARAMS['classes'].keys(): files = PARAMS['test_files'][PARAMS['classes'][clNum]] for fl in files: temp_file = 'pred_' + PARAMS['classes'][clNum] + '_fold' + str(PARAMS['fold']) + '_' + PARAMS['featName'] + '_' + fl.split('.')[0] if not os.path.exists(temp_folder + temp_file + '.pkl'): FV = np.load(PARAMS['test_folder'] + '/' + PARAMS['featName'] + '/' + PARAMS['classes'][clNum] + '/' + fl, allow_pickle=True) FV = misc.get_feature_patches(FV, PARAMS['CNN_patch_size'], PARAMS['CNN_patch_shift_test'], input_shape) FV = PARAMS['std_scale'].transform(FV) pred_lab = NB_model.predict(FV) pred = NB_model.predict_proba(FV) misc.save_obj({'pred':pred, 'pred_lab':pred_lab}, temp_folder, temp_file) else: pred = misc.load_obj(temp_folder, temp_file)['pred'] pred_lab = misc.load_obj(temp_folder, temp_file)['pred_lab'] PtdLabels_test.extend(pred_lab) Predictions_test.extend(pred) GroundTruth.extend([clNum]*np.shape(pred)[0]) print(fl, ' acc=', np.sum(np.array(pred_lab)==np.array([clNum]*np.shape(pred)[0]))/np.size(pred_lab)) PtdLabels_test = np.array(PtdLabels_test) GroundTruth = np.array(GroundTruth) accuracy_test = np.mean(PtdLabels_test.ravel() == GroundTruth.ravel()) * 100 ConfMat_test, fscore_test = misc.getPerformance(PtdLabels_test, GroundTruth) Performance_test = np.array([accuracy_test, fscore_test[0], fscore_test[1], fscore_test[2]]) testingTimeTaken = time.process_time() - start print('Accuracy: test=', np.round(accuracy_test,4), 'F-score: test=', np.round(fscore_test,4)) Train_Params = { 'model':NB_model, } Test_Params = { 'PtdLabels': PtdLabels_test, 'Predictions': Predictions_test, 'accuracy': accuracy_test, 'Performance_test': Performance_test, 'testingTimeTaken': testingTimeTaken, 'fscore': fscore_test, 'GroundTruth': GroundTruth, } return Train_Params, Test_Params
def grid_search_svm(PARAMS, train_data, train_label, test_data, test_label): C = np.power(2.0, list(np.arange(-5, 5, 1))) Gamma = np.power(2.0, list(np.arange(-5, 5, 1))) njobs = multiprocessing.cpu_count() - 1 cv_folds = 3 print('CV folds=', cv_folds, ' n_jobs=', njobs) trainingTimeTaken = 0 start = time.process_time() clf_param_tuning = SVC(decision_function_shape='ovo', verbose=0, probability=True) tunable_parameters = [{'kernel': ['rbf'], 'gamma': Gamma, 'C': C}] CLF_CV = GridSearchCV(clf_param_tuning, tunable_parameters, cv=cv_folds, refit=True, n_jobs=njobs, verbose=2) ''' Checking if model is already available ''' if not os.path.exists(PARAMS['modelName']): CLF_CV.fit(train_data, train_label.flatten()) model = CLF_CV.best_estimator_ if PARAMS['save_flag']: misc.save_obj(model, PARAMS['opDir'], PARAMS['modelName'].split('/')[-1].split('.')[0]) misc.save_obj( CLF_CV, PARAMS['opDir'], PARAMS['modelName'].split('/')[-1].split('.')[0] + '_All_Models') else: model = misc.load_obj(PARAMS['opDir'], PARAMS['modelName'].split('/')[-1].split('.')[0]) CLF_CV = misc.load_obj( PARAMS['opDir'], PARAMS['modelName'].split('/')[-1].split('.')[0] + '_All_Models') trainingTimeTaken = time.process_time() - start testingTimeTaken = 0 start = time.process_time() optC = str(CLF_CV.best_params_['C']) optGamma = str(CLF_CV.best_params_['gamma']) countSV = model.n_support_ countTrPts = [np.sum(train_label == lab) for lab in np.unique(train_label)] PtdLabels_train = model.predict(train_data) # Predictions_train = model.predict_log_proba(train_data) PtdLabels_test = model.predict(test_data) Predictions_test = model.predict_log_proba(test_data) accuracy_train = np.mean( PtdLabels_train.ravel() == train_label.ravel()) * 100 accuracy_test = np.mean(PtdLabels_test.ravel() == test_label.ravel()) * 100 ConfMat_train, fscore_train = misc.getPerformance(PtdLabels_train, train_label) ConfMat_test, fscore_test = misc.getPerformance(PtdLabels_test, test_label) # Performance_train = np.array([accuracy_train, fscore_train[0], fscore_train[1], fscore_train[2]]) Performance_test = np.array( [accuracy_test, fscore_test[0], fscore_test[1], fscore_test[2]]) print('Accuracy: train=', accuracy_train, ' test=', accuracy_test, 'F-score: train=', fscore_train[-1], ' test=', fscore_test[-1], ' SupportVectors=', countSV) testingTimeTaken = time.process_time() - start Train_Params = { 'model': model, 'optC': optC, 'optGamma': optGamma, 'countSV': countSV, 'countTrPts': countTrPts, 'trainingTimeTaken': trainingTimeTaken, } Test_Params = { 'PtdLabels': PtdLabels_test, 'Predictions': Predictions_test, 'accuracy': accuracy_test, 'Performance_test': Performance_test, 'testingTimeTaken': testingTimeTaken, 'fscore': fscore_test, 'GroundTruth': test_label, } # return model, optC, optGamma, Predictions_test, Performance_test, countSV, countTrPts return Train_Params, Test_Params