def test_model_generator(PARAMS, Train_Params): testingTimeTaken = 0 start = time.process_time() if not os.path.exists(PARAMS['opDir'] + '/evaluate_generator_results_fold' + str(PARAMS['fold']) + '.pkl'): metrics = Train_Params['model'].evaluate( generator(PARAMS, PARAMS['folder'], PARAMS['test_files'], PARAMS['batch_size']), steps=PARAMS['TS_STEPS'], verbose=1, ) if PARAMS['save_flag']: misc.save_obj( metrics, PARAMS['opDir'], 'evaluate_generator_results_fold' + str(PARAMS['fold'])) else: metrics = misc.load_obj( PARAMS['opDir'], 'evaluate_generator_results_fold' + str(PARAMS['fold'])) metric_names = Train_Params['model'].metrics_names print(metric_names) print(metrics) testingTimeTaken = time.process_time() - start print('Time taken for model testing: ', testingTimeTaken) return metrics, metric_names, testingTimeTaken
def naive_bayes_classification(PARAMS, train_data, train_label, test_data, test_label): NB_model = GaussianNB() start = time.process_time() ''' Checking if model is already available ''' NB_ModelFileName = PARAMS['opDir'] + PARAMS['modelName'].split('/')[-1].split('.')[0] + '.pkl' if not os.path.exists(NB_ModelFileName): NB_model.fit(train_data, train_label.flatten()) if PARAMS['save_flag']: misc.save_obj(NB_model, PARAMS['opDir'], PARAMS['modelName'].split('/')[-1].split('.')[0]) else: NB_model = misc.load_obj(PARAMS['opDir'], PARAMS['modelName'].split('/')[-1].split('.')[0]) trainingTimeTaken = time.process_time() - start start = time.process_time() PtdLabels_train = NB_model.predict(train_data) PtdLabels_test = NB_model.predict(test_data) # Predictions_train = NB_model.predict_proba(train_data) Predictions_test = NB_model.predict_proba(test_data) accuracy_train = np.mean(PtdLabels_train.ravel() == train_label.ravel()) * 100 accuracy_test = np.mean(PtdLabels_test.ravel() == test_label.ravel()) * 100 ConfMat_train, fscore_train = misc.getPerformance(PtdLabels_train, train_label) ConfMat_test, fscore_test = misc.getPerformance(PtdLabels_test, test_label) # Performance_train = np.array([accuracy_train, fscore_train[0], fscore_train[1], fscore_train[2]]) Performance_test = np.array([accuracy_test, fscore_test[0], fscore_test[1], fscore_test[2]]) testingTimeTaken = time.process_time() - start print('Accuracy: train=', accuracy_train, ' test=', accuracy_test, 'F-score: train=', fscore_train[-1], ' test=', fscore_test[-1]) Train_Params = { 'model':NB_model, 'trainingTimeTaken': trainingTimeTaken, } Test_Params = { 'PtdLabels': PtdLabels_test, 'Predictions': Predictions_test, 'accuracy': accuracy_test, 'Performance_test': Performance_test, 'testingTimeTaken': testingTimeTaken, 'fscore': fscore_test, 'GroundTruth': test_label, } return Train_Params, Test_Params
def create_CV_folds(folder, opDir, classes, cv=3): if not os.path.exists(opDir + '/cv_file_list.pkl'): cv_file_list = {} for clNum in classes.keys(): path = folder + '/' + classes[clNum] + '/' files = np.array(os.listdir(path)) np.random.shuffle(files) files_per_fold = int(np.ceil(len(files) / cv)) cv_file_list[classes[clNum]] = {} fl_count = 0 for cv_num in range(cv): cv_file_list[classes[clNum]][ 'fold' + str(cv_num)] = files[fl_count:np.min( [fl_count + files_per_fold, len(files)])] fl_count += files_per_fold misc.save_obj(cv_file_list, folder, 'cv_file_list') print('CV folds created') else: cv_file_list = misc.load_obj(folder, 'cv_file_list') print('\t\t\tCV folds loaded') return cv_file_list
def patch_probability_generator(PARAMS, fl, Train_Params): startTime = time.process_time() labels_sp = [] labels_mu = [] pred_opDir = PARAMS['opDir'] + '/__Frame_Predictions_CNN/' if not os.path.exists(pred_opDir): os.makedirs(pred_opDir) result_fName = fl + '_fold' + str(PARAMS['fold']) + '_result' n_fft = PARAMS['n_fft'][PARAMS['Model']] n_mels = PARAMS['n_mels'][PARAMS['Model']] featName = PARAMS['featName'][PARAMS['Model']] if not os.path.exists(pred_opDir + result_fName + '.pkl'): fName_path = PARAMS['test_path'] + '/features/' + fl + '.npy' if not os.path.exists(fName_path): return {} fv = np.load(fName_path, allow_pickle=True) fv = get_featuregram(PARAMS, PARAMS['feature_opDir'], fl, fv, n_fft, n_mels, featName, save_feat=True) if not 'HarmPerc' in featName: fv = fv.T fv = StandardScaler(copy=False).fit_transform(fv) fv = fv.T else: nDim = np.shape(fv)[0] fv_H = fv[:int(nDim / 2), :] fv_H = fv_H.T fv_H = StandardScaler(copy=False).fit_transform(fv_H) fv_H = fv_H.T fv_P = fv[int(nDim / 2):, :] fv_P = fv_P.T fv_P = StandardScaler(copy=False).fit_transform(fv_P) fv_P = fv_P.T fv = np.append(fv_H.astype(np.float32), fv_P.astype(np.float32), axis=0) nFrames = np.shape(fv)[1] annotations_mu, annotations_sp, music_marker, speech_marker = get_annotations( PARAMS['test_path'], fl, nFrames, PARAMS['opDir']) pred = np.empty([]) pred_lab = np.empty([]) batch_size = 10000 labels_mu = [] labels_sp = [] # for batchStart in range(0, np.shape(fv_patches)[0], batch_size): for batchStart in range(0, np.shape(fv)[1], batch_size): # batchEnd = np.min([batchStart+batch_size, np.shape(fv_patches)[0]]) batchEnd = np.min([batchStart + batch_size, np.shape(fv)[1]]) # fv_patches_temp = fv_patches[batchStart:batchEnd,:] fv_temp = fv[:, batchStart:batchEnd] music_marker_temp = music_marker[batchStart:batchEnd] speech_marker_temp = speech_marker[batchStart:batchEnd] print('\tBatch: (', batchStart, batchEnd, ') ', np.shape(fv_temp), ' mu=', np.sum(music_marker_temp), ' sp=', np.sum(speech_marker_temp), end=' ', flush=True) fv_patches_temp = get_feature_patches(PARAMS, fv_temp, PARAMS['W'], PARAMS['W_shift_test'], featName) labels_mu_patches = cextract_patches( np.array(music_marker_temp, ndmin=2), np.shape(np.array(music_marker_temp, ndmin=2)), PARAMS['W'], PARAMS['W_shift_test']).astype(int) labels_mu_temp = ( (np.sum(np.squeeze(labels_mu_patches, axis=1), axis=1) / np.shape(labels_mu_patches)[2]) > 0.5).astype(int) labels_sp_patches = cextract_patches( np.array(speech_marker_temp, ndmin=2), np.shape(np.array(speech_marker_temp, ndmin=2)), PARAMS['W'], PARAMS['W_shift_test']).astype(int) labels_sp_temp = ( (np.sum(np.squeeze(labels_sp_patches, axis=1), axis=1) / np.shape(labels_sp_patches)[2]) > 0.5).astype(int) if 'Lemaire_et_al' in PARAMS['Model']: # TCN input shape=(batch_size, timesteps, ndim) fv_patches_temp = np.transpose(fv_patches_temp, axes=(0, 2, 1)) if PARAMS['signal_type'] == 'music': pred_temp = Train_Params['model'].predict(x=fv_patches_temp) CM, acc, P, R, F1 = getPerformance( np.array((pred_temp > 0.5).astype(int)), labels_mu_temp) elif PARAMS['signal_type'] == 'speech': pred_temp = Train_Params['model'].predict(x=fv_patches_temp) CM, acc, P, R, F1 = getPerformance( np.array((pred_temp > 0.5).astype(int)), labels_sp_temp) pred_lab_temp = np.array(pred_temp > 0.5).astype(int) if np.size(pred) <= 1: pred = pred_temp pred_lab = pred_lab_temp else: pred = np.append(pred, pred_temp) pred_lab = np.append(pred_lab, pred_lab_temp) labels_mu.extend(labels_mu_temp) labels_sp.extend(labels_sp_temp) print(np.shape(fv_patches_temp), np.shape(pred_temp), np.shape(pred), ' acc=', acc, F1) if PARAMS['signal_type'] == 'music': ConfMat, precision, recall, fscore = misc.getPerformance( pred_lab, labels_mu, labels=[0, 1]) acc = np.round(np.sum(np.diag(ConfMat)) / np.sum(ConfMat), 4) print('Perf mu: ', acc, precision, recall, fscore) elif PARAMS['signal_type'] == 'speech': ConfMat, precision, recall, fscore = misc.getPerformance( pred_lab, labels_sp, labels=[0, 1]) acc = np.round(np.sum(np.diag(ConfMat)) / np.sum(ConfMat), 4) print('Perf sp: ', acc, precision, recall, fscore) print('\n\n\n') probability_genTime = time.process_time() - startTime result = { 'pred': pred, 'pred_lab': pred_lab, 'labels_sp': labels_sp, 'labels_mu': labels_mu, 'probability_genTime': probability_genTime, 'ConfMat': ConfMat, 'precision': precision, 'recall': recall, 'fscore': fscore, 'accuracy': acc, } misc.save_obj(result, pred_opDir, result_fName) print('Test predictions saved!!!') else: result = misc.load_obj(pred_opDir, result_fName) return result
def train_dnn(PARAMS, data_dict): # Updated on 25-05-2019 # set remaining variables epochs = 100 batch_size = 64 weightFile = PARAMS['modelName'].split('.')[0] + '.h5' architechtureFile = PARAMS['modelName'].split('.')[0] + '.json' paramFile = PARAMS['modelName'].split('.')[0] + '_params.npz' if not PARAMS['data_generator']: PARAMS['input_dim'] = np.shape(data_dict['train_data'])[1] else: PARAMS['input_dim'] = len(PARAMS['DIM']) output_dim = len(PARAMS['classes']) print(output_dim) FL_Ret = {} print('Weight file: ', weightFile, PARAMS['input_dim'], output_dim) if not os.path.exists(weightFile): model, optimizerName, learning_rate = dnn_model( PARAMS['input_dim'], output_dim) print(model.summary()) model, trainingTimeTaken, FL_Ret, History = dplearn.train_model( PARAMS, data_dict, model, epochs, batch_size, weightFile) if PARAMS['save_flag']: # Save the weights model.save_weights(weightFile) # Save the model architecture with open(architechtureFile, 'w') as f: f.write(model.to_json()) np.savez(paramFile, ep=str(epochs), bs=str(batch_size), lr=str(learning_rate), TTT=str(trainingTimeTaken)) misc.save_obj(History, PARAMS['opDir'], 'training_history') else: if os.path.exists(paramFile): epochs = int(np.load(paramFile)['ep']) batch_size = int(np.load(paramFile)['bs']) learning_rate = float(np.load(paramFile)['lr']) trainingTimeTaken = float(np.load(paramFile)['TTT']) optimizerName = 'Adam' # Model reconstruction from JSON file with open(architechtureFile, 'r') as f: model = model_from_json(f.read()) # Load weights into the new model model.load_weights(weightFile) opt = optimizers.Adam(lr=learning_rate) model.compile(loss="binary_crossentropy", optimizer=opt, metrics=['accuracy']) History = misc.load_obj(PARAMS['opDir'], 'training_history') print('DNN model exists! Loaded. Training time required=', trainingTimeTaken) print(model.summary()) Train_Params = { 'model': model, 'History': History, 'trainingTimeTaken': trainingTimeTaken, 'epochs': epochs, 'batch_size': batch_size, 'learning_rate': learning_rate, 'optimizerName': optimizerName, 'FL_Ret': FL_Ret, 'paramFile': paramFile, 'architechtureFile': architechtureFile, 'weightFile': weightFile, } return Train_Params
PARAMS['modelName'] = PARAMS['opDir'] + '/' + PARAMS[ 'clFunc'] + '_Custom_Classifier_' + feature_indexes_dict_keys[ classifier_num] + '_iter' + str(numIter) + '.xyz' if PARAMS['clFunc'] == 'DNN-Ensemble': train_params_file = PARAMS[ 'opDir'] + '/train_params_Classifier_' + feature_indexes_dict_keys[ classifier_num] + '_iter' + str(numIter) + '.pkl' if not os.path.exists(train_params_file): Train_Params = DNN.train_dnn(PARAMS, data_dict_Classifier_Part) if PARAMS['save_flag']: misc.save_obj( Train_Params, PARAMS['opDir'], 'train_params_Classifier_' + feature_indexes_dict_keys[classifier_num] + '_iter' + str(numIter)) else: Train_Params = misc.load_obj( PARAMS['opDir'], 'train_params_Classifier_' + feature_indexes_dict_keys[classifier_num] + '_iter' + str(numIter)) test_params_file = PARAMS[ 'opDir'] + '/test_params_Classifier_' + feature_indexes_dict_keys[ classifier_num] + '_iter' + str(numIter) + '.pkl' if not os.path.exists(test_params_file): Test_Params = DNN.test_dnn(PARAMS, data_dict_Classifier_Part, Train_Params)
def test_cnn_ensemble(PARAMS, Ensemble_Train_Params, file_sp, file_mu): count = -1 # class_labels = {PARAMS['classes'][key]:key for key in PARAMS['classes'].keys()} temp_folder = PARAMS['opDir'] + '/__temp/fold' + str(PARAMS['fold']) + '/' if not os.path.exists(temp_folder): os.makedirs(temp_folder) count += 1 PtdLabels = np.empty([]) GroundTruth = np.empty([]) Predictions = np.empty([]) error = False print('\n\n\n') for featName in Ensemble_Train_Params.keys(): Train_Params = Ensemble_Train_Params[featName] temp_file = 'pred_fold' + str( PARAMS['fold']) + '_' + featName + '_' + file_sp.split( '.')[0] + '_' + file_mu.split('.')[0] print('Temp file: ', temp_file + '.pkl', os.path.exists(temp_folder + '/' + temp_file + '.pkl')) if not os.path.exists(temp_folder + '/' + temp_file + '.pkl'): fName_sp = PARAMS[ 'test_folder'] + '/' + featName + '/speech/' + file_sp # print('fName_sp: ', fName_sp) if not os.path.exists(fName_sp): error = True break data_sp = np.load(fName_sp, allow_pickle=True) fName_mu = PARAMS[ 'test_folder'] + '/' + featName + '/music/' + file_mu # print('fName_mu: ', fName_mu) if not os.path.exists(fName_mu): error = True break data_mu = np.load(fName_mu, allow_pickle=True) print('\t\t\t', featName, np.shape(data_sp), np.shape(data_mu)) if np.shape(data_sp)[1] < np.shape(data_mu)[1]: data_mu = data_mu[:, :np.shape(data_sp)[1]] elif np.shape(data_sp)[1] > np.shape(data_mu)[1]: data_sp = data_sp[:, :np.shape(data_mu)[1]] data_sp = StandardScaler(copy=False).fit_transform(data_sp) data_mu = StandardScaler(copy=False).fit_transform(data_mu) data = np.append(data_sp, data_mu, axis=0) data = data.T print('\t\t\t data: ', np.shape(data.T)) # batchData = get_feature_patches(PARAMS, data, PARAMS['CNN_patch_size'], PARAMS['CNN_patch_shift_test'], PARAMS['input_shape'][featName]) batchData, label_sp, label_mu = cextract_patches( data, np.shape(data), PARAMS['CNN_patch_size'], PARAMS['CNN_patch_shift_test'], [1] * np.shape(data)[1], [0] * np.shape(data)[1], 'classification') if (np.shape(batchData)[1] == 9) or (np.shape(batchData)[1] == 10): diff_dim = PARAMS['input_shape'][featName][0] - np.shape( batchData)[1] zero_padding = np.zeros( (np.shape(batchData)[0], diff_dim, np.shape(batchData)[2])) batchData = np.append(batchData, zero_padding, axis=1) elif np.shape(batchData)[1] == 22: batchData = batchData[:, :21, :] elif np.shape(batchData)[1] == 39: if not PARAMS['39_dim_CC_feat']: first_7_cep_dim = np.array( list(range(0, 7)) + list(range(13, 20)) + list(range(26, 33))) batchData = batchData[:, first_7_cep_dim, :] # print('Patches: ', np.shape(patches)) batchData = np.expand_dims(batchData, axis=3) print('\t\t\t batchData: ', np.shape(batchData)) pred = Train_Params['model'].predict(x=batchData) gt = np.ones(np.shape(batchData)[0]) gt[np.shape(data_sp)[0]:] = 0 misc.save_obj({'pred': pred, 'gt': gt}, temp_folder, temp_file) else: pred = misc.load_obj(temp_folder, temp_file)['pred'] gt = misc.load_obj(temp_folder, temp_file)['gt'] nPatches = len(gt) gt = gt[int(PARAMS['CNN_patch_size'] / 2):] gt = np.append(gt, [0] * (nPatches - len(gt))) print('nPatches: ', nPatches, len(gt)) print('\t\t', featName, 'pred: ', np.shape(pred)) if np.size(Predictions) <= 1: Predictions = np.array(pred, ndmin=2) GroundTruth = gt else: print('\t\tPredictions: ', np.shape(Predictions), np.shape(pred)) if np.shape(pred)[0] < np.shape(Predictions)[0]: while np.shape(pred)[0] < np.shape(Predictions)[0]: d = np.shape(Predictions)[0] - np.shape(pred)[0] pred = np.append(pred, np.array(pred[-d:, :], ndmin=2), axis=0) elif np.shape(pred)[0] > np.shape(Predictions)[0]: pred = pred[:np.shape(Predictions)[0], :] print('\t\tPredictions reshaped: ', np.shape(Predictions), np.shape(pred)) Predictions = np.add(Predictions, np.array(pred, ndmin=2)) print('\tPredictions scaling: ', np.shape(Predictions), np.shape(Ensemble_Train_Params)) Predictions /= len(Ensemble_Train_Params) PtdLabels = np.argmax(Predictions, axis=1) if not error: print('\t', np.shape(Predictions), ' acc=', np.round( np.sum(PtdLabels == GroundTruth) * 100 / np.size(GroundTruth), 2), end='\n') else: print('\tError!') return Predictions, PtdLabels, GroundTruth, error
PARAMS['cv_file_list'], PARAMS['fold']) if PARAMS['use_GPU']: PARAMS['GPU_session'] = start_GPU_session() PARAMS['modelName'] = PARAMS['opDir'] + '/fold' + str( PARAMS['fold']) + '_model.xyz' print('input_shape: ', PARAMS['input_shape'], PARAMS['modelName']) Train_Params = perform_training(PARAMS) if not os.path.exists(PARAMS['opDir'] + '/Test_Params_fold' + str(PARAMS['fold']) + '.pkl'): Test_Params = perform_testing(PARAMS, Train_Params) if PARAMS['save_flag']: misc.save_obj(Test_Params, PARAMS['opDir'], 'Test_Params_fold' + str(PARAMS['fold'])) else: Test_Params = misc.load_obj( PARAMS['opDir'], 'Test_Params_fold' + str(PARAMS['fold'])) print('Test_Params: ', Test_Params.keys()) print(Test_Params['precision_annot'], Test_Params['recall_annot'], Test_Params['fscore_annot']) res_dict = {} res_dict['0'] = 'SMR:Annot' res_dict['1'] = Test_Params['metric_names'][0] + ':' + str( Test_Params['metrics'][0]) res_dict['2'] = Test_Params['metric_names'][1] + ':' + str( Test_Params['metrics'][1]) res_dict['3'] = 'Prec_mu:' + str(Test_Params['precision_annot'][0])
def grid_search_svm(PARAMS, data_dict): pwrs_c = list(np.arange(-5, 1, 1)) pwrs_gamma = list(np.arange(-5, 1, 1)) C = np.power(2.0, pwrs_c) Gamma = np.power(2.0, pwrs_gamma) svm_type = 'single' njobs = multiprocessing.cpu_count() - 1 cv_folds = 3 print('SVM type=', svm_type, ' CV folds=', cv_folds, ' n_jobs=', njobs) trainingTimeTaken = 0 start = time.process_time() if svm_type == 'single': clf = SVC(decision_function_shape='ovo', verbose=0, probability=True) tunable_parameters = [{'kernel': ['rbf'], 'gamma': Gamma, 'C': C}] CLF_CV = GridSearchCV(clf, tunable_parameters, cv=cv_folds, iid=True, refit=True, n_jobs=njobs, verbose=False) elif svm_type == 'bagging': clf = SVC(decision_function_shape='ovo', verbose=0, probability=True) ''' This function extracts balanced bootstraps ''' max_features = 1.0 n_estimators = 10 bagged_classifier = BalancedBaggingClassifier( base_estimator=clf, sampling_strategy=1.0, n_estimators=n_estimators) max_samples = [0.2] #[0.001, 0.005, 0.01, 0.05] print('max_samples=', max_samples, ' max_features=', max_features, ' n_estimators=', n_estimators) tunable_parameters = { 'max_samples': max_samples, 'base_estimator__gamma': Gamma, 'base_estimator__C': C } ''' Perform Grid search over individual classifiers in the bag ''' CLF_CV = GridSearchCV(bagged_classifier, tunable_parameters, scoring='accuracy', cv=cv_folds, iid=False, refit=True, n_jobs=njobs, verbose=True) All_train_data = np.append(data_dict['train_data'], data_dict['val_data'], 0) All_train_label = np.append(data_dict['train_label'], data_dict['val_label']) ''' Checking if model is already available ''' if not os.path.exists(PARAMS['modelName']): CLF_CV.fit(All_train_data, All_train_label) model = CLF_CV.best_estimator_ if PARAMS['save_flag']: misc.save_obj(model, PARAMS['opDir'], PARAMS['modelName'].split('/')[-1].split('.')[0]) misc.save_obj( CLF_CV, PARAMS['opDir'], PARAMS['modelName'].split('/')[-1].split('.')[0] + '_All_Models') else: model = misc.load_obj(PARAMS['opDir'], PARAMS['modelName'].split('/')[-1].split('.')[0]) CLF_CV = misc.load_obj( PARAMS['opDir'], PARAMS['modelName'].split('/')[-1].split('.')[0] + '_All_Models') trainingTimeTaken = time.process_time() - start testingTimeTaken = 0 start = time.process_time() if svm_type == 'single': optC = str(CLF_CV.best_params_['C']) optGamma = str(CLF_CV.best_params_['gamma']) countSV = model.n_support_ elif svm_type == 'bagging': optC = str(CLF_CV.best_params_['base_estimator__C']) optGamma = str(CLF_CV.best_params_['base_estimator__gamma']) countSV = [0, 0] countTrPts = [ np.sum(All_train_label == lab) for lab in np.unique(All_train_label) ] PtdLabels_train = model.predict(All_train_data) Predictions_train = model.predict_log_proba(All_train_data) PtdLabels_test = model.predict(data_dict['test_data']) Predictions_test = model.predict_log_proba(data_dict['test_data']) accuracy_train = np.mean( PtdLabels_train.ravel() == All_train_label.ravel()) * 100 accuracy_test = np.mean( PtdLabels_test.ravel() == data_dict['test_label'].ravel()) * 100 ConfMat_train, fscore_train = misc.getPerformance(PtdLabels_train, All_train_label) ConfMat_test, fscore_test = misc.getPerformance(PtdLabels_test, data_dict['test_label']) Performance_train = np.array( [accuracy_train, fscore_train[0], fscore_train[1], fscore_train[2]]) Performance_test = np.array( [accuracy_test, fscore_test[0], fscore_test[1], fscore_test[2]]) print('Accuracy: train=', accuracy_train, ' test=', accuracy_test, 'F-score: train=', fscore_train[-1], ' test=', fscore_test[-1], ' SupportVectors=', countSV) testingTimeTaken = time.process_time() - start Train_Params = { 'model': model, 'optC': optC, 'optGamma': optGamma, 'countSV': countSV, 'countTrPts': countTrPts, 'trainingTimeTaken': trainingTimeTaken, } Test_Params = { 'PtdLabels_train': PtdLabels_train, 'Predictions_train': Predictions_train, 'PtdLabels_test': PtdLabels_test, 'Predictions_test': Predictions_test, 'accuracy_train': accuracy_train, 'accuracy_test': accuracy_test, 'Performance_train': Performance_train, 'Performance_test': Performance_test, 'testingTimeTaken': testingTimeTaken, } return Train_Params, Test_Params
def test_cnn_ensemble(PARAMS, Ensemble_Train_Params): start = time.clock() PtdLabels_Ensemble = [] GroundTruth_Ensemble = [] Predictions_Ensemble = np.empty([]) count = -1 class_labels = { PARAMS['classes'][key]: key for key in PARAMS['classes'].keys() } individual_performances = { 'Khonglah_et_al': { 'Predictions': np.empty([]), 'GroundTruths': np.empty([]), 'fscore': [0, 0, 0] }, 'Sell_et_al': { 'Predictions': np.empty([]), 'GroundTruths': np.empty([]), 'fscore': [0, 0, 0] }, 'MFCC-39': { 'Predictions': np.empty([]), 'GroundTruths': np.empty([]), 'fscore': [0, 0, 0] }, 'Melspectrogram': { 'Predictions': np.empty([]), 'GroundTruths': np.empty([]), 'fscore': [0, 0, 0] }, 'HNGDMFCC': { 'Predictions': np.empty([]), 'GroundTruths': np.empty([]), 'fscore': [0, 0, 0] }, 'MGDCC': { 'Predictions': np.empty([]), 'GroundTruths': np.empty([]), 'fscore': [0, 0, 0] }, 'IFCC': { 'Predictions': np.empty([]), 'GroundTruths': np.empty([]), 'fscore': [0, 0, 0] }, } temp_folder = PARAMS['opDir'] + '/__temp/' if not os.path.exists(temp_folder): os.makedirs(temp_folder) for classname in PARAMS['test_files'].keys(): clNum = class_labels[classname] files = PARAMS['test_files'][classname] # print('test_files: ', files) fl_count = 0 for fl in files: fl_count += 1 print('\t\t\t', PARAMS['fold'], PARAMS['classes'][clNum], fl, fl_count, '/', len(files), end='\t') count += 1 PtdLabels = None PtdLabels_temp = np.empty([]) GroundTruth = np.empty([]) Predictions = np.empty([]) empty_predictions = False for featName in Ensemble_Train_Params.keys(): empty_predictions = False curr_fName = PARAMS[ 'test_folder'] + '/' + featName + '/' + classname + '/' + fl # print('curr_fName: ', curr_fName) if not os.path.exists(curr_fName): # print('curr_file not found') empty_predictions = True break Train_Params = Ensemble_Train_Params[featName] batchData = None batchLabel = None temp_file = 'pred_' + classname + '_fold' + str( PARAMS['fold']) + '_' + featName + '_' + fl.split('.')[0] # print(temp_folder, temp_file) # print(featName, Train_Params['model'].layers[0].output_shape, PARAMS['input_shape'][featName]) if not os.path.exists(temp_folder + '/' + temp_file + '.pkl'): batchData, batchLabel = generator_test_ensemble( PARAMS, featName, fl, clNum) # print('batchData: ', np.shape(batchData), np.shape(batchLabel)) pred = Train_Params['model'].predict(x=batchData) # print('pred: ', np.shape(pred)) misc.save_obj(pred, temp_folder, temp_file) else: try: pred = misc.load_obj(temp_folder, temp_file) except: batchData, batchLabel = generator_test_ensemble( PARAMS, featName, fl, clNum) # print('batchData: ', np.shape(batchData), np.shape(batchLabel)) pred = Train_Params['model'].predict(x=batchData) # print('pred: ', np.shape(pred)) misc.save_obj(pred, temp_folder, temp_file) # print('indv_labels: ', np.shape(indv_labels), np.shape(individual_performances[featName]['PtdLabels'])) if np.size( individual_performances[featName]['Predictions']) <= 1: individual_performances[featName][ 'Predictions'] = np.array(pred, ndmin=2) individual_performances[featName]['GroundTruth'] = np.ones( np.shape(pred)[0]) * clNum else: individual_performances[featName][ 'Predictions'] = np.append( individual_performances[featName]['Predictions'], np.array(pred, ndmin=2), axis=0) individual_performances[featName][ 'GroundTruth'] = np.append( individual_performances[featName]['GroundTruth'], np.ones(np.shape(pred)[0]) * clNum) if np.size(Predictions) <= 1: Predictions = np.array(pred, ndmin=2) PtdLabels_temp = np.array(np.argmax(pred, axis=1), ndmin=2).T else: # print('PtdLabels_temp: ', np.shape(PtdLabels_temp), np.shape(pred)) empty_predictions = False if np.shape(pred)[0] != np.shape(Predictions)[0]: if np.shape(pred)[0] > np.shape(Predictions)[0]: pred = pred[:np.shape(Predictions)[0], :] else: empty_predictions = True break Predictions = np.add(Predictions, np.array(pred, ndmin=2)) PtdLabels_temp = np.append(PtdLabels_temp, np.array(np.argmax(pred, axis=1), ndmin=2).T, axis=1) if empty_predictions: print(' ', end='\n') continue GroundTruth = np.ones(np.shape(Predictions)[0]) * clNum PtdLabels = np.argmax(Predictions, axis=1) # PtdLabels, label_counts = scipy.stats.mode(PtdLabels_temp, axis=1) # PtdLabels = np.array(PtdLabels.flatten()) # print('PtdLabels: ', np.shape(PtdLabels), ' GroundTruth: ', np.shape(GroundTruth)) print(np.shape(Predictions), ' acc=', np.round( np.sum(PtdLabels == GroundTruth) * 100 / np.size(GroundTruth), 2), end='\n') if np.size(PtdLabels_Ensemble) <= 1: PtdLabels_Ensemble = PtdLabels GroundTruth_Ensemble = GroundTruth Predictions_Ensemble = Predictions else: PtdLabels_Ensemble = np.append(PtdLabels_Ensemble, PtdLabels) GroundTruth_Ensemble = np.append(GroundTruth_Ensemble, GroundTruth) Predictions_Ensemble = np.append(Predictions_Ensemble, Predictions, axis=0) testingTimeTaken = time.clock() - start print('\t\t\t', PARAMS['fold'], ' Time taken for model testing: ', testingTimeTaken) ConfMat_Ensemble, fscore_Ensemble = misc.getPerformance( PtdLabels_Ensemble, GroundTruth_Ensemble) accuracy_Ensemble = np.round( np.sum(PtdLabels_Ensemble == GroundTruth_Ensemble) * 100 / len(GroundTruth_Ensemble), 4) for featName in Ensemble_Train_Params.keys(): # print(featName, 'individual_performances: ', np.shape(individual_performances[featName]['PtdLabels']), np.shape(GroundTruth_Ensemble)) indv_PtdLabels = np.argmax( individual_performances[featName]['Predictions'], axis=1) ConfMat_indv, fscore_indv = misc.getPerformance( indv_PtdLabels, individual_performances[featName]['GroundTruth']) individual_performances[featName]['fscore'] = fscore_indv Ensemble_Test_Params = { 'loss': -1, 'accuracy_Ensemble': accuracy_Ensemble, 'testingTimeTaken': testingTimeTaken, 'ConfMat_Ensemble': ConfMat_Ensemble, 'fscore_Ensemble': fscore_Ensemble, 'PtdLabels_Ensemble': PtdLabels_Ensemble, 'Predictions_Ensemble': Predictions_Ensemble, 'GroundTruth_Ensemble': GroundTruth_Ensemble, 'individual_performances': individual_performances, } return Ensemble_Test_Params
if PARAMS['frame_level_scaling']: stats_fName = 'data_stats_fold' + str(PARAMS['fold']) + '_' + str( len(PARAMS['classes'])) + 'class_train' if not os.path.exists(PARAMS['feature_opDir'] + '/' + stats_fName + '.pkl'): mean, stdev, nMuFrames, nSpFrames, nSpMuFrames = preproc.get_data_stats( PARAMS, PARAMS['train_files']) nFrames = [nMuFrames, nSpFrames, nSpMuFrames] PARAMS['mean_fold' + str(PARAMS['fold'])] = mean PARAMS['stdev_fold' + str(PARAMS['fold'])] = stdev stats = { 'mean': mean, 'stdev': stdev, 'nFrames': [nMuFrames, nSpFrames, nSpMuFrames] } misc.save_obj(stats, PARAMS['feature_opDir'], stats_fName) else: stats = misc.load_obj(PARAMS['feature_opDir'], stats_fName) PARAMS['mean_fold' + str(PARAMS['fold'])] = stats['mean'] PARAMS['stdev_fold' + str(PARAMS['fold'])] = stats['stdev'] nFrames = stats['nFrames'] print('Class durations: ', np.round(((np.array(nFrames) * 10) + 15) / 1000 / 3600, 2)) # sys.exit(0) if PARAMS['use_GPU']: PARAMS['GPU_session'] = start_GPU_session() PARAMS['modelName'] = PARAMS['opDir'] + '/fold' + str( PARAMS['fold']) + '_model.xyz'
def grid_search_gmm(PARAMS, data_dict): K = 10 gmmModel_mu = GaussianMixture(n_components=K, max_iter=1000) gmmModel_sp = GaussianMixture(n_components=K, max_iter=1000) All_train_data = np.append(data_dict['train_data'], data_dict['val_data'], 0) All_train_label = np.append(data_dict['train_label'], data_dict['val_label']) mu_idx = np.squeeze(np.where(All_train_label == 0)) sp_idx = np.squeeze(np.where(All_train_label == 1)) ''' Checking if model is already available ''' gmmModelFileName = PARAMS['opDir'] + PARAMS['modelName'].split( '/')[-1].split('.')[0] + '_gmmModel_mu_K=' + str(K) + '.pkl' if not os.path.exists(gmmModelFileName): gmmModel_mu.fit(All_train_data[mu_idx, :]) gmmModel_sp.fit(All_train_data[sp_idx, :]) if PARAMS['save_flag']: misc.save_obj( gmmModel_mu, PARAMS['opDir'], PARAMS['modelName'].split('/')[-1].split('.')[0] + '_gmmModel_mu_K=' + str(K)) misc.save_obj( gmmModel_sp, PARAMS['opDir'], PARAMS['modelName'].split('/')[-1].split('.')[0] + '_gmmModel_sp_K=' + str(K)) else: gmmModel_mu = misc.load_obj( PARAMS['opDir'], PARAMS['modelName'].split('/')[-1].split('.')[0] + '_gmmModel_mu_K=' + str(K)) gmmModel_sp = misc.load_obj( PARAMS['opDir'], PARAMS['modelName'].split('/')[-1].split('.')[0] + '_gmmModel_sp_K=' + str(K)) score_train_mu = np.array(gmmModel_mu.score_samples(All_train_data), ndmin=2).T score_train_sp = np.array(gmmModel_sp.score_samples(All_train_data), ndmin=2).T print('scores shape: ', np.shape(score_train_mu), np.shape(score_train_sp)) score_train = np.append(score_train_mu, score_train_sp, 1) print('score_train: ', np.shape(score_train)) PtdLabels_train = np.argmax(score_train, axis=1) score_test_mu = np.array(gmmModel_mu.score_samples(data_dict['test_data']), ndmin=2).T score_test_sp = np.array(gmmModel_sp.score_samples(data_dict['test_data']), ndmin=2).T score_test = np.append(score_test_mu, score_test_sp, 1) print('score_test: ', np.shape(score_test)) PtdLabels_test = np.argmax(score_test, axis=1) accuracy_train = np.mean( PtdLabels_train.ravel() == All_train_label.ravel()) * 100 accuracy_test = np.mean( PtdLabels_test.ravel() == data_dict['test_label'].ravel()) * 100 ConfMat_train, fscore_train = misc.getPerformance(PtdLabels_train, All_train_label) ConfMat_test, fscore_test = misc.getPerformance(PtdLabels_test, data_dict['test_label']) Performance_train = np.array( [accuracy_train, fscore_train[0], fscore_train[1], fscore_train[2]]) Performance_test = np.array( [accuracy_test, fscore_test[0], fscore_test[1], fscore_test[2]]) print('Accuracy: train=', accuracy_train, ' test=', accuracy_test, 'F-score: train=', fscore_train[-1], ' test=', fscore_test[-1]) return score_test, PtdLabels_test, K, Performance_train, Performance_test
# folder = '/media/mrinmoy/NTFS_Volume/Phd_Work/Data/Scheirer-slaney/' folder = '/media/mrinmoy/NTFS_Volume/Phd_Work/Data/musan/' classes = {0: 'music', 1: 'speech', 2: 'noise'} CV = 3 dataset_name = list(filter(None, folder.split('/')))[-1] opDir = './cross_validation_info/' + dataset_name + '_5_class/' if not os.path.exists(opDir): os.makedirs(opDir) mixing_dB_range = list(range(-5, 21)) # -5dB to 20dB mixing SMR levels if not os.path.exists(opDir + '/Dataset_Duration.pkl'): total_duration, filewise_duration = calculate_dataset_size( folder, classes) misc.save_obj( { 'total_duration': total_duration, 'filewise_duration': filewise_duration }, opDir, 'Dataset_Duration') else: total_duration = misc.load_obj(opDir, 'Dataset_Duration')['total_duration'] filewise_duration = misc.load_obj( opDir, 'Dataset_Duration')['filewise_duration'] if not os.path.exists(opDir + '/cv_file_list_5_class.pkl'): cv_file_list, music_annot, speech_annot, noise_annot = create_CV_folds( folder, dataset_name, classes, CV, total_duration, filewise_duration, mixing_dB_range) misc.save_obj(cv_file_list, opDir, 'cv_file_list_5_class') misc.save_obj(music_annot, opDir, 'music_annot') misc.save_obj(speech_annot, opDir, 'speech_annot')
def train_cnn(PARAMS, data_dict): weightFile = PARAMS['modelName'].split('.')[0] + '.h5' architechtureFile = PARAMS['modelName'].split('.')[0] + '.json' paramFile = PARAMS['modelName'].split('.')[0] + '_params.npz' FL_Ret = {} if not os.path.exists(weightFile): epochs = 16 batch_size = 128 input_shape = (1, 200, 200) model, learning_rate = get_model(PARAMS['classes'], input_shape) model, trainingTimeTaken, FL_Ret, History = dplearn.train_model( PARAMS, data_dict, model, epochs, batch_size, weightFile) if PARAMS['save_flag']: model.save_weights(weightFile) # Save the weights with open(architechtureFile, 'w') as f: # Save the model architecture f.write(model.to_json()) np.savez(paramFile, epochs=epochs, batch_size=batch_size, input_shape=input_shape, lr=learning_rate, trainingTimeTaken=trainingTimeTaken) misc.save_obj(History, PARAMS['opDir'], 'training_history_iter' + str(PARAMS['iter'])) misc.save_obj(FL_Ret, PARAMS['opDir'], 'FL_Ret') print('CNN model trained.') else: epochs = np.load(paramFile)['epochs'] batch_size = np.load(paramFile)['batch_size'] input_shape = np.load(paramFile)['input_shape'] learning_rate = np.load(paramFile)['lr'] trainingTimeTaken = np.load(paramFile)['trainingTimeTaken'] optimizer = optimizers.SGD(lr=learning_rate, momentum=0.9) with open(architechtureFile, 'r') as f: # Model reconstruction from JSON file model = model_from_json(f.read()) model.load_weights(weightFile) # Load weights into the new model History = misc.load_obj(PARAMS['opDir'], 'training_history_iter' + str(PARAMS['iter'])) FL_Ret = misc.load_obj(PARAMS['opDir'], 'FL_Ret') model.compile(loss="binary_crossentropy", optimizer=optimizer, metrics=['accuracy']) print('CNN model exists! Loaded. Training time required=', trainingTimeTaken) Train_Params = { 'model': model, 'History': History, 'trainingTimeTaken': trainingTimeTaken, 'epochs': epochs, 'batch_size': batch_size, 'learning_rate': learning_rate, 'FL_Ret': FL_Ret, 'paramFile': paramFile, 'architechtureFile': architechtureFile, 'weightFile': weightFile, } return Train_Params
This function generates CBoW feature from the Peak trace matrices of the classes by learning SEPARATE GMMs and extracting posterior probabilities from them ''' for numIter in range(PARAMS['iterations']): PARAMS['opDir'] = PARAMS['folder'] + '/' + PARAMS[ 'feat_type'] + '_mix' + str( PARAMS['numMix']) + '_iter' + str(numIter) + '/' if not os.path.exists(PARAMS['opDir']): os.makedirs(PARAMS['opDir']) file_list_fName = PARAMS['folder'] + '/file_list_iter' + str( numIter) + '.pkl' if not os.path.exists(file_list_fName): FL_Ret = misc.get_file_list(PARAMS) misc.save_obj(FL_Ret, PARAMS['folder'], 'file_list_iter' + str(numIter)) # This is saved in the feature folder of specific iteration for loading the correct training and testing files during classification misc.save_obj(FL_Ret, PARAMS['opDir'], 'file_list_iter' + str(numIter)) else: FL_Ret = misc.load_obj(PARAMS['folder'], 'file_list_iter' + str(numIter)) # This is saved in the feature folder of specific iteration for loading the correct training and testing files during classification misc.save_obj(FL_Ret, PARAMS['opDir'], 'file_list_iter' + str(numIter)) PARAMS['tempFolder'] = PARAMS['folder'] + '/__temp/iter' + str( numIter) + '/' if not os.path.exists(PARAMS['tempFolder']): os.makedirs(PARAMS['tempFolder']) PARAMS['gmmPath'] = PARAMS['folder'] + '/__GMMs/iter' + str(
def test_cnn_ensemble_noise(PARAMS, Ensemble_Train_Params): start = time.clock() GroundTruth_Ensemble = {dB: [] for dB in PARAMS['noise_dB_range']} Predictions_Ensemble = { dB: np.empty([]) for dB in PARAMS['noise_dB_range'] } PtdLabels_Ensemble = {dB: [] for dB in PARAMS['noise_dB_range']} count = -1 class_labels = { PARAMS['classes'][key]: key for key in PARAMS['classes'].keys() } basic_storage_cell = { 'Predictions': np.empty([]), 'GroundTruths': np.empty([]), 'fscore': [0, 0, 0] } individual_performances = { 'Khonglah_et_al': {db: basic_storage_cell for db in PARAMS['noise_dB_range']}, 'Sell_et_al': {db: basic_storage_cell for db in PARAMS['noise_dB_range']}, 'MFCC-39': {db: basic_storage_cell for db in PARAMS['noise_dB_range']}, 'Melspectrogram': {db: basic_storage_cell for db in PARAMS['noise_dB_range']}, 'HNGDMFCC': {db: basic_storage_cell for db in PARAMS['noise_dB_range']}, 'MGDCC': {db: basic_storage_cell for db in PARAMS['noise_dB_range']}, 'IFCC': {db: basic_storage_cell for db in PARAMS['noise_dB_range']}, } temp_folder = PARAMS['opDir'] + '/__temp/' if not os.path.exists(temp_folder): os.makedirs(temp_folder) for classname in PARAMS['test_files'].keys(): clNum = class_labels[classname] files = PARAMS['test_files'][classname] # print('test_files: ', files) fl_count = 0 for fl in files: fl_count += 1 print('\t\t\t', PARAMS['fold'], PARAMS['classes'][clNum], fl, fl_count, '/', len(files), end='\n') count += 1 PtdLabels = None GroundTruth = np.empty([]) Predictions = {dB: np.empty([]) for dB in PARAMS['noise_dB_range']} PtdLabels = {dB: [] for dB in PARAMS['noise_dB_range']} empty_predictions = False for featName in Ensemble_Train_Params.keys(): empty_predictions = False curr_fName = PARAMS[ 'test_folder'] + '/' + featName + '/' + classname + '/' + fl if not os.path.exists(curr_fName): empty_predictions = True break Train_Params = Ensemble_Train_Params[featName] batchData = None batchLabel = None if not os.path.exists(PARAMS['test_folder'] + '/' + featName + '/' + classname + '/' + fl): continue for targetdB in PARAMS['noise_dB_range']: temp_file = temp_folder + '/pred_' + classname + '_fold' + str( PARAMS['fold']) + '_' + featName + '_' + str( targetdB) + 'dB_' + fl.split('.')[0] + '.pkl' if not os.path.exists(temp_file): batchData, batchLabel = generator_test_noise( PARAMS, featName, fl, clNum, targetdB) pred = Train_Params['model'].predict(x=batchData) misc.save_obj( pred, temp_folder, 'pred_' + classname + '_fold' + str(PARAMS['fold']) + '_' + featName + '_' + str(targetdB) + 'dB_' + fl.split('.')[0]) else: pred = misc.load_obj( temp_folder, 'pred_' + classname + '_fold' + str(PARAMS['fold']) + '_' + featName + '_' + str(targetdB) + 'dB_' + fl.split('.')[0]) if np.size(individual_performances[featName][targetdB] ['Predictions']) <= 1: individual_performances[featName][targetdB][ 'Predictions'] = np.array(pred, ndmin=2) individual_performances[featName][targetdB][ 'GroundTruth'] = np.ones(np.shape(pred)[0]) * clNum else: individual_performances[featName][targetdB][ 'Predictions'] = np.append( individual_performances[featName][targetdB] ['Predictions'], np.array(pred, ndmin=2), axis=0) individual_performances[featName][targetdB][ 'GroundTruth'] = np.append( individual_performances[featName][targetdB] ['GroundTruth'], np.ones(np.shape(pred)[0]) * clNum) if np.size(Predictions[targetdB]) <= 1: Predictions[targetdB] = np.array(pred, ndmin=2) else: empty_predictions = False if np.shape(pred)[0] != np.shape( Predictions[targetdB])[0]: if np.shape(pred)[0] > np.shape( Predictions[targetdB])[0]: pred = pred[ np.shape(Predictions[targetdB])[0], :] else: empty_predictions = True break Predictions[targetdB] = np.add(Predictions[targetdB], np.array(pred, ndmin=2)) if empty_predictions: print(' ', end='\n') continue for dB in PARAMS['noise_dB_range']: GroundTruth = np.array(np.ones(np.shape(Predictions[dB])[0]) * clNum, ndmin=2).T PtdLabels[dB] = np.array(np.argmax(Predictions[dB], axis=1), ndmin=2).T # print('PtdLabels[dB]: ', np.shape(PtdLabels[dB]), np.shape(GroundTruth), np.sum(PtdLabels[dB]==GroundTruth), np.shape(GroundTruth)[0]) print('\t\t\t\t', dB, 'dB\t', np.shape(Predictions[dB]), ' acc=', np.round( np.sum(PtdLabels[dB] == GroundTruth) * 100 / np.shape(GroundTruth)[0], 2), end='\n') if np.size(PtdLabels_Ensemble[dB]) <= 1: PtdLabels_Ensemble[dB] = PtdLabels[dB] Predictions_Ensemble[dB] = Predictions[dB] GroundTruth_Ensemble[dB] = GroundTruth else: PtdLabels_Ensemble[dB] = np.append(PtdLabels_Ensemble[dB], PtdLabels[dB]) Predictions_Ensemble[dB] = np.append( Predictions_Ensemble[dB], Predictions[dB], axis=0) GroundTruth_Ensemble[dB] = np.append( GroundTruth_Ensemble[dB], GroundTruth) testingTimeTaken = time.clock() - start print('\t\t\t', PARAMS['fold'], ' Time taken for model testing: ', testingTimeTaken) ConfMat_Ensemble = {} fscore_Ensemble = {} accuracy_Ensemble = {} for dB in PARAMS['noise_dB_range']: # print(dB, np.shape(PtdLabels_Ensemble[dB]), np.shape(GroundTruth_Ensemble[dB])) ConfMat_dB, fscore_dB = misc.getPerformance(PtdLabels_Ensemble[dB], GroundTruth_Ensemble[dB]) ConfMat_dB = np.reshape( ConfMat_dB, (len(PARAMS['classes']), len(PARAMS['classes']))) accuracy_dB = np.round( np.sum(np.diag(ConfMat_dB)) / np.sum(ConfMat_dB), 4) ConfMat_Ensemble[dB] = ConfMat_dB fscore_Ensemble[dB] = fscore_dB accuracy_Ensemble[dB] = accuracy_dB for featName in Ensemble_Train_Params.keys(): for dB in PARAMS['noise_dB_range']: indv_PtdLabels_dB = np.argmax( individual_performances[featName][dB]['Predictions'], axis=1) ConfMat_indv_dB, fscore_indv_dB = misc.getPerformance( indv_PtdLabels_dB, individual_performances[featName][dB]['GroundTruth']) individual_performances[featName][dB]['fscore'] = fscore_indv_dB Ensemble_Test_Params = { 'loss': -1, 'accuracy_Ensemble': accuracy_Ensemble, 'testingTimeTaken': testingTimeTaken, 'ConfMat_Ensemble': ConfMat_Ensemble, 'fscore_Ensemble': fscore_Ensemble, 'PtdLabels_Ensemble': PtdLabels_Ensemble, 'Predictions_Ensemble': Predictions_Ensemble, 'GroundTruth_Ensemble': GroundTruth_Ensemble, 'individual_performances': individual_performances, } return Ensemble_Test_Params
def load_model_NB(PARAMS, test_data, test_label, input_shape): ''' Checking if model is already available ''' if not os.path.exists(PARAMS['modelName']): print('NB model does not exist') return {}, {} else: NB_model = misc.load_obj('/'.join(PARAMS['modelName'].split('/')[:-1]), PARAMS['modelName'].split('/')[-1].split('.')[0]) start = time.process_time() # PtdLabels_test = NB_model.predict(test_data) # Predictions_test = NB_model.predict_proba(test_data) # GroundTruth = test_label temp_folder = PARAMS['opDir'] + '/__temp/' if not os.path.exists(temp_folder): os.makedirs(temp_folder) PtdLabels_test = [] Predictions_test = [] GroundTruth = [] for clNum in PARAMS['classes'].keys(): files = PARAMS['test_files'][PARAMS['classes'][clNum]] for fl in files: temp_file = 'pred_' + PARAMS['classes'][clNum] + '_fold' + str(PARAMS['fold']) + '_' + PARAMS['featName'] + '_' + fl.split('.')[0] if not os.path.exists(temp_folder + temp_file + '.pkl'): FV = np.load(PARAMS['test_folder'] + '/' + PARAMS['featName'] + '/' + PARAMS['classes'][clNum] + '/' + fl, allow_pickle=True) FV = misc.get_feature_patches(FV, PARAMS['CNN_patch_size'], PARAMS['CNN_patch_shift_test'], input_shape) FV = PARAMS['std_scale'].transform(FV) pred_lab = NB_model.predict(FV) pred = NB_model.predict_proba(FV) misc.save_obj({'pred':pred, 'pred_lab':pred_lab}, temp_folder, temp_file) else: pred = misc.load_obj(temp_folder, temp_file)['pred'] pred_lab = misc.load_obj(temp_folder, temp_file)['pred_lab'] PtdLabels_test.extend(pred_lab) Predictions_test.extend(pred) GroundTruth.extend([clNum]*np.shape(pred)[0]) print(fl, ' acc=', np.sum(np.array(pred_lab)==np.array([clNum]*np.shape(pred)[0]))/np.size(pred_lab)) PtdLabels_test = np.array(PtdLabels_test) GroundTruth = np.array(GroundTruth) accuracy_test = np.mean(PtdLabels_test.ravel() == GroundTruth.ravel()) * 100 ConfMat_test, fscore_test = misc.getPerformance(PtdLabels_test, GroundTruth) Performance_test = np.array([accuracy_test, fscore_test[0], fscore_test[1], fscore_test[2]]) testingTimeTaken = time.process_time() - start print('Accuracy: test=', np.round(accuracy_test,4), 'F-score: test=', np.round(fscore_test,4)) Train_Params = { 'model':NB_model, } Test_Params = { 'PtdLabels': PtdLabels_test, 'Predictions': Predictions_test, 'accuracy': accuracy_test, 'Performance_test': Performance_test, 'testingTimeTaken': testingTimeTaken, 'fscore': fscore_test, 'GroundTruth': GroundTruth, } return Train_Params, Test_Params
def grid_search_svm(PARAMS, train_data, train_label, test_data, test_label): C = np.power(2.0, list(np.arange(-5, 5, 1))) Gamma = np.power(2.0, list(np.arange(-5, 5, 1))) njobs = multiprocessing.cpu_count() - 1 cv_folds = 3 print('CV folds=', cv_folds, ' n_jobs=', njobs) trainingTimeTaken = 0 start = time.process_time() clf_param_tuning = SVC(decision_function_shape='ovo', verbose=0, probability=True) tunable_parameters = [{'kernel': ['rbf'], 'gamma': Gamma, 'C': C}] CLF_CV = GridSearchCV(clf_param_tuning, tunable_parameters, cv=cv_folds, refit=True, n_jobs=njobs, verbose=2) ''' Checking if model is already available ''' if not os.path.exists(PARAMS['modelName']): CLF_CV.fit(train_data, train_label.flatten()) model = CLF_CV.best_estimator_ if PARAMS['save_flag']: misc.save_obj(model, PARAMS['opDir'], PARAMS['modelName'].split('/')[-1].split('.')[0]) misc.save_obj( CLF_CV, PARAMS['opDir'], PARAMS['modelName'].split('/')[-1].split('.')[0] + '_All_Models') else: model = misc.load_obj(PARAMS['opDir'], PARAMS['modelName'].split('/')[-1].split('.')[0]) CLF_CV = misc.load_obj( PARAMS['opDir'], PARAMS['modelName'].split('/')[-1].split('.')[0] + '_All_Models') trainingTimeTaken = time.process_time() - start testingTimeTaken = 0 start = time.process_time() optC = str(CLF_CV.best_params_['C']) optGamma = str(CLF_CV.best_params_['gamma']) countSV = model.n_support_ countTrPts = [np.sum(train_label == lab) for lab in np.unique(train_label)] PtdLabels_train = model.predict(train_data) # Predictions_train = model.predict_log_proba(train_data) PtdLabels_test = model.predict(test_data) Predictions_test = model.predict_log_proba(test_data) accuracy_train = np.mean( PtdLabels_train.ravel() == train_label.ravel()) * 100 accuracy_test = np.mean(PtdLabels_test.ravel() == test_label.ravel()) * 100 ConfMat_train, fscore_train = misc.getPerformance(PtdLabels_train, train_label) ConfMat_test, fscore_test = misc.getPerformance(PtdLabels_test, test_label) # Performance_train = np.array([accuracy_train, fscore_train[0], fscore_train[1], fscore_train[2]]) Performance_test = np.array( [accuracy_test, fscore_test[0], fscore_test[1], fscore_test[2]]) print('Accuracy: train=', accuracy_train, ' test=', accuracy_test, 'F-score: train=', fscore_train[-1], ' test=', fscore_test[-1], ' SupportVectors=', countSV) testingTimeTaken = time.process_time() - start Train_Params = { 'model': model, 'optC': optC, 'optGamma': optGamma, 'countSV': countSV, 'countTrPts': countTrPts, 'trainingTimeTaken': trainingTimeTaken, } Test_Params = { 'PtdLabels': PtdLabels_test, 'Predictions': Predictions_test, 'accuracy': accuracy_test, 'Performance_test': Performance_test, 'testingTimeTaken': testingTimeTaken, 'fscore': fscore_test, 'GroundTruth': test_label, } # return model, optC, optGamma, Predictions_test, Performance_test, countSV, countTrPts return Train_Params, Test_Params