def SVM_Classification_BeehiveSTATE(X_flat_train, y_train, X_flat_test, y_test, kerneloption='rbf'): print('\n') printb('Starting classification with SVM:') Test_Preds=[] Train_Preds=[] Test_Preds_Proba=[] Train_Preds_Proba=[] Test_GroundT=[] Train_GroundT=[] print('\n') printb('classification Beehive State into : Active or Missing Queen') #train : CLF = svm.SVC(kernel=kerneloption, probability=True) CLF.fit(X_flat_train, y_train) y_pred_train = CLF.predict(X_flat_train) y_pred_proba_train = CLF.predict_proba(X_flat_train) Train_GroundT = y_train Train_Preds = y_pred_train Train_Preds_Proba = y_pred_proba_train[:,1] # test: y_pred_test = CLF.predict(X_flat_test) y_pred_proba_test = CLF.predict_proba(X_flat_test) Test_GroundT= y_test Test_Preds = y_pred_test Test_Preds_Proba = y_pred_proba_test[:,1] return CLF, Test_GroundT, Train_GroundT, Test_Preds, Train_Preds, Test_Preds_Proba, Train_Preds_Proba
def BalanceData_online(y_set, x_set, sample_ids_set): ## balances already processed data (X and Y, just before classifier) by replicating samples of the least represented class. # input: y_set - binary labels of set, x_set - feature_maps of set, sample_ids_set - sample names in set, ( all have the same order!) # output: X, Y and sample_ids with replicated samples concatenated printb( 'Balancing training data:' ) print('will randomly replicate samples from least represented class') x2concatenate = x_set y2concatenate = y_set sample_ids2concatenate = sample_ids_set dict_items_replicate = get_items2replicate(y_set,sample_ids_set ) #print("dict_items_replicate: ",dict_items_replicate) for i in range(len(sample_ids_set)): if sample_ids_set[i] in dict_items_replicate.keys() : sample_ids2concatenate =np.concatenate([sample_ids2concatenate, [sample_ids_set[i]]*dict_items_replicate[sample_ids_set[i]]]) y2concatenate = np.concatenate([y2concatenate, [y_set[i]]*dict_items_replicate[sample_ids_set[i]]]) x2concatenate = np.concatenate([x2concatenate, [x_set[i]]*dict_items_replicate[sample_ids_set[i]]]) return y2concatenate, x2concatenate, sample_ids2concatenate
def train_and_evaluate_model(model, X_train, Y_train, X_test, Y_test, y_test, nb_epoch, batch_size, model_filename): # y_test is labels, Y_test is categorical labels print('Train...') target_names = ['missing_queen', 'active'] stopping = EarlyStopping(monitor='val_accuracy', patience=50) checkpointer = ModelCheckpoint(filepath=model_filename, verbose=1, save_best_only=True, save_weights_only=False, monitor='val_accuracy', mode='max') results = model.fit(X_train, Y_train, batch_size=batch_size, nb_epoch=nb_epoch, callbacks=[stopping, checkpointer], verbose=2, validation_data=(X_test, Y_test)) # prediction best_model = load_model(model_filename) probabilities = best_model.predict(X_test, batch_size=batch_size) predictions = probabilities.argmax(axis=-1) best_acc = accuracy_score(y_test, predictions) print('Accuracy score (best model): {}'.format(best_acc)) #print("classification report: ", classification_report(y_test, predictions)) report = classification_report(y_test, predictions, target_names=target_names, output_dict=True) cnf_matrix = confusion_matrix(y_test, predictions) return results, best_acc, report, cnf_matrix
def get_features_from_samples(path_audio_samples, sample_ids, raw_feature, normalization, high_level_features ): #normalization = NO, z_norm, min_max ## function to extract features #high_level_features = 0 or 1 #file_path = os.path.isfile(path_save_audio_labels+'matrix.mat'+'.csv') n_samples_set = len(sample_ids) # 4 feature_Maps = [] if raw_feature== 'MFCCs20': path_working= path_working_MFCC20 elif raw_feature=='TTBOX': path_working= path_working_TTBox elif raw_feature=='stft': path_working= path_working_stft else: path_working= path_working_cqt for sample in sample_ids: # raw feature extraction: print("sample: ",sample) x = raw_feature_fromSample( path_audio_samples+sample, raw_feature ) # x.shape: (4, 20, 2584) # print("x.shape: ",x.shape) # Sauvgarder les x dans un fichier .mat b = csr_matrix(x) savemat(path_working+ sample[:-4] + '.mat', {'b': b}) ##normalization here:si on veut les résultats pour Conv1D on utlise la normalisation ##normalization here: if not normalization == 'NO': x_norm = featureMap_normalization_block_level(x, normalizationType = normalization) else: x_norm = x if high_level_features: # high level feature extraction: if 'MFCCs' in raw_feature: X = compute_statistics_overMFCCs(x_norm, 'yes') # X.shape: (4 , 120) else: X = compute_statistics_overSpectogram(x_norm) feature_map=X else: feature_map=x_norm feature_Maps.append(feature_map) return feature_Maps
def uniform_block_size(undersized_block, block_size_samples, method='repeat'): lengthTofill = (block_size_samples) - (undersized_block.size) if method == 'zero_padding': new_block = np.pad(undersized_block, (0, lengthTofill), 'constant', constant_values=(0)) elif method == 'mean_padding': new_block = np.pad(undersized_block, (0, lengthTofill), 'mean') elif method == 'repeat': new_block = np.pad(undersized_block, (0, lengthTofill), 'reflect') else: print( 'methods to choose are: \'zero_padding\' ,\'mean_padding\' and \'repeat\' ' ) new_block = 0 return new_block
def plot_confusion_matrix(cm, classes, normalize=False, title='Confusion matrix', cmap=plt.cm.Blues): """ This function prints and plots the confusion matrix. Normalization can be applied by setting `normalize=True`. """ plt.imshow(cm, interpolation='nearest', cmap=cmap) plt.title(title) plt.colorbar() tick_marks = np.arange(len(classes)) plt.xticks(tick_marks, classes, rotation=25) plt.yticks(tick_marks, classes) if normalize: cm = cm.astype('float') / cm.sum(axis=1)[:, np.newaxis] print("Normalized confusion matrix") else: print('Confusion matrix, without normalization') print(cm) thresh = cm.max() / 2. for i, j in itertools.product(range(cm.shape[0]), range(cm.shape[1])): plt.text(j, i, cm[i, j], horizontalalignment="center", color="white" if cm[i, j] > thresh else "black") plt.tight_layout() plt.ylabel('True label') plt.xlabel('Predicted label')
def read_beeNotBee_annotations_saves_labels(audiofilename, block_name, blockStart, blockfinish, annotations_path, threshold=0): ## function: reads corresponding annotation file (.lab) and assigns a label to one block/sample. Appends label into csv file. ## ## inputs: ## audiofilename = name of the audio file (no path), block_name = name of the sample/segment, blockStart = time point in seconds where block starts, blockfinish = time point in seconds where block ends, annotations_path = path to annotations folder (where .lab files are), threshold = value tor threshold. ## ## outputs: ## label_th= 2 element list, [0] = a label (bee / nobee) for the block and threshold considered; [1] = label strength, value that reflects the proportion of nobee interval in respect to the whole block. # thershold gives the minimum duration of the no bee intervals we want to consider. # trheshold=0 uses every event as notBee whatever the duration # thershold=0.5 disregards intervals with less than half a second duration. block_length = blockfinish - blockStart if audiofilename.startswith('#'): annotation_filename = audiofilename[1:-4] + '.lab' else: annotation_filename = audiofilename[0:-4] + '.lab' try: with open(annotations_path + os.sep + annotation_filename, 'r') as f: # EXAMPLE FILE: # CF003 - Active - Day - (223) # 0 8.0 bee # 8.01 15.05 nobee # 15.06 300.0 bee # . # # all files end with a dot followed by an empty line. print(annotations_path + os.sep + annotation_filename) lines = f.read().split('\n') labels_th = ['bee', 0.0] label2assign = 'bee' label_strength = 0 intersected_s = 0 for line in lines: if (line == annotation_filename[0:-4]) or (line == '.') or (line == ''): #ignores title, '.', or empty line on the file. continue #print(line) parsed_line = line.split('\t') assert (len(parsed_line) == 3), ( 'expected 3 fields in each line, got: ' + str(len(parsed_line))) tp0 = float(parsed_line[0]) tp1 = float(parsed_line[1]) annotation_label = parsed_line[2] if blockfinish < tp0: # no need to read further nobee intervals since annotation line is already after block finishes break if annotation_label == 'nobee': if tp1 - tp0 >= threshold: # only progress if nobee interval is longer than defined threshold. if tp0 > blockStart and tp0 <= blockfinish and tp1 >= blockfinish: intersected_s = intersected_s + (blockfinish - tp0) # |____________########|######## # bs tp0 bf tp1 elif tp1 >= blockStart and tp1 < blockfinish and tp0 <= blockStart: intersected_s = intersected_s + (tp1 - blockStart) # #####|########_____| # tp0 bs tp1 bf elif tp1 >= blockStart and tp1 <= blockfinish and tp0 >= blockStart and tp0 <= blockfinish: intersected_s = intersected_s + (tp1 - tp0) # |_____########_____| # bs tp0 tp1 bf elif tp0 <= blockStart and tp1 >= blockfinish: intersected_s = intersected_s + (blockfinish - blockStart) # ####|############|#### # tp0 bs bf tp1 if intersected_s > 0: label2assign = 'nobee' label_strength = intersected_s / block_length # proportion of nobee length in the block labels_th = [label2assign, round(label_strength, 3) ] # if label_strehgth ==0 --> bee segment assert (blockfinish <= tp1), ( 'the end of the request block falls outside the file: block ending: ' + str(blockfinish) + ' end of file at: ' + str(tp1)) except FileNotFoundError as e: print(e, '--Anotation file does not exist! label as unknown') #print(annotation_filename=audiofilename[0:-4]+'.lab') label2assign = 'unknown' label_strength = -1 labels_th = [label2assign, label_strength] except Exception as e1: print('unknown exception: ' + str(e1)) #quit return labels_th
def load_audioFiles_saves_segments(path_audioFiles, path_save_audio_labels, block_size, thresholds, annotations_path, read_beeNotBee_annotations='yes', save_audioSegments='yes'): audiofilenames_list = [ os.path.basename(x) for x in glob.glob(path_audioFiles + '*.mp3') ] audiofilenames_list.extend( [os.path.basename(x) for x in glob.glob(path_audioFiles + '*.wav')]) printb("Number of audiofiles in folder: " + str(len(audiofilenames_list))) # print("audiofilenames_list ",audiofilenames_list) fi = 0 for file_name in audiofilenames_list: fi = fi + 1 # print('\n') # printb('Processing '+ file_name+' :::file number: '+str(fi)+' --------->of '+str(len(audiofilenames_list))) offset = 0 block_id = 0 while 1: # READ ONE BLOCK OF THE AUDIO FILE try: ## Read one block of 60 seconds block, sr = librosa.core.load(path_audioFiles + file_name, offset=offset, duration=block_size) # print(block.shape , sr) # print('-----------------Reading segment '+str(block_id)) except ValueError as e: e if 'Input signal length' in str(e): block = np.arange(0) except FileNotFoundError as e1: print(e1, ' but continuing anyway') ##print("test") if block.shape[ 0] > 0: #when total length = multiple of blocksize, results that last block is 0-lenght, this if bypasses those cases. block_name = file_name[0:-4] + '__segment' + str(block_id) ## print(block_name) # READ BEE NOT_BEE ANNOTATIONS: if read_beeNotBee_annotations == 'yes': # print('---------------------Will read BeeNotbee anotations and create labels for segment'+str(block_id)) blockStart = offset ##print("blockStart: ",blockStart) blockfinish = offset + block_size ##print("blockfinish: ",blockfinish) for th in thresholds: #print("th::::::::::", th) label_file_exists = os.path.isfile( path_save_audio_labels + 'labels_BeeNotBee_th' + str(th) + '.csv') with open(path_save_audio_labels + 'labels_BeeNotBee_th' + str(th) + '.csv', 'a', newline='') as label_file: writer = csv.DictWriter(label_file, fieldnames=[ 'sample_name', 'segment_start', 'segment_finish', 'label_strength', 'label' ], delimiter=',') if not label_file_exists: writer.writeheader() ## print("start read_beeNotBee_annotation_saves_labels") label_block_th = read_beeNotBee_annotations_saves_labels( file_name, block_name, blockStart, blockfinish, annotations_path, th) # print("label_block_th : ", label_block_th) writer.writerow({ 'sample_name': block_name, 'segment_start': blockStart, 'segment_finish': blockfinish, 'label_strength': label_block_th[1], 'label': label_block_th[0] }) # print('-----------------Wrote label for th '+ str(th)+' seconds of segment'+str(block_id) ) # MAKE BLOCK OF THE SAME SIZE: if block.shape[0] < block_size * sr: block = uniform_block_size(block, block_size * sr, 'repeat') # print('-----------------Uniformizing block length of segment'+str(block_id) ) # Save audio segment: if save_audioSegments == 'yes' and ( not os.path.exists(path_save_audio_labels + block_name + '.wav') ): #saves only if option is chosen and if block file doesn't already exist. librosa.output.write_wav( path_save_audio_labels + block_name + '.wav', block, sr) #print( '-----------------Saved wav file for segment '+str(block_id)) else: #print('----------------- no more segments for this file--------------------------------------') # print('\n') break offset += block_size block_id += 1 printb( '______________________________No more audioFiles___________________________________________________' ) return
def report_SVM_beehiveState_results(summary_filename, path_results, thresholds, CLF, Test_GroundT, Train_GroundT, Test_Preds, Train_Preds, Test_Preds_Proba, Train_Preds_Proba, classification_idSTRING, testFilenames, chunk_size, save='yes'): if not os.path.isfile(summary_filename): with open(summary_filename, 'w') as csvfile: wtr = csv.writer(csvfile, delimiter=',') wtr.writerow(['ExperienceParameters', 'AccuracyTRAIN', 'AUC_TRAIN', 'gtACTIVEpACTIVE_TRAIN', 'gtMQUEENpACTIVE_TRAIN', 'gtACTIVEpMQUEEN_TRAIN', 'gtMQUEENpMQUEEN_TRAIN','ShannonEnthropy_TRAIN','AccuracyTEST', 'AUC_TEST','ConfusionMatrixTEST_gtACTIVEpACTIVE_gtMQUEENpACTIVE_gtACTIVEpMQUEEN_gtMQUEENpMQUEEN', 'PrecisionTEST_on_MQUEEN', 'RecallTEST_on_MQUEEN', 'PrecisionTEST_on_ACTIVE', 'RecallTEST_on_ACTIVE', 'gtACTIVEpACTIVE_TEST', 'gtMQUEENpACTIVE_TEST', 'gtACTIVEpMQUEEN_TEST', 'gtMQUEENpMQUEEN_TEST', 'ShannonEnthropyTEST','accuracyTEST_on_balancedDatasets']) csvfile.close() # transform labels into boolean type for easiness PRED_TEST=Test_Preds[:] PRED_TRAIN=Train_Preds[:] PRED_TEST_PROBA=Test_Preds_Proba[:] PRED_TRAIN_PROBA=Train_Preds_Proba[:] GT=Test_GroundT[:] GT_TRAIN=Train_GroundT[:] #Evaluate classifier: print("Classification report for classifier %s:\n%s\n" % (CLF, metrics.classification_report(GT, PRED_TEST))) print("Confusion matrix:\n%s" % metrics.confusion_matrix(GT, PRED_TEST)) print('\n') # Save classification results: if save == 'yes': with open(path_results+classification_idSTRING+'.csv', 'w') as csvfile: wtr = csv.writer(csvfile, delimiter=';') wtr.writerow( ["Classification report for classifier %s:\n%s\n" % (CLF, metrics.classification_report(GT, PRED_TEST))]) wtr.writerow( ["Confusion matrix:\n%s" % metrics.confusion_matrix(GT, PRED_TEST)]) wtr.writerow( [" Accuracy: \n%s" % metrics.accuracy_score(GT, PRED_TEST)]) wtr.writerow( [" Area under Curve: \n%s" % metrics.roc_auc_score(GT, PRED_TEST_PROBA, average='macro', sample_weight=None)]) wtr.writerow( ["Predictions: \n"]) for p in range(len(PRED_TEST)): wtr. writerow([testFilenames[p] +' GT: ' +str(GT[p])+ ' PRED_TEST: '+ str(PRED_TEST[p])]) # append results to summary file: with open(summary_filename, 'a') as summaryFile: writer=csv.writer(summaryFile, delimiter=',') #compute parameters to show: accuracy=metrics.accuracy_score(GT, PRED_TEST) AccuracyTRAIN=metrics.accuracy_score(GT_TRAIN, PRED_TRAIN) try: gtACTIVEpACTIVE, gtMQUEENpACTIVE, gtACTIVEpMQUEEN, gtMQUEENpMQUEEN=metrics.confusion_matrix(GT, PRED_TEST).ravel() except ValueError as e: if sum(PRED_TEST)==len(PRED_TEST) and sum(GT)==len(PRED_TEST) : gtACTIVEpACTIVE=0 gtMQUEENpACTIVE=0 gtACTIVEpMQUEEN=0 gtMQUEENpMQUEEN =len(PRED_TEST) elif sum(PRED_TEST)==0 and sum(GT)==0 : gtACTIVEpACTIVE=len(PRED_TEST) gtMQUEENpACTIVE=0 gtACTIVEpMQUEEN=0 gtMQUEENpMQUEEN =0 elif sum(PRED_TEST)==len(PRED_TEST) and sum(GT)==0 : gtACTIVEpACTIVE=0 gtMQUEENpACTIVE=0 gtACTIVEpMQUEEN=len(PRED_TEST) gtMQUEENpMQUEEN =0 elif sum(PRED_TEST)==0 and sum(GT)==len(GT) : gtACTIVEpACTIVE=0 gtMQUEENpACTIVE=len(PRED_TEST) gtACTIVEpMQUEEN=0 gtMQUEENpMQUEEN =0 try: gtACTIVEpACTIVE_TRAIN, gtMQUEENpACTIVE_TRAIN, gtACTIVEpMQUEEN_TRAIN, gtMQUEENpMQUEEN_TRAIN=metrics.confusion_matrix(GT_TRAIN, PRED_TRAIN).ravel() except ValueError as e: if sum(PRED_TRAIN)==len(PRED_TRAIN) and sum(GT_TRAIN)==len(PRED_TRAIN) : gtACTIVEpACTIVE_TRAIN=0 gtMQUEENpACTIVE_TRAIN=0 gtACTIVEpMQUEEN_TRAIN=0 gtMQUEENpMQUEEN_TRAIN =len(PRED_TEST) elif sum(PRED_TRAIN)==0 and sum(GT_TRAIN)==0 : gtACTIVEpACTIVE_TRAIN=len(PRED_TRAIN) gtMQUEENpACTIVE_TRAIN=0 gtACTIVEpMQUEEN_TRAIN=0 gtMQUEENpMQUEEN_TRAIN =0 elif sum(PRED_TRAIN)==len(PRED_TRAIN) and sum(GT_TRAIN)==0 : gtACTIVEpACTIVE_TRAIN=0 gtMQUEENpACTIVE_TRAIN=0 gtACTIVEpMQUEEN_TRAIN=len(PRED_TRAIN) gtMQUEENpMQUEEN_TRAIN =0 elif sum(PRED_TRAIN)==0 and sum(GT_TRAIN)==len(GT_TRAIN) : gtACTIVEpACTIVE_TRAIN=0 gtMQUEENpACTIVE_TRAIN=len(PRED_TRAIN) gtACTIVEpMQUEEN_TRAIN=0 gtMQUEENpMQUEEN_TRAIN =0 try: ShannonEnthropy_TRAIN=-(((gtACTIVEpACTIVE_TRAIN+gtACTIVEpMQUEEN_TRAIN)/(gtACTIVEpACTIVE_TRAIN+gtMQUEENpACTIVE_TRAIN+gtACTIVEpMQUEEN_TRAIN+gtMQUEENpMQUEEN_TRAIN) )*log(((gtACTIVEpACTIVE_TRAIN+gtACTIVEpMQUEEN_TRAIN)/(gtACTIVEpACTIVE_TRAIN+gtMQUEENpACTIVE_TRAIN+gtACTIVEpMQUEEN_TRAIN+gtMQUEENpMQUEEN_TRAIN) )) + ((gtMQUEENpACTIVE_TRAIN+gtMQUEENpMQUEEN_TRAIN)/(gtACTIVEpACTIVE_TRAIN+gtMQUEENpACTIVE_TRAIN+gtACTIVEpMQUEEN_TRAIN+gtMQUEENpMQUEEN_TRAIN) )*log(((gtMQUEENpACTIVE_TRAIN+gtMQUEENpMQUEEN_TRAIN)/(gtACTIVEpACTIVE_TRAIN+gtMQUEENpACTIVE_TRAIN+gtACTIVEpMQUEEN_TRAIN+gtMQUEENpMQUEEN_TRAIN) ))) except Exception as e : ShannonEnthropy_TRAIN=0 try: Precision_on_MQUEEN=gtMQUEENpMQUEEN/(gtMQUEENpMQUEEN+gtACTIVEpMQUEEN) except ZeroDivisionError as e: Precision_on_MQUEEN=0 try: Recall_on_MQUEEN=gtMQUEENpMQUEEN/(gtMQUEENpMQUEEN+gtMQUEENpACTIVE) except ZeroDivisionError as e: Recall_on_MQUEEN=0 try: Precision_on_ACTIVE=gtACTIVEpACTIVE/(gtACTIVEpACTIVE+gtMQUEENpACTIVE) except ZeroDivisionError as e: Precision_on_ACTIVE=0 try: Recall_on_ACTIVE=gtACTIVEpACTIVE/(gtACTIVEpACTIVE+gtACTIVEpMQUEEN) except ZeroDivisionError as e: Recall_on_ACTIVE=0 try: ShannonEnthropy=-(((gtACTIVEpACTIVE+gtACTIVEpMQUEEN)/(gtACTIVEpACTIVE+gtMQUEENpACTIVE+gtACTIVEpMQUEEN+gtMQUEENpMQUEEN) )*log(((gtACTIVEpACTIVE+gtACTIVEpMQUEEN)/(gtACTIVEpACTIVE+gtMQUEENpACTIVE+gtACTIVEpMQUEEN+gtMQUEENpMQUEEN) )) + ((gtMQUEENpACTIVE+gtMQUEENpMQUEEN)/(gtACTIVEpACTIVE+gtMQUEENpACTIVE+gtACTIVEpMQUEEN+gtMQUEENpMQUEEN) )*log(((gtMQUEENpACTIVE+gtMQUEENpMQUEEN)/(gtACTIVEpACTIVE+gtMQUEENpACTIVE+gtACTIVEpMQUEEN+gtMQUEENpMQUEEN) ))) except Exception as e : ShannonEnthropy=0 if ShannonEnthropy>0.9: accuracy_on_balancedDatasets=accuracy else: accuracy_on_balancedDatasets=0 try: AUC_TRAIN=metrics.roc_auc_score(GT_TRAIN, PRED_TRAIN_PROBA, average='macro', sample_weight=None) except Exception as e: AUC_TRAIN='error' try: AUC_TEST=metrics.roc_auc_score(GT, PRED_TEST_PROBA, average='macro', sample_weight=None) except Exception as e: AUC_TEST='error' writer.writerow([classification_idSTRING, AccuracyTRAIN, AUC_TRAIN, gtACTIVEpACTIVE_TRAIN, gtMQUEENpACTIVE_TRAIN, gtACTIVEpMQUEEN_TRAIN, gtMQUEENpMQUEEN_TRAIN , ShannonEnthropy_TRAIN, accuracy , AUC_TEST ,metrics.confusion_matrix(GT, PRED_TEST),Precision_on_MQUEEN , Recall_on_MQUEEN, Precision_on_ACTIVE, Recall_on_ACTIVE, gtACTIVEpACTIVE, gtMQUEENpACTIVE, gtACTIVEpMQUEEN,gtMQUEENpMQUEEN, ShannonEnthropy,accuracy_on_balancedDatasets])
def extract_long_term_features_fromMEL(Mel_spectrograms, n_slices): #n_slices must result in an equal division. so: # total_frames=Mel_spectrograms[0].shape[-1] # total number of frames # slice_size = total_frames//n_slices # corrected_n_slices = total_frames / int(slice_size) long_term_features = [] for sample in Mel_spectrograms: print(sample.shape) sliced_sample = np.array_split(sample, n_slices, 1 ) print(len(sliced_sample)) print(sliced_sample[0].shape) stacked_averaged_slices = [] for i in range(len(sliced_sample)): stacked_averaged_slices.append(np.mean(sliced_sample[i],1)) print(stacked_averaged_slices[i].shape) #pdb.set_trace() print(len(stacked_averaged_slices)) long_term_features.append(stacked_averaged_slices) print(len(long_term_features)) print(len(long_term_features[0])) print(long_term_features[0][0].shape) return long_term_features
path_save_audio_stft= path+ 'dataset_BeeNoBee_2_second'+str(block_size)+'sec'+'\\stft_matrix.mat'+os.sep nbits = 16; MAX_VAL = pow(2,(nbits-1)) * 1.0; target_names=['missing_queen', 'active'] #-----------------------------------STFT+CNN -----------------------------------# ruche1,Y1,labels1, sample_ids1, ruche2,Y2,labels2, sample_ids2, ruche3,Y3,labels3, sample_ids3, ruche4,Y4,labels4, sample_ids4=get_list_samples_name_('b', path_save_audio_stft) # save the model history in a list after fitting so we can plot later model_history=[] val_accuracy=[] for i in range(4): fold= i+1 print("Training on Fold :", fold) x_train, x_test, y_train, y_test,sample_ids_train, sample_ids_test=cross_validation_4folds(fold, ruche1,Y1, ruche2,Y2, ruche3,Y3, ruche4,Y4 , sample_ids1 , sample_ids2 , sample_ids3 , sample_ids4) print(len(x_train), len(x_test), len(y_train), len(y_test)) # Read a sparse matrix x_train2=[] x_test2=[] for l in range(len(x_train)): x_train2.append(x_train[l].todense()) for l in range(len(x_test)): x_test2.append(x_test[l].todense()) X_train= np.array(x_train2) x_Test= np.array(x_test2) y_train=np.array(y_train) y_test= np.array(y_test) X_test, y_test = constrainedsplit(y_train, x_Test, y_test, 0.7)
def fit_and_evaluate(train_x, val_x, train_y, val_y, EPOCHS=50, BATCH_SIZE=145 ): model=None model=deep_model(( 20,44, 1)) results= model.fit(train_x, train_y, epochs=EPOCHS, batch_size= BATCH_SIZE, callbacks=[early_stopping, model_checkpoint], verbose=1, validation_split=0.1) print("Val Score :", model.evaluate(val_x, val_y)) return results
def StoreExcel(dic): checklis = [] if os.path.isfile('./MFCC/Name_Label_MFCCfeature.xlsx'): xls = load_workbook('./MFCC/Name_Label_MFCCfeature.xlsx') print("file existed ") try: df = pd.read_excel('./MFCC/Name_Label_MFCCfeature.xlsx', delimiter="\t") # 取出第一行(Name) check = df.iloc[:, 0] for i in check: if i in checklis: continue else: checklis.append(i) except Exception as e: print("An error found but ignored", e) else: xls = openpyxl.Workbook() #以下是儲存進excel sheet = xls.get_sheet_by_name('Sheet') #生成excel的方法 x, y = 1, 1 sheet.cell(row=x, column=y, value='Name') sheet.cell(row=x, column=y + 1, value='Labal') x, y = 2, 1 #判斷全新一列 塞值 tmp = 'A' + str(x) while True: if sheet[tmp].value != None: x += 1 tmp = 'A' + str(x) #print(ws[tmp].value) else: break for name in dic: if name[0:-4] in checklis: print('this file has been used , Pass!') continue # 此檔案已經存過 略過 currentname = name[0:-4] # 判斷狀況 if 'NO' in currentname or 'Missing' in currentname: currentlabel = 'Missing Queen' else: currentlabel = 'Active' # 存進excel格子 # 存 名稱 & 狀態 y = 1 sheet.cell(row=x, column=y, value=currentname) sheet.cell(row=x, column=y + 1, value=currentlabel) y = 3 # 存 feature for feature in dic[name]: sheet.cell(row=x, column=y, value=feature) y += 1 x += 1 # MFCC特徵數值 x, y = 1, 3 for count in range(1, 121): tmps = count sheet.cell(row=x, column=y, value=tmps) y += 1 Excelname = 'Name_Label_MFCCfeature.xlsx' xls.save('./MFCC/' + Excelname) print(Excelname + " have Done")
def load_audioFiles_saves_segments(audiofilenames_list, path_audioFiles, path_save_audio_labels, block_size, save_audioSegments='yes'): printb("Number of audiofiles in folder: " + str(len(audiofilenames_list))) fi = 0 #目前檔案第0個 #操作方式: 逐檔載入,一個音檔中設置開始時間offset從0開始,到指定每個音檔分段時間duration(block_size=60s),然後進到while 1, #將音檔輸出成wav(等於此音檔的序列被輸出,剩下的序列是從offset+block_size開始)&資訊存進csv後,序列還有資料就再執行一次, #當序列<0則表示此音檔已經完全輸出,則跳出while 1 ,繼續處理下一個音檔 for file_name in audiofilenames_list: fi = fi + 1 print('\n') printb('Processing ' + file_name + ' :::file number: ' + str(fi) + ' ----->of ' + str(len(audiofilenames_list))) offset = 0 block_id = 0 # 處理前先判斷是否已經處理過,若以處理過則跳過 exitfile = fileisexitornot(path_save_audio_labels) # 用第一個切割的音段名稱來判斷是否需要處理此音檔 first_block_name = file_name[0:-4] + '__segment' + str( block_id) + '.wav' if first_block_name in exitfile: print('this file has been used') continue while 1: # READ ONE BLOCK OF THE AUDIO FILE try: #try除錯用 #加載音檔 (音檔位置,offset:以此時間開始讀取,duration:持續讀取時間) 回傳音檔序列,sr=採樣率 block, sr = librosa.core.load(path_audioFiles + file_name, offset=offset, duration=block_size) print('-----Reading segment ' + str(block_id)) except ValueError as e: e if 'Input signal length' in str(e): block = np.arange(0) except FileNotFoundError as e1: print(e1, ' but continuing anyway') if block.shape[0] > 0: #when total length = multiple of blocksize, results that last block is 0-lenght, this if bypasses those cases. block_name = file_name[0:-4] + '__segment' + str(block_id) print(block_name) #testing #在此產生state_label.csv並將名稱跟label狀態加入 states = ['active', 'missing queen', 'swarm'] label_file_exists = os.path.isfile(path_save_audio_labels + 'state_labels' + '.csv') #判斷檔案是否存在 with open(path_save_audio_labels + 'state_labels' + '.csv', 'a', newline='') as label_file: #label_file檔案讀寫 writer = csv.DictWriter( label_file, fieldnames=['sample_name', 'label'], delimiter=',') #定義欄位sample_name, label if not label_file_exists: #假如檔案不存在 writer.writeheader() #寫入欄位 csvreader = csv.DictReader(label_file) try: for row in csvreader: print(row['sample_name']) except: pass #270 label_state = read_HiveState_fromSampleName( block_name, states) writer.writerow({ 'sample_name': block_name, 'label': label_state }) # READ BEE NOT_BEE ANNOTATIONS: 將音檔資訊.lab儲存進csv # MAKE BLOCK OF THE SAME SIZE: if block.shape[0] < block_size * sr: pass # Save audio segment: if save_audioSegments == 'yes' and ( not os.path.exists(path_save_audio_labels + block_name + '.wav') ): #saves only if option is chosen and if block file doesn't already exist. #將音檔序列輸出為.wav文件 librosa.output.write_wav( path_save_audio_labels + block_name + '.wav', block, sr) print('-----Saved wav file for segment ' + str(block_id)) else: print('-----no more segments for this file-----') print('\n') break offset += block_size block_id += 1 printb('_____No more audioFiles_____') return