def LoadData(fe_fd, agg_num, hop, na_list): mix_names = sorted(os.listdir(fe_fd + '/mix')) chn0_names = sorted(os.listdir(fe_fd + '/chn0')) chn1_names = sorted(os.listdir(fe_fd + '/chn1')) X2d_list = [] X3d_list = [] for na in mix_names: if na_in_na_list(na, na_list): X = pickle.load(open(fe_fd + '/mix/' + na, 'rb')) X2d_list.append(X) X_pad = pad_zero_pre(X, agg_num - 1) X3d = mat_2d_to_3d(X_pad, agg_num, hop) X3d_list.append(X3d) y2d_chn0_list = [] y3d_chn0_list = [] for na in chn0_names: if na_in_na_list(na, na_list): X = pickle.load(open(fe_fd + '/chn0/' + na, 'rb')) y2d_chn0_list.append(X) X_pad = pad_zero_pre(X, agg_num - 1) y3d = mat_2d_to_3d(X_pad, agg_num, hop) y3d_chn0_list.append(y3d) y2d_chn1_list = [] y3d_chn1_list = [] for na in chn1_names: if na_in_na_list(na, na_list): X = pickle.load(open(fe_fd + '/chn1/' + na, 'rb')) y2d_chn1_list.append(X) X_pad = pad_zero_pre(X, agg_num - 1) y3d = mat_2d_to_3d(X_pad, agg_num, hop) y3d_chn1_list.append(y3d) X2d = np.concatenate(X2d_list, axis=0) # shape: (n_songs*n_chunks, n_freq) X3d = np.concatenate(X3d_list, axis=0) # shape: (n_songs*n_chunks, n_time, n_freq) y2d_chn0 = np.concatenate(y2d_chn0_list, axis=0) # shape: (n_songs*n_chunks, n_freq) y2d_chn1 = np.concatenate(y2d_chn1_list, axis=0) # shape: (n_songs*n_chunks, n_freq) y3d_chn0 = np.concatenate( y3d_chn0_list, axis=0) # shape: (n_songs*n_chunks, n_time, n_freq) y3d_chn1 = np.concatenate( y3d_chn1_list, axis=0) # shape: (n_songs*n_chunks, n_time, n_freq) return X2d, X3d, y2d_chn0, y2d_chn1, y3d_chn0, y3d_chn1
def get_matrix_format_data(fe_fd, csv_file, n_concat, hop, scaler): """Get training data and ground truth in matrix format. Args: fe_fd: string. Feature folder. csv_file: string. Path of csv file. n_concat: integar. Number of frames to concatenate. hop: integar. Number of hop frames. scaler: None | object. """ with open(csv_file, 'rb') as f: reader = csv.reader(f) lis = list(reader) x3d_all = [] y_all = [] for li in lis: [na, lb] = li[0].split('\t') na = na.split('/')[1][0:-4] path = fe_fd + '/' + na + '.f' x = cPickle.load(open(path, 'rb')) if scaler: x = scaler.transform(x) x3d = mat_2d_to_3d(x, n_concat, hop) # (n_blocks, n_concat, n_freq) x3d_all.append(x3d) y_all += [cfg.lb_to_id[lb]] * len(x3d) x3d_all = np.concatenate(x3d_all) # (n_samples, n_concat, n_freq) y_all = np.array(y_all) y_all = sparse_to_categorical(y_all, len(cfg.labels)) # (n_samples, n_labels) return x3d_all, y_all
def GetScalerSegData(fe_fd, agg_num, hop, fold, scaler): with open(cfg.dev_cv_csv_path, 'rb') as f: reader = csv.reader(f) lis = list(reader) tr_Xlist, tr_ylist = [], [] te_Xlist, te_ylist = [], [] # read one line for li in lis: na = li[1] curr_fold = int(li[2]) # get features, tags fe_path = fe_fd + '/' + na + '.f' csv_path = cfg.dev_wav_fd + '/' + na + '.csv' tags = GetTags(csv_path) y = TagsToCategory(tags) X = cPickle.load(open(fe_path, 'rb')) if scaler is not None: X = scaler.transform(X) # aggregate data X3d = mat_2d_to_3d(X, agg_num, hop) if curr_fold == fold: te_Xlist.append(X3d) te_ylist += [y] else: tr_Xlist.append(X3d) tr_ylist += [y] return np.array( tr_Xlist ), np.array( tr_ylist ), \ np.array( te_Xlist ), np.array( te_ylist )
def LoadAllData( fe_fd, txt_file, lb_to_id, agg_num, hop ): # add acoustic sound and id to Xlist, ylist fr = open( txt_file, 'r' ) Xlist, ylist = [], [] for line in fr.readlines(): line_list = line.split('\t') # parse info path, scene, bgn, fin, lb = line_list[0], line_list[1], float(line_list[2]), float(line_list[3]), line_list[4].split('\r')[0] # load whole feature fe_path = fe_fd + '/' + path.split('/')[-1][0:4] + '.f' X = cPickle.load( open( fe_path, 'rb' ) ) # get sub feature ratio = cfg.fs / cfg.win X = X[ int(bgn*ratio):int(fin*ratio), : ] # aggregate feature X3d = mat_2d_to_3d( X, agg_num, hop ) Xlist.append( X3d ) ylist += [ lb_to_id[lb] ] * len(X3d) fr.close() return np.concatenate( Xlist, axis=0 ), np.array( ylist )
def LoadAllData(fe_fd, ann_fd, lb_to_id, agg_num, hop): # anno names names = os.listdir(ann_fd) names = sorted(names) # init space Xlist, ylist = [], [] # each anno file for na in names: fr = open(ann_fd + '/' + na, 'r') for line in fr.readlines(): line_list = line.split('\t') # parse info bgn, fin, lb = float(line_list[0]), float( line_list[1]), line_list[2].split('\n')[0] # load whole feature fe_path = fe_fd + '/' + na[0:4] + '.f' X = cPickle.load(open(fe_path, 'rb')) # get sub feature ratio = cfg.fs / cfg.win X = X[int(bgn * ratio):int(fin * ratio), :] # aggregate feature X3d = mat_2d_to_3d(X, agg_num, hop) Xlist.append(X3d) ylist += [lb_to_id[lb]] * len(X3d) fr.close() return np.concatenate(Xlist, axis=0), np.array(ylist)
def recognize(md_path, te_fe_fd, te_csv_file, n_concat, hop, scaler): """Recognize and get statistics. Args: md_path: string. Path of model. te_fe_fd: string. Folder path containing testing features. te_csv_file: string. Path of test csv file. n_concat: integar. Number of frames to concatenate. hop: integar. Number of frames to hop. scaler: None | scaler object. """ # Load model md = serializations.load(md_path) # Recognize and get statistics n_labels = len(cfg.labels) confuse_mat = np.zeros((n_labels, n_labels)) # confusion matrix frame_based_accs = [] # Get test file names with open(te_csv_file, 'rb') as f: reader = csv.reader(f) lis = list(reader) # Predict for each scene for li in lis: # Load data [na, lb] = li[0].split('\t') na = na.split('/')[1][0:-4] path = te_fe_fd + '/' + na + '.f' x = cPickle.load(open(path, 'rb')) if scaler: x = scaler.transform(x) x = mat_2d_to_3d(x, n_concat, hop) # Predict p_y_preds = md.predict(x)[0] # (n_block,label) pred_ids = np.argmax(p_y_preds, axis=-1) # (n_block,) pred_id = int(get_mode_value(pred_ids)) gt_id = cfg.lb_to_id[lb] # Statistics confuse_mat[gt_id, pred_id] += 1 n_correct_frames = list(pred_ids).count(gt_id) frame_based_accs += [float(n_correct_frames) / len(pred_ids)] clip_based_acc = np.sum(np.diag( np.diag(confuse_mat))) / np.sum(confuse_mat) frame_based_acc = np.mean(frame_based_accs) print 'event_acc:', clip_based_acc print 'frame_acc:', frame_based_acc print confuse_mat
def detect_cv(): # init paths if type=='home': fe_fd = cfg.dev_fe_mel_home_fd labels = cfg.labels_home lb_to_id = cfg.lb_to_id_home id_to_lb = cfg.id_to_lb_home tr_txt = cfg.dev_evaluation_fd + '/home_fold' + str(fold) + '_train.txt' te_txt = cfg.dev_evaluation_fd + '/home_fold' + str(fold) + '_evaluate.txt' meta_fd = cfg.dev_meta_home_fd if type=='resi': fe_fd = cfg.dev_fe_mel_resi_fd labels = cfg.labels_resi lb_to_id = cfg.lb_to_id_resi id_to_lb = cfg.id_to_lb_resi tr_txt = cfg.dev_evaluation_fd + '/residential_area_fold' + str(fold) + '_train.txt' te_txt = cfg.dev_evaluation_fd + '/residential_area_fold' + str(fold) + '_evaluate.txt' meta_fd = cfg.dev_meta_resi_fd n_out = len( labels ) # load model md = serializations.load( md_path ) # get wav names to be detected te_names = pp_dev_data.GetWavNamesFromTxt( te_txt ) # do recognize for each test audio names = os.listdir( fe_fd ) names = sorted( names ) y_pred_list = [] # detect and write out to txt pp_dev_data.CreateFolder( cfg.dev_results_fd ) file_list = [] for na in names: if na[0:4] in te_names: print na gt_file = meta_fd + '/' + na[0:4] + '.ann' out_file = cfg.dev_results_fd + '/'+na[0:4]+'_detect.ann' X = cPickle.load( open( fe_fd+'/'+na, 'rb' ) ) X = mat_2d_to_3d( X, agg_num, hop ) y_pred = md.predict( X ) y_pred_list.append( y_pred ) out_list = pp_dev_data.OutMatToList( y_pred, thres, id_to_lb ) pp_dev_data.PrintListToTxt( out_list, out_file ) file_list.append( { 'reference_file': gt_file, 'estimated_file': out_file } ) # print results for this fold pp_dev_data.PrintScore( file_list, labels )
def recognize(md_path, te_fe_fd, te_csv_file, n_concat, hop, scaler): """Recognize and get statistics. Args: md_path: string. Path of model. te_fe_fd: string. Folder path containing testing features. te_csv_file: string. Path of test csv file. n_concat: integar. Number of frames to concatenate. hop: integar. Number of frames to hop. scaler: None | scaler object. """ # Load model md = serializations.load(md_path) # Recognize and get statistics n_labels = len(cfg.labels) confuse_mat = np.zeros((n_labels, n_labels)) # confusion matrix frame_based_accs = [] # Get test file names with open(te_csv_file, 'rb') as f: reader = csv.reader(f) lis = list(reader) # Predict for each scene for li in lis: # Load data [na, lb] = li[0].split('\t') na = na.split('/')[1][0:-4] path = te_fe_fd + '/' + na + '.f' x = cPickle.load(open(path, 'rb')) if scaler: x = scaler.transform(x) x = mat_2d_to_3d(x, n_concat, hop) # Predict p_y_preds = md.predict(x)[0] # (n_block,label) pred_ids = np.argmax(p_y_preds, axis=-1) # (n_block,) pred_id = int(get_mode_value(pred_ids)) gt_id = cfg.lb_to_id[lb] # Statistics confuse_mat[gt_id, pred_id] += 1 n_correct_frames = list(pred_ids).count(gt_id) frame_based_accs += [float(n_correct_frames) / len(pred_ids)] clip_based_acc = np.sum(np.diag(np.diag(confuse_mat))) / np.sum(confuse_mat) frame_based_acc = np.mean(frame_based_accs) print 'event_acc:', clip_based_acc print 'frame_acc:', frame_based_acc print confuse_mat
def GetEvaSegData( fe_fd, agg_num, hop ): te_Xlist = [] names = os.listdir( fe_fd ) te_na_list = [] # read one line for na in names: fe_path = fe_fd + '/' + na X = cPickle.load( open( fe_path, 'rb' ) ) # aggregate data X3d = mat_2d_to_3d( X, agg_num, hop ) te_Xlist.append( X3d ) te_na_list.append( na[0:-2] ) return np.array( te_Xlist ), te_na_list
def recognize(): # prepare data _, _, _, te_X, te_y, _ = pp_dev_data.GetAllData(fe_fd, agg_num, hop, fold) # do recognize and evaluation thres = 0.4 # thres, tune to prec=recall n_labels = len(cfg.labels) gt_roll = [] pred_roll = [] with open(cfg.dev_cv_csv_path, 'rb') as f: reader = csv.reader(f) lis = list(reader) # read one line for li in lis: na = li[1] curr_fold = int(li[2]) if fold == curr_fold: # get features, tags fe_path = fe_fd + '/' + na + '.f' info_path = cfg.dev_wav_fd + '/' + na + '.csv' tags = pp_dev_data.GetTags(info_path) y = pp_dev_data.TagsToCategory(tags) X = cPickle.load(open(fe_path, 'rb')) # aggregate data X3d = mat_2d_to_3d(X, agg_num, hop) p_y_pred = md.predict(X3d) p_y_pred = np.mean(p_y_pred, axis=0) # shape:(n_label) pred = np.zeros(n_labels) pred[np.where(p_y_pred > thres)] = 1 pred_roll.append(pred) gt_roll.append(y) pred_roll = np.array(pred_roll) gt_roll = np.array(gt_roll) # calculate prec, recall, fvalue prec, recall, fvalue = prec_recall_fvalue(pred_roll, gt_roll, thres) print prec, recall, fvalue
def recognize(): # prepare data _, _, _, te_X, te_y, _ = pp_dev_data.GetAllData( fe_fd, agg_num, hop, fold ) # do recognize and evaluation thres = 0.4 # thres, tune to prec=recall n_labels = len( cfg.labels ) gt_roll = [] pred_roll = [] with open( cfg.dev_cv_csv_path, 'rb') as f: reader = csv.reader(f) lis = list(reader) # read one line for li in lis: na = li[1] curr_fold = int(li[2]) if fold==curr_fold: # get features, tags fe_path = fe_fd + '/' + na + '.f' info_path = cfg.dev_wav_fd + '/' + na + '.csv' tags = pp_dev_data.GetTags( info_path ) y = pp_dev_data.TagsToCategory( tags ) X = cPickle.load( open( fe_path, 'rb' ) ) # aggregate data X3d = mat_2d_to_3d( X, agg_num, hop ) p_y_pred = md.predict( X3d ) p_y_pred = np.mean( p_y_pred, axis=0 ) # shape:(n_label) pred = np.zeros(n_labels) pred[ np.where(p_y_pred>thres) ] = 1 pred_roll.append( pred ) gt_roll.append( y ) pred_roll = np.array( pred_roll ) gt_roll = np.array( gt_roll ) # calculate prec, recall, fvalue prec, recall, fvalue = prec_recall_fvalue( pred_roll, gt_roll, thres ) print prec, recall, fvalue
def GetAllData( fe_fd, csv_file, agg_num, hop ): with open( csv_file, 'rb') as f: reader = csv.reader(f) lis = list(reader) Xlist = [] # read one line for li in lis: na = li[1] # get features, tags fe_path = fe_fd + '/' + na + '.f' X = cPickle.load( open( fe_path, 'rb' ) ) # aggregate data X3d = mat_2d_to_3d( X, agg_num, hop ) Xlist.append( X3d ) return np.concatenate( Xlist, axis=0 )
def detect(): # init paths if type == 'home': fe_fd = cfg.eva_fe_mel_home_fd labels = cfg.labels_home lb_to_id = cfg.lb_to_id_home id_to_lb = cfg.id_to_lb_home if type == 'resi': fe_fd = cfg.eva_fe_mel_resi_fd labels = cfg.labels_resi lb_to_id = cfg.lb_to_id_resi id_to_lb = cfg.id_to_lb_resi n_out = len(labels) # load model md = serializations.load(md_path) # do recognize for each test audio names = os.listdir(fe_fd) names = sorted(names) pp_dev_data.CreateFolder(cfg.eva_results_fd) pp_dev_data.CreateFolder(cfg.eva_results_fd + '/' + type) # detect and write out for all audios for na in names: X = cPickle.load(open(fe_fd + '/' + na, 'rb')) X = mat_2d_to_3d(X, agg_num, hop) y_pred = md.predict(X) outlist = pp_dev_data.OutMatToList(y_pred, thres, id_to_lb) full_na = type + '/audio/' + na[0:4] + '.wav' out_txt_path = cfg.eva_results_fd + '/' + type + '/' + na[ 0:4] + '_detect.ann' f = open(out_txt_path, 'w') for li in outlist: f.write(full_na + '\t' + str(li['event_onset']) + '\t' + str(li['event_offset']) + '\t' + li['event_label'] + '\n') print 'Write out detection result to', out_txt_path, 'successfully!' f.close()
def GetAllData( fe_fd, agg_num, hop, fold ): with open( cfg.dev_cv_csv_path, 'rb') as f: reader = csv.reader(f) lis = list(reader) tr_Xlist, tr_ylist = [], [] te_Xlist, te_ylist = [], [] tr_na_list, te_na_list = [], [] # read one line for li in lis: na = li[1] curr_fold = int(li[2]) # get features, tags fe_path = fe_fd + '/' + na + '.f' info_path = cfg.dev_wav_fd + '/' + na + '.csv' tags = GetTags( info_path ) y = TagsToCategory( tags ) X = cPickle.load( open( fe_path, 'rb' ) ) # aggregate data X3d = mat_2d_to_3d( X, agg_num, hop ) if curr_fold==fold: te_Xlist.append( X3d ) te_ylist += [ y ] * len( X3d ) te_na_list.append( na ) else: tr_Xlist.append( X3d ) tr_ylist += [ y ] * len( X3d ) tr_na_list.append( na ) if fold is None: return np.concatenate( tr_Xlist, axis=0 ), np.array( tr_ylist ), tr_na_list else: return np.concatenate( tr_Xlist, axis=0 ), np.array( tr_ylist ), tr_na_list, \ np.concatenate( te_Xlist, axis=0 ), np.array( te_ylist ), te_na_list
def detect(): # init paths if type=='home': fe_fd = cfg.eva_fe_mel_home_fd labels = cfg.labels_home lb_to_id = cfg.lb_to_id_home id_to_lb = cfg.id_to_lb_home if type=='resi': fe_fd = cfg.eva_fe_mel_resi_fd labels = cfg.labels_resi lb_to_id = cfg.lb_to_id_resi id_to_lb = cfg.id_to_lb_resi n_out = len( labels ) # load model md = serializations.load( md_path ) # do recognize for each test audio names = os.listdir( fe_fd ) names = sorted( names ) pp_dev_data.CreateFolder( cfg.eva_results_fd ) pp_dev_data.CreateFolder( cfg.eva_results_fd+'/'+type ) # detect and write out for all audios for na in names: X = cPickle.load( open( fe_fd+'/'+na, 'rb' ) ) X = mat_2d_to_3d( X, agg_num, hop ) y_pred = md.predict( X ) outlist = pp_dev_data.OutMatToList( y_pred, thres, id_to_lb ) full_na = type + '/audio/' + na[0:4] + '.wav' out_txt_path = cfg.eva_results_fd+'/'+type+'/'+na[0:4]+'_detect.ann' f = open( out_txt_path, 'w') for li in outlist: f.write( full_na + '\t' + str(li['event_onset']) + '\t' + str(li['event_offset']) + '\t' + li['event_label'] + '\n' ) print 'Write out detection result to', out_txt_path, 'successfully!' f.close()
def GetAllData(fe_fd, agg_num, hop, fold): with open(cfg.dev_cv_csv_path, 'rb') as f: reader = csv.reader(f) lis = list(reader) tr_Xlist, tr_ylist = [], [] te_Xlist, te_ylist = [], [] tr_na_list, te_na_list = [], [] # read one line for li in lis: na = li[1] curr_fold = int(li[2]) # get features, tags fe_path = fe_fd + '/' + na + '.f' info_path = cfg.dev_wav_fd + '/' + na + '.csv' tags = GetTags(info_path) y = TagsToCategory(tags) X = cPickle.load(open(fe_path, 'rb')) # aggregate data X3d = mat_2d_to_3d(X, agg_num, hop) if curr_fold == fold: te_Xlist.append(X3d) te_ylist += [y] * len(X3d) te_na_list.append(na) else: tr_Xlist.append(X3d) tr_ylist += [y] * len(X3d) tr_na_list.append(na) if fold is None: return np.concatenate(tr_Xlist, axis=0), np.array(tr_ylist), tr_na_list else: return np.concatenate( tr_Xlist, axis=0 ), np.array( tr_ylist ), tr_na_list, \ np.concatenate( te_Xlist, axis=0 ), np.array( te_ylist ), te_na_list
md = serializations.load( cfg.dev_md_fd + '/md100.p' ) # evaluate for each test feature names = os.listdir(cfg.dev_ann_fd) names = sorted(names) results = [] if not os.path.exists( cfg.dev_results_fd ): os.makedirs( cfg.dev_results_fd ) for na in names: if na[10:12]==test_noise: print na # load data ann_path = cfg.dev_ann_fd + '/' + na gt_list = pp_dev_data.ReadAnn( ann_path ) # ground truth list te_fe = te_fe_fd + '/' + na[0:-4] + '.f' X = cPickle.load( open(te_fe, 'rb') ) X3d = mat_2d_to_3d( X, agg_num, hop ) # detect p_y_pred = md.predict( X3d ) out_list = pp_dev_data.OutMatToList(p_y_pred, thres) out_path = cfg.dev_results_fd + '/' + na[0:-4] + '.txt' pp_dev_data.WriteOutToTxt( out_path, out_list ) # evaluate eva = DCASE2016_EventDetection_SegmentBasedMetrics( cfg.labels ) r = eva.evaluate( gt_list, out_list ).results() results.append( r ) # show average results of each file pp_dev_data.ShowResults( results )
n_labels = len(cfg.labels) pp_dev_data.CreateFolder(cfg.eva_results_fd) txt_out_path = cfg.eva_results_fd + '/task4_results.txt' fwrite = open(txt_out_path, 'w') with open(cfg.eva_csv_path, 'rb') as f: reader = csv.reader(f) lis = list(reader) # read one line for li in lis: na = li[1] full_na = na + '.16kHz.wav' # get features, tags fe_path = cfg.eva_fe_mel_fd + '/' + na + '.f' X = cPickle.load(open(fe_path, 'rb')) # aggregate data X3d = mat_2d_to_3d(X, agg_num, hop) p_y_pred = md.predict(X3d) p_y_pred = np.mean(p_y_pred, axis=0) # shape:(n_label) # write out data for j1 in xrange(7): fwrite.write(full_na + ',' + cfg.id_to_lb[j1] + ',' + str(p_y_pred[j1]) + '\n') fwrite.close() print "Write out to", txt_out_path, "successfully!"