def filter(self, source_label, dest_label='processed', size=(5, 3)): ''' Median filter a binary spectrogram Median-filters spectrogram at self.source_label, saving it at self.dest_label. For instance, if used as a step in processing, this function can be used to update the self.processed spectrogram: audio.filter(source_label = 'processed', dest_label = 'processed') Inputs: source_label (str): label of the class attribute for the source spectrogram, e.g. self.normalized dest_label (str): label of the class attribute where the destination spectrogram should be saved. size (tuple of ints (x, y)): structure of the filter ''' source = self.get_spect(source_label) new_spect = utils.median_filter(source.spect, size=size) self.set_spect(label=dest_label, spect=new_spect, freqs=source.freqs, times=source.times)
def segmentation(F, M, Mg, L, plot=False): """Computes the Foote segmentator. Parameters ---------- F : np.array((N,M)) Features matrix of N beats x M features. M : int Median filter size for the audio features (in beats). Mg : int Gaussian kernel size (in beats). L : int Median filter size for the adaptive threshold Return ------ bound_idx : np.array Array containing the indices of the boundaries. """ # Filter F = utils.median_filter(F, M=M) # Self Similarity Matrix S = utils.compute_ssm(F) # Compute gaussian kernel G = utils.compute_gaussian_krnl(Mg) # Compute the novelty curve nc = utils.compute_nc(S, G) # Find peaks in the novelty curve return utils.pick_peaks(nc, L=L, plot=plot)
def filter_activation_matrix(G, R): """Filters the activation matrix G, and returns a flattened copy.""" idx = np.argmax(G, axis=1) max_idx = np.arange(G.shape[0]) max_idx = (max_idx, idx.flatten()) G[:, :] = 0 G[max_idx] = idx + 1 G = np.sum(G, axis=1) G = utils.median_filter(G[:, np.newaxis], R) return G.flatten()
def compute_ssm(wav_file, h, ssm_read_pk, is_ismir=False, tonnetz=False): """Computes the self similarity matrix from an audio file. Parameters ---------- wav_file: str Path to the wav file to be read. h : float Hop size. ssm_read_pk : bool Whether to read the ssm from a pickle file or not (note: this function utomatically saves the ssm in a pickle file). is_ismir : bool Produce the plots that appear on the ISMIR paper. tonnetz : bool Compute tonnetz instead of Chroma features. Returns ------- X : np.array((N, N)) Self-similarity matrix """ if not ssm_read_pk: # Read WAV file logging.info("Reading the WAV file...") C = utils.compute_audio_chromagram(wav_file, h) C = utils.median_filter(C, L=9) if is_ismir: ismir.plot_chroma(C) # Compute Tonnetz if needed F = C if tonnetz: F = utils.chroma_to_tonnetz(C) # Compute the self similarity matrix logging.info("Computing key-invariant self-similarity matrix...") X = utils.compute_key_inv_ssm(F, h) #plt.imshow(X, interpolation="nearest", aspect="auto") #plt.show() utils.write_cPickle(wav_file + "-audio-ssm.pk", X) else: X = utils.read_cPickle(wav_file + "-audio-ssm.pk") if is_ismir: #X = X**2.5 ismir.plot_ssm(X) ismir.plot_score_examples(X) return X
def segmentation(X, rank, R, h, niter=300, seed=None): """ Gets the segmentation (boundaries and labels) from the factorization matrices. Parameters ---------- X: np.array() Features matrix (e.g. chromagram) rank: int Rank of decomposition R: int Size of the median filter for activation matrix niter: int Number of iterations for k-means bound_idxs : list Use previously found boundaries (None to detect them) Returns ------- bounds_idx: np.array Bound indeces found labels: np.array Indeces of the labels representing the similarity between segments. """ # Filter X = utils.median_filter(X, M=h) X = X.T # Find non filtered boundaries bound_idxs = None while True: if bound_idxs is None: try: F, G = cnmf(X, rank, niter=niter, seed=seed) except: return np.empty(0), [1] # Filter G G = filter_activation_matrix(G.T, R) if bound_idxs is None: bound_idxs = np.where(np.diff(G) != 0)[0] + 1 if len(np.unique(bound_idxs)) <= 2: rank += 1 bound_idxs = None else: break return bound_idxs
img_list_gauss_005.append(utils.gaussian_noise(img_list[i], 0.1)) img_list_gauss_015.append(utils.gaussian_noise(img_list[i], 0.15)) img_list_impulsive_005.append(utils.salt_and_pepper(img_list[i], 0.05)) img_list_impulsive_015.append(utils.salt_and_pepper(img_list[i], 0.15)) # utils.show_image_list(img_list_gauss_005, "Images with gaussian noise! 0.05") # utils.show_image_list(img_list_gauss_015, "Images with gaussian noise! 0.15") # utils.show_image_list(img_list_impulsive_005, "Image with salt and pepper noise! 0.05") # utils.show_image_list(img_list_impulsive_015, "Image with salt and pepper noise! 0.15") img_list_impulsive = img_list_impulsive + img_list_impulsive_005 + img_list_impulsive_015 img_list_impulsive_median = [] img_list_impulsive_neavf = [] index = 0 for img in img_list_impulsive: img_list_impulsive_median.append( utils.median_filter(img_list_impulsive[index])) img_list_impulsive_neavf.append(NEAVF.NEAVF(img_list_impulsive[index])) index += 1 # utils.show_image_list(img_list_impulsive_median, "Image with salt and pepper after median filter") # utils.show_image_list(img_list_impulsive_neavf, "Image with salt and pepper after NEAVF filter") img_list_gauss = img_list_gauss + img_list_gauss_005 + img_list_gauss_015 img_list_gauss_mean = [] img_list_gauss_neavf = [] index = 0 for img in img_list_gauss: img_list_gauss_mean.append(utils.mean_filter(img_list_gauss[index])) img_list_gauss_neavf.append(NEAVF.NEAVF(img_list_gauss[index])) index += 1 # utils.show_image_list(img_list_gauss_mean, "Image with gauss after mean filter!") # utils.show_image_list(img_list_gauss_neavf, "Image with gauss after NEAVF filter!")
import utils if __name__ == "__main__": lfiles = glob.glob("data/cycling_Auckland/cycling_counts_????.csv") lfiles.sort() df_list = [] for f in lfiles: d = pd.read_csv(f, index_col=0, parse_dates=True) df_list.append(d) df = pd.concat(df_list, axis=0) df = df.loc[:, ["Tamaki Drive EB", "Tamaki Drive WB"]] Tamaki = df.loc[:, "Tamaki Drive WB"] + df.loc[:, "Tamaki Drive EB"] Tamaki = Tamaki.loc["2013":"2018-06-01", ] Tamaki = Tamaki.to_frame(name="Tamaki Drive") dfc = Tamaki.copy() dfc.loc[:, "Tamaki Drive, Filtered"] = utils.median_filter( dfc, varname="Tamaki Drive") data = dfc.loc["2013":, ["Tamaki Drive, Filtered"]].resample("1D").sum() holidays_df = pd.DataFrame([], columns=["ds", "holiday"]) ldates = [] lnames = [] for date, name in sorted( holidays.NZ(prov="AUK", years=np.arange(2013, 2018 + 1)).items()): ldates.append(date) lnames.append(name) ldates = np.array(ldates) lnames = np.array(lnames) holidays_df.loc[:, "ds"] = ldates holidays_df.loc[:, "holiday"] = lnames holidays_df.loc[:, "holiday"] = holidays_df.loc[:, "holiday"].apply( lambda x: x.replace(" (Observed)", ""))
def evaluate( self, experiment_path: str, pred_file='hard_predictions_{}.txt', tag_file='tagging_predictions_{}.txt', event_file='event_{}.txt', segment_file='segment_{}.txt', class_result_file='class_result_{}.txt', time_ratio=10. / 500, postprocessing='double', threshold=None, window_size=None, save_seq=False, sed_eval=True, # Do evaluation on sound event detection ( time stamps, segemtn/evaluation based) **kwargs): """evaluate :param experiment_path: Path to already trained model using train :type experiment_path: str :param pred_file: Prediction output file, put into experiment dir :param time_resolution: Resolution in time (1. represents the model resolution) :param **kwargs: Overwrite standard args, please pass `data` and `label` """ # Update config parameters with new kwargs config = torch.load(list(Path(f'{experiment_path}').glob("run_config*"))[0], map_location='cpu') # Use previous config, but update data such as kwargs config_parameters = dict(config, **kwargs) # Default columns to search for in data config_parameters.setdefault('colname', ('filename', 'encoded')) model_parameters = torch.load( glob.glob("{}/run_model*".format(experiment_path))[0], map_location=lambda storage, loc: storage) encoder = torch.load(glob.glob( '{}/run_encoder*'.format(experiment_path))[0], map_location=lambda storage, loc: storage) strong_labels_df = pd.read_csv(config_parameters['label'], sep='\t') # Evaluation is done via the filenames, not full paths if not np.issubdtype(strong_labels_df['filename'].dtype, np.number): strong_labels_df['filename'] = strong_labels_df['filename'].apply( os.path.basename) if 'audiofilepath' in strong_labels_df.columns: # In case of ave dataset, the audiofilepath column is the main column strong_labels_df['audiofilepath'] = strong_labels_df[ 'audiofilepath'].apply(os.path.basename) colname = 'audiofilepath' # AVE else: colname = 'filename' # Dcase etc. # Problem is that we iterate over the strong_labels_df, which is ambigious # In order to conserve some time and resources just reduce strong_label to weak_label format weak_labels_df = strong_labels_df.groupby( colname)['event_label'].unique().apply( tuple).to_frame().reset_index() if "event_labels" in strong_labels_df.columns: assert False, "Data with the column event_labels are used to train not to evaluate" weak_labels_array, encoder = utils.encode_labels( labels=weak_labels_df['event_label'], encoder=encoder) dataloader = dataset.getdataloader( { 'filename': weak_labels_df['filename'].values, 'encoded': weak_labels_array, }, config_parameters['data'], batch_size=1, shuffle=False, colname=config_parameters[ 'colname'] # For other datasets with different key names ) model = getattr(models, config_parameters['model'])( inputdim=dataloader.dataset.datadim, outputdim=len(encoder.classes_), **config_parameters['model_args']) model.load_state_dict(model_parameters) model = model.to(DEVICE).eval() time_predictions, clip_predictions = [], [] sequences_to_save = [] mAP_pred, mAP_tar = [], [] with torch.no_grad(): for batch in tqdm(dataloader, unit='file', leave=False): _, target, filenames = batch clip_pred, pred, _ = self._forward(model, batch) clip_pred = clip_pred.cpu().detach().numpy() mAP_tar.append(target.numpy().squeeze(0)) mAP_pred.append(clip_pred.squeeze(0)) pred = pred.cpu().detach().numpy() if postprocessing == 'median': if threshold is None: thres = 0.5 else: thres = threshold if window_size is None: window_size = 1 filtered_pred = utils.median_filter( pred, window_size=window_size, threshold=thres) decoded_pred = utils.decode_with_timestamps( encoder, filtered_pred) elif postprocessing == 'cATP-SDS': # cATP-SDS postprocessing uses an "Optimal" configurations, assumes we have a prior # Values are taken from the Surface Disentange paper # Classes are (DCASE2018 only) # ['Alarm_bell_ringing' 'Blender' 'Cat' 'Dishes' 'Dog' # 'Electric_shaver_toothbrush' 'Frying' 'Running_water' 'Speech' # 'Vacuum_cleaner'] assert pred.shape[ -1] == 10, "Only supporting DCASE2018 for now" if threshold is None: thres = 0.5 else: thres = threshold if window_size is None: window_size = [17, 42, 17, 9, 16, 74, 85, 64, 18, 87] # P(y|x) > alpha clip_pred = utils.binarize(clip_pred, threshold=thres) pred = pred * clip_pred filtered_pred = np.zeros_like(pred) # class specific filtering via median filter for cl in range(pred.shape[-1]): # Median filtering also applies thresholding filtered_pred[..., cl] = utils.median_filter( pred[..., cl], window_size=window_size[cl], threshold=thres) decoded_pred = utils.decode_with_timestamps( encoder, filtered_pred) elif postprocessing == 'double': # Double thresholding as described in # https://arxiv.org/abs/1904.03841 if threshold is None: hi_thres, low_thres = (0.75, 0.2) else: hi_thres, low_thres = threshold filtered_pred = utils.double_threshold(pred, high_thres=hi_thres, low_thres=low_thres) decoded_pred = utils.decode_with_timestamps( encoder, filtered_pred) elif postprocessing == 'triple': # Triple thresholding as described in # Using frame level + clip level predictions if threshold is None: clip_thres, hi_thres, low_thres = (0.5, 0.75, 0.2) else: clip_thres, hi_thres, low_thres = threshold clip_pred = utils.binarize(clip_pred, threshold=clip_thres) # Apply threshold to pred = clip_pred * pred filtered_pred = utils.double_threshold(pred, high_thres=hi_thres, low_thres=low_thres) decoded_pred = utils.decode_with_timestamps( encoder, filtered_pred) for num_batch in range(len(decoded_pred)): filename = filenames[num_batch] cur_pred = pred[num_batch] cur_clip = clip_pred[num_batch].reshape(1, -1) # Clip predictions, independent of per-frame predictions bin_clips = utils.binarize(cur_clip) # Binarize with default threshold 0.5 For clips bin_clips = encoder.inverse_transform( bin_clips.reshape(1, -1))[0] # 0 since only single sample # Add each label individually into list for clip_label in bin_clips: clip_predictions.append({ 'filename': filename, 'event_label': clip_label, }) # Save each frame output, for later visualization if save_seq: labels = weak_labels_df.loc[weak_labels_df['filename'] == filename]['event_label'] to_save_df = pd.DataFrame(pred[num_batch], columns=encoder.classes_) # True labels to_save_df.rename({'variable': 'event'}, axis='columns', inplace=True) to_save_df['filename'] = filename to_save_df['pred_labels'] = np.array(labels).repeat( len(to_save_df)) sequences_to_save.append(to_save_df) label_prediction = decoded_pred[num_batch] for event_label, onset, offset in label_prediction: time_predictions.append({ 'filename': filename, 'event_label': event_label, 'onset': onset, 'offset': offset }) assert len(time_predictions) > 0, "No outputs, lower threshold?" pred_df = pd.DataFrame( time_predictions, columns=['filename', 'event_label', 'onset', 'offset']) clip_pred_df = pd.DataFrame( clip_predictions, columns=['filename', 'event_label', 'probability']) test_data_filename = os.path.splitext( os.path.basename(config_parameters['label']))[0] if save_seq: pd.concat(sequences_to_save).to_csv(os.path.join( experiment_path, 'probabilities.csv'), index=False, sep='\t', float_format="%.4f") pred_df = utils.predictions_to_time(pred_df, ratio=time_ratio) if pred_file: pred_df.to_csv(os.path.join(experiment_path, pred_file.format(test_data_filename)), index=False, sep="\t") tagging_df = metrics.audio_tagging_results(strong_labels_df, pred_df) clip_tagging_df = metrics.audio_tagging_results( strong_labels_df, clip_pred_df) print("Tagging Classwise Result: \n{}".format( tabulate(clip_tagging_df, headers='keys', showindex=False, tablefmt='github'))) print("mAP: {}".format( metrics.mAP(np.array(mAP_tar), np.array(mAP_pred)))) if tag_file: clip_tagging_df.to_csv(os.path.join( experiment_path, tag_file.format(test_data_filename)), index=False, sep='\t') if sed_eval: event_result, segment_result = metrics.compute_metrics( strong_labels_df, pred_df, time_resolution=1.0) print("Event Based Results:\n{}".format(event_result)) event_results_dict = event_result.results_class_wise_metrics() class_wise_results_df = pd.DataFrame().from_dict({ f: event_results_dict[f]['f_measure'] for f in event_results_dict.keys() }).T class_wise_results_df.to_csv(os.path.join( experiment_path, class_result_file.format(test_data_filename)), sep='\t') print("Class wise F1-Macro:\n{}".format( tabulate(class_wise_results_df, headers='keys', tablefmt='github'))) if event_file: with open( os.path.join(experiment_path, event_file.format(test_data_filename)), 'w') as wp: wp.write(event_result.__str__()) print("=" * 100) print(segment_result) if segment_file: with open( os.path.join(experiment_path, segment_file.format(test_data_filename)), 'w') as wp: wp.write(segment_result.__str__()) event_based_results = pd.DataFrame( event_result.results_class_wise_average_metrics()['f_measure'], index=['event_based']) segment_based_results = pd.DataFrame( segment_result.results_class_wise_average_metrics() ['f_measure'], index=['segment_based']) result_quick_report = pd.concat(( event_based_results, segment_based_results, )) # Add two columns tagging_macro_f1, tagging_macro_pre, tagging_macro_rec = tagging_df.loc[ tagging_df['label'] == 'macro'].values[0][1:] static_tagging_macro_f1, static_tagging_macro_pre, static_tagging_macro_rec = clip_tagging_df.loc[ clip_tagging_df['label'] == 'macro'].values[0][1:] result_quick_report.loc['Time Tagging'] = [ tagging_macro_f1, tagging_macro_pre, tagging_macro_rec ] result_quick_report.loc['Clip Tagging'] = [ static_tagging_macro_f1, static_tagging_macro_pre, static_tagging_macro_rec ] with open( os.path.join( experiment_path, 'quick_report_{}.md'.format(test_data_filename)), 'w') as wp: print(tabulate(result_quick_report, headers='keys', tablefmt='github'), file=wp) print("Quick Report: \n{}".format( tabulate(result_quick_report, headers='keys', tablefmt='github')))
def wcal(filename, telluric_name): tel = fits.getdata(telluric_name) fTel = tel['trans'] wTel = tel['lam'] * 1E3 cs_tell = interpolate.splrep(wTel, fTel, s=0.0) hdul = fits.open(filename) no = len(hdul) - 1 nx = len(hdul[1].data) # Set starting parameters x1 = 255 x2 = 511 x3 = 767 # Initialise vectors for io in range(no): print('Processing detector {:1}'.format(io + 1)) wlout = np.zeros(nx) d = hdul[io + 1].data wlen = d['Wavelength'] # Identify and mask bad pixels spc = d['Extracted_OPT'] ### Two-step filtering to attempt to clean the bad pixels out. for i in range(5): spc = median_filter(spc, 128, False) spc = median_filter(spc, 4, True) ''' plt.figure(figsize=(12,3), dpi=120) plt.title("Before") plt.plot(wlen, spc, c='r', label="input_spectrum") plt.plot(wTel, fTel*np.nanpercentile(spc, 70), c='k', label="telluric_model") plt.xlim(wlen[0], wlen[-1]) plt.show() ''' l1 = wlen[x1] l2 = wlen[x2] l3 = wlen[x3] pars = (l1, l2, l3) deltas = (1E-3, 1E-3, 1E-3) ### Before feeding to MCMC we do a grid search around the initial params, ### which helps the MCMC converge properly. This takes about as long as ### one of the MCMC chunks below, so it's a good tradeoff. print("Initialise grid search") dim1 = np.linspace(l1 - 0.25, l1 + 0.25, 30) dim2 = np.linspace(l2 - 0.25, l2 + 0.25, 30) dim3 = np.linspace(l3 - 0.25, l3 + 0.25, 30) chisq_cube = np.ones((len(dim1), len(dim2), len(dim3))) for i in range(len(dim1)): for j in range(len(dim2)): for k in range(len(dim3)): theta = (dim1[i], dim2[j], dim3[k]) chisq_cube[i][j][k] = get_logL(theta, cs_tell, spc) tup = np.unravel_index(chisq_cube.argmax(), chisq_cube.shape) i, j, k = tup pars = (dim1[i], dim2[j], dim3[k]) print("MCMC running") ### Here we run a short set of MCMC chains, really refining the solution ### produced by the grid search. for i in range(3): ll1, ll2, ll3 = run_emcee(pars, deltas, cs_tell, spc, plot=False) pars = ll1, ll2, ll3 print(pars) wlout = get_wl_sol(*pars) d['WAVELENGTH'] = wlout hdul[io + 1].data = d ''' plt.figure(figsize=(12,3), dpi=120) plt.title("After") plt.plot(wlout, spc, c='r', label="input_spectrum") plt.plot(wTel, fTel*np.nanpercentile(spc, 70), c='k', label="telluric_model") plt.xlim(wlen[0], wlen[-1]) plt.show() ''' ### Write out our updated spectra to a new file. out_file = filename[:-5] + "_proc.fits" hdul.writeto(out_file, overwrite=True) return