def filter(self, source_label, dest_label='processed', size=(5, 3)):
        '''
        Median filter a binary spectrogram

        Median-filters spectrogram at self.source_label,
        saving it at self.dest_label. For instance,
        if used as a step in processing, this function
        can be used to update the self.processed spectrogram:

            audio.filter(source_label = 'processed', dest_label = 'processed')


        Inputs:
            source_label (str): label of the class attribute
                for the source spectrogram, e.g. self.normalized
            dest_label (str): label of the class attribute where
                the destination spectrogram should be saved.
            size (tuple of ints (x, y)): structure of the filter
        '''

        source = self.get_spect(source_label)
        new_spect = utils.median_filter(source.spect, size=size)

        self.set_spect(label=dest_label,
                       spect=new_spect,
                       freqs=source.freqs,
                       times=source.times)
Example #2
0
def segmentation(F, M, Mg, L, plot=False):
    """Computes the Foote segmentator.

    Parameters
    ----------
    F : np.array((N,M))
        Features matrix of N beats x M features.
    M : int
        Median filter size for the audio features (in beats).
    Mg : int
        Gaussian kernel size (in beats).
    L : int
        Median filter size for the adaptive threshold

    Return
    ------
    bound_idx : np.array
        Array containing the indices of the boundaries.
    """
    # Filter
    F = utils.median_filter(F, M=M)

    # Self Similarity Matrix
    S = utils.compute_ssm(F)

    # Compute gaussian kernel
    G = utils.compute_gaussian_krnl(Mg)

    # Compute the novelty curve
    nc = utils.compute_nc(S, G)

    # Find peaks in the novelty curve
    return utils.pick_peaks(nc, L=L, plot=plot)
Example #3
0
def filter_activation_matrix(G, R):
    """Filters the activation matrix G, and returns a flattened copy."""
    idx = np.argmax(G, axis=1)
    max_idx = np.arange(G.shape[0])
    max_idx = (max_idx, idx.flatten())
    G[:, :] = 0
    G[max_idx] = idx + 1
    G = np.sum(G, axis=1)
    G = utils.median_filter(G[:, np.newaxis], R)
    return G.flatten()
Example #4
0
def compute_ssm(wav_file, h, ssm_read_pk, is_ismir=False, tonnetz=False):
    """Computes the self similarity matrix from an audio file.

    Parameters
    ----------
    wav_file: str
        Path to the wav file to be read.
    h : float
        Hop size.
    ssm_read_pk : bool
        Whether to read the ssm from a pickle file or not (note: this function
        utomatically saves the ssm in a pickle file).
    is_ismir : bool
        Produce the plots that appear on the ISMIR paper.
    tonnetz : bool
        Compute tonnetz instead of Chroma features.

    Returns
    -------
    X : np.array((N, N))
        Self-similarity matrix
    """
    if not ssm_read_pk:
        # Read WAV file
        logging.info("Reading the WAV file...")
        C = utils.compute_audio_chromagram(wav_file, h)
        C = utils.median_filter(C, L=9)

        if is_ismir:
            ismir.plot_chroma(C)

        # Compute Tonnetz if needed
        F = C
        if tonnetz:
            F = utils.chroma_to_tonnetz(C)

        # Compute the self similarity matrix
        logging.info("Computing key-invariant self-similarity matrix...")
        X = utils.compute_key_inv_ssm(F, h)

        #plt.imshow(X, interpolation="nearest", aspect="auto")
        #plt.show()

        utils.write_cPickle(wav_file + "-audio-ssm.pk", X)
    else:
        X = utils.read_cPickle(wav_file + "-audio-ssm.pk")

    if is_ismir:
        #X = X**2.5
        ismir.plot_ssm(X)
        ismir.plot_score_examples(X)

    return X
Example #5
0
def segmentation(X, rank, R, h, niter=300, seed=None):
    """
    Gets the segmentation (boundaries and labels) from the factorization
    matrices.

    Parameters
    ----------
    X: np.array()
        Features matrix (e.g. chromagram)
    rank: int
        Rank of decomposition
    R: int
        Size of the median filter for activation matrix
    niter: int
        Number of iterations for k-means
    bound_idxs : list
        Use previously found boundaries (None to detect them)

    Returns
    -------
    bounds_idx: np.array
        Bound indeces found
    labels: np.array
        Indeces of the labels representing the similarity between segments.
    """

    # Filter
    X = utils.median_filter(X, M=h)
    X = X.T

    # Find non filtered boundaries
    bound_idxs = None
    while True:
        if bound_idxs is None:
            try:
                F, G = cnmf(X, rank, niter=niter, seed=seed)
            except:
                return np.empty(0), [1]

            # Filter G
            G = filter_activation_matrix(G.T, R)
            if bound_idxs is None:
                bound_idxs = np.where(np.diff(G) != 0)[0] + 1

        if len(np.unique(bound_idxs)) <= 2:
            rank += 1
            bound_idxs = None
        else:
            break

    return bound_idxs
Example #6
0
    img_list_gauss_005.append(utils.gaussian_noise(img_list[i], 0.1))
    img_list_gauss_015.append(utils.gaussian_noise(img_list[i], 0.15))
    img_list_impulsive_005.append(utils.salt_and_pepper(img_list[i], 0.05))
    img_list_impulsive_015.append(utils.salt_and_pepper(img_list[i], 0.15))
# utils.show_image_list(img_list_gauss_005, "Images with gaussian noise! 0.05")
# utils.show_image_list(img_list_gauss_015, "Images with gaussian noise! 0.15")
# utils.show_image_list(img_list_impulsive_005, "Image with salt and pepper noise! 0.05")
# utils.show_image_list(img_list_impulsive_015, "Image with salt and pepper noise! 0.15")

img_list_impulsive = img_list_impulsive + img_list_impulsive_005 + img_list_impulsive_015
img_list_impulsive_median = []
img_list_impulsive_neavf = []
index = 0
for img in img_list_impulsive:
    img_list_impulsive_median.append(
        utils.median_filter(img_list_impulsive[index]))
    img_list_impulsive_neavf.append(NEAVF.NEAVF(img_list_impulsive[index]))
    index += 1
# utils.show_image_list(img_list_impulsive_median, "Image with salt and pepper after median filter")
# utils.show_image_list(img_list_impulsive_neavf, "Image with salt and pepper after NEAVF filter")

img_list_gauss = img_list_gauss + img_list_gauss_005 + img_list_gauss_015
img_list_gauss_mean = []
img_list_gauss_neavf = []
index = 0
for img in img_list_gauss:
    img_list_gauss_mean.append(utils.mean_filter(img_list_gauss[index]))
    img_list_gauss_neavf.append(NEAVF.NEAVF(img_list_gauss[index]))
    index += 1
# utils.show_image_list(img_list_gauss_mean, "Image with gauss after mean filter!")
# utils.show_image_list(img_list_gauss_neavf, "Image with gauss after NEAVF filter!")
Example #7
0
import utils

if __name__ == "__main__":
    lfiles = glob.glob("data/cycling_Auckland/cycling_counts_????.csv")
    lfiles.sort()
    df_list = []
    for f in lfiles:
        d = pd.read_csv(f, index_col=0, parse_dates=True)
        df_list.append(d)
    df = pd.concat(df_list, axis=0)
    df = df.loc[:, ["Tamaki Drive EB", "Tamaki Drive WB"]]
    Tamaki = df.loc[:, "Tamaki Drive WB"] + df.loc[:, "Tamaki Drive EB"]
    Tamaki = Tamaki.loc["2013":"2018-06-01", ]
    Tamaki = Tamaki.to_frame(name="Tamaki Drive")
    dfc = Tamaki.copy()
    dfc.loc[:, "Tamaki Drive, Filtered"] = utils.median_filter(
        dfc, varname="Tamaki Drive")
    data = dfc.loc["2013":, ["Tamaki Drive, Filtered"]].resample("1D").sum()

    holidays_df = pd.DataFrame([], columns=["ds", "holiday"])
    ldates = []
    lnames = []
    for date, name in sorted(
            holidays.NZ(prov="AUK", years=np.arange(2013, 2018 + 1)).items()):
        ldates.append(date)
        lnames.append(name)
    ldates = np.array(ldates)
    lnames = np.array(lnames)
    holidays_df.loc[:, "ds"] = ldates
    holidays_df.loc[:, "holiday"] = lnames
    holidays_df.loc[:, "holiday"] = holidays_df.loc[:, "holiday"].apply(
        lambda x: x.replace(" (Observed)", ""))
Example #8
0
    def evaluate(
            self,
            experiment_path: str,
            pred_file='hard_predictions_{}.txt',
            tag_file='tagging_predictions_{}.txt',
            event_file='event_{}.txt',
            segment_file='segment_{}.txt',
            class_result_file='class_result_{}.txt',
            time_ratio=10. / 500,
            postprocessing='double',
            threshold=None,
            window_size=None,
            save_seq=False,
            sed_eval=True,  # Do evaluation on sound event detection ( time stamps, segemtn/evaluation based)
            **kwargs):
        """evaluate

        :param experiment_path: Path to already trained model using train
        :type experiment_path: str
        :param pred_file: Prediction output file, put into experiment dir
        :param time_resolution: Resolution in time (1. represents the model resolution)
        :param **kwargs: Overwrite standard args, please pass `data` and `label`
        """
        # Update config parameters with new kwargs

        config = torch.load(list(Path(f'{experiment_path}').glob("run_config*"))[0], map_location='cpu')
        # Use previous config, but update data such as kwargs
        config_parameters = dict(config, **kwargs)
        # Default columns to search for in data
        config_parameters.setdefault('colname', ('filename', 'encoded'))
        model_parameters = torch.load(
            glob.glob("{}/run_model*".format(experiment_path))[0],
            map_location=lambda storage, loc: storage)
        encoder = torch.load(glob.glob(
            '{}/run_encoder*'.format(experiment_path))[0],
                             map_location=lambda storage, loc: storage)
        strong_labels_df = pd.read_csv(config_parameters['label'], sep='\t')

        # Evaluation is done via the filenames, not full paths
        if not np.issubdtype(strong_labels_df['filename'].dtype, np.number):
            strong_labels_df['filename'] = strong_labels_df['filename'].apply(
                os.path.basename)
        if 'audiofilepath' in strong_labels_df.columns:  # In case of ave dataset, the audiofilepath column is the main column
            strong_labels_df['audiofilepath'] = strong_labels_df[
                'audiofilepath'].apply(os.path.basename)
            colname = 'audiofilepath'  # AVE
        else:
            colname = 'filename'  # Dcase etc.
        # Problem is that we iterate over the strong_labels_df, which is ambigious
        # In order to conserve some time and resources just reduce strong_label to weak_label format
        weak_labels_df = strong_labels_df.groupby(
            colname)['event_label'].unique().apply(
                tuple).to_frame().reset_index()
        if "event_labels" in strong_labels_df.columns:
            assert False, "Data with the column event_labels are used to train not to evaluate"
        weak_labels_array, encoder = utils.encode_labels(
            labels=weak_labels_df['event_label'], encoder=encoder)
        dataloader = dataset.getdataloader(
            {
                'filename': weak_labels_df['filename'].values,
                'encoded': weak_labels_array,
            },
            config_parameters['data'],
            batch_size=1,
            shuffle=False,
            colname=config_parameters[
                'colname']  # For other datasets with different key names
        )
        model = getattr(models, config_parameters['model'])(
            inputdim=dataloader.dataset.datadim,
            outputdim=len(encoder.classes_),
            **config_parameters['model_args'])
        model.load_state_dict(model_parameters)
        model = model.to(DEVICE).eval()
        time_predictions, clip_predictions = [], []
        sequences_to_save = []
        mAP_pred, mAP_tar = [], []
        with torch.no_grad():
            for batch in tqdm(dataloader, unit='file', leave=False):
                _, target, filenames = batch
                clip_pred, pred, _ = self._forward(model, batch)
                clip_pred = clip_pred.cpu().detach().numpy()
                mAP_tar.append(target.numpy().squeeze(0))
                mAP_pred.append(clip_pred.squeeze(0))
                pred = pred.cpu().detach().numpy()
                if postprocessing == 'median':
                    if threshold is None:
                        thres = 0.5
                    else:
                        thres = threshold
                    if window_size is None:
                        window_size = 1
                    filtered_pred = utils.median_filter(
                        pred, window_size=window_size, threshold=thres)
                    decoded_pred = utils.decode_with_timestamps(
                        encoder, filtered_pred)


                elif postprocessing == 'cATP-SDS':
                    # cATP-SDS postprocessing uses an "Optimal" configurations, assumes we have a prior
                    # Values are taken from the Surface Disentange paper
                    # Classes are (DCASE2018 only)
                    # ['Alarm_bell_ringing' 'Blender' 'Cat' 'Dishes' 'Dog'
                    # 'Electric_shaver_toothbrush' 'Frying' 'Running_water' 'Speech'
                    # 'Vacuum_cleaner']
                    assert pred.shape[
                        -1] == 10, "Only supporting DCASE2018 for now"
                    if threshold is None:
                        thres = 0.5
                    else:
                        thres = threshold
                    if window_size is None:
                        window_size = [17, 42, 17, 9, 16, 74, 85, 64, 18, 87]
                    # P(y|x) > alpha
                    clip_pred = utils.binarize(clip_pred, threshold=thres)
                    pred = pred * clip_pred
                    filtered_pred = np.zeros_like(pred)

                    # class specific filtering via median filter
                    for cl in range(pred.shape[-1]):
                        # Median filtering also applies thresholding
                        filtered_pred[..., cl] = utils.median_filter(
                            pred[..., cl],
                            window_size=window_size[cl],
                            threshold=thres)
                    decoded_pred = utils.decode_with_timestamps(
                        encoder, filtered_pred)

                elif postprocessing == 'double':
                    # Double thresholding as described in
                    # https://arxiv.org/abs/1904.03841
                    if threshold is None:
                        hi_thres, low_thres = (0.75, 0.2)
                    else:
                        hi_thres, low_thres = threshold
                    filtered_pred = utils.double_threshold(pred,
                                                           high_thres=hi_thres,
                                                           low_thres=low_thres)
                    decoded_pred = utils.decode_with_timestamps(
                        encoder, filtered_pred)

                elif postprocessing == 'triple':
                    # Triple thresholding as described in
                    # Using frame level + clip level predictions
                    if threshold is None:
                        clip_thres, hi_thres, low_thres = (0.5, 0.75, 0.2)
                    else:
                        clip_thres, hi_thres, low_thres = threshold

                    clip_pred = utils.binarize(clip_pred, threshold=clip_thres)
                    # Apply threshold to
                    pred = clip_pred * pred
                    filtered_pred = utils.double_threshold(pred,
                                                           high_thres=hi_thres,
                                                           low_thres=low_thres)
                    decoded_pred = utils.decode_with_timestamps(
                        encoder, filtered_pred)

                for num_batch in range(len(decoded_pred)):
                    filename = filenames[num_batch]
                    cur_pred = pred[num_batch]
                    cur_clip = clip_pred[num_batch].reshape(1, -1)
                    # Clip predictions, independent of per-frame predictions
                    bin_clips = utils.binarize(cur_clip)
                    # Binarize with default threshold 0.5 For clips
                    bin_clips = encoder.inverse_transform(
                        bin_clips.reshape(1,
                                          -1))[0]  # 0 since only single sample
                    # Add each label individually into list
                    for clip_label in bin_clips:
                        clip_predictions.append({
                            'filename': filename,
                            'event_label': clip_label,
                        })
                    # Save each frame output, for later visualization
                    if save_seq:
                        labels = weak_labels_df.loc[weak_labels_df['filename']
                                                    == filename]['event_label']
                        to_save_df = pd.DataFrame(pred[num_batch],
                                                  columns=encoder.classes_)

                        # True labels
                        to_save_df.rename({'variable': 'event'},
                                          axis='columns',
                                          inplace=True)
                        to_save_df['filename'] = filename
                        to_save_df['pred_labels'] = np.array(labels).repeat(
                            len(to_save_df))
                        sequences_to_save.append(to_save_df)
                    label_prediction = decoded_pred[num_batch]
                    for event_label, onset, offset in label_prediction:
                        time_predictions.append({
                            'filename': filename,
                            'event_label': event_label,
                            'onset': onset,
                            'offset': offset
                        })

        assert len(time_predictions) > 0, "No outputs, lower threshold?"
        pred_df = pd.DataFrame(
            time_predictions,
            columns=['filename', 'event_label', 'onset', 'offset'])
        clip_pred_df = pd.DataFrame(
            clip_predictions,
            columns=['filename', 'event_label', 'probability'])
        test_data_filename = os.path.splitext(
            os.path.basename(config_parameters['label']))[0]

        if save_seq:
            pd.concat(sequences_to_save).to_csv(os.path.join(
                experiment_path, 'probabilities.csv'),
                                                index=False,
                                                sep='\t',
                                                float_format="%.4f")

        pred_df = utils.predictions_to_time(pred_df, ratio=time_ratio)
        if pred_file:
            pred_df.to_csv(os.path.join(experiment_path,
                                        pred_file.format(test_data_filename)),
                           index=False,
                           sep="\t")
        tagging_df = metrics.audio_tagging_results(strong_labels_df, pred_df)
        clip_tagging_df = metrics.audio_tagging_results(
            strong_labels_df, clip_pred_df)
        print("Tagging Classwise Result: \n{}".format(
            tabulate(clip_tagging_df,
                     headers='keys',
                     showindex=False,
                     tablefmt='github')))
        print("mAP: {}".format(
            metrics.mAP(np.array(mAP_tar), np.array(mAP_pred))))
        if tag_file:
            clip_tagging_df.to_csv(os.path.join(
                experiment_path, tag_file.format(test_data_filename)),
                                   index=False,
                                   sep='\t')

        if sed_eval:
            event_result, segment_result = metrics.compute_metrics(
                strong_labels_df, pred_df, time_resolution=1.0)
            print("Event Based Results:\n{}".format(event_result))
            event_results_dict = event_result.results_class_wise_metrics()
            class_wise_results_df = pd.DataFrame().from_dict({
                f: event_results_dict[f]['f_measure']
                for f in event_results_dict.keys()
            }).T
            class_wise_results_df.to_csv(os.path.join(
                experiment_path, class_result_file.format(test_data_filename)),
                                         sep='\t')
            print("Class wise F1-Macro:\n{}".format(
                tabulate(class_wise_results_df,
                         headers='keys',
                         tablefmt='github')))
            if event_file:
                with open(
                        os.path.join(experiment_path,
                                     event_file.format(test_data_filename)),
                        'w') as wp:
                    wp.write(event_result.__str__())
            print("=" * 100)
            print(segment_result)
            if segment_file:
                with open(
                        os.path.join(experiment_path,
                                     segment_file.format(test_data_filename)),
                        'w') as wp:
                    wp.write(segment_result.__str__())
            event_based_results = pd.DataFrame(
                event_result.results_class_wise_average_metrics()['f_measure'],
                index=['event_based'])
            segment_based_results = pd.DataFrame(
                segment_result.results_class_wise_average_metrics()
                ['f_measure'],
                index=['segment_based'])
            result_quick_report = pd.concat((
                event_based_results,
                segment_based_results,
            ))
            # Add two columns

            tagging_macro_f1, tagging_macro_pre, tagging_macro_rec = tagging_df.loc[
                tagging_df['label'] == 'macro'].values[0][1:]
            static_tagging_macro_f1, static_tagging_macro_pre, static_tagging_macro_rec = clip_tagging_df.loc[
                clip_tagging_df['label'] == 'macro'].values[0][1:]
            result_quick_report.loc['Time Tagging'] = [
                tagging_macro_f1, tagging_macro_pre, tagging_macro_rec
            ]
            result_quick_report.loc['Clip Tagging'] = [
                static_tagging_macro_f1, static_tagging_macro_pre,
                static_tagging_macro_rec
            ]
            with open(
                    os.path.join(
                        experiment_path,
                        'quick_report_{}.md'.format(test_data_filename)),
                    'w') as wp:
                print(tabulate(result_quick_report,
                               headers='keys',
                               tablefmt='github'),
                      file=wp)

            print("Quick Report: \n{}".format(
                tabulate(result_quick_report,
                         headers='keys',
                         tablefmt='github')))
Example #9
0
def wcal(filename, telluric_name):
    tel = fits.getdata(telluric_name)
    fTel = tel['trans']
    wTel = tel['lam'] * 1E3
    cs_tell = interpolate.splrep(wTel, fTel, s=0.0)
    hdul = fits.open(filename)
    no = len(hdul) - 1
    nx = len(hdul[1].data)
    # Set starting parameters
    x1 = 255
    x2 = 511
    x3 = 767
    # Initialise vectors
    for io in range(no):
        print('Processing detector {:1}'.format(io + 1))
        wlout = np.zeros(nx)
        d = hdul[io + 1].data
        wlen = d['Wavelength']
        # Identify and mask bad pixels
        spc = d['Extracted_OPT']

        ### Two-step filtering to attempt to clean the bad pixels out.
        for i in range(5):
            spc = median_filter(spc, 128, False)
            spc = median_filter(spc, 4, True)
        '''
        plt.figure(figsize=(12,3), dpi=120)
        plt.title("Before")
        plt.plot(wlen, spc, c='r', label="input_spectrum")
        plt.plot(wTel, fTel*np.nanpercentile(spc, 70), c='k', label="telluric_model")
        plt.xlim(wlen[0], wlen[-1])
        plt.show()
        '''
        l1 = wlen[x1]
        l2 = wlen[x2]
        l3 = wlen[x3]
        pars = (l1, l2, l3)
        deltas = (1E-3, 1E-3, 1E-3)

        ### Before feeding to MCMC we do a grid search around the initial params,
        ### which helps the MCMC converge properly. This takes about as long as
        ### one of the MCMC chunks below, so it's a good tradeoff.

        print("Initialise grid search")
        dim1 = np.linspace(l1 - 0.25, l1 + 0.25, 30)
        dim2 = np.linspace(l2 - 0.25, l2 + 0.25, 30)
        dim3 = np.linspace(l3 - 0.25, l3 + 0.25, 30)
        chisq_cube = np.ones((len(dim1), len(dim2), len(dim3)))

        for i in range(len(dim1)):
            for j in range(len(dim2)):
                for k in range(len(dim3)):
                    theta = (dim1[i], dim2[j], dim3[k])
                    chisq_cube[i][j][k] = get_logL(theta, cs_tell, spc)

        tup = np.unravel_index(chisq_cube.argmax(), chisq_cube.shape)
        i, j, k = tup

        pars = (dim1[i], dim2[j], dim3[k])

        print("MCMC running")

        ### Here we run a short set of MCMC chains, really refining the solution
        ### produced by the grid search.

        for i in range(3):
            ll1, ll2, ll3 = run_emcee(pars, deltas, cs_tell, spc, plot=False)
            pars = ll1, ll2, ll3

        print(pars)
        wlout = get_wl_sol(*pars)
        d['WAVELENGTH'] = wlout
        hdul[io + 1].data = d
        '''
        plt.figure(figsize=(12,3), dpi=120)
        plt.title("After")
        plt.plot(wlout, spc, c='r', label="input_spectrum")
        plt.plot(wTel, fTel*np.nanpercentile(spc, 70), c='k', label="telluric_model")
        plt.xlim(wlen[0], wlen[-1])
        plt.show()
        '''
    ### Write out our updated spectra to a new file.
    out_file = filename[:-5] + "_proc.fits"
    hdul.writeto(out_file, overwrite=True)
    return