Beispiel #1
0
from ecosound.core.measurement import Measurement

# # ## netcdf folder
# netcdf_files = r'C:\Users\xavier.mouy\Documents\PhD\Projects\Dectector\datasets\test'
# annot4 = Annotation()
# annot4.from_netcdf(netcdf_files, verbose=True)
# print(len(annot4))

# # ## Load netcdf measurmeent folder from folder
# netcdf_files = r'C:\Users\xavier.mouy\Documents\PhD\Projects\Dectector\results\Noise_dataset'
# outfile=r'C:\Users\xavier.mouy\Documents\PhD\Projects\Dectector\results\Noise_dataset\dataset_noise.nc'
# meas = Measurement()
# meas.from_netcdf(netcdf_files, verbose=True)
# print(len(meas))
# #meas.to_netcdf(outfile)

# # ## Load netcdf measurmeent folder from single file
netcdf_files = r'C:\Users\xavier.mouy\Documents\PhD\Projects\Dectector\results\Full_dataset_with_metadata2\JASCOAMARHYDROPHONE742_20140913T115018.797Z.wav.nc'
meas = Measurement()
meas.from_netcdf(netcdf_files, verbose=True)
print(len(meas))

# # ## Load netcdf measurmeent folder from list of files
# netcdf_files = []
# netcdf_files.append(r"C:\Users\xavier.mouy\Documents\PhD\Projects\Dectector\datasets\test2\67391492.181017121114.wav.nc")
# netcdf_files.append(r"C:\Users\xavier.mouy\Documents\PhD\Projects\Dectector\datasets\test2\67391492.181017151114.wav.nc")
# netcdf_files.append(r"C:\Users\xavier.mouy\Documents\PhD\Projects\Dectector\datasets\test2\67391492.181017181114.wav.nc")
# meas = Measurement()
# meas.from_netcdf(netcdf_files, verbose=True)
# print(len(meas))
def plot_full_figure(time_sec=None):

    loc_file = r'C:\Users\xavier.mouy\Documents\Reports_&_Papers\Papers\10-XAVarray_2020\results\large-array_quillback\AMAR173.4.20190920T161248Z.nc'
    audio_file = r'C:\Users\xavier.mouy\Documents\Reports_&_Papers\Papers\10-XAVarray_2020\data\large_array\2019-09-15_HornbyIsland_AMAR_07-HI\AMAR173.1.20190920T161248Z.wav'
    video_file = r'C:\Users\xavier.mouy\Documents\Reports_&_Papers\Papers\10-XAVarray_2020\data\large_array\2019-09-15_HornbyIsland_AMAR_07-HI\3420_FishCam01_20190920T163627.613206Z_1600x1200_awb-auto_exp-night_fr-10_q-20_sh-0_b-50_c-0_i-400_sat-0.mp4'
    hp_config_file = r'C:\Users\xavier.mouy\Documents\Reports_&_Papers\Papers\10-XAVarray_2020\data\large_array\2019-09-15_HornbyIsland_AMAR_07-HI\hydrophones_config_07-HI.csv'
    t1_sec = 1570
    t2_sec = 1587  #1590

    filter_x = [-1.5, 1.5]
    filter_y = [-1.5, 1.5]
    filter_z = [-1.5, 1.5]
    filter_x_std = 0.5
    filter_y_std = 0.5
    filter_z_std = 0.5

    params = pd.DataFrame({
        'loc_color': ['black'],
        'loc_marker': ['o'],
        'loc_alpha': [1],
        'loc_size': [5],
        'uncertainty_color': ['black'],
        'uncertainty_style': ['-'],
        'uncertainty_alpha': [1],  #0.7
        'uncertainty_width': [0.2],  #0.2
        'x_min': [-1.26],
        'x_max': [1.26],
        'y_min': [-1.26],
        'y_max': [1.26],
        'z_min': [-1.5],
        'z_max': [2.1],
    })

    ## ###########################################################################

    ## load localization results
    loc = Measurement()
    loc.from_netcdf(loc_file)
    loc_data = loc.data

    ## load hydrophone locations
    hydrophones_config = pd.read_csv(hp_config_file)

    # Filter
    loc_data = loc_data.dropna(subset=['x', 'y', 'z'])  # remove NaN
    loc_data = loc_data.loc[(loc_data['x'] >= min(filter_x))
                            & (loc_data['x'] <= max(filter_x)) &
                            (loc_data['y'] >= min(filter_y)) &
                            (loc_data['y'] <= max(filter_y)) &
                            (loc_data['z'] >= min(filter_z)) &
                            (loc_data['z'] <= max(filter_z)) &
                            (loc_data['x_std'] <= filter_x_std) &
                            (loc_data['y_std'] <= filter_y_std) &
                            (loc_data['z_std'] <= filter_z_std)]
    # Adjust detection times
    loc_data['time_min_offset'] = loc_data['time_min_offset'] - t1_sec
    loc_data['time_max_offset'] = loc_data['time_max_offset'] - t1_sec

    if time_sec != None:
        loc_data = loc_data.loc[(loc_data['time_max_offset'] <= time_sec)]
    else:
        print('Static')

    # update loc object
    loc.data = loc_data

    # plots
    # fig, ax = plt.subplots(figsize=(6, 1))
    # fig.subplots_adjust(bottom=0.5)
    # n_colors = t2_sec-t1_sec
    # cmap = mpl.cm.get_cmap('CMRmap', n_colors*2)
    # norm = mpl.colors.Normalize(vmin=0, vmax=n_colors)
    # ax_cmap = mpl.colorbar.ColorbarBase(ax, cmap=cmap,
    #                                 norm=norm,
    #                                 orientation='horizontal')
    # ax_cmap.set_label('Time (s)')

    # Plot spectrogram
    fig_final, ax_spectro = plot_spectrogram(audio_file,
                                             loc,
                                             t1_sec,
                                             t2_sec,
                                             geometry=(5, 1, 1))
    ax_spectro.set_title("")
    ax_spectro.get_xaxis().set_visible(False)
    n_colors = t2_sec - t1_sec
    cmap = mpl.cm.get_cmap('viridis', n_colors * 4)
    norm = mpl.colors.Normalize(vmin=0, vmax=n_colors)
    divider = make_axes_locatable(ax_spectro)
    cax = divider.append_axes('bottom', 0.1, pad=0.03)
    ax_cmap = mpl.colorbar.ColorbarBase(cax,
                                        cmap=cmap,
                                        norm=norm,
                                        orientation='horizontal')
    ax_cmap.set_label('Time (s)')

    if time_sec:
        SFreq_min, SFreq_max = ax_spectro.get_ylim()
        ax_spectro.plot([time_sec, time_sec], [SFreq_min, SFreq_max], 'r')

    # plot detection points on top of spectrogram
    #gs0 = fig_final.add_gridspec(60,1)
    ax_detec = fig_final.add_subplot(20, 1, 1)
    det_y = np.asarray(np.ones((1, len(loc_data['time_min_offset']))))[0]
    det_x = np.asarray(loc_data['time_min_offset'])
    ax_detec.scatter(det_x,
                     det_y,
                     c=loc_data['time_min_offset'],
                     cmap=cmap,
                     norm=norm,
                     s=12)
    ax_detec.set_xlim(ax_spectro.get_xlim())
    ax_detec.get_xaxis().set_visible(False)
    ax_detec.get_yaxis().set_visible(False)
    ax_detec.axis('off')

    # #pos =[left, bottom, width, height]
    # box = ax_detec.get_position()
    # box.y0 = box.y0 + 0.6
    # box.y1 = box.y1 + 0.6
    # ax_detec.set_position(box)

    #size = fig_final.get_size_inches()

    plt.subplots_adjust(left=0.08,
                        bottom=0.1,
                        right=0.95,
                        top=0.95,
                        wspace=0,
                        hspace=0)

    # divider2 = make_axes_locatable(ax_spectro)
    # cax2 = divider2.append_axes('top', size=0.2, pad=10.0)
    # det_y = np.asarray(np.ones((1,len(loc_data['time_min_offset']))))[0]
    # det_x = np.asarray(loc_data['time_min_offset'])
    # cax2.plot(det_x,det_y,'.r')
    # cax2.set_xlim(ax_spectro.get_xlim())

    # ax_cmap = mpl.colorbar.ColorbarBase(cax, cmap=cmap,
    #                                     norm=norm,
    #                                     orientation='horizontal')

    gs = fig_final.add_gridspec(3, 2)

    # plot localization top
    ax_toploc = fig_final.add_subplot(gs[1:, 1])
    plot_top_view(hydrophones_config, loc_data, params, cmap, norm, ax_toploc)
    ax_toploc.set_anchor('E')

    # plot localization side
    #ax_sideloc = fig_final.add_subplot(3,3,7,sharex = ax_toploc)
    ax_sideloc = fig_final.add_subplot(gs[1:, 0])
    plot_side_view(hydrophones_config, loc_data, params, cmap, norm,
                   ax_sideloc)
    ax_sideloc.set_anchor('W')

    # set the spacing between subplots
    plt.subplots_adjust(wspace=0, hspace=0)

    # # plot video frame 1
    # fig_video1, ax_video1 = plt.subplots(1,1)
    # frame1_sec = 152.8 # second detection -> 16:38:59.8
    # #ax_video1 = fig_final.add_subplot(3,3,5)
    # plot_video_frame(video_file,frame1_sec, ax_video1)
    # ax_video1.get_xaxis().set_visible(False)
    # ax_video1.get_yaxis().set_visible(False)

    # # plot video frame 2
    # fig_video2, ax_video2 = plt.subplots(1,1)
    # frame2_sec = 160 # 4th detection -> 16:39:07
    # #ax_video2 = fig_final.add_subplot(3,3,6)
    # plot_video_frame(video_file,frame2_sec, ax_video2)
    # ax_video2.get_xaxis().set_visible(False)
    # ax_video2.get_yaxis().set_visible(False)

    fig_final.set_size_inches(8.6, 6.72)

    box = ax_spectro.get_position()
    box.y0 = box.y0 - 0.03
    box.y1 = box.y1 - 0.03
    ax_spectro.set_position(box)
    return fig_final
import pandas as pd

"""
Gathers measuremenst for all annotations and noise. Merges into a single dataset,
and re-label classes to create a 2-class dataset 'FS' vs 'NN'.

"""

# Define input and output files
annot_file = r'C:\Users\xavier.mouy\Documents\PhD\Projects\Dectector\results\dataset_annotations_only.nc'
noise_file = r'C:\Users\xavier.mouy\Documents\PhD\Projects\Dectector\results\Noise_dataset'
outfile=r'C:\Users\xavier.mouy\Documents\PhD\Projects\Dectector\results\dataset_FS-NN_modified_20201105145300.nc'

# Load measurements
meas_annot = Measurement()
meas_annot.from_netcdf(annot_file)
meas_noise = Measurement()
meas_noise.from_netcdf(noise_file)

## Label noise measurement as 'NN'
meas_noise.insert_values(label_class='NN')
print(meas_noise.summary())

## relabel annotations that are not 'FS' as 'NN'
print(meas_annot.summary())
meas_annot.data['label_class'].replace(to_replace=['', 'ANT','HS','KW','UN'], value='NN', inplace=True)
print(meas_annot.summary())

## merge the 2 datasets
meas_NN_FS = meas_noise + meas_annot
print(meas_NN_FS.summary())
def plot_full_figure(time_sec=None):

    #loc_file = r'C:\Users\xavier.mouy\Documents\Reports_&_Papers\Papers\10-XAVarray_2020\results\mobile_array_copper\localizations_1m_5cm.nc'
    loc_file = r'C:\Users\xavier.mouy\Documents\Reports_&_Papers\Papers\10-XAVarray_2020\results\mobile_array_copper\localizations_2cm_3m.nc'
    loc_file_matlab = r'C:\Users\xavier.mouy\Documents\Reports_&_Papers\Papers\10-XAVarray_2020\results\mobile_array_copper\localizations_matlab_with_CI.csv'
    audio_file = r'C:\Users\xavier.mouy\Documents\Reports_&_Papers\Papers\10-XAVarray_2020\data\mobile_array\2019-09-14_HornbyIsland_Trident\671404070.190918222812.wav'
    video_file = r'C:\Users\xavier.mouy\Documents\Reports_&_Papers\Papers\10-XAVarray_2020\data\large_array\2019-09-15_HornbyIsland_AMAR_07-HI\3420_FishCam01_20190920T163627.613206Z_1600x1200_awb-auto_exp-night_fr-10_q-20_sh-0_b-50_c-0_i-400_sat-0.mp4'
    hp_config_file = r'C:\Users\xavier.mouy\Documents\Reports_&_Papers\Papers\10-XAVarray_2020\data\mobile_array\2019-09-14_HornbyIsland_Trident\hydrophones_config_HI-201909.csv'
    localization_config_file = r'C:\Users\xavier.mouy\Documents\Reports_&_Papers\Papers\10-XAVarray_2020\config_files\localization_config_mobile_array.yaml'
    t1_sec = 214  #216
    t2_sec = 224  #223

    filter_x = [-5, 5]
    filter_y = [-5, 5]
    filter_z = [-2, 5]
    filter_x_std = 6
    filter_y_std = 9
    filter_z_std = 6

    params = pd.DataFrame({
        'loc_color': ['black'],
        'loc_marker': ['o'],
        'loc_alpha': [1],
        'loc_size': [5],
        'uncertainty_color': ['black'],
        'uncertainty_style': ['-'],
        'uncertainty_alpha': [1],  #0.7
        'uncertainty_width': [0.2],  #0.2
        'x_min': [-1.5],
        'x_max': [1.5],
        'y_min': [-0.5],
        'y_max': [3],
        'z_min': [-1.5],
        'z_max': [1.5],
    })

    ## ###########################################################################
    localization_config = read_yaml(localization_config_file)
    hydrophones_config = pd.read_csv(hp_config_file)
    sound_speed_mps = localization_config['ENVIRONMENT']['sound_speed_mps']
    ref_channel = localization_config['TDOA']['ref_channel']
    hydrophone_pairs = defineReceiverPairs(len(hydrophones_config),
                                           ref_receiver=ref_channel)

    ## load localization results
    loc = Measurement()
    loc.from_netcdf(loc_file)
    loc_data = loc.data

    # used matlab CI
    loc_data = pd.read_csv(loc_file_matlab)

    # ## recalculate data errors
    # diff=[]
    # idx = 0
    # for idx in range(len(loc_data)):
    #     m = loc_data.loc[[idx],['x','y','z']]
    #     tdoa_m = predict_tdoa(m, sound_speed_mps, hydrophones_config, hydrophone_pairs)
    #     tdoa_measured = loc_data.loc[[idx],['tdoa_sec_1','tdoa_sec_2','tdoa_sec_3']].to_numpy()
    #     #diff_temp = (tdoa_m-tdoa_measured.T)**2
    #     if idx==0:
    #         diff = (tdoa_m-tdoa_measured.T)**2
    #     else:
    #         diff = np.vstack((diff,(tdoa_m-tdoa_measured.T)**2))

    # Q = len(loc_data)
    # #M = m.size # number of dimensions of the model (here: X, Y, and Z)
    # #N = len(tdoa_sec) # number of measurements
    # #error_std = np.sqrt((1/(Q*(N-M))) * (sum((tdoa_sec-tdoa_m)**2)))
    # tdoa_errors_std = np.sqrt( (1/Q)*(sum(diff)))

    # #tdoa_errors_std = calc_data_error(tdoa_sec, m, sound_speed_mps,hydrophones_config, hydrophone_pairs)
    # for idx in range(len(loc_data)):
    #     loc_errors_std = calc_loc_errors(tdoa_errors_std, loc_data.loc[[idx],['x','y','z']] , sound_speed_mps, hydrophones_config, hydrophone_pairs)
    #     print('m')

    # Filter
    loc_data = loc_data.dropna(subset=['x', 'y', 'z'])  # remove NaN
    loc_data = loc_data.loc[(loc_data['x'] >= min(filter_x))
                            & (loc_data['x'] <= max(filter_x)) &
                            (loc_data['y'] >= min(filter_y)) &
                            (loc_data['y'] <= max(filter_y)) &
                            (loc_data['z'] >= min(filter_z)) &
                            (loc_data['z'] <= max(filter_z)) &
                            (loc_data['x_std'] <= filter_x_std) &
                            (loc_data['y_std'] <= filter_y_std) &
                            (loc_data['z_std'] <= filter_z_std)]
    # Adjust detection times
    loc_data['time_min_offset'] = loc_data['time_min_offset'] - t1_sec
    loc_data['time_max_offset'] = loc_data['time_max_offset'] - t1_sec

    if time_sec != None:
        loc_data = loc_data.loc[(loc_data['time_max_offset'] <= time_sec)]
    else:
        print('Static')

    # update loc object
    loc.data = loc_data

    # plots
    # fig, ax = plt.subplots(figsize=(6, 1))
    # fig.subplots_adjust(bottom=0.5)
    # n_colors = t2_sec-t1_sec
    # cmap = mpl.cm.get_cmap('CMRmap', n_colors*2)
    # norm = mpl.colors.Normalize(vmin=0, vmax=n_colors)
    # ax_cmap = mpl.colorbar.ColorbarBase(ax, cmap=cmap,
    #                                 norm=norm,
    #                                 orientation='horizontal')
    # ax_cmap.set_label('Time (s)')

    # Plot spectrogram
    fig_final, ax_spectro = plot_spectrogram(audio_file,
                                             loc,
                                             t1_sec,
                                             t2_sec,
                                             geometry=(5, 1, 1))
    ax_spectro.set_title("")
    ax_spectro.get_xaxis().set_visible(False)
    n_colors = t2_sec - t1_sec
    cmap = mpl.cm.get_cmap('viridis', n_colors * 4)
    norm = mpl.colors.Normalize(vmin=0, vmax=n_colors)
    divider = make_axes_locatable(ax_spectro)
    cax = divider.append_axes('bottom', 0.1, pad=0.03)
    ax_cmap = mpl.colorbar.ColorbarBase(cax,
                                        cmap=cmap,
                                        norm=norm,
                                        orientation='horizontal')
    ax_cmap.set_label('Time (s)')

    if time_sec:
        SFreq_min, SFreq_max = ax_spectro.get_ylim()
        ax_spectro.plot([time_sec, time_sec], [SFreq_min, SFreq_max], 'r')

    # plot detection points on top of spectrogram
    #gs0 = fig_final.add_gridspec(60,1)
    ax_detec = fig_final.add_subplot(20, 1, 1)
    det_y = np.asarray(np.ones((1, len(loc_data['time_min_offset']))))[0]
    det_x = np.asarray(loc_data['time_min_offset'])
    ax_detec.scatter(det_x,
                     det_y,
                     c=loc_data['time_min_offset'],
                     cmap=cmap,
                     norm=norm,
                     s=12)
    ax_detec.set_xlim(ax_spectro.get_xlim())
    ax_detec.get_xaxis().set_visible(False)
    ax_detec.get_yaxis().set_visible(False)
    ax_detec.axis('off')

    # #pos =[left, bottom, width, height]
    # box = ax_detec.get_position()
    # box.y0 = box.y0 + 0.6
    # box.y1 = box.y1 + 0.6
    # ax_detec.set_position(box)

    #size = fig_final.get_size_inches()

    plt.subplots_adjust(left=0.08,
                        bottom=0.1,
                        right=0.95,
                        top=0.95,
                        wspace=0,
                        hspace=0)

    # divider2 = make_axes_locatable(ax_spectro)
    # cax2 = divider2.append_axes('top', size=0.2, pad=10.0)
    # det_y = np.asarray(np.ones((1,len(loc_data['time_min_offset']))))[0]
    # det_x = np.asarray(loc_data['time_min_offset'])
    # cax2.plot(det_x,det_y,'.r')
    # cax2.set_xlim(ax_spectro.get_xlim())

    # ax_cmap = mpl.colorbar.ColorbarBase(cax, cmap=cmap,
    #                                     norm=norm,
    #                                     orientation='horizontal')

    gs = fig_final.add_gridspec(3, 2)

    # plot localization top
    ax_toploc = fig_final.add_subplot(gs[1:, 1])
    plot_top_view(hydrophones_config, loc_data, params, cmap, norm, ax_toploc)
    ax_toploc.set_anchor('E')

    # plot localization side
    #ax_sideloc = fig_final.add_subplot(3,3,7,sharex = ax_toploc)
    ax_sideloc = fig_final.add_subplot(gs[1:, 0])
    plot_side_view(hydrophones_config, loc_data, params, cmap, norm,
                   ax_sideloc)
    ax_sideloc.set_anchor('W')

    # set the spacing between subplots
    plt.subplots_adjust(wspace=0, hspace=0)

    # # plot video frame 1
    # fig_video1, ax_video1 = plt.subplots(1,1)
    # frame1_sec = 152.8 # second detection -> 16:38:59.8
    # #ax_video1 = fig_final.add_subplot(3,3,5)
    # plot_video_frame(video_file,frame1_sec, ax_video1)
    # ax_video1.get_xaxis().set_visible(False)
    # ax_video1.get_yaxis().set_visible(False)

    # # plot video frame 2
    # fig_video2, ax_video2 = plt.subplots(1,1)
    # frame2_sec = 160 # 4th detection -> 16:39:07
    # #ax_video2 = fig_final.add_subplot(3,3,6)
    # plot_video_frame(video_file,frame2_sec, ax_video2)
    # ax_video2.get_xaxis().set_visible(False)
    # ax_video2.get_yaxis().set_visible(False)

    fig_final.set_size_inches(9.08, 6.72)

    box = ax_spectro.get_position()
    box.y0 = box.y0 - 0.03
    box.y1 = box.y1 - 0.03
    ax_spectro.set_position(box)
    return fig_final
Beispiel #5
0
    'uncertainty_style': ['-'],
    'uncertainty_alpha': [1],  #0.7
    'uncertainty_width': [0.2],  #0.2
    'x_min': [-1.26],
    'x_max': [1.26],
    'y_min': [-1.26],
    'y_max': [1.26],
    'z_min': [-1.5],
    'z_max': [2.1],
})

## ###########################################################################

## load localization results
loc = Measurement()
loc.from_netcdf(loc_file)
loc_data = loc.data

## load hydrophone locations
hydrophones_config = pd.read_csv(hp_config_file)

# Filter
loc_data = loc_data.dropna(subset=['x', 'y', 'z'])  # remove NaN
loc_data = loc_data.loc[(loc_data['x'] >= min(filter_x))
                        & (loc_data['x'] <= max(filter_x)) &
                        (loc_data['y'] >= min(filter_y)) &
                        (loc_data['y'] <= max(filter_y)) &
                        (loc_data['z'] >= min(filter_z)) &
                        (loc_data['z'] <= max(filter_z)) &
                        (loc_data['x_std'] <= filter_x_std) &
                        (loc_data['y_std'] <= filter_y_std) &
Beispiel #6
0
# annot4.from_netcdf(netcdf_files, verbose=True)
# print(len(annot4))

# # ## netcdf folder
# netcdf_files = r'C:\Users\xavier.mouy\Documents\PhD\Projects\Dectector\datasets\test'
# annot4 = Annotation()
# annot4.from_netcdf(netcdf_files, verbose=True)
# print(len(annot4))

# # ## netcdf folder from Measurements
# netcdf_files = r'C:\Users\xavier.mouy\Documents\PhD\Projects\Dectector\datasets\test2'
# annot4 = Annotation()
# annot4.from_netcdf(netcdf_files, verbose=True)
# print(len(annot4))

# ## netcdf folder from Measurements
netcdf_files = r'C:\Users\xavier.mouy\Documents\PhD\Projects\Dectector\datasets\test2'
annot4 = Measurement()
annot4.from_netcdf(netcdf_files, verbose=True)
print(len(annot4))

# import xarray as xr
# d=annot3.data
# index = range(0,len(d),1)
# d['index']=index
# #d = d.set_index(['index','entry_date', 'frequency_min','label_class'])
# d = d.set_index(['index'])

# data = d.to_xarray()

# data2=data.sel(index=0)
    'uncertainty_alpha': [1], #0.7
    'uncertainty_width': [0.2], #0.2
    'x_min':[-3],
    'x_max':[3],
    'y_min':[-3],
    'y_max':[3],
    'z_min':[-3],
    'z_max':[3],    
    })
    
## ###########################################################################

## load localization results 0 degrees
file1 = '0_deg.nc'
loc1 = Measurement()
loc1.from_netcdf(os.path.join(indir,file1))
# loc1_data = loc1.data
# # Filter
# loc1_data = loc1_data.dropna(subset=['x', 'y','z']) # remove NaN
# loc1_data = loc1_data.loc[(loc1_data['x']>=min(filter_x)) & 
#                         (loc1_data['x']<=max(filter_x)) &
#                         (loc1_data['y']>=min(filter_y)) & 
#                         (loc1_data['y']<=max(filter_y)) &
#                         (loc1_data['z']>=min(filter_z)) & 
#                         (loc1_data['z']<=max(filter_z)) &
#                         (loc1_data['x_std']<= filter_x_std) & 
#                         (loc1_data['y_std']<= filter_y_std) &
#                         (loc1_data['z_std']<= filter_z_std)
#                         ]

# # Adjust detection times
Beispiel #8
0
# filter_y=[-1.5, 1.5]
# filter_z=[-2, 2]
# filter_x_std=0.3
# filter_y_std=0.3
# filter_z_std=0.3

# load data
print('')
print('Loading dataset')
idx = 0
for infile in os.listdir(indir):
    if infile.endswith(".nc"):
        print(infile)

        locs = Measurement()
        locs.from_netcdf(os.path.join(indir, infile))
        loc_data = locs.data

        # Filter
        loc_data = loc_data.dropna(subset=['x', 'y', 'z'])  # remove NaN
        loc_data = loc_data.loc[(loc_data['x'] >= min(filter_x))
                                & (loc_data['x'] <= max(filter_x)) &
                                (loc_data['y'] >= min(filter_y)) &
                                (loc_data['y'] <= max(filter_y)) &
                                (loc_data['z'] >= min(filter_z)) &
                                (loc_data['z'] <= max(filter_z)) &
                                (loc_data['x_std'] <= filter_x_std) &
                                (loc_data['y_std'] <= filter_y_std) &
                                (loc_data['z_std'] <= filter_z_std)]

        if idx == 0:
import pandas as pd


## Input paraneters ##########################################################

annotation_file = r"C:\Users\xavier.mouy\Documents\PhD\Projects\Dectector\datasets\Master_annotations_dataset.nc"
detection_file = r"C:\Users\xavier.mouy\Documents\PhD\Projects\Dectector\results\Full_dataset_with_metadata2"
outfile=r'C:\Users\xavier.mouy\Documents\PhD\Projects\Dectector\results\dataset_annotations_only2.nc'

# load annotations
annot = Annotation()
annot.from_netcdf(annotation_file)

# load detections
detec = Measurement()
detec.from_netcdf(detection_file)
print(detec)

freq_ovp = True # default True
dur_factor_max = None # default None
dur_factor_min = 0.1 # default None
ovlp_ratio_min = 0.3 # defaulkt None
remove_duplicates = True # dfault - False
inherit_metadata = True # default False
filter_deploymentID = False # default True

detec.filter_overlap_with(annot,
                          freq_ovp=freq_ovp,
                          dur_factor_max=dur_factor_max,
                          dur_factor_min=dur_factor_min,
                          ovlp_ratio_min=ovlp_ratio_min,
Beispiel #10
0
deployment_file = r'C:\Users\xavier.mouy\Documents\PhD\Projects\Dectector\datasets\UVIC_mill-bay_2019\deployment_info.csv'
data_dir = r'C:\Users\xavier.mouy\Documents\PhD\Projects\Dectector\datasets\UVIC_mill-bay_2019\audio_data'

# load meta data
operator_name = platform.uname().node
dep_info = DeploymentInfo()
dep_info.read(deployment_file)

#list files
files = ecosound.core.tools.list_files(indir,
                                       ext,
                                       recursive=False,
                                       case_sensitive=True)

for idx, file in enumerate(files):
    print(str(idx) + r'/' + str(len(files)) + ': ' + file)
    meas = Measurement()
    meas.from_netcdf(file)

    meas.insert_metadata(deployment_file)

    file_name = os.path.splitext(os.path.basename(file))[0]
    meas.insert_values(
        operator_name=platform.uname().node,
        audio_file_name=os.path.splitext(os.path.basename(file_name))[0],
        audio_file_dir=data_dir,
        audio_file_extension='.wav',
        audio_file_start_date=ecosound.core.tools.filename_to_datetime(
            file_name)[0])
    meas.to_netcdf(os.path.join(outdir, file_name + '.nc'))
Beispiel #11
0
def main():
    # input arguments
    input_args = dict()
    input_args['positive_class_label'] = 'FS'
    input_args['train_ratio'] = 0.75
    input_args['cv_splits'] = 10  #5
    input_args['cv_repeats'] = 1
    input_args['rebalance_classes'] = True
    #input_args['data_file']= r'C:\Users\xavier.mouy\Documents\PhD\Projects\Detector\results\dataset_FS-NN_modified_20201105145300.nc'
    input_args[
        'data_file'] = r'C:\Users\xavier.mouy\Documents\PhD\Projects\Detector\results\dataset_FS-NN_modified_20200902194334.nc'
    input_args[
        'out_dir'] = r'C:\Users\xavier.mouy\Documents\PhD\Projects\Detector\results\Classification'
    input_args['run_CV'] = False
    input_args['train_final_model'] = True
    input_args['final_model_name'] = 'RF50'

    ## DEFINITION OF CLASSIFIERS -------------------------------------------------
    models = []
    models.append(('Dummy', DummyClassifier(strategy="constant", constant=1)))
    models.append(
        ('LR', LogisticRegression(solver='liblinear', multi_class='ovr')))
    models.append(('LDA', LinearDiscriminantAnalysis()))
    #models.append(('KNN', KNeighborsClassifier()))
    #models.append(('KNN', KNeighborsClassifier(n_neighbors=4, metric='euclidean')))
    models.append(('CART', DecisionTreeClassifier()))
    #models.append(('NB', GaussianNB()))
    models.append(('XGBoost', XGBClassifier()))
    #models.append(('MLP', MLPClassifier(solver='sgd', alpha=1e-5, hidden_layer_sizes=(5, 2), random_state=0)))
    models.append(('RF5',
                   RandomForestClassifier(n_estimators=5,
                                          min_samples_split=100,
                                          min_samples_leaf=50,
                                          random_state=0)))
    models.append(('RF10',
                   RandomForestClassifier(n_estimators=10,
                                          min_samples_split=100,
                                          min_samples_leaf=50,
                                          random_state=0)))
    models.append(('RF30',
                   RandomForestClassifier(n_estimators=30,
                                          min_samples_split=100,
                                          min_samples_leaf=50,
                                          random_state=0)))
    models.append(('RF50',
                   RandomForestClassifier(n_estimators=50,
                                          min_samples_split=100,
                                          min_samples_leaf=50,
                                          random_state=0)))
    #models.append(('RF100', RandomForestClassifier(n_estimators=100,min_samples_split= 100, min_samples_leaf=50,random_state=0)))

    ## setup output folder
    now = datetime.now()
    now_str = now.strftime("%Y%m%dT%H%M%S")
    out_dir = os.path.join(input_args['out_dir'], now_str)
    os.mkdir(out_dir)

    ## Save input args to txt file
    text_file = open(os.path.join(out_dir, 'input_args_' + now_str + '.txt'),
                     "w")
    n = text_file.write(str(input_args))
    text_file.close()

    ## Checks that model name exists before running all the processing
    if input_args['train_final_model']:
        model_idx = [model[0]
                     for model in models].index(input_args['final_model_name'])

    ## LOAD DATSET ---------------------------------------------------------------
    dataset = Measurement()
    dataset.from_netcdf(input_args['data_file'])
    print(dataset.summary())

    ## DATA PREPARATION ----------------------------------------------------------
    # features
    features = dataset.metadata['measurements_name'][
        0]  # list of features used for the classification
    # data
    data = dataset.data
    # drop FS observations at Mill Bay
    indexNames = data[(data['label_class'] == 'FS')
                      & (data['location_name'] == 'Mill bay')].index
    data.drop(indexNames, inplace=True)
    # add subclass + IDs
    data, class_encoder = add_class_ID(data,
                                       input_args['positive_class_label'])
    data, _ = add_subclass(data)
    #subclass2class_table = subclass2class_conversion(data)
    # add group ID
    data, group_encoder = add_group(data)

    ## DATA CLEAN-UP -------------------------------------------------------------
    # Basic stats on all features
    data_stats = data[features].describe()
    #print(data_stats)

    # how many NaNs and Infs per column
    data = data.replace([np.inf, -np.inf], np.nan)
    Nnan = data[features].isna().sum()
    ax = Nnan.plot(kind='bar', title='Number of NaN/Inf', grid=True)
    ax.set_ylabel('Number of observations with NaNs/Infs')

    # Drop some features with too many NaNs
    features.remove('freq_flatness')
    features.remove('snr')
    features.remove('uuid')

    # drop observations/rows with NaNs
    data.dropna(subset=features, axis=0, how='any', thresh=None, inplace=True)
    data_stats2 = data[features].describe()

    # ## VISUALIZATION -------------------------------------------------------------
    # # box and whisker plots
    # data[features].plot(kind='box', subplots=True, layout=(7,7), sharex=False, sharey=False)
    # # histograms
    # data[features].hist()
    # # scatter plot matrix
    # pd.plotting.scatter_matrix(data[features])
    # scatter plot PCA
    # pca = PCA(n_components=2)
    # X  = pca.fit_transform(data[features])
    # y = data['class_ID']
    # plot_2d_space(X, y, 'Imbalanced dataset (2 PCA components)')

    ## SPLIT DATA INTO TRAIN & TEST SETS ------------------------------------------
    n_splits = round(1 / (1 - input_args['train_ratio']))
    skf = StratifiedGroupKFold(n_splits=n_splits,
                               shuffle=True,
                               random_state=None)
    for train_index, test_index in skf.split(data,
                                             data['subclass_ID'],
                                             groups=data['group_ID']):
        data_train, data_test = data.iloc[train_index], data.iloc[test_index]
        break
    # plot class repartition
    plot_datasets_distrib(data_train, data_test)
    plot_dataset_distrib(data,
                         attr_list=['subclass_label', 'label_class'],
                         title='Full dataset')
    plot_dataset_distrib(data_train,
                         attr_list=['subclass_label', 'label_class'],
                         title='Training set')
    plot_dataset_distrib(data_test,
                         attr_list=['subclass_label', 'label_class'],
                         title='Test set')
    # verify groups are not used in both datasets
    groups_intersection = plot_datasets_groups(data_train,
                                               data_test,
                                               show=True)

    ## CROSS VALIDATION ON TRAIN SET ----------------------------------------------
    if input_args['run_CV']:
        # run train/test experiments
        cv_predictions, cv_performance = cross_validation(
            data_train,
            models,
            features,
            cv_splits=input_args['cv_splits'],
            cv_repeats=input_args['cv_repeats'],
            rebalance=input_args['rebalance_classes'])
        # display summary results
        performance_report = summarize_performance(cv_performance,
                                                   threshold=0.5)
        print(performance_report)
        # plot mean Precision and Recall curves
        plot_PR_curves(cv_performance)
        plot_F_curves(cv_performance)
        # save results
        CV_results = {
            'cv_predictions': cv_predictions,
            'cv_performance': cv_performance,
            'models': models,
            'input_args': input_args,
        }
        pickle.dump(
            CV_results,
            open(os.path.join(out_dir, 'CV_' + now_str + '.sav'), 'wb'))

    ## FINAL EVALUATION ON TEST SET -----------------------------------------------
    if input_args['train_final_model']:

        print(' ')
        print('Final evaluation on test set:')
        print(' ')

        model_name = models[model_idx][0]
        model = models[model_idx][1]  # RF50
        print(model)
        X_train = data_train[features]  # features
        Y_train = data_train['class_ID']  #labels
        X_test = data_test[features]  # features
        Y_test = data_test['class_ID']  #labels
        # feature normalization
        Norm_mean = X_train.mean()
        Norm_std = X_train.std()
        X_train = (X_train - Norm_mean) / Norm_std
        X_test = (X_test - Norm_mean) / Norm_std
        # Train on entire train set
        final_model = classification_train(
            X_train, Y_train, model, rebalance=input_args['rebalance_classes'])
        # Evaluate on full test set
        pred_class, pred_prob = classification_predict(X_test, final_model)
        # Print evaluation report
        CR = classification_report(Y_test, pred_class)
        print(CR)
        # save the model to disk
        model = {
            'name': model_name,
            'model': final_model,
            'features': features,
            'normalization_mean': Norm_mean,
            'normalization_std': Norm_std,
            'classes': class_encoder,
            'input_args': input_args,
        }
        pickle.dump(
            model,
            open(
                os.path.join(out_dir,
                             model_name + '_model_' + now_str + '.sav'), 'wb'))
# print('-----------------')
# print('  Annotations    ')
# annot = Annotation()
# annot.from_netcdf(annotation_file)
# print(annot.summary())
# annot_perfile = annot.summary(rows='audio_file_name',columns='label_class')
# annot_perfile.rename(columns={"FS": "FS-annot"}, inplace=True)
# annot_perfile = annot_perfile['FS-annot'].to_frame()
# #annot_perfile.to_csv('annot.csv')

print(' ')
print('-----------------')
print('  Detections     ')
# load detections
detec = Measurement()
detec.from_netcdf(detec_file)
print(detec.summary())
detec_perfile = detec.summary(rows='audio_file_name', columns='label_class')
detec_perfile.rename(columns={"FS": "FS-detec"}, inplace=True)
detec_perfile = detec_perfile['FS-detec'].to_frame()

dd = pd.concat([annot_perfile, detec_perfile], axis=1)
dd['diff'] = dd['FS-annot'] - dd['FS-detec']
dd.plot()

# outdir=r'C:\Users\xavier.mouy\Documents\Workspace\GitHub\ecosound\tests\detec_export'
# detec.to_pamlab(outdir, single_file=False)

# outdir=r'C:\Users\xavier.mouy\Documents\Workspace\GitHub\ecosound\tests\annot_export'
# annot.to_pamlab(outdir, single_file=False)
Beispiel #13
0
min_threshold = 0.7
noise_label = 'NN'

# load names of file and start/stop times where false alarms have been manually
# identified
df = pd.read_excel(xls_file, header=None)

for idx in range(0, len(df)):
    # file name to load
    wav_file_name = df[0][idx]
    tmin_sec = df[1][idx]
    tmax_sec = df[2][idx]
    print(wav_file_name, tmin_sec, tmax_sec)
    detec_file_path = os.path.join(in_dir, wav_file_name + '.nc')
    # load detection/measurement file
    meas = Measurement()
    meas.from_netcdf(detec_file_path)
    data_df = meas.data
    # Only keep fish detections above the given confidence threshold and times
    data_df_filt = data_df[(data_df.label_class == fish_label)
                           & (data_df.confidence >= min_threshold)
                           & (data_df.time_min_offset >= tmin_sec)
                           & (data_df.time_max_offset <= tmax_sec)]
    data_df_filt.reset_index(inplace=True, drop=True)
    meas.data = data_df_filt
    # Change fish labels to noise labels
    meas.insert_values(label_class=noise_label)
    # Save to new nc file
    meas.to_netcdf(os.path.join(out_dir, wav_file_name + str(idx)))

print('done')
classif_model = pickle.load(open(classif_model_file, 'rb'))
features = classif_model['features']
model = classif_model['model']
Norm_mean = classif_model['normalization_mean']
Norm_std = classif_model['normalization_std']
classes_encoder = classif_model['classes']

# loops thrugh each file
files_list = os.listdir(indir)  # list of files
for file in files_list:
    if os.path.isfile(os.path.join(indir, file)) & file.endswith(file_ext):
        if os.path.isfile(os.path.join(outdir, file)) is False:
            # load file
            print(file)
            meas = Measurement()
            meas.from_netcdf(os.path.join(indir, file))
            # reclassify
            data = meas.data
            n1 = len(data)
            # drop observations/rows with NaNs
            data = data.replace([np.inf, -np.inf], np.nan)
            data.dropna(subset=features,
                        axis=0,
                        how='any',
                        thresh=None,
                        inplace=True)
            n2 = len(data)
            print('Deleted observations (due to NaNs): ' + str(n1 - n2))
            # Classification - predictions
            X = data[features]
            X = (X - Norm_mean) / Norm_std