Python preprocess Examples

Programming Language: Python

Namespace/Package Name: upcall.sound_util

Method/Function: preprocess

Examples at hotexamples.com: 4

Python preprocess - 4 examples found. These are the top rated real world Python examples of upcall.sound_util.preprocess extracted from open source projects. You can rate examples to help us improve the quality of examples.

Example #1

Show file

def run_detector(day_sound_path, seltab_out_path, classifier_model_path, 
                 config):
    """ run detector on one day of sound"""
    if isinstance(classifier_model_path, str): # check if classifier_model_path 
    #is list or tuple
        MultiModel = False
    else:
        MultiModel = True
        
    if MultiModel == False:
        classifier_model = load_model(classifier_model_path, custom_objects={'F1_Class': F1_Class}) # Temporary for F1_Class metric
    else:
        # load multiple models into a list
        classifier_modelList = [load_model(cc, custom_objects={'F1_Class': F1_Class}) for cc in classifier_model_path]
        
    sound_list = sorted(glob.glob(os.path.join(day_sound_path+'/', '*.aif'))) # Virginia uses .aif # need to fix. #Use os.path to determine aif or wav
    if len(sound_list)==0:
        sound_list = sorted(glob.glob(os.path.join(day_sound_path+'/', '*.wav'))) # NOPP uses .wav
        if len(sound_list)==0:
            print('.wav files are not found, either. Leave.')
            return
    DayFile = os.path.join(seltab_out_path+'/',os.path.basename(day_sound_path)+'.txt')
    f = open(DayFile,'w')
    f.write('Selection\tView\tChannel\tBegin Time (s)\tEnd Time (s)\tLow Freq (Hz)\tHigh Freq (Hz)\tScore\n')
    
    EventId = 0
    for ff in sound_list:
    ###for ff in [sound_list[0]]:
        ff2 = os.path.splitext(os.path.basename(ff))[0]
        print (ff2)                    
        # time stamp
        time_curr = get_time_stamp(ff2, config.TIME_FORMAT)
        samples0, sample_rate = sf.read(ff)
                
        if samples0.ndim==1: # make it a multi-dimensional array for single channel
            samples0 = np.reshape(samples0, (-1, 1))
        num_sample, num_chan = samples0.shape[0], samples0.shape[1]
        
        for cc in range(num_chan):
            ss_last = int(np.floor((num_sample-config.FRAME_SIZE)*1.0/config.FRAME_STEP))
            print('#',end='')
            
            # make prediction for each 15-min file or for each sliding window. The former is 3 times faster and the latter will be eliminated later.
            spectro_list = []
            for ss in range(ss_last):
                samples = samples0[ss*config.FRAME_STEP:ss*config.FRAME_STEP+config.FRAME_SIZE,cc]
                spectro = preprocess(samples, config)
                    
                spectro_list.append(spectro)
            
            fea_spectro = np.vstack(spectro_list)            
            fea_spectro = fea_spectro.reshape(fea_spectro.shape[0], config.IMG_X, config.IMG_Y, 1)

            if MultiModel == False:
                score_arr = classifier_model.predict(fea_spectro)[:,1]
            else:
                score_arr_list = [cc.predict(fea_spectro)[:,1] for cc in classifier_modelList]
                score_arr_sum = score_arr_list[0]
                for ss in range(1, len(score_arr_list)):
                    score_arr_sum += score_arr_list[ss]
                
                score_arr = score_arr_sum/float(len(score_arr_list))
            
            call_arr = np.where(score_arr > config.SCORE_THRE)[0]
            ###call_arr = np.where(score_arr > 0.0)[0]
            if call_arr.shape[0] != 0: # there's at least one window with score larger than the threshold
                # merging multi detection boxes / non-maximum suppresion
                # score & indices. 0.5 the ovelap
                call_arr_sepa = non_max_suppress(call_arr, score_arr, config)
                ###call_arr_sepa = non_max_suppress_bypass(call_arr, score_arr, config)
                print('==>> ',end='') #!!! move Non-max suppresion to the last stage using the score threshold set
                print(call_arr_sepa)

                for jj in call_arr_sepa:
                    EventId += 1
                    # Raven selection table format
                    Time1 = time_curr.hour*3600.0 + time_curr.minute*60.0 + time_curr.second + jj*config.FRAME_STEP_SEC
                    Time2 = Time1 + config.FRAME_SIZE_SEC
                    print('Found event: '+ str(EventId)+ ' Time1: '+str.format("{0:=.4f}",Time1)+' Score: '+str(score_arr[jj]))
                    f.write(str(EventId)+'\t'+'Spectrogram'+'\t'+str(cc+1)+'\t'+str.format("{0:=.4f}",Time1)+'\t'+str.format("{0:<.4f}",Time2)+'\t'+str.format("{0:=.1f}",config.BOX_OUT_F1)+'\t'+str.format("{0:=.1f}",config.BOX_OUT_F2)+'\t'+str.format("{0:<.5f}", score_arr[jj]) )
                    f.write('\n')
        print('')
    f.close()
    return True

Example #2

Show file

def run_detector_1day(
        stream,
        model,
        SelTabPathDetected,
        filter_args=None,
        N_read=1000,
        N_previous=1000,
        date_format="%Y%m%d_%H%M%S",
        ScoreThre=0.05,
        max_streams=40000,  # make it big to utilize GPU
        OverlapRatioThre=0.4,
        SelectionTableName='temp',
        SNDSel=False,
        config=None):
    ''' Runs the model over the datastream and first produces an array of 
    score values for all times in the datset/Day of data. Then removes all
    scores below a given threhsold and runs non-max suppression on the 
    remaining prediction values. Finally puts all predictions into 
    produces raven table(s) for
    the detector. Read in N_read and N_previous samples from the sample stream
    (e.g. context!). Step through the loaded samples making predictions from 
    the trained model. Run non-max supression to remove duplicate detections.
    
    inputs - 
        stream - SampleStream of files (see DSP sampelstream of
            MakeSampleStream)
        model -  Either traied and loadede keras model or full path to
            trained model
        
        #####################################
        # Filter args Dictionary
        #####################################
        
        FFTSize - FFTSize in samples
        HopSize - HopSize in samples
        filter_name - string, preprocessing filter name from filterlist.py
        f_range - frequency range (Hz) to maintain (e.g. cropping limits)
        
        #####################################
        # Advance dictionary data. Could also be dictionary 
        #####################################
        
        SelTabPathDetected - output directory for the raven selection table
        N_read, N_previous - number of sampels (combined) needed for the
            prediction. For upcall basic net this is 2k
        date_format - date string format of the sound files
            (e.g. "%Y%m%d_%H%M%S")
        ScoreThre- threshold above which the detector keeps the events
            default 0.05
        max_streams - number of stream advances to load into memory
                        prior to making a prediction default 400
        SelectionTableName - string for sound selection / sound table name
        SNDSel - bool to indicate whether or not to make a sound table default
                true
        
        config- optional configuration class containing input parameters
        
        Conditions for wariting out the selection table
        1) End of the stream
        2) The stream has come across a new day AND SndSel is Fales
        3) A file in the stream has different parameters (e.g. channels)
            this is not fully populated yet. Currently everything runs on 2khz
            so we are just going with that for the time being
        
        
        Creates a Raven readable selection table of detections that include
        prediction score
   
    
    '''

    #########################################################################
    # Check if there is a configureation file, if so load/override relevant
    # filter parameters from the config file

    if config is not None:
        # preprocess arguments
        filter_args = {
            'FFTSize': config.FFT_SIZE,
            'HopSize': config.HOP_SIZE,
            'fs': config.SAMPLE_RATE,
            'filter_fx': 'ys_Preprocess'
        }
        fs = filter_args['fs']
        # Calculate number of samples to advance the streamer and the number
        # of 'previous' samples to retain. Note this is not the same as
        # stft and advance
        FrameStepSec = config.FRAME_STEP_SEC
        FrameStep = int(FrameStepSec * fs)
        FrameSizeSec = config.FRAME_SIZE_SEC  # each window is 2 sec long
        FrameSize = int(FrameSizeSec * fs)

        N_read = FrameStep
        N_previous = FrameSize - N_read

        date_format = config.TIME_FORMAT
        OverlapRatioThre = config.OVERLAP_RATIO

        # output frequencies of the upcall detector
        low_f = config.BOX_OUT_F1,
        high_f = config.BOX_OUT_F2

        max_streams = config.MAX_STREAMS

    if type(model) == str:
        try:
            model = load_model(model)
        except ValueError:
            try:
                model = load_model(model,
                                   custom_objects={'F1_Class': F1_Class})
            except:
                print('Falure loading model')  # Need fix

    #######################################################################
    # chcek that low and high frequency are in the filter args for the
    # output bounding box on the raven selection table
    try:  # need fixz
        filter_args['low_f']
    except KeyError:
        low_f = 50
    try:
        filter_args['high_f']
    except KeyError:
        high_f = 350

    ########################################################################

    # Initialize a table to export
    RavenTable_out = pd.DataFrame()

    # Create empty timestamp array to get timestamp of detections
    timestamp_array = []

    # Create empty ScoreArr
    ScoreArr = np.array([])
    counter = 0
    SpectroList = []
    file_array = []
    previous_channels = stream.stream.channels
    current_channels = stream.stream.channels

    # Stream through the files, preprocess and apply the detector
    #previous_date = stream.get_current_timesamp().date()
    current_date = stream.get_current_timesamp().date()

    # path to save and load spectrogram for reuse
    SelTabPathDetected = os.path.normpath(SelTabPathDetected)
    # two levels up from RunX: the model cv folder
    #path_to_spectro_file = os.path.dirname(os.path.dirname(SelTabPathDetected))
    # __ExptResult
    path_to_spectro_file = os.path.dirname(
        os.path.dirname(os.path.dirname(SelTabPathDetected)))
    if config.TEST_MODE is True:
        spectro_file_curr_day = os.path.join(
            path_to_spectro_file, 'TEST_' + str(current_date)) + '.npz'
    else:
        spectro_file_curr_day = os.path.join(path_to_spectro_file,
                                             str(current_date)) + '.npz'

    if config.USE_SAVED_FEATURE and os.path.exists(spectro_file_curr_day):
        print('Load the saved feature files...')
        stored_data = np.load(spectro_file_curr_day)
        SpectroList = stored_data['arr_0']
        file_array = stored_data['arr_1']
        timestamp_array = stored_data['arr_2']
        #SpectroList, file_array, timestamp_array
        #np.savez(spectro_file_curr_day, SpectroList, file_array, timestamp_array)

        print('Make prediction...')
        ScoreArr = make_predictions(model,
                                    SpectroList,
                                    previous_channels,
                                    config,
                                    streamevent=True)
        ScoreArr = ScoreArr.reshape(-1, previous_channels)
        ScoreArr = ScoreArr[:-1].reshape(-1, previous_channels)

        print('Make dataframe for selection tables')
        Raven_data_frame = make_raven_SndSel_table(ScoreArr,
                                                   OverlapRatioThre,
                                                   file_array,
                                                   timestamp_array,
                                                   ScoreThre,
                                                   config,
                                                   date_format=date_format)

        #RavenTable_out = RavenTable_out.append(Raven_data_frame)

        print('Write to the selection table')
        #print('writing selection table ' + current_file)
        write_raven_table_to_CSV(Raven_data_frame,
                                 SelTabPathDetected,
                                 SelectionTableName,
                                 SNDSel=SNDSel)

    else:
        print('Extract features from sound files...')
        sec_advance = float(
            (N_read + N_previous) / stream.get_all_stream_fs()[0])
        while True:
            try:
                # load the samples
                samps = stream.read(N=N_read, previousN=N_previous)[0]
                #advance the counter
                counter += 1

                # Append the timestamp
                #timestamp = stream.get_current_timesamp() - datetime.timedelta(seconds=2.0)
                timestamp = stream.get_current_timesamp() - datetime.timedelta(
                    seconds=sec_advance)

                # Set the current file
                current_file = stream.stream.filename

                # current date
                #current_date = stream.get_current_timesamp().date()

                # Append the timestamp
                timestamp_array.append(timestamp)

                # Set the current file
                file_array.append(current_file)

                #pre-process all the channels
                for ii in range(samps.shape[1]):
                    #Spectro = preprocess(samps[:,ii], filter_args)
                    Spectro = preprocess(samps[:, ii], config)
                    SpectroList.append([Spectro])
                #SpectroList.append([Spectro]) # this seems weird: only save the feature of the last channel

                if counter == config.MAX_STREAMS:
                    preds = make_predictions(model,
                                             SpectroList,
                                             current_channels,
                                             config,
                                             streamevent=False)
                    # make model predictions
                    ScoreArr = np.append(ScoreArr, preds)
                    #if bool(len(ScoreArr) % current_channels):
                    #print('blarg')

                    # reset the list
                    SpectroList = []
                    counter = 0

            except (StreamGap, StreamEnd) as S:

                # If error was thrown after a read (as opposed to after writing
                # a new table, then read the samples and predict)
                #ScoreArr = np.append(ScoreArr,
                print('Make prediction')
                preds = make_predictions(model,
                                         SpectroList,
                                         previous_channels,
                                         config,
                                         streamevent=True)
                ScoreArr = np.append(ScoreArr, preds)
                ScoreArr = ScoreArr.reshape(-1, previous_channels)
                #ScoreArr = ScoreArr[:-1].reshape(-1, previous_channels)

                # save SpectroList
                if config.USE_SAVED_FEATURE is True:
                    print('Save features to numpy binary files...')
                    np.savez(spectro_file_curr_day, SpectroList, file_array,
                             timestamp_array)

                # End of Stream write the dataframe
                if type(S).__name__ == 'StreamEnd':
                    # Make the raven table
                    print('Make dataframe for selection tables')
                    Raven_data_frame = make_raven_SndSel_table(
                        ScoreArr,
                        OverlapRatioThre,
                        file_array,
                        timestamp_array,
                        ScoreThre,
                        config,
                        date_format=date_format)

                    #RavenTable_out = RavenTable_out.append(Raven_data_frame)

                    #print('writing selection table ' + current_file)
                    print('Write to the selection table')
                    write_raven_table_to_CSV(Raven_data_frame,
                                             SelTabPathDetected,
                                             SelectionTableName,
                                             SNDSel=SNDSel)
                    return False

                elif type(S).__name__ == 'StreamGap':
                    # reset the list
                    SpectroList = []
                    file_array = []
                    timestamp_array = []
                    counter = 0
                    ScoreArr = []
                    #print('I passed a stream gap!!')
                    continue

Example #3

Show file

def run_detector_days_dsp_speedy(day_sound_path_list, seltab_out_path,
                                 classifier_model_path, config):
    """ run detector on one day of sound; the main detection running engine
    
    Args:
        day_sound_path: path of a deployment sound folder
        seltab_out_path: directory where the selection tables will be output
        classifier_model_path: the path to the trained classifier model. Could 
        be a list instead for ensemble classifiers
        config: parameter configuration
    """
    classifier_model = load_model(classifier_model_path, custom_objects=\
        {'F1_Class': F1_Class}) # Temporary for F1_Class metric
    #
    #    DayFile = os.path.join(seltab_out_path+'/',os.path.basename(day_sound_path)+'.txt')
    #    f = open(DayFile,'w')
    #    f.write('Selection\tView\tChannel\tBegin Time (s)\tEnd Time (s)\tLow Freq (Hz)\tHigh Freq (Hz)\tScore\n')
    #
    stream = MakeSoundStream(day_sound_path_list)
    ############################################
    IS_MAX_STREAMS = False
    previous_channels = stream.stream.channels
    previous_date = stream.get_current_timesamp().date()
    score_arr = np.array([])
    fea_spectro = []
    count_stream = 0
    #EventId = 1
    while True:
        try:
            # Append the timestamp
            timestamp = stream.get_current_timesamp()
            #print(timestamp)

            # Set the current file
            current_file = stream.stream.filename
            # number of channels in the new file
            current_channels = stream.stream.channels
            # current date
            current_date = stream.get_current_timesamp().date()

            # load the samples
            samps = stream.read(N=config.N_READ, previousN=config.N_PREV)[0]

            if (current_channels != previous_channels) or (
                    current_date !=
                    previous_date):  # find a new channel of new date
                # Write the previous raven table
                # Yu: for each new channel, generate sound selectio tables as well?
                # should remove the condition for channels part
                print(
                    'New Channel Format or sound selection for day writing table'
                )

                preds = classifier_model.predict(fea_spectro)[:, 1]
                #preds = make_predictions(model, fea_spectro, current_channels,streamevent = False)
                score_arr = np.append(score_arr, preds)
                score_arr = score_arr.reshape(-1, previous_channels)

                # Get the quantile threshold values
                if config.ScoreThre is None:
                    aa = pd.Series(score_arr.flatten())
                    thresholds = aa.quantile(np.linspace(min(aa), max(aa), 20))
                    del (aa)
                else:
                    thresholds = [config.ScoreThre]

                # Create selection tables for every threshold Yu: why do we need it?
                for threshold in thresholds:
                    print(threshold)

                    Raven_data_frame = Make_raven_SndSel_table(
                        current_channels,
                        score_arr,
                        OverlapRatioThre,
                        file_array,
                        timestamp_array,
                        threshold,
                        date_format=date_format)

                    RavenTable_out = RavenTable_out.append(Raven_data_frame)

                    #SelectionTableName_out = SelectionTableName + \
                    #                    str(previous_channels) + \
                    #                    ''+ str(counter)
                    SelectionTableName_out = SelectionTableName

                    # Export the raven table
                    write_raven_table_to_CSV(RavenTable_out,
                                             SelTabPathDetected,
                                             SelectionTableName_out,
                                             SNDSel=SNDSel)

                    # Reset everything
                    RavenTable_out = pd.DataFrame()
                    score_arr = []
                    SpectroList = []
                    timestamp_array = []
                    file_array = []
                    previous_channels = current_channels
                    previous_date = current_date

                    # Bool telling streamer not to write on the stream gap if
                    # it's thrown later on. There has got to be a nicer way to
                    # do this
                    IS_MAX_STREAMS = True
            else:
                #advance the counter
                count_stream += 1
                # Append the timestamp
                timestamp_array.append(timestamp)

                # Set the current file
                file_array.append(current_file)

                #pre-process all the channels
                for cc in range(samps.shape[1]):
                    #Spectro = Preprocess(samps[:,cc], config)
                    Spectro = preprocess(samps[:, cc], config)
                    fea_spectro.append([Spectro])

                # If iterated the maximum number of times, make the predicitons and clear the list
                if count_stream == config.MAX_STREAMS:  # does it mean we will not predictions on those at the end of the file when count_stream hasnt achieved max_streams?
                    preds = classifier_model.predict(fea_spectro)[:, 1]
                    #preds = make_predictions(model, fea_spectro, current_channels, streamevent = False)

                    # make model predictions
                    score_arr = np.append(score_arr, preds)

                    # reset the list
                    fea_spectro = []
                    count_stream = 0

                previous_channels = current_channels
                previous_date = current_date

                # On stream gap, do the normal stuff
                IS_MAX_STREAMS = False
        except Exception as S:

            if IS_MAX_STREAMS is not True:
                preds = classifier_model.predict(fea_spectro)[:, 1]

                score_arr = np.append(score_arr, pred)
                score_arr = score_arr.reshape(-1, current_channels)

                # Make the raven table
                Raven_data_frame = Make_raven_SndSel_table(
                    current_channels,
                    score_arr,
                    OverlapRatioThre,
                    file_array,
                    timestamp_array,
                    config.ScoreThre,
                    date_format=date_format)

                RavenTable_out = RavenTable_out.append(Raven_data_frame)

            # End of Stream write the dataframe
            if type(S).__name__ == 'StreamEnd':
                if IS_MAX_STREAMS is not True:
                    score_arr = np.append(
                        score_arr,
                        make_predictions(model,
                                         SpectroList,
                                         current_channels,
                                         streamevent=True))
                    # score_arr is not used in write_raven_table_to_CSV?

                    write_raven_table_to_CSV(RavenTable_out,
                                             SelTabPathDetected,
                                             SelectionTableName + '',
                                             SNDSel=SNDSel)
                    # RavenTable_out at this point only includes detection threshold .2

                return False

            elif type(S).__name__ == 'StreamGap':

                # reset the list
                SpectroList = []
                file_array = []
                timestamp_array = []
                counter = 0
                iter_counter += 1
                score_arr = []
                IS_MAX_STREAMS = False
                print('I passed a stream gap!!')
            else:
                continue  # Yu: or break?

                return False

Example #4

Show file

def run_detector(
        stream,
        model,
        SelTabPathDetected,
        filter_args=None,
        N_read=1000,
        N_previous=1000,
        date_format="%Y%m%d_%H%M%S",
        ScoreThre=0.05,
        max_streams=40000,  # make it big to utilize GPU
        OverlapRatioThre=0.4,
        SelectionTableName='temp',
        SNDSel=False,
        config=None):
    ''' Runs the model over the datastream and first produces an array of 
    score values for all times in the datset/Day of data. Then removes all
    scores below a given threhsold and runs non-max suppression on the 
    remaining prediction values. Finally puts all predictions into 
    produces raven table(s) for
    the detector. Read in N_read and N_previous samples from the sample stream
    (e.g. context!). Step through the loaded samples making predictions from 
    the trained model. Run non-max supression to remove duplicate detections.
    
    inputs - 
        stream - SampleStream of files (see DSP sampelstream of
            MakeSampleStream)
        model -  Either traied and loadede keras model or full path to
            trained model
        
        #####################################
        # Filter args Dictionary
        #####################################
        
        FFTSize - FFTSize in samples
        HopSize - HopSize in samples
        filter_name - string, preprocessing filter name from filterlist.py
        f_range - frequency range (Hz) to maintain (e.g. cropping limits)
        
        #####################################
        # Advance dictionary data. Could also be dictionary 
        #####################################
        
        SelTabPathDetected - output directory for the raven selection table
        N_read, N_previous - number of sampels (combined) needed for the
            prediction. For upcall basic net this is 2k
        date_format - date string format of the sound files
            (e.g. "%Y%m%d_%H%M%S")
        ScoreThre- threshold above which the detector keeps the events
            default 0.05
        max_streams - number of stream advances to load into memory
                        prior to making a prediction default 400
        SelectionTableName - string for sound selection / sound table name
        SNDSel - bool to indicate whether or not to make a sound table default
                true
        
        config- optional configuration class containing input parameters
                
            

        
        
        Conditions for wariting out the selection table
        1) End of the stream
        2) The stream has come across a new day AND SndSel is Fales
        3) A file in the stream has different parameters (e.g. channels)
            this is not fully populated yet. Currently everything runs on 2khz
            so we are just going with that for the time being
        
        
        Creates a Raven readable selection table of detections that include
        prediction score
   
    
    '''

    #########################################################################
    # Check if there is a configureation file, if so load/override relevant
    # filter parameters from the config file

    if config is not None:
        # preprocess arguments
        filter_args = {
            'FFTSize': config.FFT_SIZE,
            'HopSize': config.HOP_SIZE,
            'fs': config.SAMPLE_RATE,
            'filter_fx': 'ys_Preprocess'
        }
        fs = filter_args['fs']
        # Calculate number of samples to advance the streamer and the number
        # of 'previous' samples to retain. Note this is not the same as
        # stft and advance
        FrameStepSec = config.FRAME_STEP_SEC
        FrameStep = int(FrameStepSec * fs)
        FrameSizeSec = config.FRAME_SIZE_SEC  # each window is 2 sec long
        FrameSize = int(FrameSizeSec * fs)

        N_read = FrameStep
        N_previous = FrameSize - N_read

        date_format = config.TIME_FORMAT
        OverlapRatioThre = config.OVERLAP_RATIO

        # output frequencies of the upcall detector
        low_f = config.BOX_OUT_F1,
        high_f = config.BOX_OUT_F2

        max_streams = config.MAX_STREAMS

    if type(model) == str:
        try:
            model = load_model(model)
        except ValueError:
            try:
                model = load_model(model,
                                   custom_objects={'F1_Class': F1_Class})
            except:
                print('Falure loading model')

    #######################################################################
    # chcek that low and high frequency are in the filter args for the
    # output bounding box on the raven selection table
    try:
        filter_args['low_f']
    except KeyError:
        low_f = 50
    try:
        filter_args['high_f']
    except KeyError:
        high_f = 350

    ########################################################################
    # Initialize a table to export
    RavenTable_out = pd.DataFrame()

    # Create empty timestamp array to get timestamp of detections
    timestamp_array = []

    # Create empty ScoreArr
    ScoreArr = np.array([])
    counter = 0
    SpectroList = []
    file_array = []
    previous_channels = stream.stream.channels
    current_channels = stream.stream.channels

    # Stream through the files, preprocess and apply the detector
    previous_date = stream.get_current_timesamp().date()
    current_date = stream.get_current_timesamp().date()

    while True:
        # Append the timestamp
        timestamp = stream.get_current_timesamp()

        # Set the current file
        current_file = stream.stream.filename

        try:
            # load the samples
            samps = stream.read(N=N_read, previousN=N_previous)[0]
        except (StreamGap, StreamEnd) as S:
            # If error was thrown after a read (as opposed to after writing
            # a new table, then read the samples and predict)
            if PredictAgain:
                ScoreArr = np.append(
                    ScoreArr,
                    make_predictions(model,
                                     SpectroList,
                                     previous_channels,
                                     config,
                                     streamevent=True))
                ScoreArr = ScoreArr.reshape(-1, previous_channels)
                ScoreArr = ScoreArr[:-1].reshape(-1, previous_channels)

                # Make the raven table
                Raven_data_frame = make_raven_SndSel_table(
                    ScoreArr,
                    OverlapRatioThre,
                    file_array,
                    timestamp_array,
                    ScoreThre,
                    config,
                    date_format=date_format)

            # End of Stream write the dataframe
            if type(S).__name__ == 'StreamEnd':
                RavenTable_out = RavenTable_out.append(Raven_data_frame)

                #print('writing selection table ' + current_file)
                if PredictAgain:
                    write_raven_table_to_CSV(RavenTable_out,
                                             SelTabPathDetected,
                                             SelectionTableName,
                                             SNDSel=SNDSel)
                    PredictAgain = False
                    continue

                return False

            elif type(S).__name__ == 'StreamGap':

                # reset the list
                SpectroList = []
                file_array = []
                timestamp_array = []
                counter = 0
                ScoreArr = []
                #print('I passed a stream gap!!')
                continue

        #advance the counter
        counter += 1
        # number of channels in the new file
        current_channels = stream.stream.channels

        # current date
        current_date = stream.get_current_timesamp().date()

        # Append the timestamp
        timestamp_array.append(timestamp)

        # Set the current file
        file_array.append(current_file)

        #pre-process all the channels
        for ii in range(samps.shape[1]):
            #Spectro = preprocess(samps[:,ii], filter_args)
            Spectro = preprocess(samps[:, ii], config)
            SpectroList.append([Spectro])


        if (current_channels != previous_channels) or \
            (SNDSel is False and current_date != previous_date):

            if PredictAgain:
                preds = make_predictions(model,
                                         SpectroList,
                                         previous_channels,
                                         config,
                                         streamevent=False)
                ScoreArr = np.append(ScoreArr, preds)
                ScoreArr = ScoreArr.reshape(-1, previous_channels)

                Raven_data_frame = make_raven_SndSel_table(
                    ScoreArr,
                    OverlapRatioThre,
                    file_array,
                    timestamp_array,
                    ScoreThre,
                    config,
                    date_format=date_format)

                RavenTable_out = RavenTable_out.append(Raven_data_frame)

                #SelectionTableName_out = SelectionTableName + \
                #                    str(previous_channels) + \
                #                    '_'+ str(ScoreThre)

                # Export the raven table
                #print('writing selection table ' + current_file)
                write_raven_table_to_CSV(RavenTable_out,
                                         SelTabPathDetected,
                                         SelectionTableName,
                                         SNDSel=SNDSel)

                # Reset everything
                RavenTable_out = pd.DataFrame()
                ScoreArr = []
                SpectroList = []
                timestamp_array = []
                file_array = []
                previous_channels = current_channels
                previous_date = current_date
                counter = 0

                # tell the reader not to write a dataframe on next streamgap
                PredictAgain = False
        else:
            # If iterated the maximum number of times, make the predicitons and clear the list
            if counter == max_streams:

                preds = make_predictions(model,
                                         SpectroList,
                                         current_channels,
                                         config,
                                         streamevent=False)
                # make model predictions
                ScoreArr = np.append(ScoreArr, preds)
                #if bool(len(ScoreArr) % current_channels):
                #print('blarg')

                # reset the list
                SpectroList = []
                counter = 0

            previous_channels = current_channels
            previous_date = current_date
            PredictAgain = True