def get_float_mask(wave, channelmask, channelgraph, sdfactor): ''' Input arguments are: wave An array of shape (nsamples, nchannels) giving the aligned wave on all channels channelmask A boolean array of length nchannels giving the unmasked channels returned for the connected component channelgraph The graph of the channels, a dictionary with keys the channel indices and values a set of neighbouring channels sdfactor The standard deviation, so that wave/sdfactor is dimensionless Should return an array of floats between 0 and 1 of length nchannels. ''' # wavemax should be the maximum (or minimum in case of negative thresholds) # value of the wave for each channel, we use this to construct the mask if Parameters['DETECT_POSITIVE']: wavemax = amax(abs(wave), axis=0) else: wavemax = amax(-wave, axis=0) # z score is this value normalised by the standard deviation z = wavemax / sdfactor zmin, zmax = Parameters['FLOAT_MASK_THRESH_SD'] # x score is between 0 and 1, 0 at the minimum threshold in SD, and 1 at the # maximum threshold in SD x = clip((z - zmin) / (zmax - zmin), 0, 1) # x = (z-zmin)/(zmax-zmin) #For use when actual values are desired (use # together with a high value of ADDITIONAL_FLOAT_PENUMBRA) if Parameters['USE_INTERPOLATION']: # the interpolation function should use the channelmask channelmask = add_penumbra(channelmask, channelgraph, Parameters['ADDITIONAL_FLOAT_PENUMBRA']) # and this function varies from 0 to 1 for x varying from 0 to 1 return eval(Parameters['FLOAT_MASK_INTERPOLATION']) * channelmask else: newchannelmask = channelmask.astype(float32) channelmaskdifference = {} for j in range(Parameters['ADDITIONAL_FLOAT_PENUMBRA']): channelmaskdifference[j] = ( add_penumbra(channelmask, channelgraph, j + 1) * 1 - add_penumbra(channelmask, channelgraph, j) * 1) channelmaskdifference[j] = channelmaskdifference[j].astype(float32) channelmaskdifference[j] = channelmaskdifference[j] / \ (2 ** (j + 1)) newchannelmask = newchannelmask + channelmaskdifference[j] return newchannelmask
def extract_spikes(h5s, basename, DatFileNames, n_ch_dat, ChannelsToUse, ChannelGraph, max_spikes=None): # some global variables we use CHUNK_SIZE = Parameters['CHUNK_SIZE'] CHUNKS_FOR_THRESH = Parameters['CHUNKS_FOR_THRESH'] DTYPE = Parameters['DTYPE'] CHUNK_OVERLAP = Parameters['CHUNK_OVERLAP'] N_CH = Parameters['N_CH'] S_JOIN_CC = Parameters['S_JOIN_CC'] S_BEFORE = Parameters['S_BEFORE'] S_AFTER = Parameters['S_AFTER'] THRESH_SD = Parameters['THRESH_SD'] THRESH_SD_LOWER = Parameters['THRESH_SD_LOWER'] # filter coefficents for the high pass filtering filter_params = get_filter_params() print filter_params progress_bar = ProgressReporter() #m A code that writes out a high-pass filtered version of the raw data (.fil file) fil_writer = FilWriter(DatFileNames, n_ch_dat) # Just use first dat file for getting the thresholding data with open(DatFileNames[0], 'rb') as fd: # Use 5 chunks to figure out threshold DatChunk = get_chunk_for_thresholding(fd, n_ch_dat, ChannelsToUse, num_samples(DatFileNames[0], n_ch_dat)) FilteredChunk = apply_filtering(filter_params, DatChunk) # get the STD of the beginning of the filtered data if Parameters['USE_HILBERT']: first_chunks_std = np.std(FilteredChunk) print 'first_chunks_std', first_chunks_std, '\n' else: if Parameters['USE_SINGLE_THRESHOLD']: ThresholdSDFactor = np.median(np.abs(FilteredChunk))/.6745 else: ThresholdSDFactor = np.median(np.abs(FilteredChunk), axis=0)/.6745 Threshold = ThresholdSDFactor*THRESH_SD print 'Threshold = ', Threshold, '\n' Parameters['THRESHOLD'] = Threshold #Record the absolute Threshold used # set the high and low thresholds do_pickle = False if Parameters['USE_HILBERT']: ThresholdStrong = Parameters['THRESH_STRONG'] ThresholdWeak = Parameters['THRESH_WEAK'] do_pickle = True elif Parameters['USE_COMPONENT_ALIGNFLOATMASK']:#to be used with a single threshold only ThresholdStrong = Threshold ThresholdWeak = ThresholdSDFactor*THRESH_SD_LOWER do_pickle = True if do_pickle: picklefile = open("threshold.p","wb") pickle.dump([ThresholdStrong,ThresholdWeak], picklefile) threshold_outputstring = 'Threshold strong = ' + repr(ThresholdStrong) + '\n' + 'Threshold weak = ' + repr(ThresholdWeak) log_message(threshold_outputstring) n_samples = num_samples(DatFileNames, n_ch_dat) spike_count = 0 for (DatChunk, s_start, s_end, keep_start, keep_end) in chunks(DatFileNames, n_ch_dat, ChannelsToUse): ############## FILTERING ######################################## FilteredChunk = apply_filtering(filter_params, DatChunk) # write filtered output to file if Parameters['WRITE_FIL_FILE']: fil_writer.write(FilteredChunk, s_start, s_end, keep_start, keep_end) ############## THRESHOLDING ##################################### # NEW: HILBERT TRANSFORM if Parameters['USE_HILBERT']: FilteredChunkHilbert = np.abs(signal.hilbert(FilteredChunk, axis=0) / first_chunks_std) ** 2 BinaryChunkWeak = FilteredChunkHilbert > ThresholdWeak BinaryChunkStrong = FilteredChunkHilbert > ThresholdStrong BinaryChunkWeak = BinaryChunkWeak.astype(np.int8) BinaryChunkStrong = BinaryChunkStrong.astype(np.int8) #elif Parameters['USE_COMPONENT_ALIGNFLOATMASK']: else: # Usual method #FilteredChunk = apply_filtering(filter_params, DatChunk) Why did you filter twice!!!??? if Parameters['USE_COMPONENT_ALIGNFLOATMASK']: if Parameters['DETECT_POSITIVE']: BinaryChunkWeak = FilteredChunk > ThresholdWeak BinaryChunkStrong = FilteredChunk > ThresholdStrong else: BinaryChunkWeak = FilteredChunk < -ThresholdWeak BinaryChunkStrong = FilteredChunk < -ThresholdStrong BinaryChunkWeak = BinaryChunkWeak.astype(np.int8) BinaryChunkStrong = BinaryChunkStrong.astype(np.int8) else: if Parameters['DETECT_POSITIVE']: BinaryChunk = np.abs(FilteredChunk)>Threshold else: BinaryChunk = (FilteredChunk<-Threshold) BinaryChunk = BinaryChunk.astype(np.int8) # write filtered output to file #if Parameters['WRITE_FIL_FILE']: # fil_writer.write(FilteredChunk, s_start, s_end, keep_start, keep_end) # print 'I am here at line 313' ############### FLOOD FILL ###################################### ChannelGraphToUse = complete_if_none(ChannelGraph, N_CH) if (Parameters['USE_HILBERT'] or Parameters['USE_COMPONENT_ALIGNFLOATMASK']): if Parameters['USE_OLD_CC_CODE']: IndListsChunkOld = connected_components(BinaryChunkWeak, ChannelGraphToUse, S_JOIN_CC) IndListsChunk = [] #Final list of connected components. Go through all \weak' connected components # and only include in final list if there are some samples that also exceed the strong threshold # This method works better than connected_components_twothresholds. for IndListWeak in IndListsChunkOld: # embed() # if sum(BinaryChunkStrong[zip(*IndListWeak)]) != 0: i,j = np.array(IndListWeak).transpose() if sum(BinaryChunkStrong[i,j]) != 0: IndListsChunk.append(IndListWeak) else: IndListsChunk = connected_components_twothresholds(BinaryChunkWeak, BinaryChunkStrong, ChannelGraphToUse, S_JOIN_CC) BinaryChunk = 1 * BinaryChunkWeak + 1 * BinaryChunkStrong else: IndListsChunk = connected_components(BinaryChunk, ChannelGraphToUse, S_JOIN_CC) if Parameters['DEBUG']: #TO DO: Change plot_diagnostics for the HILBERT case if Parameters['USE_HILBERT']: plot_diagnostics_twothresholds(s_start,IndListsChunk,BinaryChunkWeak, BinaryChunkStrong,BinaryChunk,DatChunk,FilteredChunk,FilteredChunkHilbert,ThresholdStrong,ThresholdWeak) elif Parameters['USE_COMPONENT_ALIGNFLOATMASK']: plot_diagnostics_twothresholds(s_start,IndListsChunk,BinaryChunkWeak,BinaryChunkStrong,BinaryChunk,DatChunk,FilteredChunk,-FilteredChunk,ThresholdStrong,ThresholdWeak)#TODO: change HIlbert in plot_diagnostics_twothresholds else: plot_diagnostics(s_start,IndListsChunk,BinaryChunk,DatChunk,FilteredChunk,Threshold) if Parameters['WRITE_BINFIL_FILE']: fil_writer.write_bin(BinaryChunk, s_start, s_end, keep_start, keep_end) #print len(IndListsChunk), 'len(IndListsChunk)' ############## ALIGN AND INTERPOLATE WAVES ####################### nextbits = [] if Parameters['USE_HILBERT']: for IndList in IndListsChunk: try: wave, s_peak, sf_peak, cm, fcm = extract_wave_hilbert_new(IndList, FilteredChunk, FilteredChunkHilbert, S_BEFORE, S_AFTER, N_CH, s_start, ThresholdStrong, ThresholdWeak) s_offset = s_start + s_peak sf_offset = s_start + sf_peak if keep_start<=s_offset<keep_end: spike_count += 1 nextbits.append((wave, s_offset, sf_offset, cm, fcm)) except np.linalg.LinAlgError: s = '*** WARNING *** Unalignable spike discarded in chunk {chunk}.'.format( chunk=(s_start, s_end)) log_warning(s) except InterpolationError: s = '*** WARNING *** Interpolation error in chunk {chunk}.'.format( chunk=(s_start, s_end)) log_warning(s) # and return them in time sorted order nextbits.sort(key=lambda (wave, s, s_frac, cm, fcm): s_frac) for wave, s, s_frac, cm, fcm in nextbits: uwave = get_padded(DatChunk, int(s)-S_BEFORE-s_start, int(s)+S_AFTER-s_start).astype(np.int32) # cm = add_penumbra(cm, ChannelGraphToUse, # Parameters['PENUMBRA_SIZE']) # fcm = get_float_mask(wave, cm, ChannelGraphToUse, # 1.) yield uwave, wave, s, s_frac, cm, fcm # unfiltered wave,wave, s_peak, ChMask, FloatChMask elif Parameters['USE_COMPONENT_ALIGNFLOATMASK']: for IndList in IndListsChunk: try: if Parameters['DETECT_POSITIVE']: wave, s_peak, sf_peak, cm, fcm, comp_normalised, comp_normalised_power = extract_wave_twothresholds(IndList, FilteredChunk, FilteredChunk, S_BEFORE, S_AFTER, N_CH, s_start, ThresholdStrong, ThresholdWeak) else: wave, s_peak, sf_peak, cm, fcm,comp_normalised, comp_normalised_power = extract_wave_twothresholds(IndList, FilteredChunk, -FilteredChunk, S_BEFORE, S_AFTER, N_CH, s_start, ThresholdStrong, ThresholdWeak) s_offset = s_start+s_peak sf_offset = s_start + sf_peak if keep_start<=s_offset<keep_end: spike_count += 1 nextbits.append((wave, s_offset, sf_offset, cm, fcm)) except np.linalg.LinAlgError: s = '*** WARNING *** Unalignable spike discarded in chunk {chunk}.'.format( chunk=(s_start, s_end)) log_warning(s) except InterpolationError: s = '*** WARNING *** Interpolation error in chunk {chunk}.'.format( chunk=(s_start, s_end)) log_warning(s) # and return them in time sorted order nextbits.sort(key=lambda (wave, s, s_frac, cm, fcm): s_frac) for wave, s, s_frac, cm, fcm in nextbits: uwave = get_padded(DatChunk, int(s)-S_BEFORE-s_start, int(s)+S_AFTER-s_start).astype(np.int32) # cm = add_penumbra(cm, ChannelGraphToUse, # Parameters['PENUMBRA_SIZE']) # fcm = get_float_mask(wave, cm, ChannelGraphToUse, # 1.) yield uwave, wave, s, s_frac, cm, fcm # unfiltered wave,wave, s_peak, ChMask, FloatChMask else: #Original SpikeDetekt. This code duplication is regretable but probably easier to deal with for IndList in IndListsChunk: try: wave, s_peak, sf_peak, cm = extract_wave(IndList, FilteredChunk, S_BEFORE, S_AFTER, N_CH, s_start,Threshold) s_offset = s_start+s_peak sf_offset = s_start + sf_peak if keep_start<=s_offset<keep_end: spike_count += 1 nextbits.append((wave, s_offset, sf_offset, cm)) except np.linalg.LinAlgError: s = '*** WARNING *** Unalignable spike discarded in chunk {chunk}.'.format( chunk=(s_start, s_end)) log_warning(s) # and return them in time sorted order nextbits.sort(key=lambda (wave, s, s_frac, cm): s_frac) for wave, s, s_frac, cm in nextbits: uwave = get_padded(DatChunk, int(s)-S_BEFORE-s_start, int(s)+S_AFTER-s_start).astype(np.int32) cm = add_penumbra(cm, ChannelGraphToUse, Parameters['PENUMBRA_SIZE']) fcm = get_float_mask(wave, cm, ChannelGraphToUse, ThresholdSDFactor) yield uwave, wave, s, s_frac, cm, fcm # unfiltered wave,wave, s_peak, ChMask, FloatChMask progress_bar.update(float(s_end)/n_samples, '%d/%d samples, %d spikes found'%(s_end, n_samples, spike_count)) if max_spikes is not None and spike_count>=max_spikes: break progress_bar.finish()
def extract_spikes(h5s, basename, DatFileNames, n_ch_dat, ChannelsToUse, ChannelGraph, max_spikes=None): # some global variables we use CHUNK_SIZE = Parameters['CHUNK_SIZE'] CHUNKS_FOR_THRESH = Parameters['CHUNKS_FOR_THRESH'] DTYPE = Parameters['DTYPE'] CHUNK_OVERLAP = Parameters['CHUNK_OVERLAP'] N_CH = Parameters['N_CH'] S_JOIN_CC = Parameters['S_JOIN_CC'] S_BEFORE = Parameters['S_BEFORE'] S_AFTER = Parameters['S_AFTER'] THRESH_SD = Parameters['THRESH_SD'] # filter coefficents for the high pass filtering filter_params = get_filter_params() progress_bar = ProgressReporter() # m A code that writes out a high-pass filtered version of the raw data # (.fil file) fil_writer = FilWriter(DatFileNames, n_ch_dat) # Just use first dat file for getting the thresholding data with open(DatFileNames[0], 'rb') as fd: # Use 5 chunks to figure out threshold DatChunk = get_chunk_for_thresholding(fd, n_ch_dat, ChannelsToUse, num_samples(DatFileNames[0], n_ch_dat)) FilteredChunk = apply_filtering(filter_params, DatChunk) # .6745 converts median to standard deviation if Parameters['USE_SINGLE_THRESHOLD']: ThresholdSDFactor = np.median(np.abs(FilteredChunk)) / .6745 else: ThresholdSDFactor = np.median( np.abs(FilteredChunk), axis=0) / .6745 Threshold = ThresholdSDFactor * THRESH_SD print 'Threshold = ', Threshold, '\n' # Record the absolute Threshold used Parameters['THRESHOLD'] = Threshold n_samples = num_samples(DatFileNames, n_ch_dat) spike_count = 0 for (DatChunk, s_start, s_end, keep_start, keep_end) in chunks(DatFileNames, n_ch_dat, ChannelsToUse): ############## FILTERING ######################################## FilteredChunk = apply_filtering(filter_params, DatChunk) # write filtered output to file # if Parameters['WRITE_FIL_FILE']: fil_writer.write(FilteredChunk, s_start, s_end, keep_start, keep_end) ############## THRESHOLDING ##################################### if Parameters['DETECT_POSITIVE']: BinaryChunk = np.abs(FilteredChunk) > Threshold else: BinaryChunk = (FilteredChunk < -Threshold) BinaryChunk = BinaryChunk.astype(np.int8) # write binary chunk filtered output to file if Parameters['WRITE_BINFIL_FILE']: fil_writer.write_bin( BinaryChunk, s_start, s_end, keep_start, keep_end) ############### FLOOD FILL ###################################### ChannelGraphToUse = complete_if_none(ChannelGraph, N_CH) IndListsChunk = connected_components(BinaryChunk, ChannelGraphToUse, S_JOIN_CC) if Parameters['DEBUG']: plot_diagnostics( s_start, IndListsChunk, BinaryChunk, DatChunk, FilteredChunk, Threshold) fil_writer.write_bin( BinaryChunk, s_start, s_end, keep_start, keep_end) ############## ALIGN AND INTERPOLATE WAVES ####################### nextbits = [] for IndList in IndListsChunk: try: wave, s_peak, cm = extract_wave(IndList, FilteredChunk, S_BEFORE, S_AFTER, N_CH, s_start, Threshold) s_offset = s_start + s_peak if keep_start <= s_offset < keep_end: spike_count += 1 nextbits.append((wave, s_offset, cm)) except np.linalg.LinAlgError: s = '*** WARNING *** Unalignable spike discarded in chunk {chunk}.'.format( chunk=(s_start, s_end)) log_warning(s) # and return them in time sorted order nextbits.sort(key=lambda wave_s_cm: wave_s_cm[1]) for wave, s, cm in nextbits: uwave = get_padded(DatChunk, int(s) - S_BEFORE - s_start, int(s) + S_AFTER - s_start).astype(np.int32) cm = add_penumbra(cm, ChannelGraphToUse, Parameters['PENUMBRA_SIZE']) fcm = get_float_mask(wave, cm, ChannelGraphToUse, ThresholdSDFactor) yield uwave, wave, s, cm, fcm progress_bar.update(float(s_end) / n_samples, '%d/%d samples, %d spikes found' % (s_end, n_samples, spike_count)) if max_spikes is not None and spike_count >= max_spikes: break progress_bar.finish()