def slice_clusters(params, result, to_remove=[], to_merge=[], extension='', light=False): import h5py, shutil file_out_suff = params.get('data', 'file_out_suff') data_file = params.data_file N_e = params.getint('data', 'N_e') N_total = params.nb_channels N_t = params.getint('detection', 'N_t') template_shift = params.getint('detection', 'template_shift') if comm.rank == 0: print_and_log(['Node 0 is slicing clusters'], 'debug', logger) if to_merge != []: for count in xrange(len(to_merge)): remove = to_merge[count][1] to_remove += [remove] all_elements = [[] for i in xrange(N_e)] for target in numpy.unique(to_remove): elec = result['electrodes'][target] nic = target - numpy.where(result['electrodes'] == elec)[0][0] mask = result['clusters_' + str(elec)] > -1 tmp = numpy.unique(result['clusters_' + str(elec)][mask]) all_elements[elec] += list(numpy.where(result['clusters_' + str(elec)] == tmp[nic])[0]) for elec in xrange(N_e): if not light: result['data_' + str(elec)] = numpy.delete(result['data_' + str(elec)], all_elements[elec], axis=0) result['clusters_' + str(elec)] = numpy.delete(result['clusters_' + str(elec)], all_elements[elec]) result['times_' + str(elec)] = numpy.delete(result['times_' + str(elec)], all_elements[elec]) result['peaks_' + str(elec)] = numpy.delete(result['peaks_' + str(elec)], all_elements[elec]) else: result['clusters_' + str(elec)] = numpy.delete(result['clusters_' + str(elec)], all_elements[elec]) myfile = h5py.File(file_out_suff + '.clusters.hdf5', 'r', libver='earliest') data = myfile.get('data_' + str(elec))[:] result['data_' + str(elec)] = numpy.delete(data, all_elements[elec], axis=0) data = myfile.get('times_' + str(elec))[:] result['times_' + str(elec)] = numpy.delete(data, all_elements[elec]) data = myfile.get('peaks_' + str(elec))[:] result['peaks_' + str(elec)] = numpy.delete(data, all_elements[elec]) myfile.close() result['electrodes'] = numpy.delete(result['electrodes'], numpy.unique(to_remove)) cfile = h5py.File(file_out_suff + '.clusters-new.hdf5', 'w', libver='earliest') to_write = ['data_', 'clusters_', 'times_', 'peaks_'] for ielec in xrange(N_e): write_datasets(cfile, to_write, result, ielec) write_datasets(cfile, ['electrodes'], result) cfile.close() if os.path.exists(file_out_suff + '.clusters%s.hdf5' %extension): os.remove(file_out_suff + '.clusters%s.hdf5' %extension) shutil.move(file_out_suff + '.clusters-new.hdf5', file_out_suff + '.clusters%s.hdf5' %extension) comm.Barrier()
def generate_matlab_mapping(probe): p = {} positions = [] nodes = [] for key in probe['channel_groups'].keys(): p.update(probe['channel_groups'][key]['geometry']) nodes += probe['channel_groups'][key]['channels'] positions += [p[channel] for channel in probe['channel_groups'][key]['channels']] idx = numpy.argsort(nodes) positions = numpy.array(positions)[idx] t = tempfile.NamedTemporaryFile().name + '.hdf5' cfile = h5py.File(t, 'w') to_write = {'positions' : positions/10., 'permutation' : numpy.sort(nodes), 'nb_total' : numpy.array([probe['total_nb_channels']])} write_datasets(cfile, to_write.keys(), to_write) cfile.close() return t
def slice_clusters(params, result, to_remove=[], to_merge=[], extension='', input_extension='', light=False, method='safe'): """Slice clusters in HDF5 templates. Arguments: params to_remove: list (optional) to_merge: list | numpy.ndarray (optional) extension: string (optional) The default value is ''. input_extension: string (optional) The default value is ''. light: boolean (optional) """ file_out_suff = params.get('data', 'file_out_suff') data_file = params.data_file N_e = params.getint('data', 'N_e') N_total = params.nb_channels hdf5_compress = params.getboolean('data', 'hdf5_compress') N_t = params.getint('detection', 'N_t') template_shift = params.getint('detection', 'template_shift') if comm.rank == 0: print_and_log(['Node 0 is slicing clusters'], 'debug', logger) old_templates = load_data(params, 'templates', extension=input_extension) _, N_tm = old_templates.shape # Determine the template indices to delete. to_delete = list(to_remove) if to_merge != []: for count in xrange(len(to_merge)): remove = to_merge[count][1] to_delete += [remove] # Determine the indices to keep. all_templates = set(numpy.arange(N_tm // 2)) to_keep = numpy.array(list(all_templates.difference(to_delete))) all_elements = [[] for i in xrange(N_e)] for target in numpy.unique(to_delete): elec = result['electrodes'][target] nic = target - numpy.where(result['electrodes'] == elec)[0][0] mask = result['clusters_' + str(elec)] > -1 tmp = numpy.unique(result['clusters_' + str(elec)][mask]) all_elements[elec] += list( numpy.where(result['clusters_' + str(elec)] == tmp[nic])[0]) myfilename = file_out_suff + '.clusters{}.hdf5'.format(input_extension) myfile = h5py.File(myfilename, 'r', libver='earliest') for elec in xrange(N_e): if not light: result['data_' + str(elec)] = numpy.delete(result['data_' + str(elec)], all_elements[elec], axis=0) result['clusters_' + str(elec)] = numpy.delete( result['clusters_' + str(elec)], all_elements[elec]) result['times_' + str(elec)] = numpy.delete( result['times_' + str(elec)], all_elements[elec]) result['peaks_' + str(elec)] = numpy.delete( result['peaks_' + str(elec)], all_elements[elec]) else: result['clusters_' + str(elec)] = numpy.delete( result['clusters_' + str(elec)], all_elements[elec]) data = myfile.get('data_' + str(elec))[:] result['data_' + str(elec)] = numpy.delete(data, all_elements[elec], axis=0) data = myfile.get('times_' + str(elec))[:] result['times_' + str(elec)] = numpy.delete( data, all_elements[elec]) data = myfile.get('peaks_' + str(elec))[:] result['peaks_' + str(elec)] = numpy.delete( data, all_elements[elec]) myfile.close() if method == 'safe': result['electrodes'] = numpy.delete(result['electrodes'], numpy.unique(to_delete)) elif method == 'new': result['electrodes'] = result['electrodes'][to_keep] else: raise ValueError("Unexpected method value: {}".format(method)) cfilename = file_out_suff + '.clusters{}.hdf5'.format('-new') cfile = h5py.File(cfilename, 'w', libver='earliest') to_write = ['data_', 'clusters_', 'times_', 'peaks_'] for ielec in xrange(N_e): write_datasets(cfile, to_write, result, ielec, compression=hdf5_compress) write_datasets(cfile, ['electrodes'], result) cfile.close() # Rename output file. temporary_path = cfilename output_path = file_out_suff + '.clusters{}.hdf5'.format(extension) if os.path.exists(output_path): os.remove(output_path) shutil.move(temporary_path, output_path) return
def main(params, nb_cpu, nb_gpu, use_gpu): # Part 1: Whitening numpy.random.seed(420) # params = detect_memory(params) _ = init_logging(params.logfile) logger = logging.getLogger('circus.whitening') ################################################################# data_file = params.data_file N_e = params.getint('data', 'N_e') hdf5_compress = params.getboolean('data', 'hdf5_compress') N_total = params.nb_channels N_t = params.getint('detection', 'N_t') dist_peaks = params.getint('detection', 'dist_peaks') template_shift = params.getint('detection', 'template_shift') file_out_suff = params.get('data', 'file_out_suff') spike_thresh = params.getfloat('detection', 'spike_thresh') spike_width = params.getfloat('detection', 'spike_width') matched_filter = params.getboolean('detection', 'matched-filter') matched_thresh = params.getfloat('detection', 'matched_thresh') fudge = params.getfloat('whitening', 'fudge') sign_peaks = params.get('detection', 'peaks') do_temporal_whitening = params.getboolean('whitening', 'temporal') do_spatial_whitening = params.getboolean('whitening', 'spatial') ignore_spikes = params.getboolean('whitening', 'ignore_spikes') chunk_size = detect_memory(params, whitening=True) plot_path = os.path.join(params.get('data', 'file_out_suff'), 'plots') nodes, edges = get_nodes_and_edges(params) safety_time = params.getint('whitening', 'safety_time') safety_space = params.getboolean('whitening', 'safety_space') sort_waveforms = params.getboolean('whitening', 'sort_waveforms') nb_temp_white = min(max(20, comm.size), N_e) max_silence_1 = int(20 * params.rate // comm.size) max_silence_2 = 5000 inv_nodes = numpy.zeros(N_total, dtype=numpy.int32) inv_nodes[nodes] = numpy.arange(len(nodes)) jitter_range = params.getint('detection', 'jitter_range') template_shift_2 = template_shift + jitter_range use_hanning = params.getboolean('detection', 'hanning') rejection_threshold = params.getfloat('detection', 'rejection_threshold') noise_window = params.getint('detection', 'noise_time') data_file.open() ################################################################# if use_hanning: hanning_filter = numpy.hanning(N_t) if comm.rank == 0: print_and_log( ["Analyzing data to get whitening matrices and thresholds..."], 'default', logger) nodes_indices = {} for elec in numpy.arange(N_e): nodes_indices[elec] = inv_nodes[edges[nodes[elec]]] if use_gpu: import cudamat as cmt # # Need to properly handle multi GPU per MPI nodes? if nb_gpu > nb_cpu: gpu_id = int(comm.rank // nb_cpu) else: gpu_id = 0 cmt.cuda_set_device(gpu_id) cmt.init() cmt.cuda_sync_threads() nb_chunks, last_chunk_len = data_file.analyze(chunk_size) if nb_chunks < comm.size: res = io.data_stats(params, show=False) chunk_size = int(res * params.rate // comm.size) if comm.rank == 0: print_and_log( ["Too much cores, automatically resizing the data chunks"], 'debug', logger) nb_chunks, last_chunk_len = data_file.analyze(chunk_size) # I guess this is more relevant, to take signals from all over the recordings. if nb_chunks > comm.size: all_chunks = numpy.random.permutation( numpy.arange(nb_chunks - 1, dtype=numpy.int32)) else: all_chunks = numpy.random.permutation( numpy.arange(nb_chunks, dtype=numpy.int32)) all_electrodes = numpy.random.permutation(N_e) numpy.random.seed(comm.rank) for gidx in [all_chunks[comm.rank]]: # print "Node", comm.rank, "is analyzing chunk", gidx, "/", nb_chunks, " ..." local_chunk, t_offset = data_file.get_data(gidx, chunk_size, nodes=nodes) local_shape = len(local_chunk) # print "Node", comm.rank, "computes the median absolute deviations in a random chunk" thresholds = numpy.zeros(N_e, dtype=numpy.float32) for i in range(N_e): u = numpy.median(local_chunk[:, i], 0) thresholds[i] = numpy.median(numpy.abs(local_chunk[:, i] - u), 0) gdata = gather_array(thresholds, comm) if comm.rank == 0: gdata = gdata.reshape((comm.size, N_e)) thresholds = numpy.mean(gdata, 0) bfile = h5py.File(file_out_suff + '.basis.hdf5', 'w', libver='earliest') io.write_datasets(bfile, ['thresholds'], {'thresholds': thresholds}, compression=hdf5_compress) bfile.close() comm.Barrier() thresholds = io.load_data(params, 'thresholds') local_borders = (template_shift, local_shape - template_shift) found_peaktimes = [] if ignore_spikes: # Extracting the peaks. local_peaktimes = [np.empty(0, dtype=numpy.uint32)] for i in range(N_e): peaktimes = scipy.signal.find_peaks(numpy.abs(local_chunk[:, i]), height=thresholds[i], width=spike_width, wlen=N_t)[0] peaktimes = peaktimes.astype(numpy.uint32) # print "Removing the useless borders..." idx = (peaktimes >= local_borders[0]) & (peaktimes < local_borders[1]) peaktimes = numpy.compress(idx, peaktimes) found_peaktimes.append(peaktimes) else: for i in range(N_e): found_peaktimes.append(numpy.zeros(0, dtype=numpy.uint32)) all_peaktimes = numpy.concatenate(found_peaktimes) local_peaktimes = numpy.unique(all_peaktimes) if len(local_peaktimes) > 0: diff_times = local_peaktimes[-1] - local_peaktimes[0] all_times = numpy.zeros((N_e, diff_times + 1), dtype=numpy.bool) padded_peaks = (local_peaktimes - local_peaktimes[0]).astype( numpy.int32) min_times = numpy.maximum(padded_peaks - safety_time, 0) max_times = numpy.minimum(padded_peaks + safety_time + 1, diff_times + 1) test_extremas = numpy.zeros((N_e, diff_times + 1), dtype=numpy.bool) for i in range(N_e): test_extremas[i, found_peaktimes[i] - local_peaktimes[0]] = True argmax_peak = numpy.random.permutation( numpy.arange(len(local_peaktimes))) all_idx = numpy.take(local_peaktimes, argmax_peak) # print "Selection of the peaks with spatio-temporal masks..." for idx, peak in zip(argmax_peak, all_idx): all_elecs = numpy.where(test_extremas[:, peak - local_peaktimes[0]])[0] data = local_chunk[peak, all_elecs] elec = all_elecs[numpy.argmax(numpy.abs(data))] indices = nodes_indices[elec] if safety_space: all_times[indices, min_times[idx]:max_times[idx]] = True else: all_times[elec, min_times[idx]:max_times[idx]] = True else: all_times = numpy.zeros((N_e, len(local_chunk)), dtype=numpy.bool) if do_temporal_whitening: local_res_temp = [] for elec in all_electrodes[numpy.arange(comm.rank, nb_temp_white, comm.size)]: res = numpy.zeros((0, N_t), dtype=numpy.float32) scount = 0 indices = nodes_indices[elec] all_times_elec = numpy.any(numpy.take(all_times, indices, axis=0), 0) esubset = numpy.where(all_times_elec == False)[0] bound = len(esubset) - N_t while (scount < bound) and (len(res) < max_silence_2): myslice = esubset[scount:scount + N_t] if numpy.all((myslice - esubset[scount]) == numpy.arange(N_t)): scount += N_t res = numpy.vstack((res, local_chunk[myslice, elec])) else: scount += 1 if len(res) > 5: local_res_temp += [numpy.cov(res.T)] nb_elecs = numpy.array([len(local_res_temp)], dtype=numpy.float32) local_res_temp = numpy.array(local_res_temp, dtype=numpy.float32) if len(local_res_temp) == 0: local_res_temp = numpy.zeros(0, dtype=numpy.float32) else: local_res_temp = numpy.sum(local_res_temp, 0) all_res_temp = gather_array(local_res_temp.ravel(), comm, 0, 1) all_elecs = gather_array(nb_elecs, comm, 0, 1) if do_spatial_whitening: local_res_spac = numpy.zeros((N_e, N_e), dtype=numpy.float32) local_silences = [] for elec in numpy.arange(comm.rank, N_e, comm.size): indices = nodes_indices[elec] all_times_elec = numpy.any(numpy.take(all_times, indices, axis=0), 0) esubset = numpy.where(all_times_elec == False)[0] local_data = local_chunk[esubset][:, indices] local_whitening = get_whitening_matrix( local_data, fudge=fudge).astype(numpy.float32) pos = numpy.where(elec == indices)[0] local_res_spac[elec, indices] = local_whitening[pos] local_silences += [len(esubset)] all_res_spac = gather_array(local_res_spac.ravel(), comm, 0, 1) all_silences = gather_array( numpy.array(local_silences, dtype=numpy.int32), comm, 0, 1, 'uint32') if comm.rank == 0: to_write = {} if do_temporal_whitening: try: nb_silences = numpy.sum(all_elecs > 0) all_res_temp = all_res_temp.reshape((nb_silences, N_t**2)) except Exception: print_and_log([ "No silent periods detected: something wrong with the parameters?" ], 'error', logger) all_res_temp = numpy.sum(all_res_temp, 0) all_res_temp = all_res_temp.reshape( (N_t, N_t)) / numpy.sum(all_elecs) temporal_whitening = get_whitening_matrix( all_res_temp.astype(numpy.double), fudge=1e-3)[template_shift].astype(numpy.float32) temporal_whitening /= temporal_whitening.sum() to_write['temporal'] = temporal_whitening have_nans = numpy.sum(numpy.isnan(temporal_whitening)) if have_nans > 0: temporal_whitening = numpy.zeros(N_t, dtype=numpy.float32) temporal_whitening[N_t // 2] = 1 to_write['temporal'] = temporal_whitening print_and_log( ["Disabling temporal whitening because of NaNs found"], 'info', logger) if do_spatial_whitening: all_res_spac = all_res_spac.reshape(comm.size, N_e, N_e) spatial_whitening = numpy.sum(all_res_spac, 0) to_write['spatial'] = spatial_whitening if ignore_spikes: print_and_log([ "Found %gs without spikes to compute the whitening matrix..." % (numpy.mean(all_silences) / params.rate) ], 'default', logger) else: print_and_log([ "Found %gs to compute the whitening matrix..." % (numpy.mean(all_silences) / params.rate) ], 'default', logger) have_nans = numpy.sum(numpy.isnan(spatial_whitening)) if have_nans > 0: spatial_whitening = numpy.eye(spatial_whitening.shape[0], dtype=numpy.float32) to_write['spatial'] = spatial_whitening print_and_log( ["Disabling spatial whitening because of NaNs found"], 'info', logger) bfile = h5py.File(file_out_suff + '.basis.hdf5', 'r+', libver='earliest') io.write_datasets(bfile, list(to_write.keys()), to_write, compression=hdf5_compress) bfile.close() comm.Barrier() if do_spatial_whitening or do_temporal_whitening: if comm.rank == 0: print_and_log( ["Because of whitening, need to recompute the thresholds..."], 'default', logger) if do_spatial_whitening: spatial_whitening = io.load_data(params, 'spatial_whitening') if use_gpu: spatial_whitening = cmt.CUDAMatrix(spatial_whitening, copy_on_host=False) if do_temporal_whitening: temporal_whitening = io.load_data(params, 'temporal_whitening') for gidx in [all_chunks[comm.rank]]: local_chunk, t_offset = data_file.get_data(gidx, chunk_size, nodes=nodes) local_shape = len(local_chunk) if do_spatial_whitening: if use_gpu: local_chunk = cmt.CUDAMatrix(local_chunk, copy_on_host=False) local_chunk = local_chunk.dot(spatial_whitening).asarray() else: local_chunk = numpy.dot(local_chunk, spatial_whitening) if do_temporal_whitening: local_chunk = scipy.ndimage.filters.convolve1d( local_chunk, temporal_whitening, axis=0, mode='constant') thresholds = numpy.zeros(N_e, dtype=numpy.float32) for i in range(N_e): u = numpy.median(local_chunk[:, i], 0) thresholds[i] = numpy.median(numpy.abs(local_chunk[:, i] - u), 0) gdata = gather_array(thresholds, comm) if comm.rank == 0: gdata = gdata.reshape((comm.size, N_e)) thresholds = numpy.mean(gdata, 0) bfile = h5py.File(file_out_suff + '.basis.hdf5', 'r+', libver='earliest') bfile.pop('thresholds') io.write_datasets(bfile, ['thresholds'], {'thresholds': thresholds}, compression=hdf5_compress) bfile.close() comm.Barrier() # if comm.rank == 0: # if not os.path.exists(plot_path): # os.makedirs(plot_path) # N_elec = min(int(numpy.sqrt(data_file.N_e)), 5) # plot.view_fit(filename, t_start=0, t_stop=1, fit_on=False, square=True, # n_elec=N_elec, save=[plot_path, 'electrodes']) # Part 2: Basis numpy.random.seed(422) SHARED_MEMORY = get_shared_memory_flag(params) ################################################################# file_out = params.get('data', 'file_out') alignment = params.getboolean('detection', 'alignment') over_factor = params.getint('detection', 'oversampling_factor') nb_jitter = params.getint('detection', 'nb_jitter') spike_thresh = params.getfloat('detection', 'spike_thresh') nodes, edges = get_nodes_and_edges(params) _, positions = get_nodes_and_positions(params) do_temporal_whitening = params.getboolean('whitening', 'temporal') do_spatial_whitening = params.getboolean('whitening', 'spatial') use_barycenter = params.getboolean('detection', 'use_barycenter') if matched_filter: chunk_size = detect_memory(params, whitening=True) else: chunk_size = detect_memory(params) safety_time = params.getint('whitening', 'safety_time') max_elts_elec = params.getint('whitening', 'max_elts') output_dim = params.getfloat('whitening', 'output_dim') inv_nodes = numpy.zeros(N_total, dtype=numpy.int32) inv_nodes[nodes] = numpy.arange(len(nodes)) smoothing_factor = params.getfloat('detection', 'smoothing_factor') if sign_peaks == 'both': max_elts_elec *= 2 nb_elts = int( params.getfloat('whitening', 'nb_elts') * N_e * max_elts_elec) weird_thresh = params.get('detection', 'weird_thresh') if weird_thresh != '': ignore_artefacts = True weird_thresh = io.load_data(params, 'weird-thresholds') else: ignore_artefacts = False ignore_dead_times = params.getboolean('triggers', 'ignore_times') if ignore_dead_times: if SHARED_MEMORY: all_dead_times, mpi_memory_3 = get_dead_times(params) else: all_dead_times = get_dead_times(params) data_file.open() ################################################################# if comm.rank == 0: print_and_log(["Searching spikes to construct the PCA basis..."], 'default', logger) nb_chunks, last_chunk_len = data_file.analyze(chunk_size) if nb_chunks < comm.size: res = io.data_stats(params, show=False) chunk_size = int(res * params.rate // comm.size) if comm.rank == 0: print_and_log( ["Too much cores, automatically resizing the data chunks"], 'debug', logger) nb_chunks, last_chunk_len = data_file.analyze(chunk_size) groups = {} for i in range(N_e): groups[i] = 0 # I guess this is more relevant, to take signals from all over the recordings all_chunks = numpy.random.permutation( numpy.arange(nb_chunks, dtype=numpy.int32)) max_elts_elec //= comm.size nb_elts //= comm.size elt_count_pos = 0 elt_count_neg = 0 if sign_peaks in ['positive', 'both']: times_pos = numpy.zeros(nb_elts, dtype=numpy.int32) electrodes_pos = numpy.zeros(nb_elts, dtype=numpy.int32) extremum_pos = numpy.zeros(nb_elts, dtype=numpy.float32) elts_pos = numpy.zeros((N_t, nb_elts), dtype=numpy.float32) if sign_peaks in ['negative', 'both']: times_neg = numpy.zeros(nb_elts, dtype=numpy.int32) electrodes_neg = numpy.zeros(nb_elts, dtype=numpy.int32) extremum_neg = numpy.zeros(nb_elts, dtype=numpy.float32) elts_neg = numpy.zeros((N_t, nb_elts), dtype=numpy.float32) thresholds = io.load_data(params, 'thresholds') mads = io.load_data(params, 'mads') stds = io.load_data(params, 'stds') if alignment: cdata = numpy.linspace(-jitter_range, +jitter_range, nb_jitter) xdata = numpy.arange(-template_shift_2, template_shift_2 + 1) xoff = len(cdata) / 2.0 snippet_duration = template_shift_2 m_size = 2 * template_shift_2 + 1 align_factor = m_size local_factors = align_factor * ((smoothing_factor * mads)**2) else: snippet_duration = template_shift xdata = numpy.arange(-template_shift, template_shift + 1) if rejection_threshold > 0: reject_noise = True noise_levels = stds * (2 * noise_window + 1) else: reject_noise = False to_explore = all_chunks[comm.rank::comm.size] upper_bounds = max_elts_elec if comm.rank == 0: to_explore = get_tqdm_progressbar(params, to_explore) for gcount, gidx in enumerate(to_explore): if (elt_count_pos + elt_count_neg) < nb_elts: # print "Node", comm.rank, "is analyzing chunk", gidx, "/", nb_chunks, " ..." local_chunk, t_offset = data_file.get_data(gidx, chunk_size, nodes=nodes) local_shape = len(local_chunk) if do_spatial_whitening: if use_gpu: local_chunk = cmt.CUDAMatrix(local_chunk, copy_on_host=False) local_chunk = local_chunk.dot(spatial_whitening).asarray() else: local_chunk = numpy.dot(local_chunk, spatial_whitening) if do_temporal_whitening: local_chunk = scipy.ndimage.filters.convolve1d( local_chunk, temporal_whitening, axis=0, mode='constant') local_borders = (snippet_duration, local_shape - snippet_duration) if ignore_dead_times: dead_indices = numpy.searchsorted( all_dead_times, [t_offset, t_offset + local_shape]) # Extracting the peaks. all_peaktimes = [numpy.empty(0, dtype=numpy.uint32)] found_peaktimes = [] found_peak_amplitudes = [] for i in range(N_e): height = thresholds[i] if sign_peaks == 'negative': peaktimes = scipy.signal.find_peaks(-local_chunk[:, i], height=height, distance=dist_peaks)[0] elif sign_peaks == 'positive': peaktimes = scipy.signal.find_peaks(local_chunk[:, i], height=height, distance=dist_peaks)[0] elif sign_peaks == 'both': peaktimes = scipy.signal.find_peaks(numpy.abs( local_chunk[:, i]), height=height, distance=dist_peaks)[0] else: peaktimes = numpy.empty(0, dtype=numpy.uint32) if ignore_artefacts: artetimes = scipy.signal.find_peaks( numpy.abs(local_chunk[:, i]), height=weird_thresh[i])[0] to_keep = numpy.logical_not( numpy.in1d(peaktimes, artetimes)) peaktimes = peaktimes[to_keep] idx = (peaktimes >= local_borders[0]) & (peaktimes < local_borders[1]) peaktimes = peaktimes[idx] if ignore_dead_times: if dead_indices[0] != dead_indices[1]: is_included = numpy.in1d( peaktimes + t_offset, all_dead_times[dead_indices[0]:dead_indices[1]]) peaktimes = peaktimes[~is_included] peaktimes = peaktimes.astype(numpy.uint32) found_peaktimes.append(peaktimes) peak_amplitudes = local_chunk[peaktimes, i] found_peak_amplitudes.append(peak_amplitudes) all_peaktimes = numpy.concatenate( found_peaktimes) # i.e. concatenate once for efficiency all_peak_amplitudes = numpy.concatenate(found_peak_amplitudes) local_peaktimes, local_indices = numpy.unique(all_peaktimes, return_inverse=True) if len(local_peaktimes) > 0: diff_times = (local_peaktimes[-1] - local_peaktimes[0]) + 1 all_times = numpy.zeros((N_e, diff_times), dtype=numpy.bool) padded_peaks = (local_peaktimes - local_peaktimes[0]).astype( numpy.int32) min_times = numpy.maximum(padded_peaks - safety_time, 0) max_times = numpy.minimum(padded_peaks + safety_time + 1, diff_times + 1) test_extremas = numpy.zeros((N_e, diff_times + 1), dtype=numpy.bool) for i in range(N_e): test_extremas[i, found_peaktimes[i] - local_peaktimes[0]] = True # Consider the peaks by decreasing extremum. if sort_waveforms: order = numpy.argsort(-np.abs(all_peak_amplitudes)) all_idx = numpy.take(all_peaktimes, order) argmax_peak = local_indices[order] else: n_times = len(all_peaktimes) shuffling = numpy.random.permutation(numpy.arange(n_times)) all_idx = numpy.take(all_peaktimes, shuffling) argmax_peak = local_indices[shuffling] # print "Selection of the peaks with spatio-temporal masks..." for midx, peak in zip(argmax_peak, all_idx): if (elt_count_neg + elt_count_pos) == nb_elts: break all_elecs = numpy.where( test_extremas[:, peak - local_peaktimes[0]])[0] data = local_chunk[peak, all_elecs] #target_area = test_extremas[:, min_times[midx]:max_times[midx]].sum(1) #all_elecs = numpy.where(target_area)[0] #data = local_chunk[peak, all_elecs] if sign_peaks == 'negative': if N_e > 1: if use_barycenter: weighed_position = data[:, numpy. newaxis] * positions[ all_elecs] barycenter = weighed_position.sum( 0) / data.sum() elec = numpy.argmin( numpy.linalg.norm(barycenter - positions[all_elecs], axis=1)) else: elec = numpy.argmin(data) else: elec = 0 negative_peak = True elif sign_peaks == 'positive': if N_e > 1: if use_barycenter: weighed_position = data[:, numpy. newaxis] * positions[ all_elecs] barycenter = weighed_position.sum( 0) / data.sum() elec = numpy.argmax( numpy.linalg.norm(barycenter - positions[all_elecs], axis=1)) else: elec = numpy.argmax(data) else: elec = 0 negative_peak = False elif sign_peaks == 'both': if N_e == 1: if data < 0: negative_peak = True elif data > 0: negative_peak = False elec = 0 else: if numpy.abs(numpy.max(data)) > numpy.abs( numpy.min(data)): elec = numpy.argmax(data) negative_peak = False else: elec = numpy.argmin(data) negative_peak = True elec = all_elecs[elec] if groups[elec] < upper_bounds: indices = nodes_indices[elec] myslice = all_times[indices, min_times[midx]:max_times[midx]] if not myslice.any(): sub_mat = local_chunk[peak - snippet_duration:peak + snippet_duration + 1, elec] if reject_noise: slice_window = sub_mat[ snippet_duration - noise_window:snippet_duration + noise_window + 1] value = numpy.linalg.norm( slice_window) / noise_levels[elec] is_noise = value < rejection_threshold else: is_noise = False if not is_noise: extrema = sub_mat[snippet_duration] if alignment: smoothed = True try: f = scipy.interpolate.UnivariateSpline( xdata, sub_mat, s=local_factors[elec], k=3) except Exception: smoothed = False f = scipy.interpolate.UnivariateSpline( xdata, sub_mat, k=3, s=0) if negative_peak: rmin = (numpy.argmin(f(cdata)) - xoff) / over_factor else: rmin = (numpy.argmax(f(cdata)) - xoff) / over_factor ddata = numpy.linspace( rmin - template_shift, rmin + template_shift, N_t) if smoothed: f = scipy.interpolate.UnivariateSpline( xdata, sub_mat, s=local_factors[elec], k=3) else: f = scipy.interpolate.UnivariateSpline( xdata, sub_mat, s=0, k=3) sub_mat = f(ddata).astype(numpy.float32) if negative_peak: times_neg[elt_count_neg] = peak + t_offset electrodes_neg[elt_count_neg] = elec extremum_neg[elt_count_neg] = extrema elts_neg[:, elt_count_neg] = sub_mat elt_count_neg += 1 else: times_pos[elt_count_pos] = peak + t_offset electrodes_pos[elt_count_pos] = elec extremum_pos[elt_count_pos] = extrema elts_pos[:, elt_count_pos] = sub_mat elt_count_pos += 1 groups[elec] += 1 all_times[ indices, min_times[midx]:max_times[midx]] = True test_extremas[elec, peak - local_peaktimes[0]] = False sys.stderr.flush() print_and_log([ "Node %d has collected %d waveforms" % (comm.rank, elt_count_pos + elt_count_neg) ], 'debug', logger) if sign_peaks in ['negative', 'both']: times_neg = gather_array(times_neg[:elt_count_neg], comm, 0, 1, dtype='int32') electrodes_neg = gather_array(electrodes_neg[:elt_count_neg], comm, 0, 1, dtype='int32') extremum_neg = gather_array(extremum_neg[:elt_count_neg], comm, 0, 1) gdata_neg = gather_array(elts_neg[:, :elt_count_neg].T, comm, 0, 1) if sign_peaks in ['positive', 'both']: times_pos = gather_array(times_pos[:elt_count_pos], comm, 0, 1, dtype='int32') electrodes_pos = gather_array(electrodes_pos[:elt_count_pos], comm, 0, 1, dtype='int32') extremum_pos = gather_array(extremum_pos[:elt_count_pos], comm, 0, 1) gdata_pos = gather_array(elts_pos[:, :elt_count_pos].T, comm, 0, 1) nb_waveforms = 0 if comm.rank == 0: # DO PCA on elts and store the basis obtained. if sign_peaks in ['negative', 'both']: nb_waveforms += gdata_neg.shape[0] if sign_peaks in ['positive', 'both']: nb_waveforms += gdata_pos.shape[0] nb_waveforms = all_gather_array( numpy.array([nb_waveforms], dtype=numpy.float32), comm, 0)[0] if comm.rank == 0: print_and_log([ "Found %d waveforms over %d requested" % (nb_waveforms, int(nb_elts * comm.size)) ], 'default', logger) if nb_waveforms == 0: print_and_log( ['No waveforms found! Are the data properly loaded??'], 'error', logger) if nb_waveforms == 0: sys.exit(0) if comm.rank == 0: res = {} pca = None pca_pos = None pca_neg = None warning_n_t = False if sign_peaks in ['negative', 'both']: res['times'] = times_neg res['electrodes'] = electrodes_neg res['extremum'] = extremum_neg if len(gdata_neg) > 0: pca = PCA(output_dim) if use_hanning: pca.fit(gdata_neg * hanning_filter) else: pca.fit(gdata_neg) res['proj'] = pca.components_.T.astype(numpy.float32) pca_neg = numpy.sum(pca.explained_variance_ratio_) else: res['proj'] = numpy.identity(int(output_dim), dtype=numpy.float32) res['rec'] = res['proj'].T res['waveform'] = numpy.median(gdata_neg, 0) # dispersion = numpy.std(gdata_neg, 0) / numpy.median(stds) # ratio = numpy.sum(dispersion > 1.1) / float(len(dispersion)) # if ratio < 0.25: # print_and_log(["Time window N_t in [detection] seems too large!"], 'info', logger) # warning_n_t = True # elif ratio == 1: # print_and_log(["Time window N_t in [detection] seems too small!"], 'info', logger) # warning_n_t = True idx = numpy.random.permutation(numpy.arange( gdata_neg.shape[0]))[:2500] res['waveforms'] = gdata_neg[idx, :] if sign_peaks in ['positive', 'both']: res['times_pos'] = times_pos res['electrodes_pos'] = electrodes_pos res['extremum_pos'] = extremum_pos if len(gdata_pos) > 0: pca = PCA(output_dim) if use_hanning: pca.fit(gdata_pos * hanning_filter) else: pca.fit(gdata_pos) res['proj_pos'] = pca.components_.T.astype(numpy.float32) pca_pos = numpy.sum(pca.explained_variance_ratio_) else: res['proj_pos'] = numpy.identity(int(output_dim), dtype=numpy.float32) res['rec_pos'] = res['proj_pos'].T res['waveform_pos'] = numpy.median(gdata_pos, 0) # dispersion = numpy.std(gdata_pos, 0) / numpy.median(stds) # ratio = numpy.sum(dispersion > 1.1) / float(len(dispersion)) # if ratio < 0.25 and not warning_n_t: # print_and_log(["Time window N_t in [detection] seems too large!"], 'info', logger) # elif ratio == 1 and not warning_n_t: # print_and_log(["Time window N_t in [detection] seems too small!"], 'info', logger) idx = numpy.random.permutation(numpy.arange( gdata_pos.shape[0]))[:2500] res['waveforms_pos'] = gdata_pos[idx, :] bfile = h5py.File(file_out_suff + '.basis.hdf5', 'r+', libver='earliest') io.write_datasets(bfile, list(res.keys()), res, compression=hdf5_compress) if sign_peaks == 'positive': print_and_log([ "A basis with %s dimensions has been built" % res['proj_pos'].shape[1] ], 'info', logger) elif sign_peaks == 'negative': print_and_log([ "A basis with %s dimensions has been built" % res['proj'].shape[1] ], 'info', logger) elif sign_peaks == 'both': print_and_log([ "Two basis with %s dimensions has been built" % res['proj'].shape[1] ], 'debug', logger) if pca_pos is not None: print_and_log([ "The percentage of variance explained is %s for positive spikes" % pca_pos ], 'debug', logger) if pca_neg is not None: print_and_log([ "The percentage of variance explained is %s for negative spikes" % pca_neg ], 'debug', logger) bfile.close() comm.Barrier() if matched_filter: if comm.rank == 0: print_and_log([ "Because of matched filters, need to recompute the thresholds..." ], 'default', logger) if do_spatial_whitening: spatial_whitening = io.load_data(params, 'spatial_whitening') if use_gpu: spatial_whitening = cmt.CUDAMatrix(spatial_whitening, copy_on_host=False) if do_temporal_whitening: temporal_whitening = io.load_data(params, 'temporal_whitening') if sign_peaks in ['negative', 'both']: waveform_neg = io.load_data(params, 'waveform')[::-1] waveform_neg /= (numpy.abs(numpy.sum(waveform_neg)) * len(waveform_neg)) if sign_peaks in ['positive', 'both']: waveform_pos = io.load_data(params, 'waveform-pos')[::-1] waveform_pos /= (numpy.abs(numpy.sum(waveform_pos)) * len(waveform_pos)) for gidx in [all_chunks[comm.rank]]: local_chunk, t_offset = data_file.get_data(gidx, chunk_size, nodes=nodes) local_shape = len(local_chunk) if do_spatial_whitening: if use_gpu: local_chunk = cmt.CUDAMatrix(local_chunk, copy_on_host=False) local_chunk = local_chunk.dot(spatial_whitening).asarray() else: local_chunk = numpy.dot(local_chunk, spatial_whitening) if do_temporal_whitening: local_chunk = scipy.ndimage.filters.convolve1d( local_chunk, temporal_whitening, axis=0, mode='constant') local_chunk /= thresholds if sign_peaks in ['negative', 'both']: tmp_chunk = scipy.ndimage.filters.convolve1d(local_chunk, waveform_neg, axis=0, mode='constant') thresholds = numpy.zeros(N_e, dtype=numpy.float32) for i in range(N_e): u = numpy.median(tmp_chunk[:, i], 0) thresholds[i] = numpy.median( numpy.abs(tmp_chunk[:, i] - u), 0) gdata = gather_array(thresholds, comm) if comm.rank == 0: gdata = gdata.reshape((comm.size, N_e)) thresholds = numpy.mean(gdata, 0) bfile = h5py.File(file_out_suff + '.basis.hdf5', 'r+', libver='earliest') io.write_datasets(bfile, ['matched_thresholds'], {'matched_thresholds': thresholds}, compression=hdf5_compress) bfile.close() comm.Barrier() if sign_peaks in ['positive', 'both']: tmp_chunk = scipy.ndimage.filters.convolve1d(local_chunk, waveform_pos, axis=0, mode='constant') thresholds = numpy.zeros(N_e, dtype=numpy.float32) for i in range(N_e): u = numpy.median(tmp_chunk[:, i], 0) thresholds[i] = numpy.median( numpy.abs(tmp_chunk[:, i] - u), 0) gdata = gather_array(thresholds, comm) if comm.rank == 0: gdata = gdata.reshape((comm.size, N_e)) thresholds = numpy.mean(gdata, 0) bfile = h5py.File(file_out_suff + '.basis.hdf5', 'r+', libver='earliest') io.write_datasets(bfile, ['matched_thresholds_pos'], {'matched_thresholds_pos': thresholds}, compression=hdf5_compress) bfile.close() comm.Barrier() data_file.close() if SHARED_MEMORY and ignore_dead_times: mpi_memory_3.Free()
def main(params, nb_cpu, nb_gpu, use_gpu): numpy.random.seed(426236) # params = detect_memory(params) parallel_hdf5 = get_parallel_hdf5_flag(params) _ = init_logging(params.logfile) logger = logging.getLogger('circus.extracting') ################################################################# data_file = params.data_file N_e = params.getint('data', 'N_e') N_t = params.getint('detection', 'N_t') N_total = params.nb_channels template_shift = params.getint('detection', 'template_shift') chunk_size = detect_memory(params) file_out = params.get('data', 'file_out') file_out_suff = params.get('data', 'file_out_suff') do_temporal_whitening = params.getboolean('whitening', 'temporal') do_spatial_whitening = params.getboolean('whitening', 'spatial') nodes, edges = get_nodes_and_edges(params) safety_time = params.getint('extracting', 'safety_time') max_elts_temp = params.getint('extracting', 'max_elts') output_dim = params.getfloat('extracting', 'output_dim') noise_thr = params.getfloat('extracting', 'noise_thr') hdf5_compress = params.getboolean('data', 'hdf5_compress') blosc_compress = params.getboolean('data', 'blosc_compress') tmp_limits = params.get('fitting', 'amp_limits').replace('(', '').replace(')', '').split(',') amp_limits = map(float, tmp_limits) elt_count = 0 inv_nodes = numpy.zeros(N_total, dtype=numpy.int32) inv_nodes[nodes] = numpy.arange(len(nodes)) data_file.open() ################################################################# if comm.rank == 0: print_and_log(["Extracting templates from already found clusters..."], 'default', logger) thresholds = io.load_data(params, 'thresholds') basis_proj, basis_rec = io.load_data(params, 'basis') clusters, spiketimes, N_clusters = io.load_data(params, 'spike-cluster') inv_clusters = numpy.zeros(clusters.max() + 1, dtype=numpy.int32) inv_clusters[numpy.unique(clusters)] = numpy.argsort( numpy.unique(clusters)) if use_gpu: import cudamat as cmt # # Need to properly handle multi GPU per MPI nodes? if nb_gpu > nb_cpu: gpu_id = int(comm.rank // nb_cpu) else: gpu_id = 0 cmt.cuda_set_device(gpu_id) cmt.init() cmt.cuda_sync_threads() if do_spatial_whitening: spatial_whitening = io.load_data(params, 'spatial_whitening') else: spatial_whitening = None # default assignment (PyCharm code inspection) if do_temporal_whitening: temporal_whitening = io.load_data(params, 'temporal_whitening') else: temporal_whitening = None # default assignment (PyCharm code inspection) if use_gpu and do_spatial_whitening: spatial_whitening = cmt.CUDAMatrix(spatial_whitening, copy_on_host=False) result = {} for i in range(N_clusters): result['data_tmp_' + str(i)] = numpy.zeros( (0, N_e * basis_proj.shape[1]), dtype=numpy.float32) result['times_' + str(i)] = numpy.zeros(0, dtype=numpy.int32) nb_chunks, last_chunk_len = data_file.analyze(chunk_size) # I guess this is more relevant, to take signals from all over the recordings. all_chunks = numpy.random.permutation(numpy.arange(nb_chunks)) nb_templates = numpy.sum( comm.rank == numpy.mod(numpy.arange(N_clusters), comm.size)) nb_elts = max_elts_temp * nb_templates to_explore = all_chunks if comm.rank == 0: to_explore = get_tqdm_progressbar(params, to_explore) for gidx in all_chunks: if elt_count < nb_elts: # print "Node", comm.rank, "is analyzing chunk", gidx, "/", nb_chunks, " ..." local_chunk, t_offset = data_file.get_data(gidx, chunk_size, nodes=nodes) local_shape = len(local_chunk) if do_spatial_whitening: if use_gpu: local_chunk = cmt.CUDAMatrix(local_chunk, copy_on_host=False) local_chunk = local_chunk.dot(spatial_whitening).asarray() else: local_chunk = numpy.dot(local_chunk, spatial_whitening) if do_temporal_whitening: local_chunk = scipy.ndimage.filters.convolve1d( local_chunk, temporal_whitening, axis=0, mode='constant') # print "Extracting the peaks..." idx = numpy.where((spiketimes >= gidx * chunk_size) & (spiketimes < (gidx + 1) * chunk_size))[0] local_offset = t_offset local_peaktimes = spiketimes[idx] - local_offset # print "Removing the useless borders..." local_borders = (template_shift, chunk_size - template_shift) idx = (local_peaktimes >= local_borders[0]) & (local_peaktimes < local_borders[1]) local_peaktimes = local_peaktimes[idx] local_clusters = inv_clusters[clusters[idx]] if len(local_peaktimes) > 0: all_times = numpy.zeros( (N_e, local_peaktimes[-1] - local_peaktimes[0] + 1), dtype=numpy.bool) min_times = numpy.maximum( local_peaktimes - local_peaktimes[0] - safety_time, 0) max_times = numpy.minimum( local_peaktimes - local_peaktimes[0] + safety_time + 1, local_peaktimes[-1] - local_peaktimes[0]) n_times = len(local_peaktimes) argmax_peak = numpy.random.permutation(numpy.arange(n_times)) clusters_id = local_clusters[argmax_peak] local_peaktimes = local_peaktimes[argmax_peak] # print "Selection of the peaks with spatio-temporal masks..." for idx in range(len(local_peaktimes)): if elt_count == nb_elts: break temp = clusters_id[idx] if numpy.mod(temp, comm.size) == comm.rank: elec = numpy.argmin(local_chunk[local_peaktimes[idx]]) indices = inv_nodes[edges[nodes[elec]]] myslice = all_times[indices, min_times[idx]:max_times[idx]] peak = local_peaktimes[idx] if not myslice.any(): if len(result['data_tmp_' + str(temp)]) < max_elts_temp: elt_count += 1 sub_mat = local_chunk[peak - template_shift:peak + template_shift + 1, :] sub_mat = numpy.dot(basis_rec, sub_mat) nx, ny = sub_mat.shape sub_mat = sub_mat.reshape((1, nx * ny)) result['data_tmp_' + str(temp)] = numpy.vstack( (result['data_tmp_' + str(temp)], sub_mat)) to_add = numpy.array([peak + local_offset], dtype=numpy.int32) result['times_' + str(temp)] = numpy.concatenate( (result['times_' + str(temp)], to_add)) all_times[indices, min_times[idx]:max_times[idx]] = True total_nb_elts = 0 for temp in range(N_clusters): total_nb_elts += len(result['data_tmp_' + str(temp)]) gdata = gather_array(numpy.array([total_nb_elts], dtype=numpy.float32), comm, 0) if comm.rank == 0: print_and_log([ "Found %d spikes over %d requested" % (int(numpy.sum(gdata)), int(nb_elts)) ], 'default', logger) # print "Spikes extracted in", time.time() - t_start, "s" comm.Barrier() local_nb_clusters = 0 for temp in range(comm.rank, N_clusters, comm.size): if len(result['data_tmp_' + str(temp)]) > 0: local_nb_clusters += 1 # print total_nb_clusters, "found in", time.time() - t_start, "s" gdata3 = gather_array( numpy.array([local_nb_clusters], dtype=numpy.float32), comm, 0) comm.Barrier() if comm.rank == 0: print_and_log(["Extracting the templates..."], 'default', logger) total_nb_clusters = int( comm.bcast(numpy.array([int(numpy.sum(gdata3))], dtype=numpy.int32), root=0)[0]) offsets = numpy.zeros(comm.size, dtype=numpy.int32) for i in range(comm.size - 1): offsets[i + 1] = comm.bcast(numpy.array([local_nb_clusters], dtype=numpy.int32), root=i) if parallel_hdf5: node_pad = numpy.sum(offsets[:comm.rank + 1]) hfile = h5py.File(file_out_suff + '.templates.hdf5', 'w', driver='mpio', comm=comm, libver='earliest') norms = hfile.create_dataset('norms', shape=(2 * total_nb_clusters, ), dtype=numpy.float32, chunks=True) electrodes = hfile.create_dataset('electrodes', shape=(total_nb_clusters, ), dtype=numpy.int32, chunks=True) amps_lims = hfile.create_dataset('limits', shape=(total_nb_clusters, 2), dtype=numpy.float32, chunks=True) g_count = node_pad g_offset = total_nb_clusters else: node_pad = 0 hfile = h5py.File(file_out_suff + '.templates-%d.hdf5' % comm.rank, 'w', libver='earliest') electrodes = hfile.create_dataset('electrodes', shape=(local_nb_clusters, ), dtype=numpy.int32, chunks=True) norms = hfile.create_dataset('norms', shape=(2 * local_nb_clusters, ), dtype=numpy.float32, chunks=True) amps_lims = hfile.create_dataset('limits', shape=(local_nb_clusters, 2), dtype=numpy.float32, chunks=True) g_count = 0 g_offset = local_nb_clusters cfile = h5py.File(file_out_suff + '.clusters-%d.hdf5' % comm.rank, 'w', libver='earliest') count_templates = node_pad temp_x = numpy.zeros(0, dtype=numpy.int32) temp_y = numpy.zeros(0, dtype=numpy.int32) temp_data = numpy.zeros(0, dtype=numpy.float32) to_explore = range(comm.rank, N_clusters, comm.size) if comm.rank == 0: to_explore = get_tqdm_progressbar(params, to_explore) for temp in to_explore: n_data = len(result['data_tmp_' + str(temp)]) if n_data > 0: data = result['data_tmp_' + str(temp)].reshape( n_data, basis_proj.shape[1], N_e) first_component = numpy.median(data, axis=0) tmp_templates = numpy.dot(first_component.T, basis_rec) electrodes[g_count] = indices[tmpidx[0][0]] indices = inv_nodes[edges[nodes[electrodes[-1]]]] templates = numpy.zeros((N_e, N_t), dtype=numpy.float32) if shift > 0: templates[indices, shift:] = tmp_templates[:, :-shift] elif shift < 0: templates[indices, :shift] = tmp_templates[:, -shift:] else: templates[indices, :] = tmp_templates templates = templates.flatten() dx = templates.nonzero()[0].astype(numpy.int32) temp_x = numpy.concatenate((temp_x, dx)) temp_y = numpy.concatenate( (temp_y, count_templates * numpy.ones(len(dx), dtype=numpy.int32))) temp_data = numpy.concatenate((temp_data, templates[dx])) norms[g_count] = numpy.sqrt( numpy.sum(templates.flatten()**2) / (N_e * N_t)) x, y, z = data.shape data_flat = data.reshape(x, y * z) first_flat = first_component.reshape(y * z, 1) amplitudes = numpy.dot(data_flat, first_flat) amplitudes /= numpy.sum(first_flat**2) for i in range(x): data_flat[i, :] -= amplitudes[i] * first_flat[:, 0] variations = 10 * numpy.median( numpy.abs(amplitudes - numpy.median(amplitudes))) physical_limit = noise_thr * ( -thresholds[indices[tmpidx[0][0]]]) / tmp_templates.min() amp_min = max(physical_limit, numpy.median(amplitudes) - variations) amp_max = min(amp_limits[1], numpy.median(amplitudes) + variations) amps_lims[g_count] = [amp_min, amp_max] if len(data_flat) > 1: pca = PCA(1) res_pca = pca.fit_transform(data_flat.astype(numpy.double)) second_component = pca.components_.T.astype( numpy.float32).reshape(y, z) else: second_component = data_flat.reshape(y, z) / numpy.sum( data_flat**2) tmp_templates = numpy.dot(second_component.T, basis_rec) offset = total_nb_clusters + count_templates sub_templates = numpy.zeros((N_e, N_t), dtype=numpy.float32) if shift > 0: sub_templates[indices, shift:] = tmp_templates[:, :-shift] elif shift < 0: sub_templates[indices, :shift] = tmp_templates[:, -shift:] else: sub_templates[indices, :] = tmp_templates sub_templates = sub_templates.flatten() dx = sub_templates.nonzero()[0].astype(numpy.int32) temp_x = numpy.concatenate((temp_x, dx)) temp_y = numpy.concatenate( (temp_y, offset * numpy.ones(len(dx), dtype=numpy.int32))) temp_data = numpy.concatenate((temp_data, sub_templates[dx])) norms[g_count + g_offset] = numpy.sqrt( numpy.sum(sub_templates.flatten()**2) / (N_e * N_t)) count_templates += 1 g_count += 1 io.write_datasets(cfile, to_write, result, ielec, compress=hdf5_compress) # At the end we should have a templates variable to store. cfile.close() del result, templates, amps_lims comm.Barrier() # We need to gather the sparse arrays. temp_x = gather_array(temp_x, comm, dtype='int32', compress=blosc_compress) temp_y = gather_array(temp_y, comm, dtype='int32', compress=blosc_compress) temp_data = gather_array(temp_data, comm, compress=blosc_compress) if parallel_hdf5: if comm.rank == 0: rs = [ h5py.File(file_out_suff + '.clusters-%d.hdf5' % i, 'r', libver='earliest') for i in range(comm.size) ] cfile = h5py.File(file_out_suff + '.clusters.hdf5', 'w', libver='earliest') io.write_datasets(cfile, ['electrodes'], {'electrodes': electrodes[:]}, compress=hdf5_compress) for i in range(comm.size): for j in range(i, N_e, comm.size): io.write_datasets(cfile, to_write, rs[i], j, compress=hdf5_compress) rs[i].close() os.remove(file_out_suff + '.clusters-%d.hdf5' % i) cfile.close() hfile.close() else: hfile.close() if comm.rank == 0: ts = [ h5py.File(file_out_suff + '.templates-%d.hdf5' % i, 'r', libver='earliest') for i in range(comm.size) ] rs = [ h5py.File(file_out_suff + '.clusters-%d.hdf5' % i, 'r', libver='earliest') for i in range(comm.size) ] result = {} hfile = h5py.File(file_out_suff + '.templates.hdf5', 'w', libver='earliest') cfile = h5py.File(file_out_suff + '.clusters.hdf5', 'w', libver='earliest') electrodes = hfile.create_dataset('electrodes', shape=(total_nb_clusters, ), dtype=numpy.int32, chunks=True) norms = hfile.create_dataset('norms', shape=(2 * total_nb_clusters, ), dtype=numpy.float32, chunks=True) amplitudes = hfile.create_dataset('limits', shape=(total_nb_clusters, 2), dtype=numpy.float32, chunks=True) count = 0 for i in range(comm.size): loc_temp = ts[i].get('templates') middle = loc_temp.shape[2] // 2 norms[count:count + middle] = loc_norms[:middle] norms[n_clusters + count:n_clusters + count + middle] = loc_norms[middle:] electrodes[count:count + middle] = ts[i].get('electrodes') amplitudes[count:count + middle] = ts[i].get('limits') count += middle for j in range(i, N_e, comm.size): io.write_datasets(cfile, to_write, rs[i], j, compress=hdf5_compress) ts[i].close() rs[i].close() os.remove(file_out_suff + '.templates-%d.hdf5' % i) os.remove(file_out_suff + '.clusters-%d.hdf5' % i) io.write_datasets(cfile, ['electrodes'], {'electrodes': electrodes[:]}, compress=hdf5_compress) hfile.close() cfile.close() if comm.rank == 0: hfile = h5py.File(file_out_suff + '.templates.hdf5', 'r+', libver='earliest') hfile.create_dataset('temp_x', data=temp_x) hfile.create_dataset('temp_y', data=temp_y) hfile.create_dataset('temp_data', data=temp_data) hfile.create_dataset('temp_shape', data=numpy.array( [N_e, N_t, 2 * total_nb_clusters], dtype=numpy.int32)) hfile.close() comm.Barrier() if comm.rank == 0: print_and_log(["Merging similar templates..."], 'default', logger) merged1 = algo.merging_cc(params, parallel_hdf5) comm.Barrier() if remove_mixture: if comm.rank == 0: print_and_log(["Removing mixtures..."], 'default', logger) merged2 = algo.delete_mixtures(params, parallel_hdf5) else: merged2 = [0, 0] if comm.rank == 0: lines = [ "Number of global merges : %d" % merged1[1], "Number of mixtures removed : %d" % merged2[1], ] print_and_log(lines, 'info', logger) comm.Barrier() io.get_overlaps(params, erase=True, parallel_hdf5=parallel_hdf5) data_file.close()