def main(argv=None): if argv is None: argv = sys.argv[1:] parallel_hdf5 = h5py.get_config().mpi user_path = pjoin(os.path.expanduser('~'), 'spyking-circus') tasks_list = None if not os.path.exists(user_path): os.makedirs(user_path) try: import cudamat as cmt cmt.init() HAVE_CUDA = True except Exception: HAVE_CUDA = False all_steps = [ 'whitening', 'clustering', 'fitting', 'gathering', 'extracting', 'filtering', 'converting', 'deconverting', 'benchmarking', 'merging', 'validating', 'thresholding' ] config_file = os.path.abspath(pkg_resources.resource_filename('circus', 'config.params')) header = get_colored_header() header += Fore.GREEN + 'Local CPUs : ' + Fore.CYAN + str(psutil.cpu_count()) + '\n' # header += Fore.GREEN + 'GPU detected : ' + Fore.CYAN + str(HAVE_CUDA) + '\n' header += Fore.GREEN + 'Parallel HDF5 : ' + Fore.CYAN + str(parallel_hdf5) + '\n' do_upgrade = '' if not SHARED_MEMORY: do_upgrade = Fore.WHITE + ' [please consider upgrading MPI]' header += Fore.GREEN + 'Shared memory : ' + Fore.CYAN + str(SHARED_MEMORY) + do_upgrade + '\n' header += '\n' header += Fore.GREEN + "##################################################################" header += Fore.RESET method_help = '''by default, all steps are performed, but a subset x,y can be done. Steps are: - filtering - whitening - clustering - fitting - merging [with or without a GUI for meta merging] - (extra) converting [export results to phy format] - (extra) thresholding [to get MUA activity only] - (extra) deconverting [import results from phy format] - (extra) gathering [force collection of results] - (extra) extracting [get templates from spike times] - (extra) benchmarking [with -o and -t] - (extra) validating [to compare performance with GT neurons]''' parser = argparse.ArgumentParser(description=header, formatter_class=argparse.RawTextHelpFormatter) parser.add_argument('datafile', help='data file (or a list of commands if batch mode)') parser.add_argument('-i', '--info', help='list the file formats supported by SpyKING CIRCUS', action='store_true') parser.add_argument('-m', '--method', default='filtering,whitening,clustering,fitting,merging', help=method_help) parser.add_argument('-c', '--cpu', type=int, default=max(1, int(psutil.cpu_count()/2)), help='number of CPU') # parser.add_argument('-g', '--gpu', type=int, default=0, help='number of GPU') parser.add_argument('-H', '--hostfile', help='hostfile for MPI', default=pjoin(user_path, 'circus.hosts')) parser.add_argument('-b', '--batch', help='datafile is a list of commands to launch, in a batch mode', action='store_true') parser.add_argument('-p', '--preview', help='GUI to display the first second filtered with thresholds', action='store_true') parser.add_argument('-r', '--result', help='GUI to display the results on top of raw data', action='store_true') parser.add_argument('-s', '--second', type=int, default=0, help='If preview mode, begining of the preview [in s]') parser.add_argument('-e', '--extension', help='extension to consider for merging, converting and deconverting', default='None') parser.add_argument('-o', '--output', help='output file [for generation of synthetic benchmarks]') parser.add_argument('-t', '--type', help='benchmark type', choices=['fitting', 'clustering', 'synchrony']) if len(argv) == 0: parser.print_help() sys.exit(0) args = parser.parse_args(argv) steps = args.method.split(',') for step in steps: if step not in all_steps: print_error(['The method "%s" is not recognized' % step]) sys.exit(0) # To save some typing later nb_gpu = 0 (nb_cpu, hostfile, batch, preview, result, extension, output, benchmark, info, second) = \ (args.cpu, args.hostfile, args.batch, args.preview, args.result, args.extension, args.output, args.type, args.info, args.second) filename = os.path.abspath(args.datafile) real_file = filename f_next, extens = os.path.splitext(filename) if info: if args.datafile.lower() in __supported_data_files__: filename = 'tmp' if len(__supported_data_files__[args.datafile.lower()].extension) > 0: filename += __supported_data_files__[args.datafile.lower()].extension[0] __supported_data_files__[args.datafile.lower()](filename, {}, is_empty=True)._display_requirements_() else: print_and_log([ '', 'To get info on any particular file format, do:', '>> spyking-circus file_format -i', '' ], 'default') print_and_log(list_all_file_format()) sys.exit(0) if extens == '.params': print_error(['You should launch the code on the data file!']) sys.exit(0) file_params = f_next + '.params' if not os.path.exists(file_params) and not batch: print(Fore.RED + 'The parameter file %s is not present!' % file_params) create_params = query_yes_no(Fore.WHITE + "Do you want SpyKING CIRCUS to create a parameter file?") if create_params: print(Fore.WHITE + "Creating %s" % file_params) print(Fore.WHITE + "Fill it properly before launching the code! (see documentation)") print_info(['Keep in mind that filtering is performed on site, so please', 'be sure to keep a copy of your data elsewhere']) shutil.copyfile(config_file, file_params) sys.exit(0) elif batch: tasks_list = filename if not batch: file_params = f_next + '.params' if not os.path.exists(file_params): print_and_log(["%s does not exist" % file_params], 'error') sys.exit(0) import ConfigParser as configparser parser = configparser.ConfigParser() myfile = open(file_params, 'r') lines = myfile.readlines() myfile.close() myfile = open(file_params, 'w') for l in lines: myfile.write(l.replace('\t', '')) myfile.close() parser.read(file_params) for section in CircusParser.__all_sections__: if parser.has_section(section): for (key, value) in parser.items(section): parser.set(section, key, value.split('#')[0].rstrip()) else: parser.add_section(section) try: use_output_dir = parser.get('data', 'output_dir') != '' except Exception: use_output_dir = False if use_output_dir: path = os.path.abspath(os.path.expanduser(parser.get('data', 'output_dir'))) file_out = os.path.join(path, os.path.basename(f_next)) if not os.path.exists(file_out): os.makedirs(file_out) else: file_out = f_next logfile = file_out + '.log' if os.path.exists(logfile): os.remove(logfile) logger = init_logging(logfile) params = CircusParser(filename) data_file = params.get_data_file(source=True, has_been_created=False) overwrite = params.getboolean('data', 'overwrite') file_format = params.get('data', 'file_format') if overwrite: support_parallel_write = data_file.parallel_write is_writable = data_file.is_writable else: support_parallel_write = __supported_data_files__['raw_binary'].parallel_write is_writable = __supported_data_files__['raw_binary'].is_writable if preview: print_and_log(['Preview mode, showing only seconds [%d-%d] of the recording' % (second, second+1)], 'info', logger) tmp_path_loc = os.path.join(os.path.abspath(params.get('data', 'file_out')), 'tmp') if not os.path.exists(tmp_path_loc): os.makedirs(tmp_path_loc) filename = os.path.join(tmp_path_loc, 'preview.dat') f_next, extens = os.path.splitext(filename) preview_params = f_next + '.params' shutil.copyfile(file_params, preview_params) steps = ['filtering', 'whitening'] chunk_size = int(params.rate) data_file.open() nb_chunks, _ = data_file.analyze(chunk_size) if nb_chunks <= (second + 1): print_and_log(['Recording is too short to display seconds [%d-%d]' % (second, second+1)]) sys.exit(0) local_chunk = data_file.get_snippet(int(second*params.rate), int(1.2*chunk_size)) description = data_file.get_description() data_file.close() new_params = CircusParser(filename, create_folders=False) new_params.write('data', 'chunk_size', '1') new_params.write('data', 'file_format', 'raw_binary') new_params.write('data', 'data_dtype', 'float32') new_params.write('data', 'data_offset', '0') new_params.write('data', 'dtype_offset', '0') new_params.write('data', 'stream_mode', 'None') new_params.write('data', 'overwrite', 'True') new_params.write('triggers', 'ignore_times', 'False') new_params.write('data', 'sampling_rate', str(params.rate)) new_params.write('whitening', 'safety_time', '0') new_params.write('clustering', 'safety_time', '0') new_params.write('whitening', 'chunk_size', '1') new_params.write('data', 'preview_path', params.file_params) new_params.write('data', 'output_dir', '') description['data_dtype'] = 'float32' description['dtype_offset'] = 0 description['data_offset'] = 0 description['gain'] = 1. new_params = CircusParser(filename) data_file_out = new_params.get_data_file(is_empty=True, params=description) support_parallel_write = data_file_out.parallel_write is_writable = data_file_out.is_writable data_file_out.allocate(shape=local_chunk.shape, data_dtype=numpy.float32) data_file_out.open('r+') data_file_out.set_data(0, local_chunk) data_file_out.close() if tasks_list is not None: with open(tasks_list, 'r') as f: for line in f: if len(line) > 0: subprocess.check_call(['spyking-circus'] + line.replace('\n', '').split(" ")) else: print_and_log(['Config file: %s' % (f_next + '.params')], 'debug', logger) print_and_log(['Data file : %s' % filename], 'debug', logger) print(get_colored_header()) print(Fore.GREEN + "File : " + Fore.CYAN + real_file) if preview: print(Fore.GREEN + "Steps : " + Fore.CYAN + "preview mode") elif result: print(Fore.GREEN + "Steps : " + Fore.CYAN + "result mode") else: print(Fore.GREEN + "Steps : " + Fore.CYAN + ", ".join(steps)) # print Fore.GREEN + "GPU detected : ", Fore.CYAN + str(HAVE_CUDA) print(Fore.GREEN + "Number of CPU : " + Fore.CYAN + str(nb_cpu) + "/" + str(psutil.cpu_count())) # if HAVE_CUDA: # print Fore.GREEN + "Number of GPU : ", Fore.CYAN + str(nb_gpu) print(Fore.GREEN + "Parallel HDF5 : " + Fore.CYAN + str(parallel_hdf5)) do_upgrade = '' use_shared_memory = get_shared_memory_flag(params) if not SHARED_MEMORY: do_upgrade = Fore.WHITE + ' [please consider upgrading MPI]' print(Fore.GREEN + "Shared memory : " + Fore.CYAN + str(use_shared_memory) + do_upgrade) print(Fore.GREEN + "Hostfile : " + Fore.CYAN + hostfile) print("") print(Fore.GREEN + "##################################################################") print("") print(Fore.RESET) # Launch the subtasks subtasks = [('filtering', 'mpirun'), ('whitening', 'mpirun'), ('clustering', 'mpirun'), ('fitting', 'mpirun'), ('extracting', 'mpirun'), ('gathering', 'python'), ('converting', 'mpirun'), ('deconverting', 'mpirun'), ('benchmarking', 'mpirun'), ('merging', 'mpirun'), ('validating', 'mpirun'), ('thresholding', 'mpirun')] # if HAVE_CUDA and nb_gpu > 0: # use_gpu = 'True' # else: use_gpu = 'False' time = data_stats(params) / 60.0 if preview: params = new_params if nb_cpu < psutil.cpu_count(): if use_gpu != 'True' and not result: print_and_log(['Using only %d out of %d local CPUs available (-c to change)' % (nb_cpu, psutil.cpu_count())], 'info', logger) if params.getboolean('detection', 'matched-filter') and not params.getboolean('clustering', 'smart_search'): print_and_log(['Smart Search should be activated for matched filtering'], 'info', logger) if time > 30 and not params.getboolean('clustering', 'smart_search'): print_and_log(['Smart Search should be activated for long recordings'], 'info', logger) n_edges = get_averaged_n_edges(params) if n_edges > 100 and not params.getboolean('clustering', 'compress'): print_and_log(['Template compression is highly recommended based on parameters'], 'info', logger) if not result: for subtask, command in subtasks: if subtask in steps: if command == 'python': # Directly call the launcher try: circus.launch(subtask, filename, nb_cpu, nb_gpu, use_gpu) except: print_and_log(['Step "%s" failed!' % subtask], 'error', logger) sys.exit(0) elif command == 'mpirun': # Use mpirun to make the call mpi_args = gather_mpi_arguments(hostfile, params) one_cpu = False if subtask in ['filtering', 'benchmarking'] and not is_writable: if not preview and overwrite: print_and_log(['The file format %s is read only!' % file_format, 'You should set overwite to False, to create a copy of the data.', 'However, note that if you have streams, informations on times', 'will be discarded'], 'info', logger) sys.exit(0) if subtask in ['filtering'] and not support_parallel_write and (args.cpu > 1): print_and_log(['No parallel writes for %s: only 1 node used for %s' %(file_format, subtask)], 'info', logger) nb_tasks = str(1) one_cpu = True else: if subtask != 'fitting': nb_tasks = str(args.cpu) else: # if use_gpu == 'True': # nb_tasks = str(args.gpu) # else: nb_tasks = str(args.cpu) if subtask == 'benchmarking': if (output is None) or (benchmark is None): print_and_log(["To generate synthetic datasets, you must provide output and type"], 'error', logger) sys.exit(0) mpi_args += [ '-np', nb_tasks, 'spyking-circus-subtask', subtask, filename, str(nb_cpu), str(nb_gpu), use_gpu, output, benchmark ] elif subtask in ['merging', 'converting']: mpi_args += [ '-np', nb_tasks, 'spyking-circus-subtask', subtask, filename, str(nb_cpu), str(nb_gpu), use_gpu, extension ] elif subtask in ['deconverting']: nb_tasks = str(1) nb_cpu = 1 mpi_args += [ '-np', nb_tasks, 'spyking-circus-subtask', subtask, filename, str(nb_cpu), str(nb_gpu), use_gpu, extension ] else: mpi_args += [ '-np', nb_tasks, 'spyking-circus-subtask', subtask, filename, str(nb_cpu), str(nb_gpu), use_gpu, str(one_cpu) ] print_and_log(['Launching task %s' % subtask], 'debug', logger) print_and_log(['Command: %s' % str(mpi_args)], 'debug', logger) try: subprocess.check_call(mpi_args) except subprocess.CalledProcessError as e: print_and_log(['Step "%s" failed for reason %s!' % (subtask, e)], 'error', logger) sys.exit(0) if preview or result: from circus.shared import gui import pylab try: from PyQt5.QtWidgets import QApplication except ImportError: from matplotlib.backends import qt_compat use_pyside = qt_compat.QT_API == qt_compat.QT_API_PYSIDE if use_pyside: from PySide.QtGui import QApplication else: from PyQt4.QtGui import QApplication app = QApplication([]) try: pylab.style.use('ggplot') except Exception: pass if preview: print_and_log(['Launching the preview GUI...'], 'debug', logger) mygui = gui.PreviewGUI(new_params) shutil.rmtree(tmp_path_loc) elif result: data_file = params.get_data_file() print_and_log(['Launching the result GUI...'], 'debug', logger) mygui = gui.PreviewGUI(params, show_fit=True) sys.exit(app.exec_())
def main(params, nb_cpu, nb_gpu, use_gpu): # Part 1: Whitening numpy.random.seed(420) # params = detect_memory(params) _ = init_logging(params.logfile) logger = logging.getLogger('circus.whitening') ################################################################# data_file = params.data_file N_e = params.getint('data', 'N_e') hdf5_compress = params.getboolean('data', 'hdf5_compress') N_total = params.nb_channels N_t = params.getint('detection', 'N_t') dist_peaks = params.getint('detection', 'dist_peaks') template_shift = params.getint('detection', 'template_shift') file_out_suff = params.get('data', 'file_out_suff') spike_thresh = params.getfloat('detection', 'spike_thresh') spike_width = params.getfloat('detection', 'spike_width') matched_filter = params.getboolean('detection', 'matched-filter') matched_thresh = params.getfloat('detection', 'matched_thresh') fudge = params.getfloat('whitening', 'fudge') sign_peaks = params.get('detection', 'peaks') do_temporal_whitening = params.getboolean('whitening', 'temporal') do_spatial_whitening = params.getboolean('whitening', 'spatial') ignore_spikes = params.getboolean('whitening', 'ignore_spikes') chunk_size = detect_memory(params, whitening=True) plot_path = os.path.join(params.get('data', 'file_out_suff'), 'plots') nodes, edges = get_nodes_and_edges(params) safety_time = params.getint('whitening', 'safety_time') safety_space = params.getboolean('whitening', 'safety_space') sort_waveforms = params.getboolean('whitening', 'sort_waveforms') nb_temp_white = min(max(20, comm.size), N_e) max_silence_1 = int(20 * params.rate // comm.size) max_silence_2 = 5000 inv_nodes = numpy.zeros(N_total, dtype=numpy.int32) inv_nodes[nodes] = numpy.arange(len(nodes)) jitter_range = params.getint('detection', 'jitter_range') template_shift_2 = template_shift + jitter_range use_hanning = params.getboolean('detection', 'hanning') rejection_threshold = params.getfloat('detection', 'rejection_threshold') noise_window = params.getint('detection', 'noise_time') data_file.open() ################################################################# if use_hanning: hanning_filter = numpy.hanning(N_t) if comm.rank == 0: print_and_log( ["Analyzing data to get whitening matrices and thresholds..."], 'default', logger) nodes_indices = {} for elec in numpy.arange(N_e): nodes_indices[elec] = inv_nodes[edges[nodes[elec]]] if use_gpu: import cudamat as cmt # # Need to properly handle multi GPU per MPI nodes? if nb_gpu > nb_cpu: gpu_id = int(comm.rank // nb_cpu) else: gpu_id = 0 cmt.cuda_set_device(gpu_id) cmt.init() cmt.cuda_sync_threads() nb_chunks, last_chunk_len = data_file.analyze(chunk_size) if nb_chunks < comm.size: res = io.data_stats(params, show=False) chunk_size = int(res * params.rate // comm.size) if comm.rank == 0: print_and_log( ["Too much cores, automatically resizing the data chunks"], 'debug', logger) nb_chunks, last_chunk_len = data_file.analyze(chunk_size) # I guess this is more relevant, to take signals from all over the recordings. if nb_chunks > comm.size: all_chunks = numpy.random.permutation( numpy.arange(nb_chunks - 1, dtype=numpy.int32)) else: all_chunks = numpy.random.permutation( numpy.arange(nb_chunks, dtype=numpy.int32)) all_electrodes = numpy.random.permutation(N_e) numpy.random.seed(comm.rank) for gidx in [all_chunks[comm.rank]]: # print "Node", comm.rank, "is analyzing chunk", gidx, "/", nb_chunks, " ..." local_chunk, t_offset = data_file.get_data(gidx, chunk_size, nodes=nodes) local_shape = len(local_chunk) # print "Node", comm.rank, "computes the median absolute deviations in a random chunk" thresholds = numpy.zeros(N_e, dtype=numpy.float32) for i in range(N_e): u = numpy.median(local_chunk[:, i], 0) thresholds[i] = numpy.median(numpy.abs(local_chunk[:, i] - u), 0) gdata = gather_array(thresholds, comm) if comm.rank == 0: gdata = gdata.reshape((comm.size, N_e)) thresholds = numpy.mean(gdata, 0) bfile = h5py.File(file_out_suff + '.basis.hdf5', 'w', libver='earliest') io.write_datasets(bfile, ['thresholds'], {'thresholds': thresholds}, compression=hdf5_compress) bfile.close() comm.Barrier() thresholds = io.load_data(params, 'thresholds') local_borders = (template_shift, local_shape - template_shift) found_peaktimes = [] if ignore_spikes: # Extracting the peaks. local_peaktimes = [np.empty(0, dtype=numpy.uint32)] for i in range(N_e): peaktimes = scipy.signal.find_peaks(numpy.abs(local_chunk[:, i]), height=thresholds[i], width=spike_width, wlen=N_t)[0] peaktimes = peaktimes.astype(numpy.uint32) # print "Removing the useless borders..." idx = (peaktimes >= local_borders[0]) & (peaktimes < local_borders[1]) peaktimes = numpy.compress(idx, peaktimes) found_peaktimes.append(peaktimes) else: for i in range(N_e): found_peaktimes.append(numpy.zeros(0, dtype=numpy.uint32)) all_peaktimes = numpy.concatenate(found_peaktimes) local_peaktimes = numpy.unique(all_peaktimes) if len(local_peaktimes) > 0: diff_times = local_peaktimes[-1] - local_peaktimes[0] all_times = numpy.zeros((N_e, diff_times + 1), dtype=numpy.bool) padded_peaks = (local_peaktimes - local_peaktimes[0]).astype( numpy.int32) min_times = numpy.maximum(padded_peaks - safety_time, 0) max_times = numpy.minimum(padded_peaks + safety_time + 1, diff_times + 1) test_extremas = numpy.zeros((N_e, diff_times + 1), dtype=numpy.bool) for i in range(N_e): test_extremas[i, found_peaktimes[i] - local_peaktimes[0]] = True argmax_peak = numpy.random.permutation( numpy.arange(len(local_peaktimes))) all_idx = numpy.take(local_peaktimes, argmax_peak) # print "Selection of the peaks with spatio-temporal masks..." for idx, peak in zip(argmax_peak, all_idx): all_elecs = numpy.where(test_extremas[:, peak - local_peaktimes[0]])[0] data = local_chunk[peak, all_elecs] elec = all_elecs[numpy.argmax(numpy.abs(data))] indices = nodes_indices[elec] if safety_space: all_times[indices, min_times[idx]:max_times[idx]] = True else: all_times[elec, min_times[idx]:max_times[idx]] = True else: all_times = numpy.zeros((N_e, len(local_chunk)), dtype=numpy.bool) if do_temporal_whitening: local_res_temp = [] for elec in all_electrodes[numpy.arange(comm.rank, nb_temp_white, comm.size)]: res = numpy.zeros((0, N_t), dtype=numpy.float32) scount = 0 indices = nodes_indices[elec] all_times_elec = numpy.any(numpy.take(all_times, indices, axis=0), 0) esubset = numpy.where(all_times_elec == False)[0] bound = len(esubset) - N_t while (scount < bound) and (len(res) < max_silence_2): myslice = esubset[scount:scount + N_t] if numpy.all((myslice - esubset[scount]) == numpy.arange(N_t)): scount += N_t res = numpy.vstack((res, local_chunk[myslice, elec])) else: scount += 1 if len(res) > 5: local_res_temp += [numpy.cov(res.T)] nb_elecs = numpy.array([len(local_res_temp)], dtype=numpy.float32) local_res_temp = numpy.array(local_res_temp, dtype=numpy.float32) if len(local_res_temp) == 0: local_res_temp = numpy.zeros(0, dtype=numpy.float32) else: local_res_temp = numpy.sum(local_res_temp, 0) all_res_temp = gather_array(local_res_temp.ravel(), comm, 0, 1) all_elecs = gather_array(nb_elecs, comm, 0, 1) if do_spatial_whitening: local_res_spac = numpy.zeros((N_e, N_e), dtype=numpy.float32) local_silences = [] for elec in numpy.arange(comm.rank, N_e, comm.size): indices = nodes_indices[elec] all_times_elec = numpy.any(numpy.take(all_times, indices, axis=0), 0) esubset = numpy.where(all_times_elec == False)[0] local_data = local_chunk[esubset][:, indices] local_whitening = get_whitening_matrix( local_data, fudge=fudge).astype(numpy.float32) pos = numpy.where(elec == indices)[0] local_res_spac[elec, indices] = local_whitening[pos] local_silences += [len(esubset)] all_res_spac = gather_array(local_res_spac.ravel(), comm, 0, 1) all_silences = gather_array( numpy.array(local_silences, dtype=numpy.int32), comm, 0, 1, 'uint32') if comm.rank == 0: to_write = {} if do_temporal_whitening: try: nb_silences = numpy.sum(all_elecs > 0) all_res_temp = all_res_temp.reshape((nb_silences, N_t**2)) except Exception: print_and_log([ "No silent periods detected: something wrong with the parameters?" ], 'error', logger) all_res_temp = numpy.sum(all_res_temp, 0) all_res_temp = all_res_temp.reshape( (N_t, N_t)) / numpy.sum(all_elecs) temporal_whitening = get_whitening_matrix( all_res_temp.astype(numpy.double), fudge=1e-3)[template_shift].astype(numpy.float32) temporal_whitening /= temporal_whitening.sum() to_write['temporal'] = temporal_whitening have_nans = numpy.sum(numpy.isnan(temporal_whitening)) if have_nans > 0: temporal_whitening = numpy.zeros(N_t, dtype=numpy.float32) temporal_whitening[N_t // 2] = 1 to_write['temporal'] = temporal_whitening print_and_log( ["Disabling temporal whitening because of NaNs found"], 'info', logger) if do_spatial_whitening: all_res_spac = all_res_spac.reshape(comm.size, N_e, N_e) spatial_whitening = numpy.sum(all_res_spac, 0) to_write['spatial'] = spatial_whitening if ignore_spikes: print_and_log([ "Found %gs without spikes to compute the whitening matrix..." % (numpy.mean(all_silences) / params.rate) ], 'default', logger) else: print_and_log([ "Found %gs to compute the whitening matrix..." % (numpy.mean(all_silences) / params.rate) ], 'default', logger) have_nans = numpy.sum(numpy.isnan(spatial_whitening)) if have_nans > 0: spatial_whitening = numpy.eye(spatial_whitening.shape[0], dtype=numpy.float32) to_write['spatial'] = spatial_whitening print_and_log( ["Disabling spatial whitening because of NaNs found"], 'info', logger) bfile = h5py.File(file_out_suff + '.basis.hdf5', 'r+', libver='earliest') io.write_datasets(bfile, list(to_write.keys()), to_write, compression=hdf5_compress) bfile.close() comm.Barrier() if do_spatial_whitening or do_temporal_whitening: if comm.rank == 0: print_and_log( ["Because of whitening, need to recompute the thresholds..."], 'default', logger) if do_spatial_whitening: spatial_whitening = io.load_data(params, 'spatial_whitening') if use_gpu: spatial_whitening = cmt.CUDAMatrix(spatial_whitening, copy_on_host=False) if do_temporal_whitening: temporal_whitening = io.load_data(params, 'temporal_whitening') for gidx in [all_chunks[comm.rank]]: local_chunk, t_offset = data_file.get_data(gidx, chunk_size, nodes=nodes) local_shape = len(local_chunk) if do_spatial_whitening: if use_gpu: local_chunk = cmt.CUDAMatrix(local_chunk, copy_on_host=False) local_chunk = local_chunk.dot(spatial_whitening).asarray() else: local_chunk = numpy.dot(local_chunk, spatial_whitening) if do_temporal_whitening: local_chunk = scipy.ndimage.filters.convolve1d( local_chunk, temporal_whitening, axis=0, mode='constant') thresholds = numpy.zeros(N_e, dtype=numpy.float32) for i in range(N_e): u = numpy.median(local_chunk[:, i], 0) thresholds[i] = numpy.median(numpy.abs(local_chunk[:, i] - u), 0) gdata = gather_array(thresholds, comm) if comm.rank == 0: gdata = gdata.reshape((comm.size, N_e)) thresholds = numpy.mean(gdata, 0) bfile = h5py.File(file_out_suff + '.basis.hdf5', 'r+', libver='earliest') bfile.pop('thresholds') io.write_datasets(bfile, ['thresholds'], {'thresholds': thresholds}, compression=hdf5_compress) bfile.close() comm.Barrier() # if comm.rank == 0: # if not os.path.exists(plot_path): # os.makedirs(plot_path) # N_elec = min(int(numpy.sqrt(data_file.N_e)), 5) # plot.view_fit(filename, t_start=0, t_stop=1, fit_on=False, square=True, # n_elec=N_elec, save=[plot_path, 'electrodes']) # Part 2: Basis numpy.random.seed(422) SHARED_MEMORY = get_shared_memory_flag(params) ################################################################# file_out = params.get('data', 'file_out') alignment = params.getboolean('detection', 'alignment') over_factor = params.getint('detection', 'oversampling_factor') nb_jitter = params.getint('detection', 'nb_jitter') spike_thresh = params.getfloat('detection', 'spike_thresh') nodes, edges = get_nodes_and_edges(params) _, positions = get_nodes_and_positions(params) do_temporal_whitening = params.getboolean('whitening', 'temporal') do_spatial_whitening = params.getboolean('whitening', 'spatial') use_barycenter = params.getboolean('detection', 'use_barycenter') if matched_filter: chunk_size = detect_memory(params, whitening=True) else: chunk_size = detect_memory(params) safety_time = params.getint('whitening', 'safety_time') max_elts_elec = params.getint('whitening', 'max_elts') output_dim = params.getfloat('whitening', 'output_dim') inv_nodes = numpy.zeros(N_total, dtype=numpy.int32) inv_nodes[nodes] = numpy.arange(len(nodes)) smoothing_factor = params.getfloat('detection', 'smoothing_factor') if sign_peaks == 'both': max_elts_elec *= 2 nb_elts = int( params.getfloat('whitening', 'nb_elts') * N_e * max_elts_elec) weird_thresh = params.get('detection', 'weird_thresh') if weird_thresh != '': ignore_artefacts = True weird_thresh = io.load_data(params, 'weird-thresholds') else: ignore_artefacts = False ignore_dead_times = params.getboolean('triggers', 'ignore_times') if ignore_dead_times: if SHARED_MEMORY: all_dead_times, mpi_memory_3 = get_dead_times(params) else: all_dead_times = get_dead_times(params) data_file.open() ################################################################# if comm.rank == 0: print_and_log(["Searching spikes to construct the PCA basis..."], 'default', logger) nb_chunks, last_chunk_len = data_file.analyze(chunk_size) if nb_chunks < comm.size: res = io.data_stats(params, show=False) chunk_size = int(res * params.rate // comm.size) if comm.rank == 0: print_and_log( ["Too much cores, automatically resizing the data chunks"], 'debug', logger) nb_chunks, last_chunk_len = data_file.analyze(chunk_size) groups = {} for i in range(N_e): groups[i] = 0 # I guess this is more relevant, to take signals from all over the recordings all_chunks = numpy.random.permutation( numpy.arange(nb_chunks, dtype=numpy.int32)) max_elts_elec //= comm.size nb_elts //= comm.size elt_count_pos = 0 elt_count_neg = 0 if sign_peaks in ['positive', 'both']: times_pos = numpy.zeros(nb_elts, dtype=numpy.int32) electrodes_pos = numpy.zeros(nb_elts, dtype=numpy.int32) extremum_pos = numpy.zeros(nb_elts, dtype=numpy.float32) elts_pos = numpy.zeros((N_t, nb_elts), dtype=numpy.float32) if sign_peaks in ['negative', 'both']: times_neg = numpy.zeros(nb_elts, dtype=numpy.int32) electrodes_neg = numpy.zeros(nb_elts, dtype=numpy.int32) extremum_neg = numpy.zeros(nb_elts, dtype=numpy.float32) elts_neg = numpy.zeros((N_t, nb_elts), dtype=numpy.float32) thresholds = io.load_data(params, 'thresholds') mads = io.load_data(params, 'mads') stds = io.load_data(params, 'stds') if alignment: cdata = numpy.linspace(-jitter_range, +jitter_range, nb_jitter) xdata = numpy.arange(-template_shift_2, template_shift_2 + 1) xoff = len(cdata) / 2.0 snippet_duration = template_shift_2 m_size = 2 * template_shift_2 + 1 align_factor = m_size local_factors = align_factor * ((smoothing_factor * mads)**2) else: snippet_duration = template_shift xdata = numpy.arange(-template_shift, template_shift + 1) if rejection_threshold > 0: reject_noise = True noise_levels = stds * (2 * noise_window + 1) else: reject_noise = False to_explore = all_chunks[comm.rank::comm.size] upper_bounds = max_elts_elec if comm.rank == 0: to_explore = get_tqdm_progressbar(params, to_explore) for gcount, gidx in enumerate(to_explore): if (elt_count_pos + elt_count_neg) < nb_elts: # print "Node", comm.rank, "is analyzing chunk", gidx, "/", nb_chunks, " ..." local_chunk, t_offset = data_file.get_data(gidx, chunk_size, nodes=nodes) local_shape = len(local_chunk) if do_spatial_whitening: if use_gpu: local_chunk = cmt.CUDAMatrix(local_chunk, copy_on_host=False) local_chunk = local_chunk.dot(spatial_whitening).asarray() else: local_chunk = numpy.dot(local_chunk, spatial_whitening) if do_temporal_whitening: local_chunk = scipy.ndimage.filters.convolve1d( local_chunk, temporal_whitening, axis=0, mode='constant') local_borders = (snippet_duration, local_shape - snippet_duration) if ignore_dead_times: dead_indices = numpy.searchsorted( all_dead_times, [t_offset, t_offset + local_shape]) # Extracting the peaks. all_peaktimes = [numpy.empty(0, dtype=numpy.uint32)] found_peaktimes = [] found_peak_amplitudes = [] for i in range(N_e): height = thresholds[i] if sign_peaks == 'negative': peaktimes = scipy.signal.find_peaks(-local_chunk[:, i], height=height, distance=dist_peaks)[0] elif sign_peaks == 'positive': peaktimes = scipy.signal.find_peaks(local_chunk[:, i], height=height, distance=dist_peaks)[0] elif sign_peaks == 'both': peaktimes = scipy.signal.find_peaks(numpy.abs( local_chunk[:, i]), height=height, distance=dist_peaks)[0] else: peaktimes = numpy.empty(0, dtype=numpy.uint32) if ignore_artefacts: artetimes = scipy.signal.find_peaks( numpy.abs(local_chunk[:, i]), height=weird_thresh[i])[0] to_keep = numpy.logical_not( numpy.in1d(peaktimes, artetimes)) peaktimes = peaktimes[to_keep] idx = (peaktimes >= local_borders[0]) & (peaktimes < local_borders[1]) peaktimes = peaktimes[idx] if ignore_dead_times: if dead_indices[0] != dead_indices[1]: is_included = numpy.in1d( peaktimes + t_offset, all_dead_times[dead_indices[0]:dead_indices[1]]) peaktimes = peaktimes[~is_included] peaktimes = peaktimes.astype(numpy.uint32) found_peaktimes.append(peaktimes) peak_amplitudes = local_chunk[peaktimes, i] found_peak_amplitudes.append(peak_amplitudes) all_peaktimes = numpy.concatenate( found_peaktimes) # i.e. concatenate once for efficiency all_peak_amplitudes = numpy.concatenate(found_peak_amplitudes) local_peaktimes, local_indices = numpy.unique(all_peaktimes, return_inverse=True) if len(local_peaktimes) > 0: diff_times = (local_peaktimes[-1] - local_peaktimes[0]) + 1 all_times = numpy.zeros((N_e, diff_times), dtype=numpy.bool) padded_peaks = (local_peaktimes - local_peaktimes[0]).astype( numpy.int32) min_times = numpy.maximum(padded_peaks - safety_time, 0) max_times = numpy.minimum(padded_peaks + safety_time + 1, diff_times + 1) test_extremas = numpy.zeros((N_e, diff_times + 1), dtype=numpy.bool) for i in range(N_e): test_extremas[i, found_peaktimes[i] - local_peaktimes[0]] = True # Consider the peaks by decreasing extremum. if sort_waveforms: order = numpy.argsort(-np.abs(all_peak_amplitudes)) all_idx = numpy.take(all_peaktimes, order) argmax_peak = local_indices[order] else: n_times = len(all_peaktimes) shuffling = numpy.random.permutation(numpy.arange(n_times)) all_idx = numpy.take(all_peaktimes, shuffling) argmax_peak = local_indices[shuffling] # print "Selection of the peaks with spatio-temporal masks..." for midx, peak in zip(argmax_peak, all_idx): if (elt_count_neg + elt_count_pos) == nb_elts: break all_elecs = numpy.where( test_extremas[:, peak - local_peaktimes[0]])[0] data = local_chunk[peak, all_elecs] #target_area = test_extremas[:, min_times[midx]:max_times[midx]].sum(1) #all_elecs = numpy.where(target_area)[0] #data = local_chunk[peak, all_elecs] if sign_peaks == 'negative': if N_e > 1: if use_barycenter: weighed_position = data[:, numpy. newaxis] * positions[ all_elecs] barycenter = weighed_position.sum( 0) / data.sum() elec = numpy.argmin( numpy.linalg.norm(barycenter - positions[all_elecs], axis=1)) else: elec = numpy.argmin(data) else: elec = 0 negative_peak = True elif sign_peaks == 'positive': if N_e > 1: if use_barycenter: weighed_position = data[:, numpy. newaxis] * positions[ all_elecs] barycenter = weighed_position.sum( 0) / data.sum() elec = numpy.argmax( numpy.linalg.norm(barycenter - positions[all_elecs], axis=1)) else: elec = numpy.argmax(data) else: elec = 0 negative_peak = False elif sign_peaks == 'both': if N_e == 1: if data < 0: negative_peak = True elif data > 0: negative_peak = False elec = 0 else: if numpy.abs(numpy.max(data)) > numpy.abs( numpy.min(data)): elec = numpy.argmax(data) negative_peak = False else: elec = numpy.argmin(data) negative_peak = True elec = all_elecs[elec] if groups[elec] < upper_bounds: indices = nodes_indices[elec] myslice = all_times[indices, min_times[midx]:max_times[midx]] if not myslice.any(): sub_mat = local_chunk[peak - snippet_duration:peak + snippet_duration + 1, elec] if reject_noise: slice_window = sub_mat[ snippet_duration - noise_window:snippet_duration + noise_window + 1] value = numpy.linalg.norm( slice_window) / noise_levels[elec] is_noise = value < rejection_threshold else: is_noise = False if not is_noise: extrema = sub_mat[snippet_duration] if alignment: smoothed = True try: f = scipy.interpolate.UnivariateSpline( xdata, sub_mat, s=local_factors[elec], k=3) except Exception: smoothed = False f = scipy.interpolate.UnivariateSpline( xdata, sub_mat, k=3, s=0) if negative_peak: rmin = (numpy.argmin(f(cdata)) - xoff) / over_factor else: rmin = (numpy.argmax(f(cdata)) - xoff) / over_factor ddata = numpy.linspace( rmin - template_shift, rmin + template_shift, N_t) if smoothed: f = scipy.interpolate.UnivariateSpline( xdata, sub_mat, s=local_factors[elec], k=3) else: f = scipy.interpolate.UnivariateSpline( xdata, sub_mat, s=0, k=3) sub_mat = f(ddata).astype(numpy.float32) if negative_peak: times_neg[elt_count_neg] = peak + t_offset electrodes_neg[elt_count_neg] = elec extremum_neg[elt_count_neg] = extrema elts_neg[:, elt_count_neg] = sub_mat elt_count_neg += 1 else: times_pos[elt_count_pos] = peak + t_offset electrodes_pos[elt_count_pos] = elec extremum_pos[elt_count_pos] = extrema elts_pos[:, elt_count_pos] = sub_mat elt_count_pos += 1 groups[elec] += 1 all_times[ indices, min_times[midx]:max_times[midx]] = True test_extremas[elec, peak - local_peaktimes[0]] = False sys.stderr.flush() print_and_log([ "Node %d has collected %d waveforms" % (comm.rank, elt_count_pos + elt_count_neg) ], 'debug', logger) if sign_peaks in ['negative', 'both']: times_neg = gather_array(times_neg[:elt_count_neg], comm, 0, 1, dtype='int32') electrodes_neg = gather_array(electrodes_neg[:elt_count_neg], comm, 0, 1, dtype='int32') extremum_neg = gather_array(extremum_neg[:elt_count_neg], comm, 0, 1) gdata_neg = gather_array(elts_neg[:, :elt_count_neg].T, comm, 0, 1) if sign_peaks in ['positive', 'both']: times_pos = gather_array(times_pos[:elt_count_pos], comm, 0, 1, dtype='int32') electrodes_pos = gather_array(electrodes_pos[:elt_count_pos], comm, 0, 1, dtype='int32') extremum_pos = gather_array(extremum_pos[:elt_count_pos], comm, 0, 1) gdata_pos = gather_array(elts_pos[:, :elt_count_pos].T, comm, 0, 1) nb_waveforms = 0 if comm.rank == 0: # DO PCA on elts and store the basis obtained. if sign_peaks in ['negative', 'both']: nb_waveforms += gdata_neg.shape[0] if sign_peaks in ['positive', 'both']: nb_waveforms += gdata_pos.shape[0] nb_waveforms = all_gather_array( numpy.array([nb_waveforms], dtype=numpy.float32), comm, 0)[0] if comm.rank == 0: print_and_log([ "Found %d waveforms over %d requested" % (nb_waveforms, int(nb_elts * comm.size)) ], 'default', logger) if nb_waveforms == 0: print_and_log( ['No waveforms found! Are the data properly loaded??'], 'error', logger) if nb_waveforms == 0: sys.exit(0) if comm.rank == 0: res = {} pca = None pca_pos = None pca_neg = None warning_n_t = False if sign_peaks in ['negative', 'both']: res['times'] = times_neg res['electrodes'] = electrodes_neg res['extremum'] = extremum_neg if len(gdata_neg) > 0: pca = PCA(output_dim) if use_hanning: pca.fit(gdata_neg * hanning_filter) else: pca.fit(gdata_neg) res['proj'] = pca.components_.T.astype(numpy.float32) pca_neg = numpy.sum(pca.explained_variance_ratio_) else: res['proj'] = numpy.identity(int(output_dim), dtype=numpy.float32) res['rec'] = res['proj'].T res['waveform'] = numpy.median(gdata_neg, 0) # dispersion = numpy.std(gdata_neg, 0) / numpy.median(stds) # ratio = numpy.sum(dispersion > 1.1) / float(len(dispersion)) # if ratio < 0.25: # print_and_log(["Time window N_t in [detection] seems too large!"], 'info', logger) # warning_n_t = True # elif ratio == 1: # print_and_log(["Time window N_t in [detection] seems too small!"], 'info', logger) # warning_n_t = True idx = numpy.random.permutation(numpy.arange( gdata_neg.shape[0]))[:2500] res['waveforms'] = gdata_neg[idx, :] if sign_peaks in ['positive', 'both']: res['times_pos'] = times_pos res['electrodes_pos'] = electrodes_pos res['extremum_pos'] = extremum_pos if len(gdata_pos) > 0: pca = PCA(output_dim) if use_hanning: pca.fit(gdata_pos * hanning_filter) else: pca.fit(gdata_pos) res['proj_pos'] = pca.components_.T.astype(numpy.float32) pca_pos = numpy.sum(pca.explained_variance_ratio_) else: res['proj_pos'] = numpy.identity(int(output_dim), dtype=numpy.float32) res['rec_pos'] = res['proj_pos'].T res['waveform_pos'] = numpy.median(gdata_pos, 0) # dispersion = numpy.std(gdata_pos, 0) / numpy.median(stds) # ratio = numpy.sum(dispersion > 1.1) / float(len(dispersion)) # if ratio < 0.25 and not warning_n_t: # print_and_log(["Time window N_t in [detection] seems too large!"], 'info', logger) # elif ratio == 1 and not warning_n_t: # print_and_log(["Time window N_t in [detection] seems too small!"], 'info', logger) idx = numpy.random.permutation(numpy.arange( gdata_pos.shape[0]))[:2500] res['waveforms_pos'] = gdata_pos[idx, :] bfile = h5py.File(file_out_suff + '.basis.hdf5', 'r+', libver='earliest') io.write_datasets(bfile, list(res.keys()), res, compression=hdf5_compress) if sign_peaks == 'positive': print_and_log([ "A basis with %s dimensions has been built" % res['proj_pos'].shape[1] ], 'info', logger) elif sign_peaks == 'negative': print_and_log([ "A basis with %s dimensions has been built" % res['proj'].shape[1] ], 'info', logger) elif sign_peaks == 'both': print_and_log([ "Two basis with %s dimensions has been built" % res['proj'].shape[1] ], 'debug', logger) if pca_pos is not None: print_and_log([ "The percentage of variance explained is %s for positive spikes" % pca_pos ], 'debug', logger) if pca_neg is not None: print_and_log([ "The percentage of variance explained is %s for negative spikes" % pca_neg ], 'debug', logger) bfile.close() comm.Barrier() if matched_filter: if comm.rank == 0: print_and_log([ "Because of matched filters, need to recompute the thresholds..." ], 'default', logger) if do_spatial_whitening: spatial_whitening = io.load_data(params, 'spatial_whitening') if use_gpu: spatial_whitening = cmt.CUDAMatrix(spatial_whitening, copy_on_host=False) if do_temporal_whitening: temporal_whitening = io.load_data(params, 'temporal_whitening') if sign_peaks in ['negative', 'both']: waveform_neg = io.load_data(params, 'waveform')[::-1] waveform_neg /= (numpy.abs(numpy.sum(waveform_neg)) * len(waveform_neg)) if sign_peaks in ['positive', 'both']: waveform_pos = io.load_data(params, 'waveform-pos')[::-1] waveform_pos /= (numpy.abs(numpy.sum(waveform_pos)) * len(waveform_pos)) for gidx in [all_chunks[comm.rank]]: local_chunk, t_offset = data_file.get_data(gidx, chunk_size, nodes=nodes) local_shape = len(local_chunk) if do_spatial_whitening: if use_gpu: local_chunk = cmt.CUDAMatrix(local_chunk, copy_on_host=False) local_chunk = local_chunk.dot(spatial_whitening).asarray() else: local_chunk = numpy.dot(local_chunk, spatial_whitening) if do_temporal_whitening: local_chunk = scipy.ndimage.filters.convolve1d( local_chunk, temporal_whitening, axis=0, mode='constant') local_chunk /= thresholds if sign_peaks in ['negative', 'both']: tmp_chunk = scipy.ndimage.filters.convolve1d(local_chunk, waveform_neg, axis=0, mode='constant') thresholds = numpy.zeros(N_e, dtype=numpy.float32) for i in range(N_e): u = numpy.median(tmp_chunk[:, i], 0) thresholds[i] = numpy.median( numpy.abs(tmp_chunk[:, i] - u), 0) gdata = gather_array(thresholds, comm) if comm.rank == 0: gdata = gdata.reshape((comm.size, N_e)) thresholds = numpy.mean(gdata, 0) bfile = h5py.File(file_out_suff + '.basis.hdf5', 'r+', libver='earliest') io.write_datasets(bfile, ['matched_thresholds'], {'matched_thresholds': thresholds}, compression=hdf5_compress) bfile.close() comm.Barrier() if sign_peaks in ['positive', 'both']: tmp_chunk = scipy.ndimage.filters.convolve1d(local_chunk, waveform_pos, axis=0, mode='constant') thresholds = numpy.zeros(N_e, dtype=numpy.float32) for i in range(N_e): u = numpy.median(tmp_chunk[:, i], 0) thresholds[i] = numpy.median( numpy.abs(tmp_chunk[:, i] - u), 0) gdata = gather_array(thresholds, comm) if comm.rank == 0: gdata = gdata.reshape((comm.size, N_e)) thresholds = numpy.mean(gdata, 0) bfile = h5py.File(file_out_suff + '.basis.hdf5', 'r+', libver='earliest') io.write_datasets(bfile, ['matched_thresholds_pos'], {'matched_thresholds_pos': thresholds}, compression=hdf5_compress) bfile.close() comm.Barrier() data_file.close() if SHARED_MEMORY and ignore_dead_times: mpi_memory_3.Free()