Beispiel #1
0
def preprocess_array(array_idx, output_dir, total_duration):
    dataio = DataIO(dirname=output_dir, ch_grp=array_idx)
    fullchain_kargs = {
        'duration': total_duration,
        'preprocessor': {
            'highpass_freq': 250.,
            'lowpass_freq': 3000.,
            'smooth_size': 0,
            'common_ref_removal': True,
            'chunksize': 32768,
            'lostfront_chunksize': 0,
            'signalpreprocessor_engine': 'numpy',
        }
    }
    cc = CatalogueConstructor(dataio=dataio, chan_grp=array_idx)
    p = {}
    p.update(fullchain_kargs['preprocessor'])
    cc.set_preprocessor_params(**p)
    # TODO offer noise esatimation duration somewhere
    noise_duration = min(
        10., fullchain_kargs['duration'],
        dataio.get_segment_length(seg_num=0) / dataio.sample_rate * .99)
    # ~ print('noise_duration', noise_duration)
    t1 = time.perf_counter()
    cc.estimate_signals_noise(seg_num=0, duration=noise_duration)
    t2 = time.perf_counter()
    print('estimate_signals_noise', t2 - t1)
    t1 = time.perf_counter()
    cc.run_signalprocessor(duration=fullchain_kargs['duration'],
                           detect_peak=False)
    t2 = time.perf_counter()
    print('run_signalprocessor', t2 - t1)
Beispiel #2
0
def compute_array_catalogue(array_idx, preprocess_dir, subject, recording_date,
                            data_files, cluster_merge_threshold):
    # If data exists for this array
    if os.path.exists(
            os.path.join(preprocess_dir, 'channel_group_%d' % array_idx,
                         'catalogue_constructor')):
        output_dir = os.path.join(cfg['single_unit_spike_sorting_dir'],
                                  subject, recording_date,
                                  'array_%d' % array_idx)
        if os.path.exists(output_dir):
            # remove is already exists
            shutil.rmtree(output_dir)
        # Compute total duration (want to use all data for clustering)
        data_file_names = []
        for seg in range(len(data_files)):
            data_file_names.append(
                os.path.join(preprocess_dir, 'channel_group_%d' % array_idx,
                             'segment_%d' % seg, 'processed_signals.raw'))

        dataio = DataIO(dirname=output_dir)
        dataio.set_data_source(type='RawData',
                               filenames=data_file_names,
                               dtype='float32',
                               sample_rate=cfg['intan_srate'],
                               total_channel=cfg['n_channels_per_array'])
        dataio.datasource.bit_to_microVolt = 0.195
        for ch_grp in range(cfg['n_channels_per_array']):
            dataio.add_one_channel_group(channels=[ch_grp], chan_grp=ch_grp)

        total_duration = np.sum([x['duration'] for x in data_files])

        figure_out_dir = os.path.join(output_dir, 'figures')
        os.mkdir(figure_out_dir)
        for ch_grp in range(cfg['n_channels_per_array']):
            print(ch_grp)
            cc = CatalogueConstructor(dataio=DataIO(dirname=output_dir,
                                                    ch_grp=ch_grp),
                                      chan_grp=ch_grp)

            fullchain_kargs = {
                'duration': total_duration,
                'preprocessor': {
                    'highpass_freq': None,
                    'lowpass_freq': None,
                    'smooth_size': 0,
                    'common_ref_removal': False,
                    'chunksize': 32768,
                    'lostfront_chunksize': 0,
                    'signalpreprocessor_engine': 'numpy',
                },
                'peak_detector': {
                    'peakdetector_engine': 'numpy',
                    'peak_sign': '-',
                    'relative_threshold': 2.,
                    'peak_span': 0.0002,
                },
                'noise_snippet': {
                    'nb_snippet': 300,
                },
                'extract_waveforms': {
                    'n_left': -20,
                    'n_right': 30,
                    'mode': 'all',
                    'nb_max': 2000000,
                    'align_waveform': False,
                },
                'clean_waveforms': {
                    'alien_value_threshold': 100.,
                },
            }
            feat_method = 'pca_by_channel'
            feat_kargs = {'n_components_by_channel': 5}
            clust_method = 'sawchaincut'
            clust_kargs = {
                'max_loop': 1000,
                'nb_min': 20,
                'break_nb_remain': 30,
                'kde_bandwith': 0.01,
                'auto_merge_threshold': 2.,
                'print_debug': False
                # 'max_loop': 1000,
                # 'nb_min': 20,
                # 'break_nb_remain': 30,
                # 'kde_bandwith': 0.01,
                # 'auto_merge_threshold': cluster_merge_threshold,
                # 'print_debug': False
            }

            p = {}
            p.update(fullchain_kargs['preprocessor'])
            p.update(fullchain_kargs['peak_detector'])
            cc.set_preprocessor_params(**p)

            noise_duration = min(
                10., fullchain_kargs['duration'],
                dataio.get_segment_length(seg_num=0) / dataio.sample_rate *
                .99)
            # ~ print('noise_duration', noise_duration)
            t1 = time.perf_counter()
            cc.estimate_signals_noise(seg_num=0, duration=noise_duration)
            t2 = time.perf_counter()
            print('estimate_signals_noise', t2 - t1)

            t1 = time.perf_counter()
            cc.run_signalprocessor(duration=fullchain_kargs['duration'])
            t2 = time.perf_counter()
            print('run_signalprocessor', t2 - t1)

            t1 = time.perf_counter()
            cc.extract_some_waveforms(**fullchain_kargs['extract_waveforms'])
            t2 = time.perf_counter()
            print('extract_some_waveforms', t2 - t1)

            fname = 'chan_%d_init_waveforms.png' % ch_grp
            fig = plot_waveforms(np.squeeze(cc.some_waveforms).T)
            fig.savefig(os.path.join(figure_out_dir, fname))
            fig.clf()
            plt.close()

            t1 = time.perf_counter()
            # ~ duration = d['duration'] if d['limit_duration'] else None
            # ~ d['clean_waveforms']
            cc.clean_waveforms(**fullchain_kargs['clean_waveforms'])
            t2 = time.perf_counter()
            print('clean_waveforms', t2 - t1)

            fname = 'chan_%d_clean_waveforms.png' % ch_grp
            fig = plot_waveforms(np.squeeze(cc.some_waveforms).T)
            fig.savefig(os.path.join(figure_out_dir, fname))
            fig.clf()
            plt.close()

            # ~ t1 = time.perf_counter()
            # ~ n_left, n_right = cc.find_good_limits(mad_threshold = 1.1,)
            # ~ t2 = time.perf_counter()
            # ~ print('find_good_limits', t2-t1)

            t1 = time.perf_counter()
            cc.extract_some_noise(**fullchain_kargs['noise_snippet'])
            t2 = time.perf_counter()
            print('extract_some_noise', t2 - t1)

            # Plot noise
            fname = 'chan_%d_noise.png' % ch_grp
            fig = plot_noise(cc)
            fig.savefig(os.path.join(figure_out_dir, fname))
            fig.clf()
            plt.close()

            t1 = time.perf_counter()
            cc.extract_some_features(method=feat_method, **feat_kargs)
            t2 = time.perf_counter()
            print('project', t2 - t1)

            t1 = time.perf_counter()
            cc.find_clusters(method=clust_method, **clust_kargs)
            t2 = time.perf_counter()
            print('find_clusters', t2 - t1)

            # Remove empty clusters
            cc.trash_small_cluster(n=0)

            if cc.centroids_median is None:
                cc.compute_all_centroid()

            # order cluster by waveforms rms
            cc.order_clusters(by='waveforms_rms')

            fname = 'chan_%d_init_clusters.png' % ch_grp
            cluster_labels = cc.clusters['cluster_label']
            fig = plot_cluster_waveforms(cc, cluster_labels)
            fig.savefig(os.path.join(figure_out_dir, fname))
            fig.clf()
            plt.close()

            # save the catalogue
            cc.make_catalogue_for_peeler()

            gc.collect()