Example #1
0
def test_can_read_waveform(path_to_data_folder):
    e = RecordingExplorer(os.path.join(path_to_data_folder, 'filtered.bin'),
                          spike_size=15, dtype='float32', n_channels=10,
                          data_order='channels', loader='array')

    with pytest.raises(ValueError):
        e.read_waveform(time=0)
Example #2
0
def test_can_read_waveform(path_to_standardized_data):
    spike_size = 15

    e = RecordingExplorer(path_to_standardized_data,
                          spike_size=spike_size, dtype='float32',
                          n_channels=10, data_order='channels', loader='array')

    assert len(e.read_waveform(time=100)) == 2 * spike_size + 1
Example #3
0
def test_error_raised_if_cannot_read_complete_waveform(path_to_data_folder):
    spike_size = 15

    e = RecordingExplorer(os.path.join(path_to_data_folder, 'filtered.bin'),
                          spike_size=spike_size, dtype='float32',
                          n_channels=10, data_order='channels', loader='array')

    assert len(e.read_waveform(time=100)) == 2 * spike_size + 1
Example #4
0
def test_returns_empty_if_cannot_get_complete_wf(path_to_standarized_data):
    e = RecordingExplorer(path_to_standarized_data,
                          spike_size=15,
                          dtype='float32',
                          n_channels=10,
                          data_order='channels',
                          loader='array')

    assert len(e.read_waveform(time=0)) == 0
Example #5
0
def test_can_read_waveform(path_to_standarized_data):
    e = RecordingExplorer(path_to_standarized_data,
                          spike_size=15,
                          dtype='float32',
                          n_channels=10,
                          data_order='channels',
                          loader='array')

    with pytest.raises(ValueError):
        e.read_waveform(time=0)
Example #6
0
def test_can_use_detect_and_triage_after_reload(path_to_tests,
                                                path_to_sample_pipeline_folder,
                                                tmp_folder,
                                                path_to_standarized_data):
    yass.set_config(path.join(path_to_tests, 'config_nnet.yaml'))
    CONFIG = yass.read_config()

    (x_detect, y_detect, x_triage, y_triage, x_ae,
     y_ae) = make_training_data(CONFIG, spike_train, chosen_templates,
                                min_amplitude, n_spikes,
                                path_to_sample_pipeline_folder)

    _, waveform_length, n_neighbors = x_detect.shape

    path_to_model = path.join(tmp_folder, 'detect-net.ckpt')

    detector = NeuralNetDetector(path_to_model,
                                 filters,
                                 waveform_length,
                                 n_neighbors,
                                 threshold=0.5,
                                 channel_index=CONFIG.channel_index,
                                 n_iter=10)

    detector.fit(x_detect, y_detect)

    detector = NeuralNetDetector.load(path_to_model,
                                      threshold=0.5,
                                      channel_index=CONFIG.channel_index)

    triage = NeuralNetTriage(path_to_model,
                             filters,
                             waveform_length,
                             n_neighbors,
                             threshold=0.5,
                             n_iter=10)

    triage.fit(x_detect, y_detect)

    triage = NeuralNetTriage.load(path_to_model, threshold=0.5)

    data = RecordingExplorer(path_to_standarized_data).reader.data

    output_names = ('spike_index', 'waveform', 'probability')

    (spike_index, waveform,
     proba) = detector.predict(data, output_names=output_names)

    triage.predict(waveform[:, :, :n_neighbors])
Example #7
0
def test_can_use_detector_after_fit(path_to_config,
                                    path_to_sample_pipeline_folder,
                                    make_tmp_folder,
                                    path_to_standardized_data):
    yass.set_config(path_to_config, make_tmp_folder)
    CONFIG = yass.read_config()

    spike_train = np.load(path.join(path_to_sample_pipeline_folder,
                                    'spike_train.npy'))
    chosen_templates = np.unique(spike_train[:, 1])
    min_amplitude = 4
    max_amplitude = 60
    n_spikes_to_make = 100

    templates = make.load_templates(path_to_sample_pipeline_folder,
                                    spike_train, CONFIG, chosen_templates)

    path_to_standardized = path.join(path_to_sample_pipeline_folder,
                                     'preprocess', 'standarized.bin')

    (x_detect, y_detect,
     x_triage, y_triage,
     x_ae, y_ae) = make.training_data(CONFIG, templates,
                                      min_amplitude, max_amplitude,
                                      n_spikes_to_make,
                                      path_to_standardized)

    _, waveform_length, n_neighbors = x_detect.shape

    path_to_model = path.join(make_tmp_folder, 'detect-net.ckpt')
    detector = NeuralNetDetector(path_to_model, [8, 4],
                                 waveform_length, n_neighbors,
                                 threshold=0.5,
                                 channel_index=CONFIG.channel_index,
                                 n_iter=10)
    detector.fit(x_detect, y_detect)

    data = RecordingExplorer(path_to_standardized_data).reader.data

    output_names = ('spike_index', 'waveform', 'probability')

    (spike_index, waveform,
        proba) = detector.predict_recording(data, output_names=output_names)

    detector.predict(x_detect)
Example #8
0
def run(config,
        logger_level='INFO',
        clean=False,
        output_dir='tmp/',
        complete=False,
        set_zero_seed=False):
    """Run YASS built-in pipeline

    Parameters
    ----------
    config: str or mapping (such as dictionary)
        Path to YASS configuration file or mapping object

    logger_level: str
        Logger level

    clean: bool, optional
        Delete CONFIG.data.root_folder/output_dir/ before running

    output_dir: str, optional
        Output directory (if relative, it makes it relative to
        CONFIG.data.root_folder) to store the output data, defaults to tmp/.
        If absolute, it leaves it as it is.

    complete: bool, optional
        Generates extra files (needed to generate phy files)

    Notes
    -----
    Running the preprocessor will generate the followiing files in
    CONFIG.data.root_folder/output_directory/:

    * ``config.yaml`` - Copy of the configuration file
    * ``metadata.yaml`` - Experiment metadata
    * ``filtered.bin`` - Filtered recordings (from preprocess)
    * ``filtered.yaml`` - Filtered recordings metadata (from preprocess)
    * ``standardized.bin`` - Standarized recordings (from preprocess)
    * ``standardized.yaml`` - Standarized recordings metadata (from preprocess)
    * ``whitening.npy`` - Whitening filter (from preprocess)


    Returns
    -------
    numpy.ndarray
        Spike train
    """

    # load yass configuration parameters
    set_config(config, output_dir)
    CONFIG = read_config()
    TMP_FOLDER = CONFIG.path_to_output_directory

    # remove tmp folder if needed
    if os.path.exists(TMP_FOLDER) and clean:
        shutil.rmtree(TMP_FOLDER)

    # create TMP_FOLDER if needed
    if not os.path.exists(TMP_FOLDER):
        os.makedirs(TMP_FOLDER)

    # load logging config file
    logging_config = load_logging_config_file()
    logging_config['handlers']['file']['filename'] = path.join(
        TMP_FOLDER, 'yass.log')
    logging_config['root']['level'] = logger_level

    # configure logging
    logging.config.dictConfig(logging_config)

    # instantiate logger
    logger = logging.getLogger(__name__)

    # print yass version
    logger.info('YASS version: %s', yass.__version__)
    ''' **********************************************
        ******** SET ENVIRONMENT VARIABLES ***********
        **********************************************
    '''
    os.environ["OPENBLAS_NUM_THREADS"] = "1"
    os.environ["MKL_NUM_THREADS"] = "1"
    os.environ["GIO_EXTRA_MODULES"] = "/usr/lib/x86_64-linux-gnu/gio/modules/"
    ''' **********************************************
        ************** PREPROCESS ********************
        **********************************************
    '''
    # preprocess
    start = time.time()
    (standardized_path, standardized_params, whiten_filter) = (preprocess.run(
        if_file_exists=CONFIG.preprocess.if_file_exists))

    time_preprocess = time.time() - start
    ''' **********************************************
        ************** DETECT EVENTS *****************
        **********************************************
    '''
    # detect
    # Cat: This code now runs with open tensorflow calls
    start = time.time()
    (spike_index_all) = detect.run(standardized_path,
                                   standardized_params,
                                   whiten_filter,
                                   if_file_exists=CONFIG.detect.if_file_exists,
                                   save_results=CONFIG.detect.save_results)
    spike_index_clear = None
    time_detect = time.time() - start
    ''' **********************************************
        ***************** CLUSTER ********************
        **********************************************
    '''

    # cluster
    start = time.time()
    path_to_spike_train_cluster = path.join(TMP_FOLDER,
                                            'spike_train_cluster.npy')
    if os.path.exists(path_to_spike_train_cluster) == False:
        cluster.run(spike_index_clear, spike_index_all)
    else:
        print("\nClustering completed previously...\n\n")

    spike_train_cluster = np.load(path_to_spike_train_cluster)
    templates_cluster = np.load(
        os.path.join(TMP_FOLDER, 'templates_cluster.npy'))

    time_cluster = time.time() - start
    #print ("Spike train clustered: ", spike_index_cluster.shape, "spike train clear: ",
    #        spike_train_clear.shape, " templates: ", templates.shape)
    ''' **********************************************
        ************** DECONVOLUTION *****************
        **********************************************
    '''

    # run deconvolution
    start = time.time()
    spike_train, postdeconv_templates = deconvolve.run(spike_train_cluster,
                                                       templates_cluster)
    time_deconvolution = time.time() - start

    # save spike train
    path_to_spike_train = path.join(TMP_FOLDER,
                                    'spike_train_post_deconv_post_merge.npy')
    np.save(path_to_spike_train, spike_train)
    logger.info('Spike train saved in: {}'.format(path_to_spike_train))

    # save template
    path_to_templates = path.join(TMP_FOLDER,
                                  'templates_post_deconv_post_merge.npy')
    np.save(path_to_templates, postdeconv_templates)
    logger.info('Templates saved in: {}'.format(path_to_templates))
    ''' **********************************************
        ************** POST PROCESSING****************
        **********************************************
    '''

    # save metadata in tmp
    path_to_metadata = path.join(TMP_FOLDER, 'metadata.yaml')
    logging.info('Saving metadata in {}'.format(path_to_metadata))
    save_metadata(path_to_metadata)

    # save metadata in tmp
    path_to_metadata = path.join(TMP_FOLDER, 'metadata.yaml')
    logging.info('Saving metadata in {}'.format(path_to_metadata))
    save_metadata(path_to_metadata)

    # save config.yaml copy in tmp/
    path_to_config_copy = path.join(TMP_FOLDER, 'config.yaml')

    if isinstance(config, Mapping):
        with open(path_to_config_copy, 'w') as f:
            yaml.dump(config, f, default_flow_style=False)
    else:
        shutil.copy2(config, path_to_config_copy)

    logging.info('Saving copy of config: {} in {}'.format(
        config, path_to_config_copy))

    # this part loads waveforms for all spikes in the spike train and scores
    # them, this data is needed to later generate phy files
    if complete:
        STANDARIZED_PATH = path.join(TMP_FOLDER, 'standardized.bin')
        PARAMS = load_yaml(path.join(TMP_FOLDER, 'standardized.yaml'))

        # load waveforms for all spikes in the spike train
        logger.info('Loading waveforms from all spikes in the spike train...')
        explorer = RecordingExplorer(STANDARIZED_PATH,
                                     spike_size=CONFIG.spike_size,
                                     dtype=PARAMS['dtype'],
                                     n_channels=PARAMS['n_channels'],
                                     data_order=PARAMS['data_order'])
        waveforms = explorer.read_waveforms(spike_train[:, 0])

        path_to_waveforms = path.join(TMP_FOLDER, 'spike_train_waveforms.npy')
        np.save(path_to_waveforms, waveforms)
        logger.info('Saved all waveforms from the spike train in {}...'.format(
            path_to_waveforms))

        # score all waveforms
        logger.info('Scoring waveforms from all spikes in the spike train...')
        path_to_rotation = path.join(TMP_FOLDER, 'rotation.npy')
        rotation = np.load(path_to_rotation)

        main_channels = explorer.main_channel_for_waveforms(waveforms)
        path_to_main_channels = path.join(TMP_FOLDER,
                                          'waveforms_main_channel.npy')
        np.save(path_to_main_channels, main_channels)
        logger.info('Saved all waveforms main channels in {}...'.format(
            path_to_waveforms))

        waveforms_score = dim_red.score(waveforms, rotation, main_channels,
                                        CONFIG.neigh_channels, CONFIG.geom)
        path_to_waveforms_score = path.join(TMP_FOLDER, 'waveforms_score.npy')
        np.save(path_to_waveforms_score, waveforms_score)
        logger.info('Saved all scores in {}...'.format(path_to_waveforms))

        # score templates
        # TODO: templates should be returned in the right shape to avoid .T
        templates_ = templates.T
        main_channels_tmpls = explorer.main_channel_for_waveforms(templates_)
        path_to_templates_main_c = path.join(TMP_FOLDER,
                                             'templates_main_channel.npy')
        np.save(path_to_templates_main_c, main_channels_tmpls)
        logger.info('Saved all templates main channels in {}...'.format(
            path_to_templates_main_c))

        templates_score = dim_red.score(templates_, rotation,
                                        main_channels_tmpls,
                                        CONFIG.neigh_channels, CONFIG.geom)
        path_to_templates_score = path.join(TMP_FOLDER, 'templates_score.npy')
        np.save(path_to_templates_score, templates_score)
        logger.info(
            'Saved all templates scores in {}...'.format(path_to_waveforms))

    logger.info('Finished YASS execution. Timing summary:')
    total = (time_preprocess + time_detect + time_cluster + time_deconvolution)
    logger.info('\t Preprocess: %s (%.2f %%)',
                human_readable_time(time_preprocess),
                time_preprocess / total * 100)
    logger.info('\t Detection: %s (%.2f %%)', human_readable_time(time_detect),
                time_detect / total * 100)
    logger.info('\t Clustering: %s (%.2f %%)',
                human_readable_time(time_cluster), time_cluster / total * 100)
    logger.info('\t Deconvolution: %s (%.2f %%)',
                human_readable_time(time_deconvolution),
                time_deconvolution / total * 100)

    return spike_train
Example #9
0
def _run_pipeline(config,
                  output_file,
                  logger_level='INFO',
                  clean=True,
                  output_dir='tmp/',
                  complete=False):
    """
    Run the entire pipeline given a path to a config file
    and output path
    """
    # load yass configuration parameters
    set_config(config)
    CONFIG = read_config()
    ROOT_FOLDER = CONFIG.data.root_folder
    TMP_FOLDER = path.join(ROOT_FOLDER, output_dir)

    # remove tmp folder if needed
    if os.path.exists(TMP_FOLDER) and clean:
        shutil.rmtree(TMP_FOLDER)

    # create TMP_FOLDER if needed
    if not os.path.exists(TMP_FOLDER):
        os.makedirs(TMP_FOLDER)

    # load logging config file
    logging_config = load_logging_config_file()
    logging_config['handlers']['file']['filename'] = path.join(
        TMP_FOLDER, 'yass.log')
    logging_config['root']['level'] = logger_level

    # configure logging
    logging.config.dictConfig(logging_config)

    # instantiate logger
    logger = logging.getLogger(__name__)

    # run preprocessor
    (score, spike_index_clear,
     spike_index_collision) = preprocess.run(output_directory=output_dir)

    # run processor
    (spike_train_clear, templates,
     spike_index_collision) = process.run(score,
                                          spike_index_clear,
                                          spike_index_collision,
                                          output_directory=output_dir)

    # run deconvolution
    spike_train = deconvolute.run(spike_train_clear,
                                  templates,
                                  spike_index_collision,
                                  output_directory=output_dir)

    # save metadata in tmp
    path_to_metadata = path.join(TMP_FOLDER, 'metadata.yaml')
    logging.info('Saving metadata in {}'.format(path_to_metadata))
    save_metadata(path_to_metadata)

    # save config.yaml copy in tmp/
    path_to_config_copy = path.join(TMP_FOLDER, 'config.yaml')
    shutil.copy2(config, path_to_config_copy)
    logging.info('Saving copy of config: {} in {}'.format(
        config, path_to_config_copy))

    # save templates
    path_to_templates = path.join(TMP_FOLDER, 'templates.npy')
    logging.info('Saving templates in {}'.format(path_to_templates))
    np.save(path_to_templates, templates)

    path_to_spike_train = path.join(TMP_FOLDER, output_file)
    np.save(path_to_spike_train, spike_train)
    logger.info('Spike train saved in: {}'.format(path_to_spike_train))

    # this part loads waveforms for all spikes in the spike train and scores
    # them, this data is needed to later generate phy files
    if complete:
        STANDARIZED_PATH = path.join(TMP_FOLDER, 'standarized.bin')
        PARAMS = load_yaml(path.join(TMP_FOLDER, 'standarized.yaml'))

        # load waveforms for all spikes in the spike train
        logger.info('Loading waveforms from all spikes in the spike train...')
        explorer = RecordingExplorer(STANDARIZED_PATH,
                                     spike_size=CONFIG.spikeSize,
                                     dtype=PARAMS['dtype'],
                                     n_channels=PARAMS['n_channels'],
                                     data_format=PARAMS['data_format'])
        waveforms = explorer.read_waveforms(spike_train[:, 0])

        path_to_waveforms = path.join(TMP_FOLDER, 'spike_train_waveforms.npy')
        np.save(path_to_waveforms, waveforms)
        logger.info('Saved all waveforms from the spike train in {}...'.format(
            path_to_waveforms))

        # score all waveforms
        logger.info('Scoring waveforms from all spikes in the spike train...')
        path_to_rotation = path.join(TMP_FOLDER, 'rotation.npy')
        rotation = np.load(path_to_rotation)

        main_channels = explorer.main_channel_for_waveforms(waveforms)
        path_to_main_channels = path.join(TMP_FOLDER,
                                          'waveforms_main_channel.npy')
        np.save(path_to_main_channels, main_channels)
        logger.info('Saved all waveforms main channels in {}...'.format(
            path_to_waveforms))

        waveforms_score = dim_red.score(waveforms, rotation, main_channels,
                                        CONFIG.neighChannels, CONFIG.geom)
        path_to_waveforms_score = path.join(TMP_FOLDER, 'waveforms_score.npy')
        np.save(path_to_waveforms_score, waveforms_score)
        logger.info('Saved all scores in {}...'.format(path_to_waveforms))

        # score templates
        # TODO: templates should be returned in the right shape to avoid .T
        templates_ = templates.T
        main_channels_tmpls = explorer.main_channel_for_waveforms(templates_)
        path_to_templates_main_c = path.join(TMP_FOLDER,
                                             'templates_main_channel.npy')
        np.save(path_to_templates_main_c, main_channels_tmpls)
        logger.info('Saved all templates main channels in {}...'.format(
            path_to_templates_main_c))

        templates_score = dim_red.score(templates_, rotation,
                                        main_channels_tmpls,
                                        CONFIG.neighChannels, CONFIG.geom)
        path_to_templates_score = path.join(TMP_FOLDER, 'templates_score.npy')
        np.save(path_to_templates_score, templates_score)
        logger.info(
            'Saved all templates scores in {}...'.format(path_to_waveforms))
Example #10
0
def _neural_network_detection(standarized_path, standarized_params,
                              n_observations, output_directory):
    """Run neural network detection and autoencoder dimensionality reduction
    """
    logger = logging.getLogger(__name__)

    CONFIG = read_config()
    OUTPUT_DTYPE = CONFIG.preprocess.dtype
    TMP_FOLDER = os.path.join(CONFIG.data.root_folder, output_directory)

    # detect spikes
    bp = BatchProcessor(standarized_path,
                        standarized_params['dtype'],
                        standarized_params['n_channels'],
                        standarized_params['data_format'],
                        CONFIG.resources.max_memory,
                        buffer_size=0)

    # check if all scores, clear and collision spikes exist..
    path_to_score = os.path.join(TMP_FOLDER, 'score_clear.npy')
    path_to_spike_index_clear = os.path.join(TMP_FOLDER,
                                             'spike_index_clear.npy')
    path_to_spike_index_collision = os.path.join(TMP_FOLDER,
                                                 'spike_index_collision.npy')

    if all([
            os.path.exists(path_to_score),
            os.path.exists(path_to_spike_index_clear),
            os.path.exists(path_to_spike_index_collision)
    ]):
        logger.info('Loading "{}", "{}" and "{}"'.format(
            path_to_score, path_to_spike_index_clear,
            path_to_spike_index_collision))

        scores = np.load(path_to_score)
        clear = np.load(path_to_spike_index_clear)
        collision = np.load(path_to_spike_index_collision)

    else:
        logger.info('One or more of "{}", "{}" or "{}" files were missing, '
                    'computing...'.format(path_to_score,
                                          path_to_spike_index_clear,
                                          path_to_spike_index_collision))

        # apply threshold detector on standarized data
        autoencoder_filename = CONFIG.neural_network_autoencoder.filename
        mc = bp.multi_channel_apply
        res = mc(
            neuralnetwork.nn_detection,
            mode='memory',
            cleanup_function=neuralnetwork.fix_indexes,
            neighbors=CONFIG.neighChannels,
            geom=CONFIG.geom,
            temporal_features=CONFIG.spikes.temporal_features,
            # FIXME: what is this?
            temporal_window=3,
            th_detect=CONFIG.neural_network_detector.threshold_spike,
            th_triage=CONFIG.neural_network_triage.threshold_collision,
            detector_filename=CONFIG.neural_network_detector.filename,
            autoencoder_filename=autoencoder_filename,
            triage_filename=CONFIG.neural_network_triage.filename)

        # save clear spikes
        clear = np.concatenate([element[1] for element in res], axis=0)
        logger.info('Removing clear indexes outside the allowed range to '
                    'draw a complete waveform...')
        clear, idx = detect.remove_incomplete_waveforms(
            clear, CONFIG.spikeSize + CONFIG.templatesMaxShift, n_observations)
        np.save(path_to_spike_index_clear, clear)
        logger.info('Saved spike index clear in {}...'.format(
            path_to_spike_index_clear))

        # save collided spikes
        collision = np.concatenate([element[2] for element in res], axis=0)
        logger.info('Removing collision indexes outside the allowed range to '
                    'draw a complete waveform...')
        collision, _ = detect.remove_incomplete_waveforms(
            collision, CONFIG.spikeSize + CONFIG.templatesMaxShift,
            n_observations)
        np.save(path_to_spike_index_collision, collision)
        logger.info('Saved spike index collision in {}...'.format(
            path_to_spike_index_collision))

        if CONFIG.clustering.clustering_method == 'location':
            #######################
            # Waveform extraction #
            #######################

            # TODO: what should the behaviour be for spike indexes that are
            # when starting/ending the recordings and it is not possible to
            # draw a complete waveform?
            logger.info('Computing whitening matrix...')
            bp = BatchProcessor(standarized_path, standarized_params['dtype'],
                                standarized_params['n_channels'],
                                standarized_params['data_format'],
                                CONFIG.resources.max_memory)
            batches = bp.multi_channel()
            first_batch, _, _ = next(batches)
            Q = whiten.matrix(first_batch, CONFIG.neighChannels,
                              CONFIG.spikeSize)

            path_to_whitening_matrix = os.path.join(TMP_FOLDER,
                                                    'whitening.npy')
            np.save(path_to_whitening_matrix, Q)
            logger.info('Saved whitening matrix in {}'.format(
                path_to_whitening_matrix))

            # apply whitening to every batch
            (whitened_path, whitened_params) = bp.multi_channel_apply(
                np.matmul,
                mode='disk',
                output_path=os.path.join(TMP_FOLDER, 'whitened.bin'),
                if_file_exists='skip',
                cast_dtype=OUTPUT_DTYPE,
                b=Q)

            main_channel = clear[:, 1]

            # load and dump waveforms from clear spikes

            path_to_waveforms_clear = os.path.join(TMP_FOLDER,
                                                   'waveforms_clear.npy')

            if os.path.exists(path_to_waveforms_clear):
                logger.info(
                    'Found clear waveforms in {}, loading them...'.format(
                        path_to_waveforms_clear))
                waveforms_clear = np.load(path_to_waveforms_clear)
            else:
                logger.info(
                    'Did not find clear waveforms in {}, reading them from {}'.
                    format(path_to_waveforms_clear, whitened_path))
                explorer = RecordingExplorer(whitened_path,
                                             spike_size=CONFIG.spikeSize)
                waveforms_clear = explorer.read_waveforms(clear[:, 0], 'all')
                np.save(path_to_waveforms_clear, waveforms_clear)
                logger.info('Saved waveform from clear spikes in: {}'.format(
                    path_to_waveforms_clear))

            main_channel = clear[:, 1]

            # save rotation
            detector_filename = CONFIG.neural_network_detector.filename
            autoencoder_filename = CONFIG.neural_network_autoencoder.filename
            rotation = neuralnetwork.load_rotation(detector_filename,
                                                   autoencoder_filename)
            path_to_rotation = os.path.join(TMP_FOLDER, 'rotation.npy')
            logger.info("rotation_matrix_shape = {}".format(rotation.shape))
            np.save(path_to_rotation, rotation)
            logger.info(
                'Saved rotation matrix in {}...'.format(path_to_rotation))

            logger.info('Denoising...')
            path_to_denoised_waveforms = os.path.join(
                TMP_FOLDER, 'denoised_waveforms.npy')
            if os.path.exists(path_to_denoised_waveforms):
                logger.info(
                    'Found denoised waveforms in {}, loading them...'.format(
                        path_to_denoised_waveforms))
                denoised_waveforms = np.load(path_to_denoised_waveforms)
            else:
                logger.info(
                    'Did not find denoised waveforms in {}, evaluating them'
                    'from {}'.format(path_to_denoised_waveforms,
                                     path_to_waveforms_clear))
                waveforms_clear = np.load(path_to_waveforms_clear)
                denoised_waveforms = dim_red.denoise(waveforms_clear, rotation,
                                                     CONFIG)
                logger.info('Saving denoised waveforms to {}'.format(
                    path_to_denoised_waveforms))
                np.save(path_to_denoised_waveforms, denoised_waveforms)

            isolated_index, x, y = get_isolated_spikes_and_locations(
                denoised_waveforms, main_channel, CONFIG)
            x = (x - np.mean(x)) / np.std(x)
            y = (y - np.mean(y)) / np.std(y)
            corrupted_index = np.logical_not(
                np.in1d(np.arange(clear.shape[0]), isolated_index))
            collision = np.concatenate([collision, clear[corrupted_index]],
                                       axis=0)
            clear = clear[isolated_index]
            waveforms_clear = waveforms_clear[isolated_index]
            #################################################
            # Dimensionality reduction (Isolated Waveforms) #
            #################################################

            scores = dim_red.main_channel_scores(waveforms_clear, rotation,
                                                 clear, CONFIG)
            scores = (scores - np.mean(scores, axis=0)) / np.std(scores)
            scores = np.concatenate([
                x[:, np.newaxis, np.newaxis], y[:, np.newaxis, np.newaxis],
                scores[:, :, np.newaxis]
            ],
                                    axis=1)

        else:

            # save scores
            scores = np.concatenate([element[0] for element in res], axis=0)

            logger.info(
                'Removing scores for indexes outside the allowed range to '
                'draw a complete waveform...')
            scores = scores[idx]

            # compute Q for whitening
            logger.info('Computing whitening matrix...')
            bp = BatchProcessor(standarized_path, standarized_params['dtype'],
                                standarized_params['n_channels'],
                                standarized_params['data_format'],
                                CONFIG.resources.max_memory)
            batches = bp.multi_channel()
            first_batch, _, _ = next(batches)
            Q = whiten.matrix_localized(first_batch, CONFIG.neighChannels,
                                        CONFIG.geom, CONFIG.spikeSize)

            path_to_whitening_matrix = os.path.join(TMP_FOLDER,
                                                    'whitening.npy')
            np.save(path_to_whitening_matrix, Q)
            logger.info('Saved whitening matrix in {}'.format(
                path_to_whitening_matrix))

            scores = whiten.score(scores, clear[:, 1], Q)

            np.save(path_to_score, scores)
            logger.info('Saved spike scores in {}...'.format(path_to_score))

            # save rotation
            detector_filename = CONFIG.neural_network_detector.filename
            autoencoder_filename = CONFIG.neural_network_autoencoder.filename
            rotation = neuralnetwork.load_rotation(detector_filename,
                                                   autoencoder_filename)
            path_to_rotation = os.path.join(TMP_FOLDER, 'rotation.npy')
            np.save(path_to_rotation, rotation)
            logger.info(
                'Saved rotation matrix in {}...'.format(path_to_rotation))

        np.save(path_to_score, scores)
        logger.info('Saved spike scores in {}...'.format(path_to_score))
    return scores, clear, collision
Example #11
0
def _threshold_detection(standarized_path, standarized_params, n_observations,
                         output_directory):
    """Run threshold detector and dimensionality reduction using PCA
    """
    logger = logging.getLogger(__name__)

    CONFIG = read_config()
    OUTPUT_DTYPE = CONFIG.preprocess.dtype
    TMP_FOLDER = os.path.join(CONFIG.data.root_folder, output_directory)

    ###############
    # Whiten data #
    ###############

    # compute Q for whitening
    logger.info('Computing whitening matrix...')
    bp = BatchProcessor(standarized_path, standarized_params['dtype'],
                        standarized_params['n_channels'],
                        standarized_params['data_format'],
                        CONFIG.resources.max_memory)
    batches = bp.multi_channel()
    first_batch, _, _ = next(batches)
    Q = whiten.matrix(first_batch, CONFIG.neighChannels, CONFIG.spikeSize)

    path_to_whitening_matrix = os.path.join(TMP_FOLDER, 'whitening.npy')
    np.save(path_to_whitening_matrix, Q)
    logger.info(
        'Saved whitening matrix in {}'.format(path_to_whitening_matrix))

    # apply whitening to every batch
    (whitened_path, whitened_params) = bp.multi_channel_apply(
        np.matmul,
        mode='disk',
        output_path=os.path.join(TMP_FOLDER, 'whitened.bin'),
        if_file_exists='skip',
        cast_dtype=OUTPUT_DTYPE,
        b=Q)

    ###################
    # Spike detection #
    ###################

    path_to_spike_index_clear = os.path.join(TMP_FOLDER,
                                             'spike_index_clear.npy')

    bp = BatchProcessor(standarized_path,
                        standarized_params['dtype'],
                        standarized_params['n_channels'],
                        standarized_params['data_format'],
                        CONFIG.resources.max_memory,
                        buffer_size=0)

    # clear spikes
    if os.path.exists(path_to_spike_index_clear):
        # if it exists, load it...
        logger.info('Found file in {}, loading it...'.format(
            path_to_spike_index_clear))
        spike_index_clear = np.load(path_to_spike_index_clear)
    else:
        # if it doesn't, detect spikes...
        logger.info('Did not find file in {}, finding spikes using threshold'
                    ' detector...'.format(path_to_spike_index_clear))

        # apply threshold detector on standarized data
        spikes = bp.multi_channel_apply(detect.threshold,
                                        mode='memory',
                                        cleanup_function=detect.fix_indexes,
                                        neighbors=CONFIG.neighChannels,
                                        spike_size=CONFIG.spikeSize,
                                        std_factor=CONFIG.stdFactor)
        spike_index_clear = np.vstack(spikes)

        logger.info('Removing clear indexes outside the allowed range to '
                    'draw a complete waveform...')
        spike_index_clear, _ = (detect.remove_incomplete_waveforms(
            spike_index_clear, CONFIG.spikeSize + CONFIG.templatesMaxShift,
            n_observations))

        logger.info('Saving spikes in {}...'.format(path_to_spike_index_clear))
        np.save(path_to_spike_index_clear, spike_index_clear)

    path_to_spike_index_collision = os.path.join(TMP_FOLDER,
                                                 'spike_index_collision.npy')

    # collided spikes
    if os.path.exists(path_to_spike_index_collision):
        # if it exists, load it...
        logger.info('Found collided spikes in {}, loading them...'.format(
            path_to_spike_index_collision))
        spike_index_collision = np.load(path_to_spike_index_collision)

        if spike_index_collision.shape[0] != 0:
            raise ValueError('Found non-empty collision spike index in {}, '
                             'but threshold detector is selected, collision '
                             'detection is not implemented for threshold '
                             'detector so array must have dimensios (0, 2) '
                             'but had ({}, {})'.format(
                                 path_to_spike_index_collision,
                                 *spike_index_collision.shape))
    else:
        # triage is not implemented on threshold detector, return empty array
        logger.info('Creating empty array for'
                    ' collided spikes (collision detection is not implemented'
                    ' with threshold detector. Saving them in {}'.format(
                        path_to_spike_index_collision))
        spike_index_collision = np.zeros((0, 2), 'int32')
        np.save(path_to_spike_index_collision, spike_index_collision)

    #######################
    # Waveform extraction #
    #######################

    # load and dump waveforms from clear spikes
    path_to_waveforms_clear = os.path.join(TMP_FOLDER, 'waveforms_clear.npy')

    if os.path.exists(path_to_waveforms_clear):
        logger.info('Found clear waveforms in {}, loading them...'.format(
            path_to_waveforms_clear))
        waveforms_clear = np.load(path_to_waveforms_clear)
    else:
        logger.info(
            'Did not find clear waveforms in {}, reading them from {}'.format(
                path_to_waveforms_clear, standarized_path))
        explorer = RecordingExplorer(standarized_path,
                                     spike_size=CONFIG.spikeSize)
        waveforms_clear = explorer.read_waveforms(spike_index_clear[:, 0])
        np.save(path_to_waveforms_clear, waveforms_clear)
        logger.info('Saved waveform from clear spikes in: {}'.format(
            path_to_waveforms_clear))

    #########################
    # PCA - rotation matrix #
    #########################

    # compute per-batch sufficient statistics for PCA on standarized data
    logger.info('Computing PCA sufficient statistics...')
    stats = bp.multi_channel_apply(dim_red.suff_stat,
                                   mode='memory',
                                   spike_index=spike_index_clear,
                                   spike_size=CONFIG.spikeSize)

    suff_stats = reduce(lambda x, y: np.add(x, y), [e[0] for e in stats])

    spikes_per_channel = reduce(lambda x, y: np.add(x, y),
                                [e[1] for e in stats])

    # compute rotation matrix
    logger.info('Computing PCA projection matrix...')
    rotation = dim_red.project(suff_stats, spikes_per_channel,
                               CONFIG.spikes.temporal_features,
                               CONFIG.neighChannels)
    path_to_rotation = os.path.join(TMP_FOLDER, 'rotation.npy')
    np.save(path_to_rotation, rotation)
    logger.info('Saved rotation matrix in {}...'.format(path_to_rotation))

    main_channel = spike_index_clear[:, 1]
    ###########################################
    # PCA - waveform dimensionality reduction #
    ###########################################
    if CONFIG.clustering.clustering_method == 'location':
        logger.info('Denoising...')
        path_to_denoised_waveforms = os.path.join(TMP_FOLDER,
                                                  'denoised_waveforms.npy')
        if os.path.exists(path_to_denoised_waveforms):
            logger.info(
                'Found denoised waveforms in {}, loading them...'.format(
                    path_to_denoised_waveforms))
            denoised_waveforms = np.load(path_to_denoised_waveforms)
        else:
            logger.info(
                'Did not find denoised waveforms in {}, evaluating them'
                'from {}'.format(path_to_denoised_waveforms,
                                 path_to_waveforms_clear))
            waveforms_clear = np.load(path_to_waveforms_clear)
            denoised_waveforms = dim_red.denoise(waveforms_clear, rotation,
                                                 CONFIG)
            logger.info('Saving denoised waveforms to {}'.format(
                path_to_denoised_waveforms))
            np.save(path_to_denoised_waveforms, denoised_waveforms)

        isolated_index, x, y = get_isolated_spikes_and_locations(
            denoised_waveforms, main_channel, CONFIG)
        x = (x - np.mean(x)) / np.std(x)
        y = (y - np.mean(y)) / np.std(y)
        corrupted_index = np.logical_not(
            np.in1d(np.arange(spike_index_clear.shape[0]), isolated_index))
        spike_index_collision = np.concatenate(
            [spike_index_collision, spike_index_clear[corrupted_index]],
            axis=0)
        spike_index_clear = spike_index_clear[isolated_index]
        waveforms_clear = waveforms_clear[isolated_index]

        #################################################
        # Dimensionality reduction (Isolated Waveforms) #
        #################################################

        scores = dim_red.main_channel_scores(waveforms_clear, rotation,
                                             spike_index_clear, CONFIG)
        scores = (scores - np.mean(scores, axis=0)) / np.std(scores)
        scores = np.concatenate([
            x[:, np.newaxis, np.newaxis], y[:, np.newaxis, np.newaxis],
            scores[:, :, np.newaxis]
        ],
                                axis=1)
    else:
        logger.info('Reducing spikes dimensionality with PCA matrix...')
        scores = dim_red.score(waveforms_clear, rotation, spike_index_clear[:,
                                                                            1],
                               CONFIG.neighChannels, CONFIG.geom)

        # save scores
    path_to_score = os.path.join(TMP_FOLDER, 'score_clear.npy')
    np.save(path_to_score, scores)
    logger.info('Saved spike scores in {}...'.format(path_to_score))

    return scores, spike_index_clear, spike_index_collision
Example #12
0
def run(config, logger_level='INFO', clean=False, output_dir='tmp/',
        complete=False, set_zero_seed=False):
    """Run YASS built-in pipeline

    Parameters
    ----------
    config: str or mapping (such as dictionary)
        Path to YASS configuration file or mapping object

    logger_level: str
        Logger level

    clean: bool, optional
        Delete CONFIG.data.root_folder/output_dir/ before running

    output_dir: str, optional
        Output directory (relative to CONFIG.data.root_folder to store the
        output data, defaults to tmp/

    complete: bool, optional
        Generates extra files (needed to generate phy files)

    Notes
    -----
    Running the preprocessor will generate the followiing files in
    CONFIG.data.root_folder/output_directory/:

    * ``config.yaml`` - Copy of the configuration file
    * ``metadata.yaml`` - Experiment metadata
    * ``filtered.bin`` - Filtered recordings (from preprocess)
    * ``filtered.yaml`` - Filtered recordings metadata (from preprocess)
    * ``standarized.bin`` - Standarized recordings (from preprocess)
    * ``standarized.yaml`` - Standarized recordings metadata (from preprocess)
    * ``whitening.npy`` - Whitening filter (from preprocess)


    Returns
    -------
    numpy.ndarray
        Spike train
    """
    # load yass configuration parameters
    set_config(config)
    CONFIG = read_config()
    ROOT_FOLDER = CONFIG.data.root_folder
    TMP_FOLDER = path.join(ROOT_FOLDER, output_dir)

    # remove tmp folder if needed
    if os.path.exists(TMP_FOLDER) and clean:
        shutil.rmtree(TMP_FOLDER)

    # create TMP_FOLDER if needed
    if not os.path.exists(TMP_FOLDER):
        os.makedirs(TMP_FOLDER)

    # load logging config file
    logging_config = load_logging_config_file()
    logging_config['handlers']['file']['filename'] = path.join(TMP_FOLDER,
                                                               'yass.log')
    logging_config['root']['level'] = logger_level

    # configure logging
    logging.config.dictConfig(logging_config)

    # instantiate logger and start coloredlogs
    logger = logging.getLogger(__name__)
    coloredlogs.install(logger=logger)

    if set_zero_seed:
        logger.warning('Set numpy seed to zero')
        np.random.seed(0)

    # print yass version
    logger.info('YASS version: %s', yass.__version__)

    # preprocess
    start = time.time()
    (standarized_path,
     standarized_params,
     channel_index,
     whiten_filter) = (preprocess
                       .run(output_directory=output_dir,
                            if_file_exists=CONFIG.preprocess.if_file_exists))
    time_preprocess = time.time() - start

    # detect
    start = time.time()
    (score, spike_index_clear,
     spike_index_all) = detect.run(standarized_path,
                                   standarized_params,
                                   channel_index,
                                   whiten_filter,
                                   output_directory=output_dir,
                                   if_file_exists=CONFIG.detect.if_file_exists,
                                   save_results=CONFIG.detect.save_results)
    time_detect = time.time() - start

    # cluster
    start = time.time()
    spike_train_clear, tmp_loc, vbParam = cluster.run(
        score,
        spike_index_clear,
        output_directory=output_dir,
        if_file_exists=CONFIG.cluster.if_file_exists,
        save_results=CONFIG.cluster.save_results)
    time_cluster = time.time() - start

    # get templates
    start = time.time()
    (templates,
     spike_train_clear_after_templates,
     groups,
     idx_good_templates) = get_templates.run(
        spike_train_clear, tmp_loc,
        output_directory=output_dir,
        if_file_exists=CONFIG.templates.if_file_exists,
        save_results=CONFIG.templates.save_results)
    time_templates = time.time() - start

    # run deconvolution
    start = time.time()
    spike_train = deconvolute.run(spike_index_all, templates,
                                  output_directory=output_dir)
    time_deconvolution = time.time() - start

    # save metadata in tmp
    path_to_metadata = path.join(TMP_FOLDER, 'metadata.yaml')
    logging.info('Saving metadata in {}'.format(path_to_metadata))
    save_metadata(path_to_metadata)

    # save config.yaml copy in tmp/
    path_to_config_copy = path.join(TMP_FOLDER, 'config.yaml')

    if isinstance(config, Mapping):
        with open(path_to_config_copy, 'w') as f:
            yaml.dump(config, f, default_flow_style=False)
    else:
        shutil.copy2(config, path_to_config_copy)

    logging.info('Saving copy of config: {} in {}'.format(config,
                                                          path_to_config_copy))

    # TODO: complete flag saves other files needed for integrating phy
    # with yass, the integration hasn't been completed yet
    # this part loads waveforms for all spikes in the spike train and scores
    # them, this data is needed to later generate phy files
    if complete:
        STANDARIZED_PATH = path.join(TMP_FOLDER, 'standarized.bin')
        PARAMS = load_yaml(path.join(TMP_FOLDER, 'standarized.yaml'))

        # load waveforms for all spikes in the spike train
        logger.info('Loading waveforms from all spikes in the spike train...')
        explorer = RecordingExplorer(STANDARIZED_PATH,
                                     spike_size=CONFIG.spike_size,
                                     dtype=PARAMS['dtype'],
                                     n_channels=PARAMS['n_channels'],
                                     data_order=PARAMS['data_order'])
        waveforms = explorer.read_waveforms(spike_train[:, 0])

        path_to_waveforms = path.join(TMP_FOLDER, 'spike_train_waveforms.npy')
        np.save(path_to_waveforms, waveforms)
        logger.info('Saved all waveforms from the spike train in {}...'
                    .format(path_to_waveforms))

        # score all waveforms
        logger.info('Scoring waveforms from all spikes in the spike train...')
        path_to_rotation = path.join(TMP_FOLDER, 'rotation.npy')
        rotation = np.load(path_to_rotation)

        main_channels = explorer.main_channel_for_waveforms(waveforms)
        path_to_main_channels = path.join(TMP_FOLDER,
                                          'waveforms_main_channel.npy')
        np.save(path_to_main_channels, main_channels)
        logger.info('Saved all waveforms main channels in {}...'
                    .format(path_to_waveforms))

        waveforms_score = dim_red.score(waveforms, rotation, main_channels,
                                        CONFIG.neigh_channels, CONFIG.geom)
        path_to_waveforms_score = path.join(TMP_FOLDER, 'waveforms_score.npy')
        np.save(path_to_waveforms_score, waveforms_score)
        logger.info('Saved all scores in {}...'.format(path_to_waveforms))

        # score templates
        # TODO: templates should be returned in the right shape to avoid .T
        templates_ = templates.T
        main_channels_tmpls = explorer.main_channel_for_waveforms(templates_)
        path_to_templates_main_c = path.join(TMP_FOLDER,
                                             'templates_main_channel.npy')
        np.save(path_to_templates_main_c, main_channels_tmpls)
        logger.info('Saved all templates main channels in {}...'
                    .format(path_to_templates_main_c))

        templates_score = dim_red.score(templates_, rotation,
                                        main_channels_tmpls,
                                        CONFIG.neigh_channels, CONFIG.geom)
        path_to_templates_score = path.join(TMP_FOLDER, 'templates_score.npy')
        np.save(path_to_templates_score, templates_score)
        logger.info('Saved all templates scores in {}...'
                    .format(path_to_waveforms))

    logger.info('Finished YASS execution. Timing summary:')
    total = (time_preprocess + time_detect + time_cluster + time_templates
             + time_deconvolution)
    logger.info('\t Preprocess: %s (%.2f %%)',
                human_readable_time(time_preprocess),
                time_preprocess/total*100)
    logger.info('\t Detection: %s (%.2f %%)',
                human_readable_time(time_detect),
                time_detect/total*100)
    logger.info('\t Clustering: %s (%.2f %%)',
                human_readable_time(time_cluster),
                time_cluster/total*100)
    logger.info('\t Templates: %s (%.2f %%)',
                human_readable_time(time_templates),
                time_templates/total*100)
    logger.info('\t Deconvolution: %s (%.2f %%)',
                human_readable_time(time_deconvolution),
                time_deconvolution/total*100)

    return spike_train
Example #13
0
"""
Checking results from threshold detector
"""
import matplotlib.pyplot as plt

from yass.explore import SpikeTrainExplorer, RecordingExplorer

path_to_data = '/Users/Edu/data/yass/tmp/standarized.bin'
path_to_spike_train = '/Users/Edu/data/yass/tmp/spike_train.npy'

exp = RecordingExplorer(path_to_data, spike_size=15)
spe = SpikeTrainExplorer(path_to_spike_train, exp)

spe.plot_templates(group_ids=range(10))
plt.show()
Example #14
0
# path to whitened recordings, geometry file and spike train
ROOT = path.join(path.expanduser('~'), 'data/yass')
path_to_data = path.join(ROOT, 'tmp/whitened.bin')
path_to_geom = path.join(ROOT, 'ej49_geometry1.txt')
path_to_spike_train = path.join(ROOT, 'middle_spike_train.npy')

# In[ ]:

# initialize recordings explorer, this is another helper class
# to explore recordings and is needed to initialize the spike
# train explorer
rce = RecordingExplorer(path_to_data,
                        path_to_geom,
                        spike_size=15,
                        neighbor_radius=70,
                        dtype='float64',
                        n_channels=49,
                        data_format='long')

# In[ ]:

# we now initialize the spike train explorer
spe = SpikeTrainExplorer(path_to_spike_train, rce)

# In[ ]:

# we can take a look at the spike train, first column is the spike
# index, second column is the spike ID
spe.spike_train