Esempio n. 1
0
def run(standardized_path,
        standardized_params,
        whiten_filter,
        output_directory='tmp/',
        if_file_exists='skip',
        save_results=False):
    """Execute detect step

    Cat: THIS CODE KEEP TENSORFLOW OPEN FOR DETECTION AND THEN COMPUTES 
         corrections post-detection

    Parameters
    ----------
    standardized_path: str or pathlib.Path
        Path to standardized data binary file

    standardized_params: dict, str or pathlib.Path
        Dictionary with standardized data parameters or path to a yaml file

    channel_index: numpy.ndarray, str or pathlib.Path
        Channel index or path to a npy file

    whiten_filter: numpy.ndarray, str or pathlib.Path
        Whiten matrix or path to a npy file

    output_directory: str, optional
      Location to store partial results, relative to CONFIG.data.root_folder,
      defaults to tmp/

    if_file_exists: str, optional
      One of 'overwrite', 'abort', 'skip'. Control de behavior for every
      generated file. If 'overwrite' it replaces the files if any exist,
      if 'abort' it raises a ValueError exception if any file exists,
      if 'skip' if skips the operation if any file exists

    save_results: bool, optional
        Whether to save results to disk, defaults to False

    Returns
    -------
    clear_scores: numpy.ndarray (n_spikes, n_features, n_channels)
        3D array with the scores for the clear spikes, first simension is
        the number of spikes, second is the nymber of features and third the
        number of channels

    spike_index_clear: numpy.ndarray (n_clear_spikes, 2)
        2D array with indexes for clear spikes, first column contains the
        spike location in the recording and the second the main channel
        (channel whose amplitude is maximum)

    spike_index_call: numpy.ndarray (n_collided_spikes, 2)
        2D array with indexes for all spikes, first column contains the
        spike location in the recording and the second the main channel
        (channel whose amplitude is maximum)

    Notes
    -----
    Running the preprocessor will generate the followiing files in
    CONFIG.data.root_folder/output_directory/ (if save_results is
    True):

    * ``spike_index_clear.npy`` - Same as spike_index_clear returned
    * ``spike_index_all.npy`` - Same as spike_index_collision returned
    * ``rotation.npy`` - Rotation matrix for dimensionality reduction
    * ``scores_clear.npy`` - Scores for clear spikes

    Threshold detector runs on CPU, neural network detector runs CPU and GPU,
    depending on how tensorflow is configured.

    Examples
    --------

    .. literalinclude:: ../../examples/pipeline/detect.py
    """
    CONFIG = read_config()

    # load files in case they are strings or Path objects
    standardized_params = file_loader(standardized_params)
    whiten_filter = file_loader(whiten_filter)

    # run detection
    if CONFIG.detect.method == 'threshold':
        return run_threshold(standardized_path, standardized_params,
                             whiten_filter, output_directory, if_file_exists,
                             save_results)
    elif CONFIG.detect.method == 'nn':
        return run_neural_network(standardized_path, standardized_params,
                                  whiten_filter, output_directory,
                                  if_file_exists, save_results)
Esempio n. 2
0
def run(spike_index,
        templates,
        output_directory='tmp/',
        recordings_filename='standarized.bin'):
    """Deconvolute spikes

    Parameters
    ----------

    spike_index: numpy.ndarray (n_data, 2), str or pathlib.Path
        A 2D array for all potential spikes whose first column indicates the
        spike time and the second column the principal channels. Or path to
        npy file

    templates: numpy.ndarray (n_channels, waveform_size, n_templates), str
    or pathlib.Path
        A 3D array with the templates. Or path to npy file

    output_directory: str, optional
        Output directory (relative to CONFIG.data.root_folder) used to load
        the recordings to generate templates, defaults to tmp/

    recordings_filename: str, optional
        Recordings filename (relative to CONFIG.data.root_folder/
        output_directory) used to draw the waveforms from, defaults to
        standarized.bin

    Returns
    -------
    spike_train: numpy.ndarray (n_clear_spikes, 2)
        A 2D array with the spike train, first column indicates the spike
        time and the second column the neuron ID

    Examples
    --------

    .. literalinclude:: ../../examples/pipeline/deconvolute.py
    """

    spike_index = file_loader(spike_index)
    templates = file_loader(templates)

    logger = logging.getLogger(__name__)

    # read config file
    CONFIG = read_config()

    # read recording
    recording_path = os.path.join(CONFIG.data.root_folder, output_directory,
                                  'preprocess', recordings_filename)
    bp = BatchProcessor(recording_path, buffer_size=templates.shape[1])

    logging.debug('Starting deconvolution. templates.shape: {}, '
                  'spike_index.shape: {}'.format(templates.shape,
                                                 spike_index.shape))

    # run deconvolution algorithm
    n_rf = int(CONFIG.deconvolution.n_rf * CONFIG.recordings.sampling_rate /
               1000)

    # run nn preprocess batch-wsie
    mc = bp.multi_channel_apply
    res = mc(deconvolve,
             mode='memory',
             cleanup_function=fix_indexes,
             pass_batch_info=True,
             templates=templates,
             spike_index=spike_index,
             spike_size=CONFIG.spike_size,
             n_explore=CONFIG.deconvolution.n_explore,
             n_rf=n_rf,
             upsample_factor=CONFIG.deconvolution.upsample_factor,
             threshold_a=CONFIG.deconvolution.threshold_a,
             threshold_dd=CONFIG.deconvolution.threshold_dd)

    spike_train = np.concatenate([element for element in res], axis=0)

    logger.debug('spike_train.shape: {}'.format(spike_train.shape))

    # sort spikes by time
    spike_train = spike_train[np.argsort(spike_train[:, 0])]

    # save spike train
    path_to_spike_train = os.path.join(CONFIG.data.root_folder,
                                       output_directory, 'spike_train.npy')
    logger.info('Spike train saved in %s', path_to_spike_train)
    file_saver(spike_train, path_to_spike_train)

    return spike_train
Esempio n. 3
0
def run(scores,
        spike_index,
        output_directory='tmp/',
        if_file_exists='skip',
        save_results=False):
    """Spike clustering

    Parameters
    ----------
    scores: numpy.ndarray (n_spikes, n_features, n_channels), str or Path
        3D array with the scores for the clear spikes, first simension is
        the number of spikes, second is the nymber of features and third the
        number of channels. Or path to a npy file

    spike_index: numpy.ndarray (n_clear_spikes, 2), str or Path
        2D array with indexes for spikes, first column contains the
        spike location in the recording and the second the main channel
        (channel whose amplitude is maximum). Or path to an npy file

    output_directory: str, optional
        Location to store/look for the generate spike train, relative to
        CONFIG.data.root_folder

    if_file_exists: str, optional
      One of 'overwrite', 'abort', 'skip'. Control de behavior for the
      spike_train_cluster.npy. file If 'overwrite' it replaces the files if
      exists, if 'abort' it raises a ValueError exception if exists,
      if 'skip' it skips the operation if the file exists (and returns the
      stored file)

    save_results: bool, optional
        Whether to save spike train to disk
        (in CONFIG.data.root_folder/relative_to/spike_train_cluster.npy),
        defaults to False

    Returns
    -------
    spike_train: (TODO add documentation)

    Examples
    --------

    .. literalinclude:: ../../examples/pipeline/cluster.py

    """
    # load files in case they are strings or Path objects
    scores = file_loader(scores)
    spike_index = file_loader(spike_index)

    CONFIG = read_config()

    startTime = datetime.datetime.now()

    Time = {'t': 0, 'c': 0, 'm': 0, 's': 0, 'e': 0}

    logger = logging.getLogger(__name__)

    scores_all = np.copy(scores)
    spike_index_all = np.copy(spike_index)

    ##########
    # Triage #
    ##########

    _b = datetime.datetime.now()
    logger.info("Randomly subsampling...")
    scores, spike_index = random_subsample(scores, spike_index,
                                           CONFIG.cluster.max_n_spikes)
    logger.info("Triaging...")
    scores, spike_index = triage(scores, spike_index,
                                 CONFIG.cluster.triage.nearest_neighbors,
                                 CONFIG.cluster.triage.percent,
                                 CONFIG.cluster.method == 'location')
    Time['t'] += (datetime.datetime.now() - _b).total_seconds()

    if CONFIG.cluster.method == 'location':
        ##############
        # Clustering #
        ##############
        _b = datetime.datetime.now()
        logger.info("Clustering...")
        vbParam, tmp_loc, scores, spike_index = run_cluster_location(
            scores, spike_index, CONFIG.cluster.min_spikes, CONFIG)
        Time['s'] += (datetime.datetime.now() - _b).total_seconds()

    else:
        ###########
        # Coreset #
        ###########
        _b = datetime.datetime.now()
        logger.info("Coresetting...")
        groups = coreset(scores, spike_index, CONFIG.cluster.coreset.clusters,
                         CONFIG.cluster.coreset.threshold)
        Time['c'] += (datetime.datetime.now() - _b).total_seconds()

        ###########
        # Masking #
        ###########
        _b = datetime.datetime.now()
        logger.info("Masking...")
        masks = getmask(scores, spike_index, groups,
                        CONFIG.cluster.masking_threshold)
        Time['m'] += (datetime.datetime.now() - _b).total_seconds()

        ##############
        # Clustering #
        ##############
        _b = datetime.datetime.now()
        logger.info("Clustering...")
        vbParam, tmp_loc, scores, spike_index = run_cluster(
            scores, masks, groups, spike_index, CONFIG.cluster.min_spikes,
            CONFIG)
        Time['s'] += (datetime.datetime.now() - _b).total_seconds()

    vbParam.rhat = calculate_sparse_rhat(vbParam, tmp_loc, scores_all,
                                         spike_index_all,
                                         CONFIG.neigh_channels)
    idx_keep = get_core_data(vbParam, scores_all, np.inf, 2)
    spike_train = vbParam.rhat[idx_keep]
    spike_train[:, 0] = spike_index_all[spike_train[:, 0].astype('int32'), 0]

    # report timing
    currentTime = datetime.datetime.now()
    logger.info("Mainprocess done in {0} seconds.".format(
        (currentTime - startTime).seconds))
    logger.info("\ttriage:\t{0} seconds".format(Time['t']))
    logger.info("\tcoreset:\t{0} seconds".format(Time['c']))
    logger.info("\tmasking:\t{0} seconds".format(Time['m']))
    logger.info("\tclustering:\t{0} seconds".format(Time['s']))

    return spike_train, tmp_loc, vbParam
Esempio n. 4
0
def run(spike_train,
        tmp_loc,
        output_directory='tmp/',
        recordings_filename='standarized.bin',
        if_file_exists='skip',
        save_results=True):
    """Compute templates

    Parameters
    ----------
    spike_train: numpy.ndarray, str or pathlib.Path
        Spike train from cluster step or path to npy file

    tmp_loc: np.array(n_templates)
        At which channel the clustering is done.

    output_directory: str, optional
        Output directory (relative to CONFIG.data.root_folder) used to load
        the recordings to generate templates, defaults to tmp/

    recordings_filename: str, optional
        Recordings filename (relative to CONFIG.data.root_folder/
        output_directory) used to generate the templates, defaults to
        standarized.bin

    if_file_exists: str, optional
      One of 'overwrite', 'abort', 'skip'. Control de behavior for the
      templates.npy. file If 'overwrite' it replaces the files if exists,
      if 'abort' it raises a ValueError exception if exists,
      if 'skip' it skips the operation if the file exists (and returns the
      stored file)

    save_results: bool, optional
        Whether to templates to disk
        (in CONFIG.data.root_folder/relative_to/templates.npy),
        defaults to False

    Returns
    -------
    templates: npy.ndarray
        templates

    spike_train: np.array(n_data, 3)
        The 3 columns represent spike time, unit id,
        weight (from soft assignment)

    groups: list(n_units)
        After template merge, it shows which ones are merged together

    idx_good_templates: np.array
        index of which templates are kept after clean up

    Examples
    --------

    .. literalinclude:: ../../examples/pipeline/templates.py
    """
    spike_train = file_loader(spike_train)

    CONFIG = read_config()

    startTime = datetime.datetime.now()

    Time = {'t': 0, 'c': 0, 'm': 0, 's': 0, 'e': 0}

    logger = logging.getLogger(__name__)

    _b = datetime.datetime.now()

    logger.info("Getting Templates...")

    path_to_recordings = os.path.join(CONFIG.data.root_folder,
                                      output_directory, 'preprocess',
                                      recordings_filename)

    # relevant parameters
    merge_threshold = CONFIG.templates.merge_threshold
    spike_size = CONFIG.spike_size
    template_max_shift = CONFIG.templates.max_shift
    neighbors = CONFIG.neigh_channels
    geometry = CONFIG.geom

    # make templates
    templates, weights = get_templates(spike_train, path_to_recordings,
                                       CONFIG.resources.max_memory,
                                       2 * (spike_size + template_max_shift))

    # clean up bad templates
    snr_threshold = 2
    spread_threshold = 100
    templates, weights, spike_train, idx_good_templates = clean_up_templates(
        templates, weights, spike_train, tmp_loc, geometry, neighbors,
        snr_threshold, spread_threshold)

    # align templates
    templates = align_templates(templates, template_max_shift)

    # merge templates
    templates, spike_train, groups = merge_templates(templates, weights,
                                                     spike_train, neighbors,
                                                     template_max_shift,
                                                     merge_threshold)

    # remove the edge since it is bad
    templates = templates[:, template_max_shift:(template_max_shift +
                                                 (4 * spike_size + 1))]

    Time['e'] += (datetime.datetime.now() - _b).total_seconds()

    # report timing
    currentTime = datetime.datetime.now()
    logger.info("Templates done in {0} seconds.".format(
        (currentTime - startTime).seconds))

    return templates, spike_train, groups, idx_good_templates
Esempio n. 5
0
def run(standarized_path,
        standarized_params,
        channel_index,
        whiten_filter,
        output_directory='tmp/',
        if_file_exists='skip',
        save_results=False,
        gmm_params=None):
    """Execute detect step

    Parameters
    ----------
    standarized_path: str or pathlib.Path
        Path to standarized data binary file

    standarized_params: dict, str or pathlib.Path
        Dictionary with standarized data parameters or path to a yaml file

    channel_index: numpy.ndarray, str or pathlib.Path
        Channel index or path to a npy file

    whiten_filter: numpy.ndarray, str or pathlib.Path
        Whiten matrix or path to a npy file

    output_directory: str, optional
      Location to store partial results, relative to CONFIG.data.root_folder,
      defaults to tmp/

    if_file_exists: str, optional
      One of 'overwrite', 'abort', 'skip'. Control de behavior for every
      generated file. If 'overwrite' it replaces the files if any exist,
      if 'abort' it raises a ValueError exception if any file exists,
      if 'skip' if skips the operation if any file exists

    save_results: bool, optional
        Whether to save results to disk, defaults to False

    Returns
    -------
    clear_scores: numpy.ndarray (n_spikes, n_features, n_channels)
        3D array with the scores for the clear spikes, first simension is
        the number of spikes, second is the nymber of features and third the
        number of channels

    spike_index_clear: numpy.ndarray (n_clear_spikes, 2)
        2D array with indexes for clear spikes, first column contains the
        spike location in the recording and the second the main channel
        (channel whose amplitude is maximum)

    spike_index_call: numpy.ndarray (n_collided_spikes, 2)
        2D array with indexes for all spikes, first column contains the
        spike location in the recording and the second the main channel
        (channel whose amplitude is maximum)
    gmm_params is a dictionary with the following keys (use None to default
    values):
        gmtype: str
         One of 'idist', 'iinf' or 'ipeak'. Controls which separability metric to 
         use. See [1] for further details.
        max_samples: int, optional
         Define the number of samples that will be used to compute the Gaussian
         mixture model (GMM)
        replicates: int, optional
         The number of replicates of the GMM
        max_iter: int, optional
         Define the maximum number of iterations in the GMMs
        n_components: int, optional
         Define the number of Gaussians in the model
        use_channel_features: bool, optional
         If True, computes the GMMs using only the main samples for each channel, 
         as implemented in the original PCA. If False, computes the GMMs using all
         the samples.       
         
    Notes
    -----
    Running the preprocessor will generate the followiing files in
    CONFIG.data.root_folder/output_directory/ (if save_results is
    True):

    * ``spike_index_clear.npy`` - Same as spike_index_clear returned
    * ``spike_index_all.npy`` - Same as spike_index_collision returned
    * ``rotation.npy`` - Rotation matrix for dimensionality reduction
    * ``scores_clear.npy`` - Scores for clear spikes

    Threshold detector runs on CPU, neural network detector runs CPU and GPU,
    depending on how tensorflow is configured.

    Examples
    --------

    .. literalinclude:: ../../examples/pipeline/detect.py
    """
    if gmm_params == None:
        gmm_params = {'gmtype': 'idist'}

    #logger = logging.getLogger('yass.__main__')

    CONFIG = read_config()

    # load files in case they are strings or Path objects
    standarized_params = file_loader(standarized_params)
    channel_index = file_loader(channel_index)
    whiten_filter = file_loader(whiten_filter)

    # run detection
    if CONFIG.detect.method == 'threshold':
        return run_threshold(standarized_path, standarized_params,
                             channel_index, whiten_filter, output_directory,
                             if_file_exists, save_results, gmm_params)
    elif CONFIG.detect.method == 'nn':
        return run_neural_network(standarized_path, standarized_params,
                                  channel_index, whiten_filter,
                                  output_directory, if_file_exists,
                                  save_results)