Esempio n. 1
0
def get_data_generator(params, data_config):

    # loading spiking data
    templates = np.load(data_config['template_file']).transpose(2, 1, 0)
    geom = np.loadtxt(data_config['geom_file'])
    nbr_dist, n_nbrs = data_config['nbr_dist'], params['n_channels']

    # exclude channels (e.g. for testing)
    if 'channels_exclude' in params:
        chans_exclude = np.load(data_config['channels_exclude'])
        print("channels to exclude:", chans_exclude)
    else:
        chans_exclude = None

    # find channels with neighbours
    chans_with_nbrs, chan_to_nbrs = get_chan_nbrs(geom,
                                                  nbr_dist,
                                                  n_nbrs,
                                                  chans_exclude=chans_exclude)

    # maps templates to channels
    idx_templates, selected_chans, chan_to_template = select_template_channels(
        templates, chan_to_nbrs)
    # chan_to_template: chan_id => templates on that channel

    # upsample/resample the templates at higher frequency (shift and downsample later)
    templates_upsampled = \
        create_upsampled_templates(
            templates, idx_templates, selected_chans, upsample=data_config['temp_upsample'])
    # shape (n_templates, n_channels, n_timesteps, n_shifts). multiple shifts in the last dimension

    print("number of channels used:", len(np.unique(selected_chans[:, 0])))

    # load a recording for noise simulation
    # samplerate = 20000
    # chunk_len = 60 * samplerate
    # noise_n_chan = 49
    # noise_recording = load_bin(params['noise_recordinng_file'],
    #                 start_time=0, N_CHAN=noise_n_chan, chunk_len=chunk_len, d_type='float32')

    noise_recording = np.load(data_config['noise_recordinng_file'])

    # geometry of noise recording used to compute noise covariance;
    noise_geom = np.loadtxt(data_config['noise_geom_file'])
    _, noise_chan_to_nbrs = get_chan_nbrs(noise_geom, nbr_dist, n_nbrs)
    noise_channels = np.stack(list(noise_chan_to_nbrs.values()))
    print("noise_recording", noise_recording.shape)

    # in CRP, the number of clusters vary with different N
    # in MFM, the number of clusters does not depend on N, which is more consistent with spike data
    # thus, MFM is currently a better generative model than CRP
    if params['cluster_generator'] == "MFM":
        cluster_generator = MFM_generator(
            Nmin=params['Nmin'],
            Nmax=params['Nmax'],
            maxK=params['maxK'],
            poisson_lambda=params['poisson_lambda'],
            dirichlet_alpha=params['dirichlet_alpha'])
    elif params['cluster_generator'] == "CRP":
        cluster_generator = CRP_generator(Nmin=params['Nmin'],
                                          Nmax=params['Nmax'],
                                          maxK=params['maxK'],
                                          alpha=params['crp_alpha'])
    else:
        raise Exception("unknown cluster generator")

    print("Using {} cluster generator.".format(params['cluster_generator']))

    # spike generator
    data_generator = SpikeGeneratorFromTemplatesCorrNoise(
        templates_upsampled,
        cluster_generator=cluster_generator,
        n_timesteps=params['n_timesteps'],  # width of waveform
        noise_recording=
        noise_recording,  # raw data for generating noise covariance
        noise_channels=noise_channels,  # same as above
        noise_thres=3,
        permute_nbrs=True,  # whether can rotate/flip neighbor channels 
        keep_nbr_order=True)
    return data_generator
Esempio n. 2
0
    if args.global_start_idx != 0 or args.global_end_idx != -1:
        N_default = 'all'
        infer_params['data_name'] += "_global-{}-{}".format(
            args.global_start_idx, args.global_end_idx)

    infer_params['N_default'] = N_default

    # spike time
    spike_time_channel_arr = np.load(infer_params["spike_time_raw_no_triage"])
    spike_time_channel_arr = spike_time_channel_arr[args.global_start_idx:args.
                                                    global_end_idx]

    # geom and neighbors
    geom = np.loadtxt(infer_params['geom_file'])
    nbr_dist, n_nbrs = infer_params['nbr_dist'], infer_params['n_nbr']
    chans_with_nbrs, chan_to_nbrs = get_chan_nbrs(
        geom, nbr_dist, n_nbrs, keep_less_nbrs=args.do_corner_padding)
    print("{} channels used:".format(len(chans_with_nbrs)))
    print(chans_with_nbrs)
    infer_params['chans_with_nbrs'] = [int(x) for x in chans_with_nbrs]

    output_dir = infer_params['data_name']
    if not os.path.isdir(output_dir):
        os.mkdir(output_dir)
    data_dir = os.path.join(output_dir, "data_input")
    if not os.path.isdir(data_dir):
        os.mkdir(data_dir)

    with open(os.path.join(output_dir, "data_params.json"), "w") as f:
        json.dump(infer_params, f, indent=2)

    seeds = np.arange(args.n_seeds)
Esempio n. 3
0
    infer_params['data_name'] += '_N-{}'.format(args.N)

    N = args.N

    templates = np.load(infer_params['template_file']).transpose(2, 1, 0)
    geom = np.loadtxt(infer_params['geom_file'])
    nbr_dist, n_nbrs = infer_params['nbr_dist'], infer_params['n_nbr']

    if 'channels_exclude' in infer_params:
        chans_exclude = np.load(infer_params['channels_exclude'])
        print("channels to exclude:", chans_exclude)
    else:
        chans_exclude = None

    chans_with_nbrs, chan_to_nbrs = get_chan_nbrs(geom,
                                                  nbr_dist,
                                                  n_nbrs,
                                                  chans_exclude=chans_exclude)

    idx_templates, selected_chans, chan_to_template = select_template_channels(
        templates, chan_to_nbrs)

    templates_upsampled = \
        create_upsampled_templates(
            templates, idx_templates, selected_chans, upsample=infer_params['temp_upsample'])
    # selected_chans: [n_temp, 7]
    print("number of channels used:", len(np.unique(selected_chans[:, 0])))

    samplerate = 20000
    chunk_len = 60 * samplerate
    noise_recording = load_bin(infer_params['noise_recordinng_file'],
                               start_time=0,
    templates_name = None
    infer_params['nbr_dist'] = 70
    infer_params['n_nbr'] = 7

    print("parameters:\n", json.dumps(infer_params, indent=2))

    geom = np.array([
        [-585.0, 270.0],
        [-645.0, 270.0],
        [-525.0, 270.0],
        [-615.0, 210.0],
        [-555.0, 210.0],
        [-615.0, 330.0],
        [-555.0, 330.0]]
    )
    chans_with_nbrs, chan_to_nbrs = get_chan_nbrs(geom, infer_params['nbr_dist'], infer_params['n_nbr'], keep_less_nbrs=False)
    print("{} channels used:".format(len(chans_with_nbrs)))
    print(chans_with_nbrs)

    topn = args.topn

    data_dir = os.path.join(output_dir, "data_ncp")
    # fig_dir_by_row = os.path.join(output_dir, "figures_by_row")
    # if not os.path.isdir(fig_dir_by_row): os.mkdir(fig_dir_by_row)
    fig_dir_overlay = os.path.join(output_dir, "figs_overlay_min-cls-{}_temp-{}".format(min_cls_size, templates_name))
    if not os.path.isdir(fig_dir_overlay): os.mkdir(fig_dir_overlay)
    fig_dir_vert_overlay = os.path.join(output_dir, "figs_overlay_vertical_min-cls-{}_temp-{}".format(min_cls_size, templates_name))
    if not os.path.isdir(fig_dir_vert_overlay): os.mkdir(fig_dir_vert_overlay)

    if args.plot_mfm:
        mfm_dir = os.path.join(infer_params['data_name'], "cluster_mfm", "data_mfm")
Esempio n. 5
0
        infer_params['template_file'] = os.path.expanduser(
            "data/data_49ch/non-triaged_190430/kilosort2/templates_reloaded.npy"
        )
        templates = np.load(infer_params['template_file']).transpose(2, 1, 0)
        # template: (61, 49, 143) [n_times, n_channels, n_templates]
        # transpose to (151, 49, 61) [n_templates, n_channels, n_times]
        infer_params['template_offset'] = 4
    else:
        raise ValueError("unknown template name")

    print("parameters:\n", json.dumps(infer_params, indent=2))

    geom = np.loadtxt(infer_params['geom_file'])
    chans_with_nbrs, chan_to_nbrs = get_chan_nbrs(
        geom,
        infer_params['nbr_dist'],
        infer_params['n_nbr'],
        keep_less_nbrs=do_corner_padding)
    print("{} channels used:".format(len(chans_with_nbrs)))
    print(chans_with_nbrs)

    idx_templates, selected_chans, chan_to_template = select_template_channels(
        templates, chan_to_nbrs)

    templates_use = templates.transpose(0, 2, 1)
    # (151, 61, 49) [n_templates, n_times, n_channels]
    templates_use = template_window(templates_use, infer_params['n_timesteps'],
                                    infer_params['template_offset'])

    seeds = np.arange(args.n_seeds)
    topn = args.topn