def get_data_generator(params, data_config): # loading spiking data templates = np.load(data_config['template_file']).transpose(2, 1, 0) geom = np.loadtxt(data_config['geom_file']) nbr_dist, n_nbrs = data_config['nbr_dist'], params['n_channels'] # exclude channels (e.g. for testing) if 'channels_exclude' in params: chans_exclude = np.load(data_config['channels_exclude']) print("channels to exclude:", chans_exclude) else: chans_exclude = None # find channels with neighbours chans_with_nbrs, chan_to_nbrs = get_chan_nbrs(geom, nbr_dist, n_nbrs, chans_exclude=chans_exclude) # maps templates to channels idx_templates, selected_chans, chan_to_template = select_template_channels( templates, chan_to_nbrs) # chan_to_template: chan_id => templates on that channel # upsample/resample the templates at higher frequency (shift and downsample later) templates_upsampled = \ create_upsampled_templates( templates, idx_templates, selected_chans, upsample=data_config['temp_upsample']) # shape (n_templates, n_channels, n_timesteps, n_shifts). multiple shifts in the last dimension print("number of channels used:", len(np.unique(selected_chans[:, 0]))) # load a recording for noise simulation # samplerate = 20000 # chunk_len = 60 * samplerate # noise_n_chan = 49 # noise_recording = load_bin(params['noise_recordinng_file'], # start_time=0, N_CHAN=noise_n_chan, chunk_len=chunk_len, d_type='float32') noise_recording = np.load(data_config['noise_recordinng_file']) # geometry of noise recording used to compute noise covariance; noise_geom = np.loadtxt(data_config['noise_geom_file']) _, noise_chan_to_nbrs = get_chan_nbrs(noise_geom, nbr_dist, n_nbrs) noise_channels = np.stack(list(noise_chan_to_nbrs.values())) print("noise_recording", noise_recording.shape) # in CRP, the number of clusters vary with different N # in MFM, the number of clusters does not depend on N, which is more consistent with spike data # thus, MFM is currently a better generative model than CRP if params['cluster_generator'] == "MFM": cluster_generator = MFM_generator( Nmin=params['Nmin'], Nmax=params['Nmax'], maxK=params['maxK'], poisson_lambda=params['poisson_lambda'], dirichlet_alpha=params['dirichlet_alpha']) elif params['cluster_generator'] == "CRP": cluster_generator = CRP_generator(Nmin=params['Nmin'], Nmax=params['Nmax'], maxK=params['maxK'], alpha=params['crp_alpha']) else: raise Exception("unknown cluster generator") print("Using {} cluster generator.".format(params['cluster_generator'])) # spike generator data_generator = SpikeGeneratorFromTemplatesCorrNoise( templates_upsampled, cluster_generator=cluster_generator, n_timesteps=params['n_timesteps'], # width of waveform noise_recording= noise_recording, # raw data for generating noise covariance noise_channels=noise_channels, # same as above noise_thres=3, permute_nbrs=True, # whether can rotate/flip neighbor channels keep_nbr_order=True) return data_generator
if args.global_start_idx != 0 or args.global_end_idx != -1: N_default = 'all' infer_params['data_name'] += "_global-{}-{}".format( args.global_start_idx, args.global_end_idx) infer_params['N_default'] = N_default # spike time spike_time_channel_arr = np.load(infer_params["spike_time_raw_no_triage"]) spike_time_channel_arr = spike_time_channel_arr[args.global_start_idx:args. global_end_idx] # geom and neighbors geom = np.loadtxt(infer_params['geom_file']) nbr_dist, n_nbrs = infer_params['nbr_dist'], infer_params['n_nbr'] chans_with_nbrs, chan_to_nbrs = get_chan_nbrs( geom, nbr_dist, n_nbrs, keep_less_nbrs=args.do_corner_padding) print("{} channels used:".format(len(chans_with_nbrs))) print(chans_with_nbrs) infer_params['chans_with_nbrs'] = [int(x) for x in chans_with_nbrs] output_dir = infer_params['data_name'] if not os.path.isdir(output_dir): os.mkdir(output_dir) data_dir = os.path.join(output_dir, "data_input") if not os.path.isdir(data_dir): os.mkdir(data_dir) with open(os.path.join(output_dir, "data_params.json"), "w") as f: json.dump(infer_params, f, indent=2) seeds = np.arange(args.n_seeds)
infer_params['data_name'] += '_N-{}'.format(args.N) N = args.N templates = np.load(infer_params['template_file']).transpose(2, 1, 0) geom = np.loadtxt(infer_params['geom_file']) nbr_dist, n_nbrs = infer_params['nbr_dist'], infer_params['n_nbr'] if 'channels_exclude' in infer_params: chans_exclude = np.load(infer_params['channels_exclude']) print("channels to exclude:", chans_exclude) else: chans_exclude = None chans_with_nbrs, chan_to_nbrs = get_chan_nbrs(geom, nbr_dist, n_nbrs, chans_exclude=chans_exclude) idx_templates, selected_chans, chan_to_template = select_template_channels( templates, chan_to_nbrs) templates_upsampled = \ create_upsampled_templates( templates, idx_templates, selected_chans, upsample=infer_params['temp_upsample']) # selected_chans: [n_temp, 7] print("number of channels used:", len(np.unique(selected_chans[:, 0]))) samplerate = 20000 chunk_len = 60 * samplerate noise_recording = load_bin(infer_params['noise_recordinng_file'], start_time=0,
templates_name = None infer_params['nbr_dist'] = 70 infer_params['n_nbr'] = 7 print("parameters:\n", json.dumps(infer_params, indent=2)) geom = np.array([ [-585.0, 270.0], [-645.0, 270.0], [-525.0, 270.0], [-615.0, 210.0], [-555.0, 210.0], [-615.0, 330.0], [-555.0, 330.0]] ) chans_with_nbrs, chan_to_nbrs = get_chan_nbrs(geom, infer_params['nbr_dist'], infer_params['n_nbr'], keep_less_nbrs=False) print("{} channels used:".format(len(chans_with_nbrs))) print(chans_with_nbrs) topn = args.topn data_dir = os.path.join(output_dir, "data_ncp") # fig_dir_by_row = os.path.join(output_dir, "figures_by_row") # if not os.path.isdir(fig_dir_by_row): os.mkdir(fig_dir_by_row) fig_dir_overlay = os.path.join(output_dir, "figs_overlay_min-cls-{}_temp-{}".format(min_cls_size, templates_name)) if not os.path.isdir(fig_dir_overlay): os.mkdir(fig_dir_overlay) fig_dir_vert_overlay = os.path.join(output_dir, "figs_overlay_vertical_min-cls-{}_temp-{}".format(min_cls_size, templates_name)) if not os.path.isdir(fig_dir_vert_overlay): os.mkdir(fig_dir_vert_overlay) if args.plot_mfm: mfm_dir = os.path.join(infer_params['data_name'], "cluster_mfm", "data_mfm")
infer_params['template_file'] = os.path.expanduser( "data/data_49ch/non-triaged_190430/kilosort2/templates_reloaded.npy" ) templates = np.load(infer_params['template_file']).transpose(2, 1, 0) # template: (61, 49, 143) [n_times, n_channels, n_templates] # transpose to (151, 49, 61) [n_templates, n_channels, n_times] infer_params['template_offset'] = 4 else: raise ValueError("unknown template name") print("parameters:\n", json.dumps(infer_params, indent=2)) geom = np.loadtxt(infer_params['geom_file']) chans_with_nbrs, chan_to_nbrs = get_chan_nbrs( geom, infer_params['nbr_dist'], infer_params['n_nbr'], keep_less_nbrs=do_corner_padding) print("{} channels used:".format(len(chans_with_nbrs))) print(chans_with_nbrs) idx_templates, selected_chans, chan_to_template = select_template_channels( templates, chan_to_nbrs) templates_use = templates.transpose(0, 2, 1) # (151, 61, 49) [n_templates, n_times, n_channels] templates_use = template_window(templates_use, infer_params['n_timesteps'], infer_params['template_offset']) seeds = np.arange(args.n_seeds) topn = args.topn