def compare_ground_truth(*, firings, firings_true, json_out, opts={}): Ft = mdaio.readmda(mlp.realizeFile(firings_true)) F = mdaio.readmda(mlp.realizeFile(firings)) times1 = Ft[1, :] labels1 = Ft[2, :] times2 = F[1, :] labels2 = F[2, :] out = compare_ground_truth_helper(times1, labels1, times2, labels2) with open(json_out, 'w') as outfile: json.dump(out, outfile, indent=4)
def read_dataset_params(params_fname): params_fname = mlp.realizeFile(params_fname) if not os.path.exists(params_fname): raise Exception('Dataset parameter file does not exist: ' + params_fname) with open(params_fname) as f: return json.load(f)
def validate_sorting_results(*,dataset_dir,sorting_output_dir,output_dir): if not os.path.exists(output_dir): os.mkdir(output_dir) compare_ground_truth( firings=sorting_output_dir+'/firings.mda', firings_true=dataset_dir+'/firings_true.mda', json_out=output_dir+'/compare_ground_truth.json', ) compute_templates( timeseries=dataset_dir+'/raw.mda', firings=dataset_dir+'/firings_true.mda', templates_out=output_dir+'/templates_true.mda.prv' ) mlp.runPipeline() templates_true=mdaio.readmda(mlp.realizeFile(output_dir+'/templates_true.mda')) amplitudes_true=np.max(np.max(np.abs(templates_true),axis=1),axis=0) accuracies=get_accuracies(output_dir+'/compare_ground_truth.json') return dict( accuracies=accuracies, amplitudes_true=amplitudes_true )
def __init__(self, firings_fname): OutputExtractor.__init__(self) print('Downloading file if needed: ' + firings_fname) self._firings_path = mlp.realizeFile(firings_fname) print('Done.') self._firings = mdaio.readmda(self._firings_path) self._times = self._firings[1, :] self._labels = self._firings[2, :] self._num_units = np.max(self._labels)
def __init__(self, *, dataset_directory, download=True): InputExtractor.__init__(self) self._dataset_directory = dataset_directory timeseries0 = dataset_directory + '/raw.mda' self._dataset_params = read_dataset_params(dataset_directory) self._samplerate = self._dataset_params['samplerate'] if download: print('Downloading file if needed: ' + timeseries0) self._timeseries_path = mlp.realizeFile(timeseries0) print('Done.') else: self._timeseries_path = mlp.locateFile(timeseries0) geom0 = dataset_directory + '/geom.csv' self._geom_fname = mlp.realizeFile(geom0) self._geom = np.genfromtxt(self._geom_fname, delimiter=',') X = mdaio.DiskReadMda(self._timeseries_path) if self._geom.shape[0] != X.N1(): raise Exception( 'Incompatible dimensions between geom.csv and timeseries file {} <> {}' .format(self._geom.shape[0], X.N1())) self._num_channels = X.N1() self._num_timepoints = X.N2()
def initialize(self): print('Downloading timeseries (if needed): {}'.format( self._timeseries)) if self._timeseries is not None: timeseries_path = mlp.realizeFile(self._timeseries) print('Downloading firings (if needed): {}'.format(self._firings)) firings_path = mlp.realizeFile(self._firings) if self._geom is not None: print('Downloading geom (if needed): {}'.format(self._geom)) geom_path = mlp.realizeFile(self._geom) self._G = np.genfromtxt(geom_path, delimiter=',').T self._G = np.flip(self._G, axis=0) else: self._G = None print('Reading arrays into memory...') if self._timeseries is not None: self._X = mdaio.readmda(timeseries_path) else: self._X = None self._F = mdaio.readmda(firings_path) self._times = self._F[1, :] self._labels = self._F[2, :] self._K = int(self._labels.max())
def __init__(self, dataset_directory, download=True): InputExtractor.__init__(self) self._dataset_directory = dataset_directory timeseries0 = dataset_directory + '/raw.mda' if download: print('Downloading file if needed: ' + timeseries0) self._timeseries_path = mlp.realizeFile(timeseries0) print('Done.') else: self._timeseries_path = mlp.locateFile(timeseries0) X = mdaio.DiskReadMda(self._timeseries_path) self._num_channels = X.N1() self._num_timepoints = X.N2() self._dataset_params = read_dataset_params(dataset_directory) self._samplerate = self._dataset_params['samplerate']
def gen_recording(*, templates, output_extractor, noise_level, samplerate, duration): OX = output_extractor K = len(OX.getUnitIds()) templates_path = mlp.realizeFile(templates) templates_data = {} with h5py.File(templates_path, 'r') as F: templates_data['info'] = json.loads(str(F['info'][()])) templates_data['celltypes'] = np.array(F.get('celltypes')) templates_data['locations'] = np.array(F.get('locations')) templates_data['rotations'] = np.array(F.get('rotations')) templates_data['templates'] = np.array(F.get('templates')) templates0 = templates_data['templates'] template_inds = np.random.choice(range(templates0.shape[0]), K, replace=False) templates0 = templates0[template_inds, :, :] upsample_factor = 13 templates0_upsampled = signal.resample_poly(templates0, up=upsample_factor, down=1, axis=2) waveforms0 = templates0_upsampled.transpose([1, 2, 0]) cut_out = templates_data['info']['params']['cut_out'] frac = cut_out[0] / (cut_out[0] + cut_out[1]) waveforms_tcenter = int(frac * waveforms0.shape[1] / upsample_factor) X = synthesize_timeseries(output_extractor=OX, waveforms=waveforms0, waveforms_tcenter=waveforms_tcenter, samplerate=samplerate, duration=duration, waveform_upsamplefac=upsample_factor, noise_level=noise_level) M = X.shape[0] # geom=np.zeros((M,2)) # geom[:,1]=range(M) tempgen_ = mr.load_templates(templates) mea_ = mu.return_mea(info=tempgen_.info['electrodes']) # make sure the geom matches the dimension of X geom = mea_.positions[:M, 1:3] # mdaio.writemda32(X,recording_out) return X, geom
def read_dataset_params(dataset_dir): ''' Parameters ---------- dataset_dir : str Returns ------- parameter_file_text : str ''' params_fname = mlp.realizeFile(os.path.join(dataset_dir, 'params.json')) if not os.path.exists(params_fname): raise FileNotFoundError( f'Dataset parameter file does not exist: {params_fname}') with open(params_fname) as f: return json.load(f)
def run(self): #tmpdir=os.environ.get('ML_PROCESSOR_TEMPDIR') #if not tmpdir: # raise Exception('Environment variable not set: ML_PROCESSOR_TEMPDIR') if not self.templates: print('Downloading templates (if needed)...') default_templates_url = 'kbucket://b5ecdf1474c5/MEArec/templates/templates_30_Neuronexus-32.h5' templates_path = mlp.realizeFile(default_templates_url) print('Done downloading templates (if needed)') else: templates_path = self.templates print('Using templates file: ' + templates_path) if self.seed <= 0: self.seed = np.random.randint(1, 10000), params_dict = dict(min_dist=self.min_dist, min_amp=self.min_amp, noise_level=self.noise_level, noise_mode='uncorrelated', modulation=self.modulation, chunk_duration=self.chunk_duration, filter=self.filter, seed=self.seed, cutoff=self.cutoff, overlap_threshold=self.overlap_threshold, n_jitters=self.n_jitters, upsample=self.upsample, pad_len=self.pad_len, mrand=self.mrand, sdrand=self.sdrand, fs=None, depth_lim=None) params_dict['excitatory'] = ['STPC', 'TTPC1', 'TTPC2', 'UTPC'] params_dict['inhibitory'] = [ 'BP', 'BTC', 'ChC', 'DBC', 'LBC', 'MC', 'NBC', 'NGC', 'SBC' ] templates_data = {} with h5py.File(templates_path, 'r') as F: templates_data['info'] = json.loads(str(F['info'][()])) templates_data['celltypes'] = np.array(F.get('celltypes')) templates_data['locations'] = np.array(F.get('locations')) templates_data['rotations'] = np.array(F.get('rotations')) templates_data['templates'] = np.array(F.get('templates')) spiketrains_data = {} spiketrains_data['spiketrains'] = np.load(self.spiketrains) recgen = RecordingGenerator(templates_data, spiketrains_data, params_dict, self.overlap) info = recgen.info F = h5py.File(self.recording_out, 'w') for key in info['General']: F.attrs[key] = info['General'][ key] # this includes spiketrain_folder, fs, template_folder, duration, n_neurons, seed, electrode_name F.create_dataset('info', data=json.dumps(info)) F.create_dataset('peaks', data=recgen.peaks) F.create_dataset('positions', data=recgen.positions) F.create_dataset('recordings', data=recgen.recordings) F.create_dataset('sources', data=recgen.sources) for ii in range(len(recgen.spiketrains)): st = recgen.spiketrains[ii] F.create_dataset('spiketrains/{}/times'.format(ii), data=st.times.rescale('s').magnitude) F.create_dataset('spiketrains/{}/t_stop'.format(ii), data=st.t_stop) annotations = {} for key in st.annotations: if (type(st.annotations[key]) == str) or (type( st.annotations[key]) == int) or (type( st.annotations[key]) == float): annotations[key] = st.annotations[key] F.create_dataset('spiketrains/{}/annotations'.format(ii), data=json.dumps(annotations)) F.create_dataset('templates', data=recgen.templates) F.create_dataset('times', data=recgen.times) F.close() return True