def test_signal_save_load(signal, signal_tmpdir): ''' Test that signals save and load properly ''' # if not os.path.exists(signal_tmpdir): # os.mkdir(signal_tmpdir) signal.save(str(signal_tmpdir), fmt='%1.3e') signals_found = RasterizedSignal.list_signals(str(signal_tmpdir)) assert len(signals_found) == 1 save_directory = os.path.join(str(signal_tmpdir), signals_found[0]) signal_loaded = RasterizedSignal.load(save_directory) assert np.all(signal.as_continuous() == signal_loaded.as_continuous())
def signal(signal_name='dummy_signal', recording_name='dummy_recording', fs=50, nchans=3, ntimes=200): ''' Generates a dummy signal with a predictable structure (every element increases by 1) that is useful for testing. ''' # Generate a numpy array that's incrementially increasing across channels, # then across timepoints, by 1. c = np.arange(nchans, dtype=np.float) t = np.arange(ntimes, dtype=np.float) data = c[..., np.newaxis] + t*nchans epochs = pd.DataFrame({ 'start': [3, 15, 150], 'end': [200, 60, 190], 'name': ['trial', 'pupil_closed', 'pupil_closed'] }) epochs['start'] /= fs epochs['end'] /= fs kwargs = { 'data': data, 'name': signal_name, 'recording': recording_name, 'chans': ['chan' + str(n) for n in range(nchans)], 'epochs': epochs, 'fs': fs, 'meta': { 'for_testing': True, 'date': "2018-01-10", 'animal': "Donkey Hotey", 'windmills': 'tilting' }, } return RasterizedSignal(**kwargs)
def test_epoch_save_load(signal, signal_tmpdir): ''' Test that epochs save and load properly ''' before = signal.epochs signal.save(str(signal_tmpdir), fmt='%1.3e') signals_found = RasterizedSignal.list_signals(str(signal_tmpdir)) save_directory = os.path.join(str(signal_tmpdir), signals_found[0]) signal_loaded = RasterizedSignal.load(save_directory) after = signal_loaded.epochs print("Dataframes equal?\n" "Before:\n{0}\n" "After:\n{1}\n".format(before, after)) assert before.equals(after)
def test_extract_channels(signal): two_sig = signal.extract_channels(['chan0', 'chan1']) assert two_sig.shape == (2, 200) one_sig = signal.extract_channels(['chan2']) assert one_sig.shape == (1, 200) recombined = RasterizedSignal.concatenate_channels([two_sig, one_sig]) before = signal.as_continuous() after = recombined.as_continuous() assert np.array_equal(before, after)
def load_sadagopan(cellid='MS_u0004_f0025', recname='MS_u0004', stimfile=None, respfile=None, epochsfile=None, fs=50, channel_num=0, **context): """ example file from Sadagopan lab """ if stimfile is None: stimfile = signals_dir / (cellid + '_stim.csv.gz') if respfile is None: respfile = signals_dir / (cellid + '_resp.csv.gz') if epochsfile is None: epochsfile = signals_dir / (cellid + '_epochs.csv') X = np.loadtxt(gzip.open(stimfile, mode='rb'), delimiter=",", skiprows=0) Y = np.loadtxt(gzip.open(respfile, mode='rb'), delimiter=",", skiprows=0) # get list of stimuli with start and stop times (in sec) epochs = pd.read_csv(epochsfile) # create NEMS-format recording objects from the raw data resp = RasterizedSignal(fs, Y, 'resp', recname, chans=[cellid], epochs=epochs.loc[:]) stim = RasterizedSignal(fs, X, 'stim', recname, epochs=epochs.loc[:]) # create the recording object from the signals signals = {'resp': resp, 'stim': stim} rec = recording.Recording(signals) return {'rec': rec}
lv1 /= lv1.std() g2 = np.random.normal(0.5, 0.1, (nCells, 1)) gain2 = np.matmul(lv1.T, g2.T).T resp = u * np.exp(gain1 + gain2) psth = np.tile(resp.mean(axis=-1), (T, 1)).T # get rid of any "cells" that never fired idx = (resp==0).sum(axis=-1) == T resp = resp[~idx, :] psth = psth[~idx, :] nCells = resp.shape[0] # pack into nems recording resp_sig = RasterizedSignal(fs=4, data=resp, name='resp', recording='simulation') psth_sig = RasterizedSignal(fs=4, data=psth, name='psth', recording='simulation') pupil_sig = RasterizedSignal(fs=4, data=pupil, name='pupil', recording='simulation') bm = pupil > pupil.mean() big_mask = RasterizedSignal(fs=4, data=bm, name='big_mask', recording='simulation') sm = pupil < pupil.mean() small_mask = RasterizedSignal(fs=4, data=sm, name='small_mask', recording='simulation') rec = Recording({'resp': resp_sig, 'psth': psth_sig, 'pupil': pupil_sig}) # fit the GLM for different hyperparameters x0 = np.zeros(3 * nCells) nLV = 1 alpha1 = np.arange(0, 0.5, 0.05) results = dict.fromkeys(alpha1) for i, a2 in tqdm(enumerate(alpha1)):
# figure out data and results paths: signals_dir = Path(nems.NEMS_PATH) / 'recordings' modelspecs_dir = Path(nems.NEMS_PATH) / 'modelspecs' # download demo data recording.get_demo_recordings(signals_dir) datafile = signals_dir / 'TAR010c-18-1.pkl' # LOAD AND FORMAT RECORDING DATA with open(datafile, 'rb') as f: #cellid, recname, fs, X, Y, X_val, Y_val = pickle.load(f) cellid, recname, fs, X, Y, epochs = pickle.load(f) # create NEMS-format recording objects from the raw data resp = RasterizedSignal(fs, Y, 'resp', recname, chans=[cellid]) stim = RasterizedSignal(fs, X, 'stim', recname) # create the recording object from the signals signals = {'resp': resp, 'stim': stim} est = recording.Recording(signals) val_signals = { 'resp': RasterizedSignal(fs, Y_val, 'resp', recname, chans=[cellid]), 'stim': RasterizedSignal(fs, X_val, 'stim', recname) } val = recording.Recording(val_signals) # INITIALIZE MODELSPEC log.info('Initializing modelspec...')
def load_recording_from_arrays(arrays, rec_name, fs, sig_names=None, signal_kwargs={}): ''' Generates a recording object, and the signal objects it contains, from a list of array-like structures of the form channels x time (see signal.py for more details about how arrays are represented by signals). If any of the arrays are more than 2-dimensional, an error will be thrown. Also pay close attention to any RuntimeWarnings from the signal class regarding improperly-shaped arrays, which may indicate that an array was passed as time x channels instead of the reverse. Arguments: ---------- arrays : list of array-like The data to be converted to a recording of signal objects. Each item should be 2-dimensional and convertible to a numpy ndarray via np.array(x). No constraints are enforced on the dtype of the arrays, but in general float values are expected by most native NEMS functions. rec_name : str The name to be given to the new recording object. This will also be assigned as the recording attribute of each new signal. fs : int or list of ints The frequency of sampling of the data arrays - used to interconvert between real time and time bins (see signal.py). If int, the same fs will be assigned to each signal. If list, the length must match the length of arrays. sig_names : list of strings (optional) Name to attach to the signal created from each array. The length of this list should match that of arrays. If not specified, the signals will be given the generic names: ['signal1', 'signal2', ...]. signal_kwargs : list of dicts Keyword arguments to be passed through to each signal object. The length of this list should match the length of arrays, and may be padded with empty dictionaries to ensure this constraint. For example: [{'chans': ['1 kHz', '3 kHz']}, {'chans': ['one', 'two']}, {}] Would assign channel names '1 kHz' and '3 kHz' to the signal for the first array, 'one' and 'two' for the second array, and no channel names (or any other arguments) for the third array. Valid keyword arguments are: chans, epochs, meta, and safety_checks Returns: -------- rec : recording object New recording containing a signal for each array. ''' # Assemble and validate lists for signal construction arrays = [np.array(a) for a in arrays] for i, a in enumerate(arrays): if len(a.shape) != 2: raise ValueError("Arrays should have shape chans x time." "Array {} had shape: {}".format(i, a.shape)) n = len(arrays) recs = [rec_name] * len(arrays) if sig_names: if not len(sig_names) == n: raise ValueError("Length of sig_names must match" "the length of arrays.\n" "Got sig_names: {} and arrays: {}".format( len(sig_names), n)) else: sig_names = ['sig%s' % i for i in range(n)] if isinstance(fs, int): fs = [fs] * n else: if not len(fs) == n: raise ValueError("Length of fs must match" "the length of arrays.\n" "Got fs: {} and arrays: {}".format(len(fs), n)) if not signal_kwargs: signal_kwargs = [{}] * n else: if not len(signal_kwargs) == n: raise ValueError("Length of signal_kwargs must match" "the length of arrays.\n" "Got signal_kwargs: {} and arrays: {}".format( len(signal_kwargs), n)) # Construct the signals to_sigs = zip(fs, arrays, sig_names, recs, signal_kwargs) signals = [ RasterizedSignal(fs, a, name, rec, **kw) for fs, a, name, rec, kw in to_sigs ] signals = {s.name: s for s in signals} # Combine into recording and return return Recording(signals)
fs = 50 cellid = 'MS_u0004_f0025' recname = 'MS_u0004' stimfile = signals_dir / 'MS_u0004_f0025_stim.csv.gz' respfile = signals_dir / 'MS_u0004_f0025_resp.csv.gz' epochsfile = signals_dir / 'MS_u0004_f0025_epochs.csv' X = np.loadtxt(gzip.open(stimfile, mode='rb'), delimiter=",", skiprows=0) Y = np.loadtxt(gzip.open(respfile, mode='rb'), delimiter=",", skiprows=0) # get list of stimuli with start and stop times (in sec) epochs = pd.read_csv(epochsfile) # create NEMS-format recording objects from the raw data resp = RasterizedSignal(fs, Y, 'resp', recname, chans=[cellid], epochs=epochs.loc[:]) stim = RasterizedSignal(fs, X, 'stim', recname, epochs=epochs.loc[:]) # create the recording object from the signals signals = {'resp': resp, 'stim': stim} rec = recording.Recording(signals) #generate est/val set_sets #nfolds=10 #est = rec.jackknife_masks_by_time(njacks=nfolds, invert=False) #VATSUN - doesnt work #val = rec.jackknife_masks_by_time(njacks=nfolds, invert=True) est, val = rec.split_at_time( fraction=0.1) # VATSUN: Fraction=0.1 I think specifies the validation set
signals_dir = nems.NEMS_PATH + '/recordings' modelspecs_dir = nems.NEMS_PATH + '/modelspecs' recording.get_demo_recordings(signals_dir) datafile = signals_dir + "/TAR010c-18-1.pkl" # ---------------------------------------------------------------------------- # LOAD AND FORMAT RECORDING DATA with open(datafile, 'rb') as f: cellid, recname, fs, X, Y, epochs = pickle.load(f) stimchans = [str(x) for x in range(X.shape[0])] # borrowed from recording.load_recording_from_arrays resp = RasterizedSignal(fs, Y, 'resp', recname, epochs=epochs, chans=[cellid]) stim = RasterizedSignal(fs, X, 'stim', recname, epochs=epochs, chans=stimchans) signals = {'resp': resp, 'stim': stim} rec = recording.Recording(signals) epoch_name = "REFERENCE" nfolds = 5 est = rec.jackknife_masks_by_epoch(nfolds, epoch_name, tiled=True) val = rec.jackknife_masks_by_epoch(nfolds, epoch_name, tiled=True, invert=True) #est, val = rec.split_at_time(0.2) #est, val = rec.split_using_epoch_occurrence_counts(epoch_regex="^STIM_") #est = preproc.average_away_epoch_occurrences(est, epoch_regex="^STIM_") #val = preproc.average_away_epoch_occurrences(val, epoch_regex="^STIM_") # ----------------------------------------------------------------------------
def test_concatenate_channels(signal): sig1 = signal sig2 = sig1.jackknife_by_time(20, 2) sig3 = RasterizedSignal.concatenate_channels([sig1, sig2]) assert sig1.as_continuous().shape == (3, 200) assert sig3.as_continuous().shape == (6, 200)
recording.get_demo_recordings(signals_dir) pkl_file = signals_dir + "/TAR010c-18-1.pkl" # ---------------------------------------------------------------------------- # LOAD AND FORMAT RECORDING DATA with open(pkl_file, 'rb') as f: # Python 3: open(..., 'rb') cellid, recname, fs, X_est, Y_est, X_val, Y_val = pickle.load(f) epochs = None stimchans = [str(x) for x in range(X_est.shape[0])] # borrowed from recording.load_recording_from_arrays # est recording - for model fitting resp = RasterizedSignal(fs, Y_est, 'resp', recname, chans=[cellid]) stim = RasterizedSignal(fs, X_est, 'stim', recname, chans=stimchans) signals = {'resp': resp, 'stim': stim} est = recording.Recording(signals) # val recording - for testing predictions resp = RasterizedSignal(fs, Y_val, 'resp', recname, chans=[cellid]) stim = RasterizedSignal(fs, X_val, 'stim', recname, chans=stimchans) signals = {'resp': resp, 'stim': stim} val = recording.Recording(signals) # ---------------------------------------------------------------------------- # INITIALIZE MODELSPEC # # GOAL: Define the model that you wish to test
def from_nwb_pupil(nwb_file, nwb_format,fs=20,with_pupil=False,running_speed=False,as_dict=True): #def from_nwb(cls, nwb_file, nwb_format,with_pupil=False,fs=20): """ The NWB (Neurodata Without Borders) format is a unified data format developed by the Allen Brain Institute. Data is stored as an HDF5 file, with the format varying depending how the data was saved. References: - https://nwb.org - https://pynwb.readthedocs.io/en/latest/index.html :param nwb_file: path to the nwb file :param nwb_format: specifier for how the data is saved in the container :param int fs: will match for all signals :param bool with_pupil, running speed: whether to return pupil, speed signals in recording :param bool as_dict: return a dictionary of recording objects, each corresponding to a single unit/neuron else a single recording object w/ each unit corresponding to a channel in pointprocess signal :return: a recording object """ #log.info(f'Loading NWB file with format "{nwb_format}" from "{nwb_file}".') # add in supported nwb formats here assert nwb_format in ['neuropixel'], f'"{nwb_format}" not a supported NWB file format.' nwb_filepath = Path(nwb_file) if not nwb_filepath.exists(): raise FileNotFoundError(f'"{nwb_file}" could not be found.') if nwb_format == 'neuropixel': """ In neuropixel ecephys nwb files, data is stored in several attributes of the container: - units: individual cell metadata, a dataframe - epochs: timing of the stimuli, series of arrays - lab_meta_data: metadata about the experiment, such as specimen details Spike times are saved as arrays in the 'spike_times' column of the units dataframe as xarrays. The frequency defaults to match pupil - if no pupil data retrieved, set to chosen value (previous default 1250). Refs: - https://allensdk.readthedocs.io/en/latest/visual_coding_neuropixels.html - https://allensdk.readthedocs.io/en/latest/_static/examples/nb/ecephys_quickstart.html - https://allensdk.readthedocs.io/en/latest/_static/examples/nb/ecephys_data_access.html """ try: from pynwb import NWBHDF5IO from allensdk.brain_observatory.ecephys import nwb # needed for ecephys format compat except ImportError: m = 'The "allensdk" library is required to work with neuropixel nwb formats, available on PyPI.' #log.error(m) raise ImportError(m) session_name = nwb_filepath.stem with NWBHDF5IO(str(nwb_filepath), 'r') as nwb_io: nwbfile = nwb_io.read() units = nwbfile.units epochs = nwbfile.epochs spike_times = dict(zip(units.id[:].astype(str), units['spike_times'][:])) # extract the metadata and convert to dict metadata = nwbfile.lab_meta_data['metadata'].to_dict() metadata['uri'] = str(nwb_filepath) # add in uri #add invalid times data to meta as df if exist - includes times and probe id? if nwbfile.invalid_times is not None: invalid_times = nwbfile.invalid_times invalid_times = np.array([invalid_times[col][:] for col in invalid_times.colnames]) metadata['invalid_times'] = pd.DataFrame(invalid_times.transpose(),columns=['start_time', 'stop_time', 'tags']) # build the units metadata units_data = { col.name: col.data for col in units.columns if col.name not in ['spike_times', 'spike_times_index', 'spike_amplitudes', 'spike_amplitudes_index', 'waveform_mean', 'waveform_mean_index'] } # needs to be a dict units_meta = pd.DataFrame(units_data, index=units.id[:]) #add electrode info to units meta electrodes=nwbfile.electrodes e_data = {col.name: col.data for col in electrodes.columns} e_meta = pd.DataFrame(e_data,index=electrodes.id[:]) units_meta=pd.merge(units_meta,e_meta,left_on=units_meta.peak_channel_id,right_index=True, suffixes=('_unit','_channel')).drop(['key_0','group'],axis=1).to_dict('index')# needs to be a dict # build the epoch dataframe epoch_data = { col.name: col.data for col in epochs.columns if col.name not in ['tags', 'timeseries', 'tags_index', 'timeseries_index'] } epoch_df = pd.DataFrame(epoch_data, index=epochs.id[:]).rename({ 'start_time': 'start', 'stop_time': 'end', 'stimulus_name': 'name' }, axis='columns') #rename epochs to correspond to different nat scene/movie frames - epoch_df.loc[epoch_df['frame'].notna(),'name'] = epoch_df.loc[epoch_df['frame'].notna(),'name'] + '_' + \ epoch_df[epoch_df['frame'].notna()].iloc[:]['frame'].astype(int).astype(str) #drop extra columns metadata['epochs']=epoch_df #save extra stim info to meta epoch_df=epoch_df.drop([col for col in epoch_df.columns if col not in ['start','end','name']],axis=1) # #rename natural scene epochs to work w/demo df_copy = epoch_df[epoch_df.name.str.contains('natural_scene')].copy() df_copy.loc[:,'name']='REFERENCE' epoch_df=epoch_df.append(df_copy,ignore_index=True) #expand epoch bounds epochs will overlap to test evoked potential # to_adjust=epoch_df.loc[:,['start','end']].to_numpy() # epoch_df.loc[:,['start','end']] = nems.epoch.adjust_epoch_bounds(to_adjust,-0.1,0.1) # save the spike times as a point process signal frequency set to match other signals pp = PointProcess(fs, spike_times, name='resp', recording=session_name, epochs=epoch_df, chans=[str(c) for c in nwbfile.units.id[:]],meta=units_meta) #dict to pass to recording #signal_dict = {pp.name: pp} # log.info('Successfully loaded nwb file.') from scipy.interpolate import interp1d #save pupil data as rasterized signal if with_pupil: try: pupil = nwbfile.modules['eye_tracking'].data_interfaces['pupil_ellipse_fits'] t = pupil['timestamps'][:] pupil = pupil['width'][:].reshape(1,-1) #only 1 dimension - or get 'height' #interpolate to set sampling rate f = interp1d(t,pupil,bounds_error=False,fill_value=np.nan) new_t = np.arange(0.0,(t.max()+1/fs),1/fs)#pupil data starting at timepoint 0.0 (nan filler) pupil = f(new_t) pupil_signal = RasterizedSignal(fs=fs,data=pupil,recording=session_name,name='pupil', epochs=epoch_df,chans=['pupil']) #for all data list(pupil_data.colnames[0:5]) #if no pupil data for session - still get spike data except KeyError: print(session_name + ' has no pupil data.') if running_speed: running = nwbfile.modules['running'].data_interfaces['running_speed'] t = running.timestamps[:][1]#data has start and end timestamps, here only end used running = running.data[:].reshape(1,-1) f = interp1d(t,running) #new_t = np.arange(np.min(t),np.max(t),1/fs) new_t = np.arange(epoch_df.start.min(),epoch_df.end.max(),1/fs) running = f(new_t) running=RasterizedSignal(fs=fs,data=running,name='running',recording=session_name,epochs=epoch_df) if as_dict: #each unit has seperate recording in dict rec_dict={} for c in pp.chans: unit_signal=pp.extract_channels([c]) rec=Recording({'resp':unit_signal},meta=metadata) if with_pupil: rec.add_signal(pupil_signal) if running_speed: rec.add_signal(running) rec_dict[c]=rec return rec_dict else: rec=Recording({'resp':pp},meta=metadata) if with_pupil: rec.add_signal(pupil_signal) if running_speed: rec.add_signal(running) return rec
rec['resp']=rec['resp'].extract_channels([cellid]) est, val = rec.split_using_epoch_occurrence_counts(epoch_regex="^STIM_") est=preproc.average_away_epoch_occurrences(est, epoch_regex="^STIM_") val=preproc.average_away_epoch_occurrences(val, epoch_regex="^STIM_") elif load_method==1: # method 1: load from a pkl datafile that contains full stim+response data # along with metadata (fs, stimulus epoch list) datafile = signals_dir / 'TAR010c-18-1.pkl' with open(datafile, 'rb') as f: #cellid, recname, fs, X, Y, X_val, Y_val = pickle.load(f) cellid, recname, fs, X, Y, epochs = pickle.load(f) # create NEMS-format recording objects from the raw data resp = RasterizedSignal(fs, Y, 'resp', recname, chans=[cellid], epochs=epochs) stim = RasterizedSignal(fs, X, 'stim', recname, epochs=epochs) # create the recording object from the signals signals = {'resp': resp, 'stim': stim} rec = recording.Recording(signals) est, val = rec.split_using_epoch_occurrence_counts(epoch_regex="^STIM_") est=preproc.average_away_epoch_occurrences(est, epoch_regex="^STIM_") val=preproc.average_away_epoch_occurrences(val, epoch_regex="^STIM_") elif load_method==2: # method 2: load from CSV files - one per response, stimulus, epochs # X is a frequency X time spectrgram, sampled at 100 Hz # Y is a neuron X time PSTH, aligned with X. Ie, same number of time bins # epochs is a list of STIM events with start and stop time of each event # in seconds