Example #1
0
def test_signal_save_load(signal, signal_tmpdir):
    '''
    Test that signals save and load properly
    '''
    #    if not os.path.exists(signal_tmpdir):
    #        os.mkdir(signal_tmpdir)
    signal.save(str(signal_tmpdir), fmt='%1.3e')

    signals_found = RasterizedSignal.list_signals(str(signal_tmpdir))
    assert len(signals_found) == 1

    save_directory = os.path.join(str(signal_tmpdir), signals_found[0])
    signal_loaded = RasterizedSignal.load(save_directory)

    assert np.all(signal.as_continuous() == signal_loaded.as_continuous())
Example #2
0
def signal(signal_name='dummy_signal', recording_name='dummy_recording', fs=50,
           nchans=3, ntimes=200):
    '''
    Generates a dummy signal with a predictable structure (every element
    increases by 1) that is useful for testing.
    '''
    # Generate a numpy array that's incrementially increasing across channels,
    # then across timepoints, by 1.
    c = np.arange(nchans, dtype=np.float)
    t = np.arange(ntimes, dtype=np.float)
    data = c[..., np.newaxis] + t*nchans

    epochs = pd.DataFrame({
        'start': [3, 15, 150],
        'end': [200, 60, 190],
        'name': ['trial', 'pupil_closed', 'pupil_closed']
        })
    epochs['start'] /= fs
    epochs['end'] /= fs
    kwargs = {
        'data': data,
        'name': signal_name,
        'recording': recording_name,
        'chans': ['chan' + str(n) for n in range(nchans)],
        'epochs': epochs,
        'fs': fs,
        'meta': {
            'for_testing': True,
            'date': "2018-01-10",
            'animal': "Donkey Hotey",
            'windmills': 'tilting'
        },
    }
    return RasterizedSignal(**kwargs)
Example #3
0
def test_epoch_save_load(signal, signal_tmpdir):
    '''
    Test that epochs save and load properly
    '''

    before = signal.epochs

    signal.save(str(signal_tmpdir), fmt='%1.3e')
    signals_found = RasterizedSignal.list_signals(str(signal_tmpdir))
    save_directory = os.path.join(str(signal_tmpdir), signals_found[0])
    signal_loaded = RasterizedSignal.load(save_directory)

    after = signal_loaded.epochs
    print("Dataframes equal?\n"
          "Before:\n{0}\n"
          "After:\n{1}\n".format(before, after))
    assert before.equals(after)
Example #4
0
def test_extract_channels(signal):
    two_sig = signal.extract_channels(['chan0', 'chan1'])
    assert two_sig.shape == (2, 200)
    one_sig = signal.extract_channels(['chan2'])
    assert one_sig.shape == (1, 200)
    recombined = RasterizedSignal.concatenate_channels([two_sig, one_sig])
    before = signal.as_continuous()
    after = recombined.as_continuous()
    assert np.array_equal(before, after)
Example #5
0
File: loaders.py Project: LBHB/NEMS
def load_sadagopan(cellid='MS_u0004_f0025',
                   recname='MS_u0004',
                   stimfile=None,
                   respfile=None,
                   epochsfile=None,
                   fs=50,
                   channel_num=0,
                   **context):
    """
    example file from Sadagopan lab
    """

    if stimfile is None:
        stimfile = signals_dir / (cellid + '_stim.csv.gz')
    if respfile is None:
        respfile = signals_dir / (cellid + '_resp.csv.gz')
    if epochsfile is None:
        epochsfile = signals_dir / (cellid + '_epochs.csv')

    X = np.loadtxt(gzip.open(stimfile, mode='rb'), delimiter=",", skiprows=0)
    Y = np.loadtxt(gzip.open(respfile, mode='rb'), delimiter=",", skiprows=0)
    # get list of stimuli with start and stop times (in sec)
    epochs = pd.read_csv(epochsfile)

    # create NEMS-format recording objects from the raw data
    resp = RasterizedSignal(fs,
                            Y,
                            'resp',
                            recname,
                            chans=[cellid],
                            epochs=epochs.loc[:])
    stim = RasterizedSignal(fs, X, 'stim', recname, epochs=epochs.loc[:])

    # create the recording object from the signals
    signals = {'resp': resp, 'stim': stim}
    rec = recording.Recording(signals)

    return {'rec': rec}
Example #6
0
lv1 /= lv1.std()
g2 = np.random.normal(0.5, 0.1, (nCells, 1))

gain2 = np.matmul(lv1.T, g2.T).T

resp = u * np.exp(gain1 + gain2)
psth = np.tile(resp.mean(axis=-1), (T, 1)).T

# get rid of any "cells" that never fired
idx = (resp==0).sum(axis=-1) == T
resp = resp[~idx, :]
psth = psth[~idx, :]
nCells = resp.shape[0]

# pack into nems recording
resp_sig = RasterizedSignal(fs=4, data=resp, name='resp', recording='simulation')
psth_sig = RasterizedSignal(fs=4, data=psth, name='psth', recording='simulation')
pupil_sig = RasterizedSignal(fs=4, data=pupil, name='pupil', recording='simulation')
bm = pupil > pupil.mean()
big_mask = RasterizedSignal(fs=4, data=bm, name='big_mask', recording='simulation')
sm = pupil < pupil.mean()
small_mask = RasterizedSignal(fs=4, data=sm, name='small_mask', recording='simulation')

rec = Recording({'resp': resp_sig, 'psth': psth_sig, 'pupil': pupil_sig})

# fit the GLM for different hyperparameters
x0 = np.zeros(3 * nCells)
nLV = 1
alpha1 = np.arange(0, 0.5, 0.05)
results = dict.fromkeys(alpha1)
for i, a2 in tqdm(enumerate(alpha1)):
Example #7
0
# figure out data and results paths:
signals_dir = Path(nems.NEMS_PATH) / 'recordings'
modelspecs_dir = Path(nems.NEMS_PATH) / 'modelspecs'

# download demo data
recording.get_demo_recordings(signals_dir)
datafile = signals_dir / 'TAR010c-18-1.pkl'

# LOAD AND FORMAT RECORDING DATA

with open(datafile, 'rb') as f:
    #cellid, recname, fs, X, Y, X_val, Y_val = pickle.load(f)
    cellid, recname, fs, X, Y, epochs = pickle.load(f)
# create NEMS-format recording objects from the raw data
resp = RasterizedSignal(fs, Y, 'resp', recname, chans=[cellid])
stim = RasterizedSignal(fs, X, 'stim', recname)

# create the recording object from the signals
signals = {'resp': resp, 'stim': stim}
est = recording.Recording(signals)

val_signals = {
    'resp': RasterizedSignal(fs, Y_val, 'resp', recname, chans=[cellid]),
    'stim': RasterizedSignal(fs, X_val, 'stim', recname)
}
val = recording.Recording(val_signals)

# INITIALIZE MODELSPEC

log.info('Initializing modelspec...')
Example #8
0
def load_recording_from_arrays(arrays,
                               rec_name,
                               fs,
                               sig_names=None,
                               signal_kwargs={}):
    '''
    Generates a recording object, and the signal objects it contains,
    from a list of array-like structures of the form channels x time
    (see signal.py for more details about how arrays are represented
     by signals).

    If any of the arrays are more than 2-dimensional,
    an error will be thrown. Also pay close attention to any
    RuntimeWarnings from the signal class regarding improperly-shaped
    arrays, which may indicate that an array was passed as
    time x channels instead of the reverse.

    Arguments:
    ----------
    arrays : list of array-like
        The data to be converted to a recording of signal objects.
        Each item should be 2-dimensional and convertible to a
        numpy ndarray via np.array(x). No constraints are enforced
        on the dtype of the arrays, but in general float values
        are expected by most native NEMS functions.

    rec_name : str
        The name to be given to the new recording object. This will
        also be assigned as the recording attribute of each new signal.

    fs : int or list of ints
        The frequency of sampling of the data arrays - used to
        interconvert between real time and time bins (see signal.py).
        If int, the same fs will be assigned to each signal.
        If list, the length must match the length of arrays.

    sig_names : list of strings (optional)
        Name to attach to the signal created from
        each array. The length of this list should match that of
        arrays.
        If not specified, the signals will be given the generic
        names: ['signal1', 'signal2', ...].

    signal_kwargs : list of dicts
        Keyword arguments to be passed through to
        each signal object. The length of this list should
        match the length of arrays, and may be padded with empty
        dictionaries to ensure this constraint.
        For example:
            [{'chans': ['1 kHz', '3 kHz']}, {'chans': ['one', 'two']}, {}]
        Would assign channel names '1 kHz' and '3 kHz' to the signal
        for the first array, 'one' and 'two' for the second array,
        and no channel names (or any other arguments) for the third array.

        Valid keyword arguments are: chans, epochs, meta,
                                     and safety_checks

    Returns:
    --------
    rec : recording object
        New recording containing a signal for each array.
    '''
    # Assemble and validate lists for signal construction
    arrays = [np.array(a) for a in arrays]
    for i, a in enumerate(arrays):
        if len(a.shape) != 2:
            raise ValueError("Arrays should have shape chans x time."
                             "Array {} had shape: {}".format(i, a.shape))
    n = len(arrays)
    recs = [rec_name] * len(arrays)
    if sig_names:
        if not len(sig_names) == n:
            raise ValueError("Length of sig_names must match"
                             "the length of arrays.\n"
                             "Got sig_names: {} and arrays: {}".format(
                                 len(sig_names), n))
    else:
        sig_names = ['sig%s' % i for i in range(n)]
    if isinstance(fs, int):
        fs = [fs] * n
    else:
        if not len(fs) == n:
            raise ValueError("Length of fs must match"
                             "the length of arrays.\n"
                             "Got fs: {} and arrays: {}".format(len(fs), n))
    if not signal_kwargs:
        signal_kwargs = [{}] * n
    else:
        if not len(signal_kwargs) == n:
            raise ValueError("Length of signal_kwargs must match"
                             "the length of arrays.\n"
                             "Got signal_kwargs: {} and arrays: {}".format(
                                 len(signal_kwargs), n))

    # Construct the signals
    to_sigs = zip(fs, arrays, sig_names, recs, signal_kwargs)
    signals = [
        RasterizedSignal(fs, a, name, rec, **kw)
        for fs, a, name, rec, kw in to_sigs
    ]
    signals = {s.name: s for s in signals}
    # Combine into recording and return
    return Recording(signals)
Example #9
0
fs = 50
cellid = 'MS_u0004_f0025'
recname = 'MS_u0004'
stimfile = signals_dir / 'MS_u0004_f0025_stim.csv.gz'
respfile = signals_dir / 'MS_u0004_f0025_resp.csv.gz'
epochsfile = signals_dir / 'MS_u0004_f0025_epochs.csv'

X = np.loadtxt(gzip.open(stimfile, mode='rb'), delimiter=",", skiprows=0)
Y = np.loadtxt(gzip.open(respfile, mode='rb'), delimiter=",", skiprows=0)
# get list of stimuli with start and stop times (in sec)
epochs = pd.read_csv(epochsfile)

# create NEMS-format recording objects from the raw data
resp = RasterizedSignal(fs,
                        Y,
                        'resp',
                        recname,
                        chans=[cellid],
                        epochs=epochs.loc[:])
stim = RasterizedSignal(fs, X, 'stim', recname, epochs=epochs.loc[:])

# create the recording object from the signals
signals = {'resp': resp, 'stim': stim}
rec = recording.Recording(signals)

#generate est/val set_sets
#nfolds=10
#est = rec.jackknife_masks_by_time(njacks=nfolds, invert=False) #VATSUN - doesnt work
#val = rec.jackknife_masks_by_time(njacks=nfolds, invert=True)

est, val = rec.split_at_time(
    fraction=0.1)  # VATSUN: Fraction=0.1 I think specifies the validation set
Example #10
0
signals_dir = nems.NEMS_PATH + '/recordings'
modelspecs_dir = nems.NEMS_PATH + '/modelspecs'
recording.get_demo_recordings(signals_dir)

datafile = signals_dir + "/TAR010c-18-1.pkl"

# ----------------------------------------------------------------------------
# LOAD AND FORMAT RECORDING DATA

with open(datafile, 'rb') as f:
    cellid, recname, fs, X, Y, epochs = pickle.load(f)

stimchans = [str(x) for x in range(X.shape[0])]
# borrowed from recording.load_recording_from_arrays

resp = RasterizedSignal(fs, Y, 'resp', recname, epochs=epochs, chans=[cellid])
stim = RasterizedSignal(fs, X, 'stim', recname, epochs=epochs, chans=stimchans)
signals = {'resp': resp, 'stim': stim}
rec = recording.Recording(signals)

epoch_name = "REFERENCE"
nfolds = 5
est = rec.jackknife_masks_by_epoch(nfolds, epoch_name, tiled=True)
val = rec.jackknife_masks_by_epoch(nfolds, epoch_name, tiled=True, invert=True)

#est, val = rec.split_at_time(0.2)
#est, val = rec.split_using_epoch_occurrence_counts(epoch_regex="^STIM_")
#est = preproc.average_away_epoch_occurrences(est, epoch_regex="^STIM_")
#val = preproc.average_away_epoch_occurrences(val, epoch_regex="^STIM_")

# ----------------------------------------------------------------------------
Example #11
0
def test_concatenate_channels(signal):
    sig1 = signal
    sig2 = sig1.jackknife_by_time(20, 2)
    sig3 = RasterizedSignal.concatenate_channels([sig1, sig2])
    assert sig1.as_continuous().shape == (3, 200)
    assert sig3.as_continuous().shape == (6, 200)
Example #12
0
recording.get_demo_recordings(signals_dir)

pkl_file = signals_dir + "/TAR010c-18-1.pkl"

# ----------------------------------------------------------------------------
# LOAD AND FORMAT RECORDING DATA

with open(pkl_file, 'rb') as f:  # Python 3: open(..., 'rb')
    cellid, recname, fs, X_est, Y_est, X_val, Y_val = pickle.load(f)

epochs = None
stimchans = [str(x) for x in range(X_est.shape[0])]
# borrowed from recording.load_recording_from_arrays

# est recording - for model fitting
resp = RasterizedSignal(fs, Y_est, 'resp', recname, chans=[cellid])
stim = RasterizedSignal(fs, X_est, 'stim', recname, chans=stimchans)
signals = {'resp': resp, 'stim': stim}
est = recording.Recording(signals)

# val recording - for testing predictions
resp = RasterizedSignal(fs, Y_val, 'resp', recname, chans=[cellid])
stim = RasterizedSignal(fs, X_val, 'stim', recname, chans=stimchans)
signals = {'resp': resp, 'stim': stim}
val = recording.Recording(signals)

# ----------------------------------------------------------------------------
# INITIALIZE MODELSPEC
#
# GOAL: Define the model that you wish to test
Example #13
0
def from_nwb_pupil(nwb_file, nwb_format,fs=20,with_pupil=False,running_speed=False,as_dict=True):
#def from_nwb(cls, nwb_file, nwb_format,with_pupil=False,fs=20):
    """
    The NWB (Neurodata Without Borders) format is a unified data format developed by the Allen Brain Institute.
    Data is stored as an HDF5 file, with the format varying depending how the data was saved.
    
    References:
      - https://nwb.org
      - https://pynwb.readthedocs.io/en/latest/index.html
    :param nwb_file: path to the nwb file
    :param nwb_format: specifier for how the data is saved in the container
    :param int fs: will match for all signals
    :param bool with_pupil, running speed: whether to return pupil, speed signals in recording
    :param bool as_dict: return a dictionary of recording objects, each corresponding to a single unit/neuron
                         else a single recording object w/ each unit corresponding to a channel in pointprocess signal
    :return: a recording object
    """
    #log.info(f'Loading NWB file with format "{nwb_format}" from "{nwb_file}".')

    # add in supported nwb formats here
    assert nwb_format in ['neuropixel'], f'"{nwb_format}" not a supported NWB file format.'

    nwb_filepath = Path(nwb_file)
    if not nwb_filepath.exists():
        raise FileNotFoundError(f'"{nwb_file}" could not be found.')

    if nwb_format == 'neuropixel':
        """
        In neuropixel ecephys nwb files, data is stored in several attributes of the container: 
          - units: individual cell metadata, a dataframe
          - epochs: timing of the stimuli, series of arrays
          - lab_meta_data: metadata about the experiment, such as specimen details
          
        Spike times are saved as arrays in the 'spike_times' column of the units dataframe as xarrays. 
        The frequency defaults to match pupil - if no pupil data retrieved, set to chosen value (previous default 1250).
          
        Refs:
          - https://allensdk.readthedocs.io/en/latest/visual_coding_neuropixels.html
          - https://allensdk.readthedocs.io/en/latest/_static/examples/nb/ecephys_quickstart.html
          - https://allensdk.readthedocs.io/en/latest/_static/examples/nb/ecephys_data_access.html
        """
        try:
            from pynwb import NWBHDF5IO
            from allensdk.brain_observatory.ecephys import nwb  # needed for ecephys format compat
        except ImportError:
            m = 'The "allensdk" library is required to work with neuropixel nwb formats, available on PyPI.'
            #log.error(m)
            raise ImportError(m)

        session_name = nwb_filepath.stem
        with NWBHDF5IO(str(nwb_filepath), 'r') as nwb_io:
            nwbfile = nwb_io.read()

            units = nwbfile.units
            epochs = nwbfile.epochs
           
            spike_times = dict(zip(units.id[:].astype(str), units['spike_times'][:]))

            # extract the metadata and convert to dict
            metadata = nwbfile.lab_meta_data['metadata'].to_dict()
            metadata['uri'] = str(nwb_filepath)  # add in uri
            #add invalid times data to meta as df if exist - includes times and probe id?
            if nwbfile.invalid_times is not None:
                invalid_times = nwbfile.invalid_times
                invalid_times =  np.array([invalid_times[col][:] for col in invalid_times.colnames])
                metadata['invalid_times'] = pd.DataFrame(invalid_times.transpose(),columns=['start_time', 'stop_time', 'tags'])
                
            # build the units metadata
            units_data = {
                col.name: col.data for col in units.columns
                if col.name not in ['spike_times', 'spike_times_index', 'spike_amplitudes',
                                    'spike_amplitudes_index', 'waveform_mean', 'waveform_mean_index']
            }

            # needs to be a dict
            units_meta = pd.DataFrame(units_data, index=units.id[:])
            #add electrode info to units meta
            electrodes=nwbfile.electrodes
            e_data = {col.name: col.data for col in electrodes.columns}
            e_meta = pd.DataFrame(e_data,index=electrodes.id[:])
            units_meta=pd.merge(units_meta,e_meta,left_on=units_meta.peak_channel_id,right_index=True, 
                                suffixes=('_unit','_channel')).drop(['key_0','group'],axis=1).to_dict('index')# needs to be a dict    

            # build the epoch dataframe
            epoch_data = {
                col.name: col.data for col in epochs.columns
                if col.name not in ['tags', 'timeseries', 'tags_index', 'timeseries_index']
            }

            epoch_df = pd.DataFrame(epoch_data, index=epochs.id[:]).rename({
                'start_time': 'start',
                'stop_time': 'end',
                'stimulus_name': 'name'
            }, axis='columns')

 
            #rename epochs to correspond to different nat scene/movie frames - 
            epoch_df.loc[epoch_df['frame'].notna(),'name'] = epoch_df.loc[epoch_df['frame'].notna(),'name'] + '_' + \
            epoch_df[epoch_df['frame'].notna()].iloc[:]['frame'].astype(int).astype(str)
            
            
            #drop extra columns
            metadata['epochs']=epoch_df #save extra stim info to meta
            epoch_df=epoch_df.drop([col for col in epoch_df.columns if col not in ['start','end','name']],axis=1)

#            #rename natural scene epochs to work w/demo
            df_copy = epoch_df[epoch_df.name.str.contains('natural_scene')].copy()
            df_copy.loc[:,'name']='REFERENCE'

            epoch_df=epoch_df.append(df_copy,ignore_index=True)
            #expand epoch bounds epochs will overlap to test evoked potential
#            to_adjust=epoch_df.loc[:,['start','end']].to_numpy()
#            epoch_df.loc[:,['start','end']] = nems.epoch.adjust_epoch_bounds(to_adjust,-0.1,0.1)
            
            
            # save the spike times as a point process signal frequency set to match other signals 
            pp = PointProcess(fs, spike_times, name='resp', recording=session_name, epochs=epoch_df,
                              chans=[str(c) for c in nwbfile.units.id[:]],meta=units_meta)
            #dict to pass to recording
            #signal_dict = {pp.name: pp}

          #  log.info('Successfully loaded nwb file.')
            from scipy.interpolate import interp1d
           #save pupil data as rasterized signal
            if with_pupil:
                try:
                    pupil = nwbfile.modules['eye_tracking'].data_interfaces['pupil_ellipse_fits']
                    t = pupil['timestamps'][:]
                    pupil = pupil['width'][:].reshape(1,-1) #only 1 dimension - or get 'height'
                    
                     #interpolate to set sampling rate
                    f = interp1d(t,pupil,bounds_error=False,fill_value=np.nan)

                    new_t = np.arange(0.0,(t.max()+1/fs),1/fs)#pupil data starting at timepoint 0.0 (nan filler)
                    pupil = f(new_t)
                    
                    pupil_signal = RasterizedSignal(fs=fs,data=pupil,recording=session_name,name='pupil',
                                                    epochs=epoch_df,chans=['pupil']) #for all data list(pupil_data.colnames[0:5])
                    
                #if no pupil data for session - still get spike data
                except KeyError:
                    print(session_name + ' has no pupil data.')

            
            if running_speed:
                running = nwbfile.modules['running'].data_interfaces['running_speed']
                t = running.timestamps[:][1]#data has start and end timestamps, here only end used
                running = running.data[:].reshape(1,-1)

                f = interp1d(t,running)
                #new_t = np.arange(np.min(t),np.max(t),1/fs)
                new_t = np.arange(epoch_df.start.min(),epoch_df.end.max(),1/fs)
                running = f(new_t)
                running=RasterizedSignal(fs=fs,data=running,name='running',recording=session_name,epochs=epoch_df)



            if as_dict:
                #each unit has seperate recording in dict
                rec_dict={}
                for c in pp.chans:
                    unit_signal=pp.extract_channels([c])
                    rec=Recording({'resp':unit_signal},meta=metadata)
                    if with_pupil:
                        rec.add_signal(pupil_signal)
                    if running_speed:
                        rec.add_signal(running)
                    rec_dict[c]=rec
                return rec_dict
            
            else:
                rec=Recording({'resp':pp},meta=metadata)
                if with_pupil:
                    rec.add_signal(pupil_signal)
                if running_speed:
                    rec.add_signal(running)
                return rec
Example #14
0
    rec['resp']=rec['resp'].extract_channels([cellid])
    est, val = rec.split_using_epoch_occurrence_counts(epoch_regex="^STIM_")
    est=preproc.average_away_epoch_occurrences(est, epoch_regex="^STIM_")
    val=preproc.average_away_epoch_occurrences(val, epoch_regex="^STIM_")

elif load_method==1:
    # method 1: load from a pkl datafile that contains full stim+response data
    # along with metadata (fs, stimulus epoch list)
    datafile = signals_dir / 'TAR010c-18-1.pkl'

    with open(datafile, 'rb') as f:
            #cellid, recname, fs, X, Y, X_val, Y_val = pickle.load(f)
            cellid, recname, fs, X, Y, epochs = pickle.load(f)

    # create NEMS-format recording objects from the raw data
    resp = RasterizedSignal(fs, Y, 'resp', recname, chans=[cellid], epochs=epochs)
    stim = RasterizedSignal(fs, X, 'stim', recname, epochs=epochs)

    # create the recording object from the signals
    signals = {'resp': resp, 'stim': stim}
    rec = recording.Recording(signals)
    est, val = rec.split_using_epoch_occurrence_counts(epoch_regex="^STIM_")
    est=preproc.average_away_epoch_occurrences(est, epoch_regex="^STIM_")
    val=preproc.average_away_epoch_occurrences(val, epoch_regex="^STIM_")

elif load_method==2:
    # method 2: load from CSV files - one per response, stimulus, epochs
    # X is a frequency X time spectrgram, sampled at 100 Hz
    # Y is a neuron X time PSTH, aligned with X. Ie, same number of time bins
    # epochs is a list of STIM events with start and stop time of each event
    # in seconds