Exemple #1
0
def test_cell_type_subsamples(session='1', animal='MD0ST5', lab='dipoppa', expt='full_trial',
                              sample='sst85_sample_0', cell_id=3):
    data_dir = get_user_dir('data')
    path = os.path.join(data_dir, lab, expt, animal, session, 'data.hdf5')
    if not os.path.exists(path):
        print(path, ' does not exist.')
        return

    # Get cell types (Need to change this for different mice)
    info_cells = loadmat('neural_dir/info_cells_MD0ST5_2018-04-04.mat')['info_cells']
    cell_type_idxs = info_cells[0][0][0][0]    # 0 = excitatory, 3 = inhibitory
    good_cell_idxs = info_cells[0][0][1][0]

    # Label all "bad" cells as -1
    cell_type_idxs = [cell_type_idxs[i] if good_cell_idxs[i] == 1 else -1 for i in range(len(good_cell_idxs))]
    def select_idxs(arr, key):
        return [i for i in range(len(arr)) if arr[i] == key]

    idxs = select_idxs(cell_type_idxs, cell_id)

    data = h5py.File(path, 'r', libver='latest', swmr=True)
    cell_types = data['samples']['cell_types']
    session = cell_types[sample]

    # Correct number of indexes
    assert len(session[:]) == len(idxs)
    # No repeats
    assert len(set(session[:])) == len(idxs)

    data.close()

    print('Cell Type Subsample: Success')
def get_all_params(search_type='grid_search', args=None):

    # Raise error if user has other command line arguments specified (as could override configs in
    # confusing ways)
    if args is not None and len(args) != 8:
        raise ValueError(
            'No command line arguments allowed other than config file names')
    elif args is None and len(sys.argv[1:]) != 8:
        raise ValueError(
            'No command line arguments allowed other than config file names')

    # Create parser
    parser = HyperOptArgumentParser(strategy=search_type)
    parser.add_argument('--data_config', type=str)
    parser.add_argument('--model_config', type=str)
    parser.add_argument('--training_config', type=str)
    parser.add_argument('--compute_config', type=str)

    namespace, extra = parser.parse_known_args(args)

    # Add arguments from all configs
    configs = [
        namespace.data_config, namespace.model_config,
        namespace.training_config, namespace.compute_config
    ]
    for config in configs:
        config_json = commentjson.load(open(config, 'r'))
        for (key, value) in config_json.items():
            add_to_parser(parser, key, value)

    # Add save/user dirs
    parser.add_argument('--save_dir', default=get_user_dir('save'), type=str)
    parser.add_argument('--data_dir', default=get_user_dir('data'), type=str)

    # Add parameters dependent on previous inputs
    namespace, extra = parser.parse_known_args(args)
    add_dependent_params(parser, namespace)

    return parser.parse_args(args)
def main():

    # Get hdf5 file
    fs = []
    data_dir = get_user_dir('data')
    for i in [1, 3, 4, 5, 6]:
        path = data_dir + '/dipoppa/MSP_pupil/SB028/{}/data.hdf5'.format(i)
        fs.append(h5py.File(path, 'a', libver='latest', swmr=True))

    print(fs)
    # Get cell types
    info_cells = loadmat(
        'neural_dir/SB028/2019-11-08/info_cells_SB028_2019-11-08.mat'
    )['info_cells']
    cell_type_idxs = info_cells[0][0][0][0]  # 0 = excitatory, 3 = inhibitory
    good_cell_idxs = info_cells[0][0][1][0]

    # Label all "bad" cells as -1
    cell_type_idxs = [
        cell_type_idxs[i] if good_cell_idxs[i] == 1 else -1
        for i in range(len(good_cell_idxs))
    ]

    def select_idxs(arr, key):
        return [i for i in range(len(arr)) if arr[i] == key]

    inh_id = [i for i in cell_type_idxs if i != 0 and i != -1][0]
    print('Inhibitory Cell Type: ' + str(inh_id))

    inh_idxs = select_idxs(cell_type_idxs, inh_id)
    exc_idxs = select_idxs(cell_type_idxs, 0)

    num_cells = len(cell_type_idxs)
    print(num_cells, 'NUM CELLS')
    print('inh: ', len(inh_idxs))
    print('exc: ', len(exc_idxs))
    '''
    for f in fs:
        if f.get('samples'):
            del f['samples']
            del f['samples']
    '''
    print('Adding subsamples')
    add_subsamples(fs, num_cells)
    # print('Adding Histogram Trials')
    add_cell_type(fs, inh_idxs, exc_idxs)

    print('Adding powerlaw samples')
    cell_types_powerlaw(fs, inh_idxs, exc_idxs)
    print('...done')
    close(fs)
def sample_n_neurons(session_params, sample_sizes, n_samples, group_class='samples', group_name='subsamples', prefix=''):
    '''
    Create N samples of randomly selected neurons in behavenet's HDF5 files. Supports multiple sessions for a given animal,
    but not multiple animals/experiements/labs.

    Params:
        session_params:dict: Each entry in session params dictionary should
             contain another dictionary specifying the following fields which define a dataset:
                - lab (str)
                - experiment_name (str)
                - animal (str)
                - sessions (list)
        sample_sizes:list: list specifying the number of neurons in each sample
        n_samples:int: how many repeats to take of each sample size (ie; n=10 would add 10 samples for each sample size)
        group_class:str: HDF5 upper level group name
        group_name:str: name of second level group to add these samples under
        prefix:str: identifying prefix for datasets

    '''
    lab = session_params.get('lab')
    experiment_name = session_params.get('experiment_name')
    animal = session_params.get('animal')
    sessions = session_params.get('sessions')

    for session in sessions:
        data_path = os.path.join(behavenet.get_user_dir('data'), lab, experiment_name, animal, str(session), 'data.hdf5')
        if not os.path.isfile(data_path):
            raise FileNotFoundError(errno.ENOENT, os.strerror(errno.ENOENT), data_path)

        logging.info('HDF5 File: {}'.format(data_path))
        data = h5py.File(data_path, 'a', libver='latest', swmr=True)
        ex_trial = list(data['neural'].keys())[0]
        n_neural = data['neural'][ex_trial][:].shape[1]

        # Delete old data
        if group_name in data[group_class]:
            logging.info('Deleting old group: {}'.format(group_name))
            del data[group_class][group_name]

        group = data[group_class].require_group(group_name)
        for size in sample_sizes:
            for i in range(n_samples):
                dset = np.random.choice(n_neural, size, replace=False)
                dset_name = prefix + 'n{}_t{}'.format(size, i)
                logging.info('Adding dataset: {}'.format(dset_name))
                group.create_dataset(dset_name, data=dset)

        data.close()

        logging.info('Added {} total datasets'.format(n_samples * len(sample_sizes)))
def add_cell_type_samples(session_params, inh_idxs, exc_idxs, n_samples, group_class='samples', group_name='cell_types', prefix=''):
    '''
    Adds n_samples of inhibitory and excitatory neurons. Uses the maximum number of neurons in the smaller
    dataset as default number of neurons
    params:
        session_params:dict: Dictionary containing information to extract data.hdf5 file
        inh_idxs:list: list of inhibitory cell indexes
        exc_idxs:list: excitatory cell indexes
        n_samples:int: how many random samples to take of each
        group_class:str: higher level group to add data to
        group_name:str: lower level group to add data to
        prefix:str: unique naming prefix (otherwise will be labelled exc_t{trial_number}, inh_t{trial_number})
    '''
    lab = session_params.get('lab')
    experiment_name = session_params.get('experiment_name')
    animal = session_params.get('animal')
    sessions = session_params.get('sessions')

    for session_number in sessions:
        data_path = os.path.join(behavenet.get_user_dir('data'), lab, experiment_name, animal, str(session_number), 'data.hdf5')
        if not os.path.isfile(data_path):
            raise FileNotFoundError(errno.ENOENT, os.strerror(errno.ENOENT), data_path)

        logging.info('HDF5 File: {}'.format(data_path))
        data = h5py.File(data_path, 'a', libver='latest', swmr=True)
        data.require_group(group_class)

        # Delete old data
        if group_name in data[group_class]:
            logging.info('Deleting old group: {}'.format(group_name))
            #del data[group_class][group_name]

        group = data[group_class].require_group(group_name)

        n_neurons = min(len(inh_idxs), len(exc_idxs))

        # Assumes that there are fewer inhibitory neurons than excitatory (so we only need one inhibitory sample)
        # TODO: Probably a fair assumption, but correct this
        inh_dataset = np.random.choice(inh_idxs, n_neurons, replace=False)
        group.create_dataset(prefix + 'inh_all', data=inh_dataset)
        logging.info('Added Inhibitory dataset: {}'.format(inh_dataset.shape))
        for i in range(n_samples):
            exc_dataset = np.random.choice(exc_idxs, n_neurons, replace=False)
            group.create_dataset(prefix + 'exc_t{}'.format(i), data=exc_dataset)
            logging.info('Added excitatory dataset {}: {}'.format(i, exc_dataset.shape))



        data.close()
Exemple #6
0
def test_subsamples(session='1', animal='MD0ST5', lab='dipoppa', expt='full_trial',
                              sample='sample_100_t0', cell_id=3):
    data_dir = get_user_dir('data')
    path = os.path.join(data_dir, lab, expt, animal, session, 'data.hdf5')
    if not os.path.exists(path):
        print(path, ' does not exist.')
        return

    data = h5py.File(path, 'r', libver='latest', swmr=True)
    subsamples = data['samples']['subsamples']
    session = subsamples[sample]

    # No duplicates
    assert len(set(session[:])) == len(session[:])
    print('Subsample Test: Success')
    data.close()
def get_cell_idxs(session_params, inh_key=3):
    '''
    Return indexes of all the inhibitory, excitatory, and overall good cells
    Requires info_cells file in same session level directory of behavenet's data directory
    (ie; if my data.hdf5 files are in "lab/experiment/animal/session_1/data.hdf5", the info_cells file
    should be in the same folder as "session_1")
    Params:
        session_params:dict: Dictionary specifying relevant details for session
        inh_key:int: key indicitating inhibitory cell type (2: PV, 3: SST, 5: GAD)
    Returns:
        inhibitory indexes, excitatory indexes, all good cell indexes
    '''
    lab = session_params.get('lab')
    experiment_name = session_params.get('experiment_name')
    animal = session_params.get('animal')
    date = session_params.get('date')

    # Load file and separate contents
    key = 'info_cells_' + animal + '_' + date + '.mat'
    cell_data_path = os.path.join(behavenet.get_user_dir('data'), lab, experiment_name, animal, key)
    if not os.path.exists(cell_data_path):
        raise FileNotFoundError(errno.ENOENT, os.strerror(errno.ENOENT), cell_data_path)

    info_cells = loadmat(cell_data_path)['info_cells']

    good_cells = info_cells[0][0][1][0]
    cell_types = info_cells[0][0][0][0]

    good_cells = info_cells[0][0][1][0]
    cell_types = info_cells[0][0][0][0]

    # Remove bad cells from neural activity and labelled cell data
    bad_cells = [i for i in range(len(good_cells)) if good_cells[i] == 0]
    cell_types = np.delete(cell_types, bad_cells, axis=0)

    # Collect indexes of inhibitory and excitatory neurons
    inh_idxs = [idx for idx,key in enumerate(cell_types) if key == inh_key]
    exc_idxs = [idx for idx,key in enumerate(cell_types) if key == 0]

    return inh_idxs, exc_idxs, list(range(len(cell_types)))
def process(sess_id,
            lab='dipoppa',
            expt='SSSVAE',
            animal='MD0ST5',
            date='2018-04-04',
            neural_data_root='/home/yoni/behavenet/neural_dir'):

    vid_id = '{}_{}_{}'.format(date, sess_id, animal)
    neural_dir = os.path.join(neural_data_root, animal, date)

    # data will be stored in data_dir/lab/expt/animal/session/data.hdf5
    lab = lab
    expt = expt
    animal = animal
    date = date
    session = sess_id

    mp4_file = os.path.join(
        neural_dir, session,
        vid_id + '_eye.mj2')  # (can be any format loadable by open cv)

    # video frames will be resized to these dimensions
    # downsampling is preferrable as it speeds up autoencoder fitting time
    # we typically use images with the largest dimension <= 256
    xpix = 256  # choose a number (e.g. 256)
    ypix = 128  # choose a number (e.g. 128)

    # processed data in behavenet format will be stored here
    data_dir = get_user_dir('data')
    proc_data_filepath = os.path.join(data_dir, lab, expt, animal, session)
    print('Writing to: ', proc_data_filepath, 'Video from: ', mp4_file)

    ###########
    # Load data
    ###########

    # set up hdf5 file
    hdf5_file = os.path.join(proc_data_filepath, 'data.hdf5')
    if False:  # os.path.exists(hdf5_file):
        raise IOError('data.hdf5 file already exists; skipping')
    else:
        hdf5_dir = os.path.dirname(hdf5_file)
        if not os.path.exists(hdf5_dir):
            os.makedirs(hdf5_dir)

    # read video file and check
    cap = cv2.VideoCapture(mp4_file)
    if not cap.isOpened():
        raise IOError('error opening video file at %s' % mp4_file)

    total_frames = int(cap.get(cv2.CAP_PROP_FRAME_COUNT))
    width = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH))
    height = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))

    print('Loading and processing data...')
    # Load trial info to sync video/neural data, and neural activity
    vvi = loadmat(
        os.path.join(neural_dir, sess_id,
                     'batch_video_id' + vid_id + '.mat'))['batch_video_id']
    nni = loadmat(
        os.path.join(neural_dir, sess_id,
                     'batch_neural_id' + vid_id + '.mat'))['batch_neural_id']
    nna = loadmat(
        os.path.join(neural_dir, sess_id,
                     'neural_activity' + vid_id + '.mat'))['neural_activity']

    # Load cell labels
    cell_data_path = os.path.join(neural_dir,
                                  'info_cells_{}_{}.mat'.format(animal, date))
    info_cells = loadmat(cell_data_path)['info_cells']

    good_cells = info_cells[0][0][1][0]
    cell_types = info_cells[0][0][0][0]

    # Remove bad cells from neural activity and labelled cell data
    bad_cells = [i for i in range(len(good_cells)) if good_cells[i] == 0]
    cell_types = np.delete(cell_types, bad_cells, axis=0)
    nna = np.delete(nna, bad_cells, axis=1)
    nna = ZScore(nna)

    # Collect indexes of inhibitory and excitatory neurons
    inh_idxs = [i for i in range(len(cell_types)) if cell_types[i] == 3]
    exc_idxs = [i for i in range(len(cell_types)) if cell_types[i] == 0]

    # Load facemap data and create labels
    facemap_data = np.load(os.path.join(neural_dir, sess_id, 'facemap',
                                        vid_id + '_eye_proc.npy'),
                           allow_pickle=True).item()

    pupil_area = facemap_data['pupil'][0]['area_smooth'].reshape(-1, 1)
    pupil_com = facemap_data['pupil'][0]['com_smooth']
    print('Neural data: ', nna.shape, '# Inhibitory neurons: ', len(inh_idxs),
          '# Excitatory neurons: ', len(exc_idxs))

    # Labels_sc are the un-normalized versions of the labels
    labels_sc = np.concatenate([pupil_area, pupil_com], axis=1)

    # Labels are a collection of ZScored pupil area and center of mass
    p_area_norm = ZScore(pupil_area, axis=0)
    p_com_norm = ZScore(pupil_com, axis=0)
    labels = np.concatenate([p_area_norm, p_com_norm], axis=1)
    print('Label shape (scaled+unscaled): ', labels.shape, labels_sc.shape)

    n_trials = vvi.shape[1]
    print(hdf5_file)
    print('Creating hdf5 file')
    t_beg = time.time()
    f = h5py.File(hdf5_file, 'a', libver='latest', swmr=True)
    with f as f:

        # single write multi-read
        f.swmr_mode = True

        # create image group
        group_i = f.require_group('images')

        # create neural group
        group_n = f.require_group('neural')

        # Create group for behavioral variables
        group_labels = f.require_group('labels')
        group_labels_sc = f.require_group('labels_sc')

        # create a dataset for each trial within groups
        t = 0
        for trial in range(n_trials):

            if trial % 10 == 0:
                print('processing trial %03i' % trial)

            # find video indices during this trial
            trial_beg = vvi[0, trial]
            trial_end = vvi[1, trial]
            ts_idxs = np.arange(trial_beg - 1, trial_end)

            # load and process corresponding frames
            frames = get_frames_from_idxs(cap, ts_idxs)
            sh = frames.shape
            frames_proc = np.zeros((sh[0], sh[1], ypix, xpix), dtype='uint8')
            for i in range(sh[0]):
                frames_proc[i, 0, :, :] = cv2.resize(frames[i, 0],
                                                     (xpix, ypix))

            # save image data
            group_i.create_dataset('trial_%04i' % t,
                                   data=frames_proc,
                                   dtype='uint8')

            label_frames = labels[ts_idxs]
            label_sc_frames = labels_sc[ts_idxs]
            #print('Image: ', frames_proc.shape, 'Labels: ', label_frames.shape, label_sc_frames.shape)

            group_labels.create_dataset('trial_%04i' % t,
                                        data=label_frames,
                                        dtype='float32')
            group_labels_sc.create_dataset('trial_%04i' % t,
                                           data=label_sc_frames,
                                           dtype='float32')

            # find neural indices during this trial
            trial_beg = nni[0, trial]
            trial_end = nni[1, trial]

            # pick out corresponding neural activity
            neural = nna[trial_beg - 1:trial_end, :]

            # save neural data
            group_n.create_dataset('trial_%04i' % t,
                                   data=neural,
                                   dtype='float32')
            t += 1

        # Add variable size subsamples
        samples = f.create_group('samples')
        subsamples = samples.create_group('subsamples')
        subsample_sizes = [
            20, 40, 60, 80, 100, 200, 300, 400, 500, 600, 700, 800, 900, 1000,
            1100, 1200, 1300
        ]
        n_cells = range(nna.shape[1])
        for size in subsample_sizes:
            for trial in range(10):
                # Choose random sample w/out replacement from all neural indexes
                subsample = np.random.choice(n_cells, size, replace=False)
                subsamples.create_dataset('n{}_t{}'.format(size, trial),
                                          data=subsample)

        # Add Inh/Exc subsamples
        cell_types = samples.create_group('cell_types')
        cell_types.create_dataset('inh_t0', data=inh_idxs, dtype='uint8')

        for i in range(50):
            exc_sample = np.random.choice(exc_idxs,
                                          len(inh_idxs),
                                          replace=False)
            cell_types.create_dataset('exc_t{}'.format(i),
                                      data=exc_sample,
                                      dtype='uint8')

    # print out timing info
    t_end = time.time()
    t_tot = t_end - t_beg
    print('Processed {} frames in total'.format(t))
    print('total processing time: %f sec' % t_tot)
    print('time per trial: %f sec' % (t_tot / n_trials))
import os
import h5py
from tqdm import tqdm
import behavenet
import numpy as np

mouse = 'MD0ST5'
expt = 'full_trial'
sessions = ['1', '2', '3', '4']

for session in sessions:
    data_path = os.path.join(behavenet.get_user_dir('data'), 'dipoppa', expt, mouse, session, 'data.hdf5')
    f = h5py.File(data_path, 'a', swmr=True, libver='latest')

    # Concatenate all trials together

    data = np.array([])

    neural = f['neural']

    print('Loading data for session: ', session)
    for trial_name in tqdm(neural):
        trial = neural[trial_name][:]

        if data.size == 0:
            data = trial
        else:
            data = np.concatenate([data, trial], axis=0)

    print('Computing mean, std...')