def run():
    logging.basicConfig(level=logging.INFO)

    good_trials = try_cache("Good trials")
    animal_sess_combs = [(animal, session) for animal in [66, 70] for session in good_trials[animal]]

    _, good_clusters = get_good_clusters(0)

    for animal, session in animal_sess_combs:
        fn, trigger_tm = load_mux(animal, session)
        vl = load_vl(animal, fn)
        cls = {tetrode: load_cl(animal, fn, tetrode) for tetrode in range(1, 17)}

        for tetrode, cl in cls.items():
            if tetrode not in good_clusters:
                import pdb

                pdb.set_trace()
                continue
            for cell in good_clusters[tetrode]:
                logging.info(
                    "Finding spike locations for animal %i, session %i, tetrode %i, cell %i",
                    animal,
                    session,
                    tetrode,
                    cell,
                )
                cache_key = (cl["Label"][::10], vl["xs"][::10], trigger_tm, cell)
                spk_i = spike_loc(cl, vl, trigger_tm, cell, key=None)
                if spk_i is np.NAN:
                    break
                store_in_cache(cache_key, spk_i)
def load_cl(animal, fn, tetrode):
    ''' Returns the date as a datetime, a list of index numbers, and
        a list of classifications.
    
    A cluster file has an array of two elements.
        1) Time. The rate is 31250/sec
        2) The cluster that this spike was manually labeled as.
        
        ie [[1],[2],[3],...], [[1],[4],[3.4],...]
        the lengths are the same
     '''
    cache = try_cache(animal,fn,tetrode, 'clusters')
    if cache is not None:
        return cache
    
    
    clpath = join(dat_base,'Data Files','Clusters','clusters%s'%(animal,),fn)
    tmp = loadmat(clpath+'.cmb.%i.mat'%(tetrode,))
    try:
        dt = datetime.strptime(tmp['__header__'][50:],'%a %b %d %H:%M:%S %Y')
    except:
        dt = None
    
    out = {'Datetime': dt, 
            'Time': np.array(tmp['clust'][0,0][0]), 
            'Label': np.array(tmp['clust'][0,0][1])}
    
    store_in_cache(animal, fn, tetrode, 'clusters', out)
    
    return out
Beispiel #3
0
def run():
    logging.basicConfig(level=logging.INFO)
    
    good_trials = try_cache('Good trials')
    animal_sess_combs = [(animal,session) for animal in [66,70] 
                         for session in good_trials[animal]]
    
    _, good_clusters = get_good_clusters(0)
    
    for animal, session in animal_sess_combs:
        fn, trigger_tm = load_mux(animal, session)
        vl = load_vl(animal,fn)
        cls = {tetrode:load_cl(animal,fn,tetrode) for tetrode in range(1,17)}
        
        
        for tetrode,cl in cls.items():
            if tetrode not in good_clusters: 
                import pdb; pdb.set_trace()
                continue
            for cell in good_clusters[tetrode]:
                logging.info('Finding spike locations for animal %i, session %i, tetrode %i, cell %i',animal, session, tetrode,cell)
                cache_key = (cl['Label'][::10],vl['xs'][::10],trigger_tm,cell)
                spk_i = spike_loc(cl, vl, trigger_tm, cell, key=None)
                if spk_i is np.NAN: break
                store_in_cache(cache_key,spk_i)
Beispiel #4
0
def load_cl(animal, fn, tetrode):
    ''' Returns the date as a datetime, a list of index numbers, and
        a list of classifications.
    
    A cluster file has an array of two elements.
        1) Time. The rate is 31250/sec
        2) The cluster that this spike was manually labeled as.
        
        ie [[1],[2],[3],...], [[1],[4],[3.4],...]
        the lengths are the same
     '''
    cache = try_cache(animal, fn, tetrode, 'clusters')
    if cache is not None:
        return cache

    clpath = join(dat_base, 'Data Files', 'Clusters',
                  'clusters%s' % (animal, ), fn)
    tmp = loadmat(clpath + '.cmb.%i.mat' % (tetrode, ))
    try:
        dt = datetime.strptime(tmp['__header__'][50:], '%a %b %d %H:%M:%S %Y')
    except:
        dt = None

    out = {
        'Datetime': dt,
        'Time': np.array(tmp['clust'][0, 0][0]),
        'Label': np.array(tmp['clust'][0, 0][1])
    }

    store_in_cache(animal, fn, tetrode, 'clusters', out)

    return out
def find_ambiguous_data():
    # Final data structure will be a dictionary:
    #  amb[animal][session] = (#ambig, total)
    
    # First load cache
    
    cache = try_cache(cache_key)
    if cache is not None:
        amb = cache
    else:
        amb = {}

    # Fix center
    cntrx = cntry = 0
    
    # Animal range
    animals = range(65,74)
    
    not_task_trials = []
    for animal in animals:
    
        # Add to dictionary
        if animal not in amb:
            amb[animal] = {}
        
        for session in range(1,100):
            if animal in amb and session in amb[animal]: #and amb[animal][session]:
                logging.info('Found (Animal %i, Session %i) in cache',animal,session)
                continue
            try:
                fn, _ = load_mux(animal, session)
            except:
                logging.info('Animal %i has no sessions greater than %i',animal,session-1)
                break
            try:
                vl = load_vl(animal,fn)
            except:
                traceback.print_exc()
                logging.info('No data found for (Animal %i, Session %i)',animal,session)
                amb[animal][session] = None
                not_task_trials.append([animal,session])
                continue
            
            logging.info('Checking ambiguous data for (Animal %i, Session %i)',animal,session)
            
            orientation = get_orientation(vl,cntrx,cntry)
            
            # Assume that orientation and task labels are matched correctly
            radial = np.sum(0 == orientation)
            discrepency = np.sum(vl['Task'] != orientation)
            tot = len(vl['xs'])
            
            
            amb[animal][session] = (radial, discrepency,  tot)
        
    # Store to cache
    store_in_cache(cache_key, amb)
    
    return amb
Beispiel #6
0
def spike_loc(cl, vl, trigger_tm, target_cl,key=None):
    ''' Return the indices of vl['xs'] and vl['ys']
        that correspond to spikes with cluster label
        target_cl.
        
        Note: vl['xs'] and vl['ys'] have repeated values.
            We must return exactly one index for each unique
            location.'''
    cached_results = (try_cache(key) if key is not None else None)
    if cached_results is not None:
        logging.info('Got spike cluster %i from Cache.cache.',target_cl)
        return cached_results
    
    if target_cl == 1:
        logging.warning('Are you SURE you want to find cluster 1?')
        y = raw_input()
        if y in ['n', 'N']:
            return np.NAN
    
    # Get the times when a cluster in label 'target_cl' occurs
    # If target_cl is None, then find the iterations for everything
    st = cl['Time'][cl['Label']==target_cl]
    if st.shape == (0,):
        logging.info('No clusters with label %i. Quitting...', target_cl)
        return np.NAN
    
    # Ask user if he wants to waste time on an excessively large dataset
    '''
    logging.info('%i pts in cluster %i',len(st),target_cl)
    if 1.0*len(st)/len(cl['Label']) > .05:
        logging.warning('Are you SURE you want to proceed?')
        return np.NAN
        y = raw_input()
        if y in ['n', 'N']:
            return np.NAN'''
    
    # Get the vl indices corresponding to times in st
    spk_i = np.array(list(match_cl_to_vl(st, vl, trigger_tm)))
    
    #import pdb; pdb.set_trace()
    
    # Delete NaN values
    spk_i = spk_i[~np.isnan(spk_i)].astype(int)
    
    # Determine speed
    # Matlab rounds speed to 6 or 7 decimals, so do the same for consistency
    speed = matround(np.sqrt(vl['vxs']**2+vl['vys']**2),decimals=6)[spk_i]
    
    # Only leave spikes when rat is running faster than 2 in /sec
    spk_i = spk_i[(speed > 2) & (speed < 40) ]
    
    if key is not None: store_in_cache(key, spk_i)
    
    return np.unique(spk_i)
def load_wv(animal, fn, tetrode):
    ''' A waveform file is a list of 4 data points corresponding to
        the 4 electrode recordings of the tetrode at a given time.
        
        returns an Xx4 ndarray
        ]
        '''
    cache=try_cache(animal,fn,tetrode,'waveform')
    if cache is not None: return cache
    
    clpath = join(dat_base,'Data Files','Waveforms','waveforms%s'%(animal,),fn)
    out = loadmat(clpath+'.cmb.%i.mat'%(tetrode,))['waveforms']
    
    store_in_cache(animal,fn,tetrode,'waveform',out)
    
    return out
Beispiel #8
0
def load_wv(animal, fn, tetrode):
    ''' A waveform file is a list of 4 data points corresponding to
        the 4 electrode recordings of the tetrode at a given time.
        
        returns an Xx4 ndarray
        ]
        '''
    cache = try_cache(animal, fn, tetrode, 'waveform')
    if cache is not None: return cache

    clpath = join(dat_base, 'Data Files', 'Waveforms',
                  'waveforms%s' % (animal, ), fn)
    out = loadmat(clpath + '.cmb.%i.mat' % (tetrode, ))['waveforms']

    store_in_cache(animal, fn, tetrode, 'waveform', out)

    return out
def cache_inside_mask(bin_size, room_shape):
    xlen = room_shape[0][1]-room_shape[0][0]
    ylen = room_shape[1][1]-room_shape[1][0]
    assert xlen%bin_size==0
    assert ylen%bin_size==0
    
    N = xlen/bin_size
    xleft = room_shape[0][0]
    ydown = room_shape[1][0]
    
    inside = np.zeros([N,N])
    good_trials = try_cache('Good trials')
    
    for animal in [66,70]:
        for session in good_trials[animal]:
            logging.info('Checking animal %i, session %i',animal,session)
            fn, _ = load_mux(animal, session)
            vl = load_vl(animal,fn)

            for xbin in range(N):
                xmin = xbin*bin_size+xleft
                xmax = (xbin+1)*bin_size+xleft
                in_x = (vl['xs']>=xmin)&(vl['xs']<xmax)

                for ybin in range(N):
                    ymin = ybin*bin_size+ydown
                    ymax = (ybin+1)*bin_size+ydown

                    if np.any(in_x&(vl['ys']>=ymin)&(vl['ys']<ymax)):
                        inside[xbin,ybin]=1
    
    # 4 way Symmetric check
    for xbin in range(N/2):
        for ybin in range(N/2):
            assert inside[xbin,ybin] == inside[xbin,ybin]
            assert inside[xbin,ybin] == inside[N-xbin-1,ybin]
            assert inside[xbin,ybin] == inside[xbin,N-ybin-1]
            assert inside[xbin,ybin] == inside[N-xbin-1,N-ybin-1]
         
    cache_key = (bin_size, room_shape, 'Inside mask')
    store_in_cache(cache_key, inside)
    
    return inside

    
def count():
    logging.basicConfig(level=logging.INFO)
    
    animals = [66,70]
    sessions = range(1,100)
    tetrodes = range(1,17)
    
    good_trials = try_cache('Good trials')
    
    key = (animals,sessions,tetrodes,'count clusters')
    cache = try_cache(key)
    cache=None
    if cache is not None:
        cls = cache
    else:
        cls = []
        for animal in animals:
            print 'Animal ', animal
            for session in good_trials[animal]:
                print 'Session', session
                fn, _ = load_mux(animal, session)
                vl = load_vl(animal,fn)
                if len(np.unique(vl['Task'])) != 2:continue

                cells = 0
                for tetrode in tetrodes:
                    cl = load_cl(animal, fn, tetrode)
                    cells += len(np.unique(cl['Label']))-1
                if cells == 0: continue
                cls.append((animal, session, cells,len(vl)))
        store_in_cache(key,cls)
    
    cls.sort(key=lambda x:x[2])
    txt = '%i    %i    %i    %i'
    print 'Animal    Session    Cells    Length'
    for animal,session,cells, length in cls:
        print txt%(animal,session,cells, length)
    
    import pdb; pdb.set_trace()
    
    print 'Mean length:', np.mean([cl[3] for cl in cls])
    
        
                    
Beispiel #11
0
def cache_inside_mask(bin_size, room_shape):
    xlen = room_shape[0][1] - room_shape[0][0]
    ylen = room_shape[1][1] - room_shape[1][0]
    assert xlen % bin_size == 0
    assert ylen % bin_size == 0

    N = xlen / bin_size
    xleft = room_shape[0][0]
    ydown = room_shape[1][0]

    inside = np.zeros([N, N])
    good_trials = try_cache('Good trials')

    for animal in [66, 70]:
        for session in good_trials[animal]:
            logging.info('Checking animal %i, session %i', animal, session)
            fn, _ = load_mux(animal, session)
            vl = load_vl(animal, fn)

            for xbin in range(N):
                xmin = xbin * bin_size + xleft
                xmax = (xbin + 1) * bin_size + xleft
                in_x = (vl['xs'] >= xmin) & (vl['xs'] < xmax)

                for ybin in range(N):
                    ymin = ybin * bin_size + ydown
                    ymax = (ybin + 1) * bin_size + ydown

                    if np.any(in_x & (vl['ys'] >= ymin) & (vl['ys'] < ymax)):
                        inside[xbin, ybin] = 1

    # 4 way Symmetric check
    for xbin in range(N / 2):
        for ybin in range(N / 2):
            assert inside[xbin, ybin] == inside[xbin, ybin]
            assert inside[xbin, ybin] == inside[N - xbin - 1, ybin]
            assert inside[xbin, ybin] == inside[xbin, N - ybin - 1]
            assert inside[xbin, ybin] == inside[N - xbin - 1, N - ybin - 1]

    cache_key = (bin_size, room_shape, 'Inside mask')
    store_in_cache(cache_key, inside)

    return inside
Beispiel #12
0
def run():
    logging.basicConfig(level=logging.INFO)
    cache_key = 'Good trials'
    animals = [66, 73]
    sessions = range(100)
    _, good_clusters = goodClusters.get_good_clusters(0)

    good_trials = try_cache(cache_key)

    if good_trials is None: good_trials = {}

    for animal in animals:
        if animal not in good_trials: good_trials[animal] = []
        for session in sessions:
            if session in good_trials[animal]: continue
            try:
                fn, trigger_tm = load_mux(animal, session)
            except:
                logging.info('Animal %i has no sessions greater than %i',
                             animal, session + 1)
                break

            try:
                vl = load_vl(animal, fn)
            except:
                logging.info('Animal %i session %i is not a task trial',
                             animal, session + 1)
                continue

            cls = {
                tetrode: load_cl(animal, fn, tetrode)
                for tetrode in range(1, 17)
            }

            try:
                count_cells(vl, cls, trigger_tm, good_clusters)
            except:
                # No cells found
                continue

            if session not in good_trials[animal]:
                good_trials[animal].append(session)
    store_in_cache(cache_key, good_trials)
def count():
    logging.basicConfig(level=logging.INFO)

    animals = [66, 70]
    sessions = range(1, 100)
    tetrodes = range(1, 17)

    good_trials = try_cache('Good trials')

    key = (animals, sessions, tetrodes, 'count clusters')
    cache = try_cache(key)
    cache = None
    if cache is not None:
        cls = cache
    else:
        cls = []
        for animal in animals:
            print 'Animal ', animal
            for session in good_trials[animal]:
                print 'Session', session
                fn, _ = load_mux(animal, session)
                vl = load_vl(animal, fn)
                if len(np.unique(vl['Task'])) != 2: continue

                cells = 0
                for tetrode in tetrodes:
                    cl = load_cl(animal, fn, tetrode)
                    cells += len(np.unique(cl['Label'])) - 1
                if cells == 0: continue
                cls.append((animal, session, cells, len(vl)))
        store_in_cache(key, cls)

    cls.sort(key=lambda x: x[2])
    txt = '%i    %i    %i    %i'
    print 'Animal    Session    Cells    Length'
    for animal, session, cells, length in cls:
        print txt % (animal, session, cells, length)

    import pdb
    pdb.set_trace()

    print 'Mean length:', np.mean([cl[3] for cl in cls])
def run():
    logging.basicConfig(level=logging.INFO)
    cache_key = 'Good trials'
    animals = [66,73]
    sessions = range(100)
    _, good_clusters = goodClusters.get_good_clusters(0)
    
    good_trials = try_cache(cache_key)
    
    if good_trials is None: good_trials = {}
    
    for animal in animals:
        if animal not in good_trials: good_trials[animal] = []
        for session in sessions:
            if session in good_trials[animal]: continue
            try:
                fn, trigger_tm = load_mux(animal, session)
            except:
                logging.info('Animal %i has no sessions greater than %i',animal,session+1)
                break
            
            try:
                vl = load_vl(animal,fn)
            except:
                logging.info('Animal %i session %i is not a task trial',animal,session+1)
                continue
            
            cls = {tetrode:load_cl(animal,fn,tetrode) for tetrode in range(1,17)}
            
            try:
                count_cells(vl,cls,trigger_tm, good_clusters)
            except:
                # No cells found
                continue
            
            if session not in good_trials[animal]:
                good_trials[animal].append(session)
    store_in_cache(cache_key,good_trials)
Beispiel #15
0
def find_ambiguous_data():
    # Final data structure will be a dictionary:
    #  amb[animal][session] = (#ambig, total)

    # First load cache

    cache = try_cache(cache_key)
    if cache is not None:
        amb = cache
    else:
        amb = {}

    # Fix center
    cntrx = cntry = 0

    # Animal range
    animals = range(65, 74)

    not_task_trials = []
    for animal in animals:

        # Add to dictionary
        if animal not in amb:
            amb[animal] = {}

        for session in range(1, 100):
            if animal in amb and session in amb[
                    animal]:  #and amb[animal][session]:
                logging.info('Found (Animal %i, Session %i) in cache', animal,
                             session)
                continue
            try:
                fn, _ = load_mux(animal, session)
            except:
                logging.info('Animal %i has no sessions greater than %i',
                             animal, session - 1)
                break
            try:
                vl = load_vl(animal, fn)
            except:
                traceback.print_exc()
                logging.info('No data found for (Animal %i, Session %i)',
                             animal, session)
                amb[animal][session] = None
                not_task_trials.append([animal, session])
                continue

            logging.info('Checking ambiguous data for (Animal %i, Session %i)',
                         animal, session)

            orientation = get_orientation(vl, cntrx, cntry)

            # Assume that orientation and task labels are matched correctly
            radial = np.sum(0 == orientation)
            discrepency = np.sum(vl['Task'] != orientation)
            tot = len(vl['xs'])

            amb[animal][session] = (radial, discrepency, tot)

    # Store to cache
    store_in_cache(cache_key, amb)

    return amb
Beispiel #16
0
def load_mux(animal, session):
    '''
    mux: np.void, len(mux) = 5
        [name of tetrode, 
        1x76 ndarray, 
        76x16 ndarray,
        76x16 ndarray,
        empty]
        
    mux[1][0,session]: np.void, len = 2
    mux[1][0,session][0]: NAME OF CORRESPONDING FILE
            Ex. 20130818T191517.cmb
    mux[1][0,session][1]: 1x1 ndarray
    mux[1][0,session][1][0,0]:  np.void, len = 5
    This is the good stuff
    mux[1][0,session][1][0,0][0]: 1x1 ndarray
    mux[1][0,session][1][0,0][0][0,0]: np.void, len = 36
    Now we're even closer to good data
    mux[1][0,session][1][0,0][0][0,0][6]: 1x5 ndarray
    -> to get to 'Start'
    mux[1][0,session][1][0,0][0][0,0][6][0,0]: np.void, len = 2
    mux[1][0,session][1][0,0][0][0,0][6][0,0][0] = 'Start'
    mux[1][0,session][1][0,0][0][0,0][6][0,0][1][0,0]: np.void, len 2
    mux[1][0,session][1][0,0][0][0,0][6][0,0][1][0,0][0]: 1x6 ndarray with date info
            ex array([[ 2013.,8.,18.,19.,
                       15.,17.76898909]])
    -> to get to 'Trigger'
    mux[1][0,session][1][0,0][0][0,0][6][0,1][0] = 'Trigger'
    mux[1][0,session][1][0,0][0][0,0][6][0,1][1][0,0][0]: np.void, len = 2
    '''

    cache = try_cache(animal, session, 'mux')
    if cache is not None:
        return cache

    session -= 1  # Session starts at 1, but array indices start at 0
    muxpath = join(dat_base, 'Data Files', str(animal))
    mux = loadmat(muxpath + '.mat')['mux'][0][0]

    #import pdb; pdb.set_trace()
    # String the ending '.cmb'
    fn = mux[1][0][session][0][0].split('.')[0]

    # Get the start time - although I don't know what this is
    #  is used for
    sr = mux[1][0, session][1][0, 0][0][0, 0][6][0, 0][1][0, 0][0]
    start_dt = datetime(year=int(sr[0, 0]),
                        month=int(sr[0, 1]),
                        day=int(sr[0, 2]),
                        hour=int(sr[0, 3]),
                        minute=int(sr[0, 4]),
                        second=int(sr[0, 5]),
                        microsecond=int(10**6 * (sr[0, 5] - int(sr[0, 5]))))

    # Get the trigger time - although I don't know what this is
    #  is used for
    tr = mux[1][0, session][1][0, 0][0][0, 0][6][0, 1][1][0, 0][0]
    trigger_dt = datetime(year=int(tr[0, 0]),
                          month=int(tr[0, 1]),
                          day=int(tr[0, 2]),
                          hour=int(tr[0, 3]),
                          minute=int(tr[0, 4]),
                          second=int(tr[0, 5]),
                          microsecond=int(1000 * (tr[0, 5] - int(tr[0, 5]))))

    # Get the BackupInitialTime - this is the thing that corresponds
    #  to Matlab's mux.sessions(wanted_sess).info.ObjInfo.InitialTriggerTime
    # It is already given in
    bt = mux[1][0, session][1][0, 0][3][0, 0][0][0, 0]

    out = (fn, bt)

    store_in_cache(animal, session, 'mux', out)

    return out
def run(Folds):
    # Toggle-able parameters
    #CLs = [CL2,CL6,CL5]
    #CLs = [CL6, CL7]
    CLs = [CL10]
    Ks = np.arange(10, 200,
                   20)  # Segment length used to calculate firing rates

    # Sort of toggle-able parameters
    #animal_sess_combs = [(66,60),(70,8),(70,10),(66,61)]
    animal_sess_combs = [(66, 60)]
    #good_trials = try_cache('Good trials')
    #animal_sess_combs = [(animal,session) for animal in range(65,74)
    #                     for session in good_trials[animal]]
    bin_sizes = [5]
    label = 'Task'
    exceptions = []
    cl_profs = [0]

    # Not really toggle-able parameters
    room = [[-55, 55], [-55, 55]]

    cache = try_cache('One big data structure for %i folds' % (Folds, ))
    adat = ({} if cache is None else cache)

    for animal, session in animal_sess_combs:
        fn, trigger_tm = load_mux(animal, session)
        vl = load_vl(animal, fn)
        cls = {
            tetrode: load_cl(animal, fn, tetrode)
            for tetrode in range(1, 17)
        }

        if label == 'Task': label_l = vl['Task']
        else: raise Exception('Not implemented yet.')

        for clust_prof in cl_profs:
            cl_prof_name, good_clusters = get_good_clusters(clust_prof)
            t_cells = count_cells(vl, cls, trigger_tm, good_clusters)

            for bin_size, K in product(bin_sizes, Ks):
                cached = np.zeros(len(CLs))
                for CL in CLs:
                    i = CLs.index(CL)
                    try:
                        raise Exception
                        adat[CL.name][animal][session][cl_prof_name][bin_size][
                            label][K]
                        cached[i] = True
                    except:
                        cached[i] = False

                if np.sum(cached) == len(CLs):
                    print 'Everything already cached'
                    continue  # Everything is already cached!

                logging.info('About to generate population vector.')
                X, Y = gpv(vl, t_cells, label_l, K, bin_size, room)

                # The main data stricture
                dps = {CL: [] for CL in CLs if CL not in cached}

                if Folds > 0:
                    kf = cross_validation.KFold(len(Y),
                                                n_folds=Folds,
                                                shuffle=True)
                else:
                    kf = [(range(len(Y)), range(len(Y)))]
                for train_index, test_index in kf:
                    logging.warning('Training/testing: %i/%i',
                                    len(train_index), len(test_index))
                    for CL in CLs:
                        if cached[CLs.index(CL)]: continue
                        logging.warning('%s, %i seg, (%i, %i)', CL.name, K,
                                        animal, session)
                        if (CL, clust_prof) in exceptions: continue
                        CL.delt_t = K
                        correct_dp = check_classifier(train_index, test_index,
                                                      X, Y, CL, room, bin_size)

                        dps[CL].extend(correct_dp.tolist())
                for CL in CLs:
                    if cached[CLs.index(CL)]: continue
                    to_add = np.array(dps[CL]).reshape([-1])
                    add(adat, CL.name, animal, session, cl_prof_name, bin_size,
                        label, K, to_add)

    store_in_cache('One big data structure for %i folds' % (Folds, ), adat)
Beispiel #18
0
def load_vl(animal, fn):
    ''' Returns a list with :
            0) the current datetime
            1) a list of 'nows'
            2) a list of xs
            3) a list of ys
            4) vxs
            5) vys
            6) Iterations
    
    A virmenlog file has is a numpy.void. 
        It has either no entry or a variable number
        of entries, corresponding to...? 
            now: [1x32993 double]
        position: [2x32993 double]
        velocity: [2x32993 double]
          targetPos: [2x32993 double]
    clockwiseness: [1x32993 double]
           isITI: [1x32993 double]
        isReward: [1x32993 double]
           exper: [1x1 virmenExperiment]
        filename: [1x56 char]
         comment: ''
      iterations: np.void
          tmp['virmenLog'][0,0][-1][0,0][1]: 1x54561 ndarray

    '''

    cache = try_cache(animal, fn, 'virmenLog')
    if cache is not None:
        return cache

    vlpath = join(dat_base, 'Data Files', 'VirmenLog',
                  'virmenLog%s' % (animal, ), fn)

    tmp = loadmat(vlpath + '.cmb.mat')
    dt = datetime.strptime(tmp['__header__'][50:], '%a %b %d %H:%M:%S %Y')
    nows = tmp['virmenLog'][0, 0][0][0]
    xs = tmp['virmenLog'][0, 0][1][0]
    ys = tmp['virmenLog'][0, 0][1][1]
    vxs = tmp['virmenLog'][0, 0][2][0]
    vys = tmp['virmenLog'][0, 0][2][1]

    if len(tmp['virmenLog'][0, 0][3]) != 2:
        raise Exception('Not a task trial.')
    tt = tmp['virmenLog'][0, 0]

    txs = tmp['virmenLog'][0, 0][3][0]
    tys = tmp['virmenLog'][0, 0][3][1]

    task_clockwiseness = tmp['virmenLog'][0, 0][4][0]

    iteration_time = np.ravel(tmp['virmenLog'][0, 0][-1][0, 0][0])
    iteration_num = np.ravel(tmp['virmenLog'][0, 0][-1][0, 0][1])

    # Clean up the virmenLog iterations numbers by removing the
    #  nan ones and linearly interpolating between them
    f = np.nonzero(~np.isnan(iteration_num))[0]
    iteration_num = matround(
        np.interp(range(len(iteration_num)), f, iteration_num[f]), 0)
    iteration_num = iteration_num.astype(int)

    out = {
        'Datetime': dt,
        'Time': nows,
        'xs': xs,
        'ys': ys,
        'vxs': vxs,
        'vys': vys,
        'Iter time': iteration_time,
        'Iter num': iteration_num,
        'txs': txs,
        'tys': tys,
        'Task': task_clockwiseness
    }

    store_in_cache(animal, fn, 'virmenLog', out)

    return out
def load_vl(animal, fn):
    ''' Returns a list with :
            0) the current datetime
            1) a list of 'nows'
            2) a list of xs
            3) a list of ys
            4) vxs
            5) vys
            6) Iterations
    
    A virmenlog file has is a numpy.void. 
        It has either no entry or a variable number
        of entries, corresponding to...? 
            now: [1x32993 double]
        position: [2x32993 double]
        velocity: [2x32993 double]
          targetPos: [2x32993 double]
    clockwiseness: [1x32993 double]
           isITI: [1x32993 double]
        isReward: [1x32993 double]
           exper: [1x1 virmenExperiment]
        filename: [1x56 char]
         comment: ''
      iterations: np.void
          tmp['virmenLog'][0,0][-1][0,0][1]: 1x54561 ndarray

    '''
    
    cache=try_cache(animal,fn,'virmenLog')
    if cache is not None:
        return cache
    
    vlpath = join(dat_base,'Data Files','VirmenLog','virmenLog%s'%(animal,),fn)

    tmp = loadmat(vlpath+'.cmb.mat')
    dt = datetime.strptime(tmp['__header__'][50:],'%a %b %d %H:%M:%S %Y')
    nows = tmp['virmenLog'][0,0][0][0]
    xs = tmp['virmenLog'][0,0][1][0]
    ys = tmp['virmenLog'][0,0][1][1]
    vxs = tmp['virmenLog'][0,0][2][0]
    vys = tmp['virmenLog'][0,0][2][1]
    
    
    if len(tmp['virmenLog'][0,0][3]) != 2:
        raise Exception('Not a task trial.')
    tt = tmp['virmenLog'][0,0]

    txs = tmp['virmenLog'][0,0][3][0]
    tys = tmp['virmenLog'][0,0][3][1]

    task_clockwiseness = tmp['virmenLog'][0,0][4][0]

    iteration_time = np.ravel(tmp['virmenLog'][0,0][-1][0,0][0])
    iteration_num = np.ravel(tmp['virmenLog'][0,0][-1][0,0][1])
    
    # Clean up the virmenLog iterations numbers by removing the
    #  nan ones and linearly interpolating between them
    f = np.nonzero(~np.isnan(iteration_num))[0]
    iteration_num = matround(np.interp(range(len(iteration_num)), f, iteration_num[f]),0)
    iteration_num = iteration_num.astype(int)
    
    out = {'Datetime': dt,
            'Time': nows, 
            'xs': xs, 
            'ys': ys, 
            'vxs': vxs, 
            'vys': vys, 
            'Iter time': iteration_time,
            'Iter num': iteration_num,
            'txs': txs,
            'tys': tys,
            'Task':task_clockwiseness}
    
    store_in_cache(animal,fn,'virmenLog',out)
    
    return out
def run(Folds):
    # Toggle-able parameters
    #CLs = [CL2,CL6,CL5]
    #CLs = [CL6, CL7]
    CLs = [CL10]
    Ks = np.arange(10,200,20) # Segment length used to calculate firing rates
    

    # Sort of toggle-able parameters
    #animal_sess_combs = [(66,60),(70,8),(70,10),(66,61)]
    animal_sess_combs = [(66,60)]
    #good_trials = try_cache('Good trials')
    #animal_sess_combs = [(animal,session) for animal in range(65,74) 
    #                     for session in good_trials[animal]]
    bin_sizes = [5]
    label = 'Task'
    exceptions = []
    cl_profs = [0]
    
    # Not really toggle-able parameters
    room = [[-55,55],[-55,55]]
    
    
    
    cache = try_cache('One big data structure for %i folds'%(Folds,))
    adat = ({} if cache is None else cache)

    for animal, session in animal_sess_combs:
        fn, trigger_tm = load_mux(animal, session)
        vl = load_vl(animal,fn)
        cls = {tetrode:load_cl(animal,fn,tetrode) for tetrode in range(1,17)}
        
        if label == 'Task': label_l = vl['Task']
        else: raise Exception('Not implemented yet.')
        
        for clust_prof in cl_profs:
            cl_prof_name, good_clusters = get_good_clusters(clust_prof)
            t_cells = count_cells(vl,cls,trigger_tm,good_clusters)
            
            for bin_size, K in product(bin_sizes,Ks):
                cached = np.zeros(len(CLs))
                for CL in CLs:
                    i = CLs.index(CL)
                    try:
                        raise Exception
                        adat[CL.name][animal][session][cl_prof_name][bin_size][label][K]
                        cached[i] = True
                    except:
                        cached[i] = False
                
                if np.sum(cached) == len(CLs): 
                    print 'Everything already cached'
                    continue # Everything is already cached!
                
                
                logging.info('About to generate population vector.')
                X, Y = gpv(vl, t_cells, label_l, K, bin_size, room)
                
                
                # The main data stricture
                dps = {CL:[] for CL in CLs if CL not in cached}
                
                if Folds >0: kf = cross_validation.KFold(len(Y),n_folds=Folds,shuffle=True)
                else: kf = [(range(len(Y)),range(len(Y)))]
                for train_index, test_index in kf:
                    logging.warning('Training/testing: %i/%i',len(train_index),len(test_index))
                    for CL in CLs:
                        if cached[CLs.index(CL)]: continue
                        logging.warning('%s, %i seg, (%i, %i)',CL.name, K, animal, session)
                        if (CL,clust_prof) in exceptions: continue
                        CL.delt_t = K
                        correct_dp = check_classifier(train_index,test_index,X,Y,CL, room, bin_size)
        
                        dps[CL].extend(correct_dp.tolist())
                for CL in CLs:
                    if cached[CLs.index(CL)]: continue
                    to_add = np.array(dps[CL]).reshape([-1])
                    add(adat, CL.name, animal, session, cl_prof_name, bin_size, label, K, to_add)

    store_in_cache('One big data structure for %i folds'%(Folds,),adat)
def load_mux(animal, session):
    '''
    mux: np.void, len(mux) = 5
        [name of tetrode, 
        1x76 ndarray, 
        76x16 ndarray,
        76x16 ndarray,
        empty]
        
    mux[1][0,session]: np.void, len = 2
    mux[1][0,session][0]: NAME OF CORRESPONDING FILE
            Ex. 20130818T191517.cmb
    mux[1][0,session][1]: 1x1 ndarray
    mux[1][0,session][1][0,0]:  np.void, len = 5
    This is the good stuff
    mux[1][0,session][1][0,0][0]: 1x1 ndarray
    mux[1][0,session][1][0,0][0][0,0]: np.void, len = 36
    Now we're even closer to good data
    mux[1][0,session][1][0,0][0][0,0][6]: 1x5 ndarray
    -> to get to 'Start'
    mux[1][0,session][1][0,0][0][0,0][6][0,0]: np.void, len = 2
    mux[1][0,session][1][0,0][0][0,0][6][0,0][0] = 'Start'
    mux[1][0,session][1][0,0][0][0,0][6][0,0][1][0,0]: np.void, len 2
    mux[1][0,session][1][0,0][0][0,0][6][0,0][1][0,0][0]: 1x6 ndarray with date info
            ex array([[ 2013.,8.,18.,19.,
                       15.,17.76898909]])
    -> to get to 'Trigger'
    mux[1][0,session][1][0,0][0][0,0][6][0,1][0] = 'Trigger'
    mux[1][0,session][1][0,0][0][0,0][6][0,1][1][0,0][0]: np.void, len = 2
    '''
    
    cache=try_cache(animal,session,'mux')
    if cache is not None:
        return cache
    
    session -= 1 # Session starts at 1, but array indices start at 0
    muxpath = join(dat_base,'Data Files', str(animal))
    mux = loadmat(muxpath+'.mat')['mux'][0][0]
    
    #import pdb; pdb.set_trace()
    # String the ending '.cmb'
    fn = mux[1][0][session][0][0].split('.')[0]
    
    # Get the start time - although I don't know what this is
    #  is used for
    sr = mux[1][0,session][1][0,0][0][0,0][6][0,0][1][0,0][0]
    start_dt = datetime(year=int(sr[0,0]), month=int(sr[0,1]),
                        day=int(sr[0,2]),hour=int(sr[0,3]),
                        minute=int(sr[0,4]),second=int(sr[0,5]),
                        microsecond=int( 10**6*(sr[0,5]-int(sr[0,5])) ) )
    
    # Get the trigger time - although I don't know what this is
    #  is used for
    tr = mux[1][0,session][1][0,0][0][0,0][6][0,1][1][0,0][0]
    trigger_dt = datetime(year=int(tr[0,0]), month=int(tr[0,1]),
                        day=int(tr[0,2]),hour=int(tr[0,3]),
                        minute=int(tr[0,4]),second=int(tr[0,5]),
                        microsecond=int( 1000*(tr[0,5]-int(tr[0,5])) ) )
    
    # Get the BackupInitialTime - this is the thing that corresponds
    #  to Matlab's mux.sessions(wanted_sess).info.ObjInfo.InitialTriggerTime
    # It is already given in 
    bt = mux[1][0,session][1][0,0][3][0,0][0][0,0]
    
    out = (fn,bt)
    
    store_in_cache(animal,session,'mux',out)
    
    return out