encode_trial_marks_all_non_negative.shape) print('Encoding marks non-negative filter: ', encode_trial_marks_all_non_negative.shape, file=open("/data2/mcoulter/1d_decoder_log.txt", "a")) print('Original decode length: ', encode_trial_marks_all.shape) print('Original decode length: ', encode_trial_marks_all.shape, file=open("/data2/mcoulter/1d_decoder_log.txt", "a")) print('Decoding marks non-negative filter: ', encode_trial_marks_all_non_negative.shape) print('Decoding marks non-negative filter: ', encode_trial_marks_all_non_negative.shape, file=open("/data2/mcoulter/1d_decoder_log.txt", "a")) # filter for mark amplitude encode_trial_marks_all_sparse = trodes2SS.threshold_marks( encode_trial_marks_all_non_negative, maxthresh=2000, minthresh=100) decode_trial_marks_all_sparse = trodes2SS.threshold_marks( decode_trial_marks_all_non_negative, maxthresh=2000, minthresh=100) print('Original encode length: ', encode_trial_marks_all_non_negative.shape) print('Original encode length: ', encode_trial_marks_all_non_negative.shape, file=open("/data2/mcoulter/1d_decoder_log.txt", "a")) print('Encoding marks amplitude filter: ', encode_trial_marks_all_sparse.shape) print('Encoding marks amplitude filter: ', encode_trial_marks_all_sparse.shape, file=open("/data2/mcoulter/1d_decoder_log.txt", "a")) print('Original decode length: ', decode_trial_marks_all_non_negative.shape) print('Original decode length: ',
random_trial_marks) # filter for large negative marks and spike amplitude marks_random_trial_non_negative = trodes2SS.threshold_marks_negative( random_trial_marks_all, negthresh=-999) print('Original encode length: ', random_trial_marks_all.shape) print('Original encode length: ', random_trial_marks_all.shape, file=open("/data2/mcoulter/1d_decoder_log.txt", "a")) print('Encoding marks non-negative filter: ', marks_random_trial_non_negative.shape) print('Encoding marks non-negative filter: ', marks_random_trial_non_negative.shape, file=open("/data2/mcoulter/1d_decoder_log.txt", "a")) random_trial_spk_subset_sparse = trodes2SS.threshold_marks( marks_random_trial_non_negative, maxthresh=2000, minthresh=100) print('original length: ' + str(marks_random_trial_non_negative.shape[0])) print('after filtering: ' + str(random_trial_spk_subset_sparse.shape[0])) print('original length: ' + str(marks_random_trial_non_negative.shape[0]), file=open("/data2/mcoulter/1d_decoder_log.txt", "a")) print('after filtering: ' + str(random_trial_spk_subset_sparse.shape[0]), file=open("/data2/mcoulter/1d_decoder_log.txt", "a")) # velocity filter to define encoding and decoding times velocity_filter = 4 print('Velocity filter: ', velocity_filter) print('Velocity filter: ', velocity_filter, file=open("/data2/mcoulter/1d_decoder_log.txt", "a")) #NOTE: to try marks shift on whole trials we need to do shift first, then velocity filter for encoding and decoding marks
pos_subset = pos.loc[(pos.index.get_level_values('time') <= chunkend) & (pos.index.get_level_values('time') >= chunkstart)] posY_subset = posY.loc[(posY.index.get_level_values('time') <= chunkend) & (posY.index.get_level_values('time') >= chunkstart)] pos_start = pos_subset.index.get_level_values('time')[0] pos_end = pos_subset.index.get_level_values('time')[-1] #spk_subset = marks.loc[(marks.index.get_level_values('time') < pos_end) & (marks.index.get_level_values('time') > pos_start)] #rip_subset = rips.loc[(rips['starttime'].values > pos_start) & (rips['endtime'].values < pos_end)] #rip_subset = rips_vel_filtered.loc[(rips_vel_filtered['starttime'].values > pos_start) & (rips_vel_filtered['endtime'].values < pos_end)] #whole epoch spk_subset = marks rip_subset = rips_vel_filtered spk_subset_sparse = trodes2SS.threshold_marks(spk_subset, maxthresh=2000, minthresh=100) print('original length: ' + str(spk_subset.shape[0])) print('after filtering: ' + str(spk_subset_sparse.shape[0])) print('original length: ' + str(spk_subset.shape[0]), file=open("/p/lustre1/coulter5/remy/1d_decoder_log.txt", "a")) print('after filtering: ' + str(spk_subset_sparse.shape[0]), file=open("/p/lustre1/coulter5/remy/1d_decoder_log.txt", "a")) spk_subset_sparse.groupby('elec_grp_id') # Filter encoding marks for times when rat velocity > 4 cm/s # The purpose of this is to remove most of the stationary time from the encoding, to focus on times of movement #linflat_obj = pos_subset.get_mapped_single_axis() #whole epoch
def main(path_base_rawdata, rat_name, path_arm_nodes, path_base_analysis, shift_amt, path_out): # set log file name #log_file = '/p/lustre1/coulter5/remy/1d_decoder_log.txt' print(datetime.now()) today = str(date.today()) #print(datetime.now(), file=open(log_file,"a")) # set path to folders where spykshrk core scripts live #path_main = '/usr/workspace/wsb/coulter5/spykshrk_realtime' #os.chdir(path_main) #cell 2 # Import data # Define path bases #path_base_rawdata = '/p/lustre1/coulter5/remy/' # Define parameters # for epochs we want 2 and 4 for each day #shifts = [0, .10, .15, .20] #shifts = [0] #for shift_amt in shifts: #rat_name = 'remy' print(rat_name) #print(rat_name, file=open(log_file,"a")) directory_temp = path_base_rawdata + rat_name + '/' day_dictionary = { 'remy': [20], 'gus': [28], 'bernard': [23], 'fievel': [19] } epoch_dictionary = {'remy': [2], 'gus': [4], 'bernard': [4], 'fievel': [4]} tetrodes_dictionary = { 'remy': [ 4, 6, 9, 10, 11, 12, 13, 14, 15, 17, 19, 20, 21, 22, 23, 24, 25, 26, 28, 29, 30 ], # 4,6,9,10,11,12,13,14,15,17,19,20,21,22,23,24,25,26,28,29,30 'gus': [ 6, 7, 8, 9, 10, 11, 12, 17, 18, 19, 20, 21, 24, 25, 26, 27, 30 ], # list(range(6,13)) + list(range(17,22)) + list(range(24,28)) + [30] 'bernard': [ 1, 2, 3, 4, 5, 7, 8, 10, 11, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29 ], 'fievel': [ 1, 2, 3, 5, 6, 7, 8, 9, 10, 11, 12, 14, 15, 16, 17, 18, 19, 20, 22, 23, 24, 25, 27, 28, 29 ] } #tetrodes_dictionary = {'remy': [4], # 4,6,9,10,11,12,13,14,15,17,19,20,21,22,23,24,25,26,28,29,30 # 'gus': [6], # list(range(6,13)) + list(range(17,22)) + list(range(24,28)) + [30] # 'bernard': [1], # 'fievel': [1]} # Maze information #os.chdir('/usr/workspace/wsb/coulter5/spykshrk_realtime/') #maze_coordinates = scipy.io.loadmat('set_arm_nodes.mat',variable_names = 'linearcoord_NEW') # new maze coordinates with only one segment for box maze_coordinates = scipy.io.loadmat(os.path.join(path_arm_nodes, 'set_arm_nodes.mat'), variable_names='linearcoord_one_box') print('Lodaing raw data! ' + str(rat_name) + ' Day ' + str(day_dictionary[rat_name]) + ' Epoch ' + str(epoch_dictionary[rat_name])) #print('Lodaing raw data! '+str(rat_name)+' Day '+str(day_dictionary[rat_name])+' Epoch '+str(epoch_dictionary[rat_name]), file=open("/data2/mcoulter/1d_decoder_log.txt","a")) datasrc = TrodesImport(directory_temp, rat_name, day_dictionary[rat_name], epoch_dictionary[rat_name], tetrodes_dictionary[rat_name]) # Import marks marks = datasrc.import_marks() # # os.chdir('/data2/jguidera/data/') # # np.load('marks.npy') # add print lines to show number of marks on each tetrode #print('Marks on tetrode 4: ', marks.xs(4,level='elec_grp_id').shape) #print('Marks on tetrode 4: ', marks.xs(4,level='elec_grp_id').shape, file=open("/data2/mcoulter/1d_decoder_log.txt","a")) #print('Marks on tetrode 28: ', marks.xs(28,level='elec_grp_id').shape) #print('Marks on tetrode 28: ', marks.xs(28,level='elec_grp_id').shape, file=open("/data2/mcoulter/1d_decoder_log.txt","a")) #print('Marks on tetrode 30: ', marks.xs(30,level='elec_grp_id').shape) #print('Marks on tetrode 30: ', marks.xs(30,level='elec_grp_id').shape, file=open("/data2/mcoulter/1d_decoder_log.txt","a")) # Import position #? concerned about use of sampling rate in the definition for position # Temporary small definition of encoding settings-- need 'arm_coordinates' to use datasrc.import_pos encode_settings = AttrDict({'arm_coordinates': [[0, 0]]}) # Import position (#? concerned about use of sampling rate in the definition for position) pos = datasrc.import_pos(encode_settings, xy='x') posY = datasrc.import_pos(encode_settings, xy='y') # Import ripples rips = datasrc.import_rips(pos, velthresh=4) # Define path bases path_base_dayepoch = 'day' + str( day_dictionary[rat_name][0]) + '_epoch' + str( epoch_dictionary[rat_name][0]) #path_base_analysis = '/p/lustre1/coulter5/remy/maze_info/' #cell 3 #filter ripples for velocity < 4 #re-shape ripples input table into format for get_irregular_resample rips['timestamp'] = rips['starttime'] rips['time'] = rips['starttime'] rips.timestamp = rips.timestamp * 30000 rips['timestamp'] = rips['timestamp'].astype(int) rips.reset_index(level=['event'], inplace=True) rips.columns = [ 'event', 'starttime', 'endtime', 'maxthresh', 'timestamp', 'time' ] rips.set_index(['timestamp', 'time'], drop=True, append=True, inplace=True) #filter for velocity < 4 with get_irregular_resample linflat_obj = pos.get_mapped_single_axis() linflat_ripindex = linflat_obj.get_irregular_resampled(rips) linflat_ripindex_encode_velthresh = linflat_ripindex.query( 'linvel_flat < 4') #re-shape to RippleTimes format for plotting rips_vel_filt = rips.loc[linflat_ripindex_encode_velthresh.index] rips_vel_filt.reset_index(level=['timestamp', 'time'], inplace=True) rips_vel_filt.set_index(['event'], drop=True, append=True, inplace=True) rips_vel_filtered = RippleTimes.create_default(rips_vel_filt, 1) print('rips when animal velocity <= 4: ' + str(linflat_ripindex_encode_velthresh.shape[0])) #print('rips when animal velocity <= 4: '+str(linflat_ripindex_encode_velthresh.shape[0]), file=open(log_file,"a")) #cell 4 # dont run encoding or decdoing subset cells for the crossvalidation runs # the marks filtering happens right before running encoder #cell 6 # linearize the whole epoch - should only have to do this once. speed_threshold_save = 0 #new position variables for whole epoch pos_all_linear = pos posY1 = posY #linear_start = pos.index.get_level_values('time')[encode_subset_start] #linear_end = pos.index.get_level_values('time')[encode_subset_end] # Define path base #path_base_timewindow = str(int(round(linear_start))) + 'to' + str(int(round(linear_end))) + 'sec' path_base_timewindow = 'whole_epoch_v3' path_base_foranalysisofonesessionepoch = path_base_analysis + rat_name + '/' + path_base_dayepoch + '/' + path_base_timewindow # Change to directory with saved linearization result # Define folder for saved linearization result linearization_output_save_path = path_base_foranalysisofonesessionepoch + '/linearization_output/' linearization_output_save_path # Check if it exists, make if it doesn't directory_path = linearization_output_save_path if not os.path.exists(directory_path): os.mkdir(directory_path) #change_to_directory_make_if_nonexistent(directory_path) # Define name of linearization result linearization_output1_save_filename = os.path.join( directory_path, 'linearization_' + path_base_timewindow + '_speed' + str(speed_threshold_save) + '_linear_distance_arm_shift' + '.npy') linearization_output2_save_filename = os.path.join( directory_path, 'linearization_' + path_base_timewindow + '_speed' + str(speed_threshold_save) + '_track_segment_id_use' + '.npy') # Load linearization print('Linearization result exists. Loading it.') #print("Linearization result exists. Loading it.", file=open(log_file,"a")) linear_distance_arm_shift = np.load(linearization_output1_save_filename) track_segment_id_use = np.load(linearization_output2_save_filename) #pos_subset['linpos_flat'] = linear_distance_arm_shift[(encode_subset_start-encode_subset_start):(encode_subset_end-encode_subset_start+1)] #whole_epoch pos_all_linear['linpos_flat'] = linear_distance_arm_shift #cell 7 # Define position bins #!!! HARD CODE: ASSUMES POSITION BIN OF WIDTH 1 !!! # need to use the indices of the encoding time subset in this cell # Initialize variables tracksegment_positionvalues_min_and_max = [] tracksegment_positionvalues_for_bin_edges = [] # Find min and max position for each track segment #tracksegments_temp = np.unique(track_segment_id_use[encode_subset_start:(encode_subset_end+1)]) #whole epoch tracksegments_temp = np.unique( track_segment_id_use[0:len(linear_distance_arm_shift)]) for t_loop in tracksegments_temp: # for each track segment #indiceswewant_temp = track_segment_id_use[encode_subset_start:(encode_subset_end+1)] == t_loop #whole epoch indiceswewant_temp = track_segment_id_use[ 0:len(linear_distance_arm_shift)] == t_loop #tracksegment_positionvalues_temp = pos_subset.values[indiceswewant_temp,0] # second dimension of pos_subset: zero for position, 1 for velocity #whole epoch tracksegment_positionvalues_temp = pos_all_linear.values[ indiceswewant_temp, 0] tracksegment_positionvalues_min_and_max.append([ tracksegment_positionvalues_temp.min(), tracksegment_positionvalues_temp.max() ]) # To define edges, floor mins and ceil maxes tracksegment_positionvalues_for_bin_edges.append([ np.floor(tracksegment_positionvalues_temp.min()), np.ceil(tracksegment_positionvalues_temp.max()) ]) # Floor to get bins #? Is this right? Does 0 mean the bin spanning [0, 1]? tracksegment_positionvalues_min_and_max_floor = np.floor( tracksegment_positionvalues_min_and_max) # Find only bins in range of segments binswewant_temp = [] for t_loop in tracksegment_positionvalues_min_and_max_floor: # for each track segment binswewant_temp.append( np.ndarray.tolist(np.arange( t_loop[0], t_loop[1] + 1))) # + 1 to account for np.arange not including last index # Do same for edges edgeswewant_temp = [] for t_loop in tracksegment_positionvalues_for_bin_edges: # for each track segment edgeswewant_temp.append( np.ndarray.tolist(np.arange( t_loop[0], t_loop[1] + 1))) # + 1 to account for np.arange not including last index # Flatten (combine bins from segments) binswewant_temp_flat = [y for x in binswewant_temp for y in x] edgeswewant_temp_flat = [y for x in edgeswewant_temp for y in x] # Find unique elements arm_coords_wewant = (np.unique(binswewant_temp_flat)) edges_wewant = (np.unique(edgeswewant_temp_flat)) # Turn list of edges into ranges start_temp, end_temp = turn_array_into_ranges(edges_wewant) arm_coordinates_WEWANT = np.column_stack((start_temp, end_temp)) print('Arm coordinates: ', arm_coordinates_WEWANT) #print('Arm coordinates: ',arm_coordinates_WEWANT, file=open(log_file,"a")) #cell 7.1 # this cell speeds up encoding with larger position bins # try 5cm bins - do this by dividing position subset by 5 and arm coords by 5 #pos_subset['linpos_flat'] = (pos_subset['linpos_flat'])/5 #whole epoch pos_all_linear['linpos_flat'] = (pos_all_linear['linpos_flat']) / 5 arm_coordinates_WEWANT = arm_coordinates_WEWANT / 5 arm_coordinates_WEWANT = np.around(arm_coordinates_WEWANT) print('Arm coordinates: ', arm_coordinates_WEWANT) #print('Arm coordinates: ',arm_coordinates_WEWANT, file=open(log_file,"a")) #cell 8 #define encoding settings #max_pos = int(round(linear_distance_arm_shift.max()) + 20) # if you are using 5cm position bins, use this max_pos instead max_pos = int(round(linear_distance_arm_shift.max() / 5) + 5) encode_settings = AttrDict({ 'sampling_rate': 3e4, 'pos_bins': np.arange(0, max_pos, 1), # arm_coords_wewant 'pos_bin_edges': np.arange(0, max_pos + .1, 1), # edges_wewant, 'pos_bin_delta': 1, # 'pos_kernel': sp.stats.norm.pdf(arm_coords_wewant, arm_coords_wewant[-1]/2, 1), 'pos_kernel': sp.stats.norm.pdf( np.arange(0, max_pos, 1), max_pos / 2, 1 ), #note that the pos_kernel mean should be half of the range of positions (ie 180/90) # sp.stats.norm.pdf(np.arange(0,560,1), 280, 1), 'pos_kernel_std': 1, 'mark_kernel_std': int(20), 'pos_num_bins': max_pos, # len(arm_coords_wewant) 'pos_col_names': [pos_col_format(ii, max_pos) for ii in range(max_pos)], # or range(0,max_pos,10) 'arm_coordinates': arm_coordinates_WEWANT, 'path_trans_mat': path_arm_nodes }) # includes box, removes bins in the gaps 'arm_coordinates': [[0,max_pos]]}) print('Encode settings: ', encode_settings) #print('Encode settings: ',encode_settings, file=open(log_file,"a")) #cell 9 #define decode settings decode_settings = AttrDict({ 'trans_smooth_std': 2, 'trans_uniform_gain': 0.0001, 'time_bin_size': 60 }) print('Decode settings: ', decode_settings) #print('Decode settings: ',decode_settings, file=open(log_file,"a")) #cell 9.1 randomize trial order within epoch #read in trial times trialsname = directory_temp + rat_name + 'trials' + str( day_dictionary[rat_name][0]) + '.mat' trialsmat = scipy.io.loadmat(trialsname, squeeze_me=True, struct_as_record=False) starttimes = trialsmat['trials'][day_dictionary[rat_name][0] - 1][epoch_dictionary[rat_name][0] - 1].starttime starttimes = starttimes.astype(np.float64, copy=False) endtimes = trialsmat['trials'][day_dictionary[rat_name][0] - 1][epoch_dictionary[rat_name][0] - 1].endtime endtimes = endtimes.astype(np.float64, copy=False) trialsindex = np.arange(starttimes.shape[0]) print('Number of trials: ', trialsindex.shape) #print('Number of trials: ',trialsindex.shape, file=open(log_file,"a")) # randomize trial order indices = np.arange(starttimes.shape[0]) np.random.shuffle(indices) #fixed random order indices = [ 17, 92, 3, 98, 11, 78, 105, 100, 103, 37, 28, 62, 85, 59, 41, 93, 29, 102, 6, 76, 13, 82, 18, 25, 64, 96, 20, 16, 65, 54, 12, 24, 56, 5, 74, 73, 79, 89, 97, 70, 68, 46, 7, 40, 101, 48, 77, 63, 69, 108, 66, 15, 91, 33, 45, 21, 51, 19, 30, 23, 72, 35, 42, 47, 95, 107, 104, 61, 43, 60, 67, 88, 71, 14, 38, 32, 87, 57, 27, 31, 1, 2, 53, 86, 50, 49, 0, 52, 90, 10, 44, 84, 55, 81, 106, 39, 75, 58, 9, 34, 4, 8, 26, 22, 94, 83, 36, 80, 99 ] starttimes_shuffled = starttimes[indices] endtimes_shuffled = endtimes[indices] trialsindex_shuffled = trialsindex[indices] print('Randomized trial order: ', trialsindex_shuffled) #print('Randomized trial order: ',trialsindex_shuffled, file=open(log_file,"a")) #to make a new position, marks and trial file with new start and end times: #position random_trial_pos_all = pos_all_linear.head(0) for i in range(len(starttimes_shuffled)): random_trial_pos = pos_all_linear.loc[ (pos_all_linear.index.get_level_values('time') <= endtimes_shuffled[i]) & (pos_all_linear.index.get_level_values( 'time') >= starttimes_shuffled[i])] random_trial_pos_all = random_trial_pos_all.append(random_trial_pos) #marks random_trial_marks_all = marks.head(0) for i in range(len(starttimes_shuffled)): random_trial_marks = marks.loc[ (marks.index.get_level_values('time') <= endtimes_shuffled[i]) & (marks.index.get_level_values('time') >= starttimes_shuffled[i])] random_trial_marks_all = random_trial_marks_all.append( random_trial_marks) # filter for large negative marks and spike amplitude marks_random_trial_non_negative = trodes2SS.threshold_marks_negative( random_trial_marks_all, negthresh=-999) print('Original encode length: ', random_trial_marks_all.shape) #print('Original encode length: ',random_trial_marks_all.shape, file=open(log_file,"a")) print('Encoding marks non-negative filter: ', marks_random_trial_non_negative.shape) #print('Encoding marks non-negative filter: ',marks_random_trial_non_negative.shape, file=open(log_file,"a")) random_trial_spk_subset_sparse = trodes2SS.threshold_marks( marks_random_trial_non_negative, maxthresh=2000, minthresh=100) print('original length: ' + str(marks_random_trial_non_negative.shape[0])) print('after filtering: ' + str(random_trial_spk_subset_sparse.shape[0])) #print('original length: '+str(marks_random_trial_non_negative.shape[0]), file=open(log_file,"a")) #print('after filtering: '+str(random_trial_spk_subset_sparse.shape[0]), file=open(log_file,"a")) # velocity filter to define encoding and decoding times velocity_filter = 4 print('Velocity filter: ', velocity_filter) #print('Velocity filter: ',velocity_filter, file=open(log_file,"a")) #NOTE: to try marks shift on whole trials we need to do shift first, then velocity filter for encoding and decoding marks # nope - cant do this, need to do velocity filter first # #encoding spikes linflat_obj = random_trial_pos_all.get_mapped_single_axis() #linflat_obj = pos_all_linear.get_mapped_single_axis() linflat_spkindex = linflat_obj.get_irregular_resampled( random_trial_spk_subset_sparse) linflat_spkindex_encode_velthresh = linflat_spkindex.query( 'linvel_flat > @velocity_filter') encode_spikes_random_trial = random_trial_spk_subset_sparse.loc[ linflat_spkindex_encode_velthresh.index] #encode_spikes_random_trial_random = encode_spikes_random_trial.head(0) #for i in range(len(starttimes_shuffled)): # encode_random_spikes = encode_spikes_random_trial.loc[(encode_spikes_random_trial.index.get_level_values('time') <= endtimes_shuffled[i]) & (encode_spikes_random_trial.index.get_level_values('time') >= starttimes_shuffled[i])] # encode_spikes_random_trial_random = encode_spikes_random_trial_random.append(encode_random_spikes) print('encoding spikes after velocity filter: ' + str(encode_spikes_random_trial.shape[0])) #print('encoding spikes after velocity filter: '+str(encode_spikes_random_trial.shape[0]), file=open(log_file,"a")) # #decoding spikes linflat_obj = random_trial_pos_all.get_mapped_single_axis() #linflat_obj = pos.get_mapped_single_axis() linflat_spkindex = linflat_obj.get_irregular_resampled( random_trial_spk_subset_sparse) linflat_spkindex_decode_velthresh = linflat_spkindex.query( 'linvel_flat < @velocity_filter') decode_spikes_random_trial = random_trial_spk_subset_sparse.loc[ linflat_spkindex_decode_velthresh.index] print('decoding spikes after velocity filter: ' + str(decode_spikes_random_trial.shape[0])) #print('decoding spikes after velocity filter: '+str(decode_spikes_random_trial.shape[0]), file=open(log_file,"a")) #filter position for velocity random_trial_pos_all_vel = random_trial_pos_all.loc[( random_trial_pos_all['linvel_flat'] > velocity_filter)] #random_trial_pos_all_vel = pos_all_linear.loc[(pos_all_linear['linvel_flat']>velocity_filter)] # cell 9.2 randomize position between arms # define dictionaries for arm swaps # dictionary to identify arm of the trial arm_id_dict = { 13: 'arm1', 14: 'arm1', 15: 'arm1', 16: 'arm1', 17: 'arm1', 18: 'arm1', 19: 'arm1', 20: 'arm1', 21: 'arm1', 22: 'arm1', 23: 'arm1', 24: 'arm1', 25: 'arm1', 26: 'arm1', 27: 'arm1', 29: 'arm2', 30: 'arm2', 31: 'arm2', 32: 'arm2', 33: 'arm2', 34: 'arm2', 35: 'arm2', 36: 'arm2', 37: 'arm2', 38: 'arm2', 39: 'arm2', 40: 'arm2', 41: 'arm2', 42: 'arm2', 43: 'arm2', 46: 'arm3', 47: 'arm3', 48: 'arm3', 49: 'arm3', 50: 'arm3', 51: 'arm3', 52: 'arm3', 53: 'arm3', 54: 'arm3', 55: 'arm3', 56: 'arm3', 57: 'arm3', 58: 'arm3', 59: 'arm3', 60: 'arm3', 64: 'arm4', 65: 'arm4', 66: 'arm4', 67: 'arm4', 68: 'arm4', 69: 'arm4', 70: 'arm4', 71: 'arm4', 72: 'arm4', 73: 'arm4', 74: 'arm4', 75: 'arm4', 76: 'arm4', 77: 'arm4', 81: 'arm5', 82: 'arm5', 83: 'arm5', 84: 'arm5', 85: 'arm5', 86: 'arm5', 87: 'arm5', 88: 'arm5', 89: 'arm5', 90: 'arm5', 91: 'arm5', 92: 'arm5', 93: 'arm5', 94: 'arm5', 97: 'arm6', 98: 'arm6', 99: 'arm6', 100: 'arm6', 101: 'arm6', 102: 'arm6', 103: 'arm6', 104: 'arm6', 105: 'arm6', 106: 'arm6', 107: 'arm6', 108: 'arm6', 109: 'arm6', 110: 'arm6', 113: 'arm7', 114: 'arm7', 115: 'arm7', 116: 'arm7', 117: 'arm7', 118: 'arm7', 119: 'arm7', 120: 'arm7', 121: 'arm7', 122: 'arm7', 123: 'arm7', 124: 'arm7', 125: 'arm7', 126: 'arm7', 127: 'arm7', 130: 'arm8', 131: 'arm8', 132: 'arm8', 133: 'arm8', 134: 'arm8', 135: 'arm8', 136: 'arm8', 137: 'arm8', 138: 'arm8', 139: 'arm8', 140: 'arm8', 141: 'arm8', 142: 'arm8', 143: 'arm8', 1: 'arm0', 2: 'arm0', 3: 'arm0', 4: 'arm0', 5: 'arm0', 6: 'arm0', 7: 'arm0', 8: 'arm0', 9: 'arm0', } # dictionary for new (randomly chosen arm) new_arm_id_dict = { 1: 'arm1', 2: 'arm2', 3: 'arm3', 4: 'arm4', 5: 'arm5', 6: 'arm6', 7: 'arm7', 8: 'arm8' } # dictionary for start of arm arm_start_dict = { 'arm0': 1, 'arm1': 13, 'arm2': 29, 'arm3': 46, 'arm4': 62, 'arm5': 79, 'arm6': 96, 'arm7': 113, 'arm8': 130 } # dictionary for length of arm arm_length_dict = { 'arm0': 1, 'arm1': 26 - 13, 'arm2': 42 - 29, 'arm3': 58 - 46, 'arm4': 76 - 64, 'arm5': 93 - 81, 'arm6': 110 - 97, 'arm7': 126 - 114, 'arm8': 142 - 130 } import random arm_swap_trial_pos_all = pos_all_linear.head(0) arm_swap_trial_pos_all_save = pos_all_linear.head(0) for i in range(len(starttimes_shuffled)): arm_swap_trial_pos = pos_all_linear.loc[ (pos_all_linear.index.get_level_values('time') <= endtimes_shuffled[i]) & (pos_all_linear.index.get_level_values( 'time') >= starttimes_shuffled[i])] #print(arm_swap_trial_pos[0:1]) arm_id = arm_id_dict[int(arm_swap_trial_pos['linpos_flat'].max())] arm_start = arm_start_dict[arm_id] arm_length = arm_length_dict[arm_id] new_arm_id = new_arm_id_dict[random.randint(1, 8)] print('Trial: ', indices[i], ' Original arm: ', arm_id, ' New arm: ', new_arm_id) new_arm_start = arm_start_dict[new_arm_id] new_arm_length = arm_length_dict[new_arm_id] arm_swap_trial_pos.loc[ arm_swap_trial_pos['linpos_flat'] > 13, ['linpos_flat']] = ( ((arm_swap_trial_pos[arm_swap_trial_pos['linpos_flat'] > 13] ['linpos_flat'].values - new_arm_start) * (arm_length / new_arm_length)) + arm_start) arm_swap_trial_pos_all = arm_swap_trial_pos_all.append( arm_swap_trial_pos) arm_swap_trial_pos_all_save = arm_swap_trial_pos_all.append( arm_swap_trial_pos) # save dataframe with both shifted position shifted_position_file_name = os.path.join( path_out, rat_name + '_' + str(day_dictionary[rat_name][0]) + '_' + str(epoch_dictionary[rat_name][0]) + '_vel4_convol_new_pos_arm_shift_position_2_' + today + '.nc') position_shift2 = arm_swap_trial_pos_all_save.reset_index() position_shift3 = position_shift2.to_xarray() position_shift3.to_netcdf(shifted_position_file_name) print('Saved shifted marks to: ' + shifted_position_file_name) #print('Saved shifted marks to: '+shifted_marks_file_name, file=open(log_file,"a") offset_30Hz_time_bins = 0 print('Shifted position shape: ', arm_swap_trial_pos_all.shape) #print('Shifted marks shape: ',encode_spikes_random_trial.shape, file=open(log_file,"a")) #cell 10 # Run encoder # these time-table lines are so that we can record the time it takes for encoder to run even if notebook disconnects # look at the time stamps for the two files in /data2/mcoulter called time_stamp1 and time_stamp2 print('Starting encoder') #print("Starting encoder", file=open(log_file,"a")) #time_table_data = {'age': [1, 2, 3, 4, 5]} #time_table = pd.DataFrame(time_table_data) #time_table.to_csv('/p/lustre1/coulter5/remy/time_stamp1.csv') time_started = datetime.now() #for whole epoch: linflat=pos_all_linear_vel #for subset: linflat=pos_subset encoder = OfflinePPEncoder(linflat=arm_swap_trial_pos_all, dec_spk_amp=decode_spikes_random_trial, encode_settings=encode_settings, decode_settings=decode_settings, enc_spk_amp=encode_spikes_random_trial, dask_worker_memory=1e9, dask_chunksize=None) #new output format to call results, prob_no_spike, and trans_mat for doing single tetrode encoding encoder_output = encoder.run_encoder() results = encoder_output['results'] prob_no_spike = encoder_output['prob_no_spike'] trans_mat = encoder_output['trans_mat'] #results = encoder.run_encoder() #time_table.to_csv('/p/lustre1/coulter5/remy/time_stamp2.csv') time_finished = datetime.now() print('Enocder finished!') #print('Encoder started at: ',datetime.fromtimestamp(os.path.getmtime('/p/lustre1/coulter5/remy/time_stamp1.csv')).strftime('%Y-%m-%d %H:%M:%S')) print('Encoder started at: %s' % str(time_started)) print('Encoder finished at: %s' % str(time_finished)) #print("Encoder finished!", file=open(log_file,"a")) #print('Encoder started at: ',datetime.fromtimestamp(os.path.getmtime('/p/lustre1/coulter5/remy/time_stamp1.csv')).strftime('%Y-%m-%d %H:%M:%S'), file=open("/data2/mcoulter/1d_decoder_log.txt","a")) #print('Encoder finished at: ',datetime.fromtimestamp(os.path.getmtime('/p/lustre1/coulter5/remy/time_stamp2.csv')).strftime('%Y-%m-%d %H:%M:%S'), file=open("/data2/mcoulter/1d_decoder_log.txt","a")) #cell 11 #make observations table from results # if the master script has the list of all tetrodes then this cell should be able to combine the results table from each tetrode tet_ids = np.unique( decode_spikes_random_trial.index.get_level_values('elec_grp_id')) observ_tet_list = [] grp = decode_spikes_random_trial.groupby('elec_grp_id') for tet_ii, (tet_id, grp_spk) in enumerate(grp): tet_result = results[tet_ii] tet_result.set_index(grp_spk.index, inplace=True) observ_tet_list.append(tet_result) observ = pd.concat(observ_tet_list) observ_obj = SpikeObservation.create_default( observ.sort_index(level=['day', 'epoch', 'timestamp', 'elec_grp_id']), encode_settings) observ_obj['elec_grp_id'] = observ_obj.index.get_level_values( 'elec_grp_id') observ_obj.index = observ_obj.index.droplevel('elec_grp_id') # add a small offset to observations table to prevent division by 0 when calculating likelihoods # this is currently hard-coded for 5cm position bins -> 147 total bins observ_obj.loc[:, 'x000':'x146'] = observ_obj.loc[:, 'x000': 'x146'].values + np.spacing( 1) #cell 11.1 #make prob_no_spike dictionary from individual tetrodes # if the master script has the list of all tetrodes then this cell should be able to combine the results table from each tetrode #this will take in prob_no_spike from several differnt encoder runs, each for a single tetrode, the dictionaries should be named # 'prob_no_spike_[tet number]' #tet_ids = [prob_no_spike_26,prob_no_spike_28,prob_no_spike_29,prob_no_spike_30] #prob_no_spike_all = tet_ids[0] #for tet_id in tet_ids[1:]: # prob_no_spike_all.update(tet_id) #cell 13 # save observations #observ_obj._to_hdf_store('/data2/mcoulter/fievel_19_2_observations_whole_epoch.h5','/analysis', # 'decode/clusterless/offline/observ_obj', 'observ_obj') #print('Saved observations to /data2/mcoulter/fievel_19_2_observations_whole_epoch.h5') #print('Saved observations to /data2/mcoulter/fievel_19_2_observations_whole_epoch.h5', file=open("/data2/mcoulter/1d_decoder_log.txt","a")) #cell 14 # load previously generated observations # hacky but reliable way to load a dataframe stored as hdf # Posteriors is imported from data_containers #observ_obj = Posteriors._from_hdf_store('/data2/mcoulter/remy_20_4_observ_obj_0_20000.h5','/analysis', # 'decode/clusterless/offline/observ_obj', 'observ_obj') #load prob_no_spike - this is a dictionary #probability_no_spike = np.load('/mnt/vortex/mcoulter/prob_no_spike.npy').item() #load transition matrix - this is an array #transition_matrix = np.load('/mnt/vortex/mcoulter/trans_mat.npy') #cell 15 # Run PP decoding algorithm # NOTE 1-11-19 had to add spk_amp and vel to encode settings in order for decoding to run # what should these be set to? and why are they here now? time_bin_size = 60 decode_settings = AttrDict({ 'trans_smooth_std': 2, 'trans_uniform_gain': 0.0001, 'time_bin_size': 60 }) encode_settings = AttrDict({ 'sampling_rate': 3e4, 'pos_bins': np.arange(0, max_pos, 1), # arm_coords_wewant 'pos_bin_edges': np.arange(0, max_pos + .1, 1), # edges_wewant, 'pos_bin_delta': 1, # 'pos_kernel': sp.stats.norm.pdf(arm_coords_wewant, arm_coords_wewant[-1]/2, 1), 'pos_kernel': sp.stats.norm.pdf( np.arange(0, max_pos, 1), max_pos / 2, 1 ), #note that the pos_kernel mean should be half of the range of positions (ie 180/90) # sp.stats.norm.pdf(np.arange(0,560,1), 280, 1), 'pos_kernel_std': 1, 'mark_kernel_std': int(20), 'pos_num_bins': max_pos, # len(arm_coords_wewant) 'pos_col_names': [ pos_col_format(ii, max_pos) for ii in range(max_pos) ], # [pos_col_format(int(ii), len(arm_coords_wewant)) for ii in arm_coords_wewant], 'arm_coordinates': arm_coordinates_WEWANT, # 'arm_coordinates': [[0,max_pos]]}) 'spk_amp': 60, 'vel': 0 }) #when running the encoder and decoder at same time: trans_mat=encoder.trans_mat['flat_powered'] #AND prob_no_spike=encoder.prob_no_spike #when loading a previsouly generated observations table use: trans_mat=transition_matrix # AND prob_no_spike=probability_no_spike print('Starting decoder') #print("Starting decoder", file=open(log_file,"a")) decoder = OfflinePPDecoder(observ_obj=observ_obj, trans_mat=encoder.trans_mat['flat_powered'], prob_no_spike=encoder.prob_no_spike, encode_settings=encode_settings, decode_settings=decode_settings, time_bin_size=time_bin_size, all_linear_position=pos_all_linear, velocity_filter=4) posteriors = decoder.run_decoder() print('Decoder finished!') #print('Decoder finished!', file=open(log_file,"a")) print('Posteriors shape: ' + str(posteriors.shape)) #print('Posteriors shape: '+ str(posteriors.shape), file=open(log_file,"a")) #cell 15.1 # reorder posteriors and position to restore original trial order (undo the randomization) #cell 16 #save posteriors with hdf #posteriors._to_hdf_store('/data2/mcoulter/posteriors/fievel_19_2_whole_epoch.h5','/analysis', # 'decode/clusterless/offline/posterior', 'learned_trans_mat') #print('Saved posteriors to /vortex/mcoulter/posteriors/fievel_19_2_whole_epoch.h5') #print('Saved posteriors to /vortex/mcoulter/posteriors/fievel_19_2_whole_epoch.h5', file=open("/data2/mcoulter/1d_decoder_log.txt","a")) #cell 17 #load previously generated posteriors from hdf #posteriors = Posteriors._from_hdf_store('/data2/mcoulter/posteriors/remy_20_4_linearized_alltime_decode.h5','/analysis', # 'decode/clusterless/offline/posterior', 'learned_trans_mat') #cell 18 saving posteriors as netcdf instead of hdf # to export posteriors to MatLab # add ripple labels to posteriors and then convert posteriors to xarray then save as netcdf # this requires folding multiindex into posteriors dataframe first #needed for netcdf saving: marks_index_shift = 0 posterior_file_name = os.path.join( path_out, rat_name + '_' + str(day_dictionary[rat_name][0]) + '_' + str(epoch_dictionary[rat_name][0]) + '_vel4_mask_convol_new_pos_yes_random_arm_shift_posteriors_2_' + today + '.nc') post1 = posteriors.apply_time_event(rips_vel_filtered, event_mask_name='ripple_grp') post2 = post1.reset_index() #post3 = post2.to_xarray() post3 = convert_dan_posterior_to_xarray( post2, tetrodes_dictionary[rat_name], velocity_filter, encode_settings, decode_settings, trans_mat, offset_30Hz_time_bins, trialsindex_shuffled, marks_index_shift) #print(len(post3)) post3.to_netcdf(posterior_file_name) print('Saved posteriors to ' + posterior_file_name) #print('Saved posteriors to '+posterior_file_name, file=open(log_file,"a")) # to export linearized position to MatLab: again convert to xarray and then save as netcdf position_file_name = os.path.join( path_out, rat_name + '_' + str(day_dictionary[rat_name][0]) + '_' + str(epoch_dictionary[rat_name][0]) + '_vel4_mask_convol_new_pos_yes_random_arm_shift_linearposition_2_' + today + '.nc') linearized_pos1 = pos_all_linear.apply_time_event( rips_vel_filtered, event_mask_name='ripple_grp') linearized_pos2 = linearized_pos1.reset_index() linearized_pos3 = linearized_pos2.to_xarray() linearized_pos3.to_netcdf(position_file_name) print('Saved linearized position to ' + position_file_name) #print('Saved linearized position to '+position_file_name, file=open(log_file,"a")) # to calculate histogram of posterior max position in each time bin hist_bins = [] post_hist1 = posteriors.drop(['num_spikes', 'dec_bin', 'ripple_grp'], axis=1) post_hist2 = post_hist1.dropna() post_hist3 = post_hist2.idxmax(axis=1) post_hist3 = post_hist3.str.replace('x', '') post_hist3 = post_hist3.astype(int) hist_bins = np.histogram(post_hist3, bins=[ 0, 9, 13, 26, 29, 42, 46, 55, 62, 75, 79, 92, 96, 109, 113, 122, 130, 142 ]) print(hist_bins) print("End of script!")
rip_subset = rips_vel_filtered # filter for large negative marks marks_all_non_negative = trodes2SS.threshold_marks_negative(spk_subset, negthresh=-999) print('Original encode length: ', spk_subset.shape) print('Original encode length: ', spk_subset.shape, file=open("/data2/mcoulter/1d_decoder_log.txt", "a")) print('Encoding marks non-negative filter: ', marks_all_non_negative.shape) print('Encoding marks non-negative filter: ', marks_all_non_negative.shape, file=open("/data2/mcoulter/1d_decoder_log.txt", "a")) spk_subset_sparse = trodes2SS.threshold_marks(marks_all_non_negative, maxthresh=2000, minthresh=100) print('original length: ' + str(marks_all_non_negative.shape[0])) print('after filtering: ' + str(spk_subset_sparse.shape[0])) print('original length: ' + str(marks_all_non_negative.shape[0]), file=open("/data2/mcoulter/1d_decoder_log.txt", "a")) print('after filtering: ' + str(spk_subset_sparse.shape[0]), file=open("/data2/mcoulter/1d_decoder_log.txt", "a")) spk_subset_sparse.groupby('elec_grp_id') # Filter encoding marks for times when rat velocity > 4 cm/s # The purpose of this is to remove most of the stationary time from the encoding, to focus on times of movement # to re-create whole_testing, i think we want vel > 2 #linflat_obj = pos_subset.get_mapped_single_axis()