def grab_diff_v_from_folder(ew, folder): '''get the voltage difference if the file exists inputs: ew: string that is to be matched with a file in the folder in order to return a value. i.e. if GLIF2 is being requested but there is not GLIF2 file in the folder a nan will be returned regardless of whether there is a value of explain variance in the database. For example this would happen if the model was excluded from analysis because of an aberrant parameter. folder: path to the structured folder used in the rest of analysis returns: either or nan or the explained variance ratio for the requested model ''' try: file=get_file_path_endswith(folder, ew) contents=ju.read(file) RSS_of_voltage_diff=contents['noise2']['RSS_of_voltage_diff'] var_of_voltage_data=contents['noise2']['var_of_voltage_data'] num_data_points_wo_spike_shape=contents['noise2']['num_data_points_wo_spike_shape'] except: RSS_of_voltage_diff=np.nan var_of_voltage_data=np.nan num_data_points_wo_spike_shape=np.nan return RSS_of_voltage_diff, var_of_voltage_data, num_data_points_wo_spike_shape
def get_th_inf(folder, endswith): if np.any([f.endswith(endswith) for f in os.listdir(folder)]): file = get_file_path_endswith(folder, endswith) optimized = ju.read(file) return optimized['th_inf'] * optimized['coeffs']['th_inf'] else: return np.nan
def count(path, ew): '''count the number of files in the structured data directory that ends with the specified input Inputs: path: string path to the structured data directory ew: string string the file name ends with Returns: n: integer number of files in the structured data directory that end with the specified input string ''' folders=[os.path.join(path, f) for f in os.listdir(structured_data_directory)] n=0 for folder in folders: try: get_file_path_endswith(folder, ew) n=n+1 except: pass return n
def check_ev_value(folder,ew): '''Checks to see if the explained variance of the training data (noise 1) is below the specified value. inputs: folder: path to folder where files are located ew: string specifies the unique end of a file name of the file searching for returns: Nothing. Appends specimen IDs to be excluded to the 'exp_var_exclusion_below' list ''' specimen_ID=os.path.basename(folder)[:9] try: file=get_file_path_endswith(folder, ew) #if file doesnt exist this will fail dictionary=ju.read(file) if dictionary['after_opt']['noise_1']<ev: exp_var_exclusion_below.append(specimen_ID) except: print 'cant find a file for', specimen_ID, 'this should not happen if the check_sweeps_and_rm_folders.py was run!' pass
def get_model(path, EW): '''Runs the model for a specified neuron and model inputs: path: string folder path with files for the neuron EW: string end of file searching for: options '_GLIF1_neuron_config.json',_GLIF2_neuron_config.json' etc. returns: run_data: dictionary contains data from the model run ''' specimen_id=int(os.path.basename(path)[:9]) file=get_file_path_endswith(path, EW) # load data dir_name=os.path.join(relative_path, 'mouse_nwb/specimen_'+ str(specimen_id)) all_sweeps=ctc.get_ephys_sweeps(specimen_id, os.path.join(dir_name, 'ephys_sweeps.json')) #all_sweeps=ctc.get_ephys_sweeps(specimen_id) sweeps=get_sweep_num_by_name(all_sweeps, 'Noise 2') noise2_sweeps = get_sweep_num_by_name(all_sweeps, 'Noise 2') # noise2_data=ctc.get_ephys_data(specimen_id).get_sweep(noise2_sweeps[0]) noise2_data=ctc.get_ephys_data(specimen_id, os.path.join(dir_name, 'ephys.nwb')).get_sweep(noise2_sweeps[0]) # run model with current stimulus2=noise2_data['stimulus'] neuron_config=ju.read(file) neuron_config['dt']=1./noise2_data['sampling_rate'] #reset dt to the stimulus dt not the optimization dt neuron = GlifNeuron.from_dict(neuron_config) 1/noise2_data['sampling_rate'] run_data = neuron.run(stimulus2) run_data['time']=np.arange(0, len(run_data['voltage']))*neuron_config['dt'] run_data['El_reference']=neuron_config['El_reference'] run_data['stimulus']=noise2_data['stimulus'] return run_data
def extract_values(folder, the_end_file_match): '''extract threshold values from a config file with a specified ending inputs: folder: string Path to second tier data folder with the data files of a specific neuron inside the_end_file_match: string Specifies the end of desired file name. Used for grabbing different model level config files. outputs: Dictionary of values ''' dictionary = {} dictionary['th_NOT_opt'] = {'from_zero': np.nan, 'absolute': np.nan} dictionary['th_opt'] = {'from_zero': np.nan, 'absolute': np.nan} dictionary['th_coeff'] = np.nan dictionary['El_reference'] = np.nan if np.any([f.endswith(the_end_file_match) for f in os.listdir(folder)]): file = get_file_path_endswith(folder, the_end_file_match) config_dict = ju.read(file) dictionary['th_NOT_opt'][ 'absolute'] = config_dict['th_inf'] + config_dict['El_reference'] dictionary['th_opt']['absolute'] = config_dict['th_inf'] * config_dict[ 'coeffs']['th_inf'] + config_dict['El_reference'] dictionary['th_NOT_opt']['from_zero'] = config_dict['th_inf'] dictionary['th_opt']['from_zero'] = config_dict[ 'th_inf'] * config_dict['coeffs']['th_inf'] dictionary['th_coeff'] = config_dict['coeffs']['th_inf'] dictionary['El_reference'] = config_dict['El_reference'] else: if 'GLIF1' in the_end_file_match: print 'THERE IS NO LIF MODEL FOR NEURON', specimen_ID raise Exception('there should be a GLIF1 in every level') pass return dictionary
def cycle(folder, end_with, model_string, data_dict): '''Calculates the squared error of the subthreshold voltage specified folder and model. This function is here for the repetitive nature of the code. inputs: folder: string path to folder containing model configurations. end_with: string end of file searching for: options '_GLIF1_neuron_config.json',_GLIF2_neuron_config.json' etc. model_string: string string searching for in model name: options '(LIF)', '(LIF-R)', '(LIF-ASC)', '(LIF-R_ASC)', '(LIF-R_ASC_A') data_dict: dictionary contains data returned by extract_data stimulus_type: string can be 'noise1' or 'noise2' output: writes RSS, and variance of the voltage difference between the model and the data, the variance of the data itself and the number of data points in subthreshold data considering into file ending with '*_subthr_v.json' in folder ''' try: file = get_file_path_endswith(folder, end_with) except: return specimen_id = int(os.path.basename(folder)[:9]) cre = os.path.basename(folder)[10:] # run forced spike protocol and save subthreshold voltage difference to file out = running('noise1', file, data_dict) out_to_update = running('noise2', file, data_dict) out.update(out_to_update) output_file_name = os.path.join( folder, str(specimen_id) + '_' + cre + end_with[:7] + 'subthr_v.json') json_utilities.write(output_file_name, out)
#------------------------------------------------------------------------------------ #------------------------------------------------------------------------------------ # sorting folders into an order (not necessary) folders=np.sort([os.path.join(struct_data_dir1, f) for f in os.listdir(struct_data_dir1)]) found1_flag=0 for specimen_id_directory in folders: specimen_id=os.path.basename(specimen_id_directory)[:9] for ends_with in ['_GLIF1'+end, '_GLIF2'+end, '_GLIF3'+end, '_GLIF4'+end, '_GLIF5'+end]: # see if specified model configuration file exist in the data folders try: json_file1=get_file_path_endswith(specimen_id_directory, ends_with) except: continue try: json_file2=get_file_path_endswith(os.path.join(struct_data_dir2,os.path.basename(specimen_id_directory)), ends_with) except: continue # if both files exist get their contents d1=ju.read(json_file1) d2=ju.read(json_file2) # look for differences in contents with a tolerance result=list(dictdiffer.diff(d1,d2, tolerance=.001)) pp=pprint.PrettyPrinter(indent=4) if result!=[]:
#------------------------------------------------------------ #--open a file from the preprocessor and plot single traces-- #------------------------------------------------------------ #set up figure plt.figure(figsize=(14, 6)) I1_plt = plt.subplot2grid((7, 1), (0, 0)) V2_plt = plt.subplot2grid((7, 1), (1, 0), rowspan=6) specimen_id = '474637203' #htr3 sub_folder = os.path.join( data_path, os.listdir(data_path)[np.where( [specimen_id in fname for fname in os.listdir(data_path)])[0][0]]) file = get_file_path_endswith(sub_folder, '_preprocessor_values.json') neuron_dict = ju.read(file) R_NO_asc = neuron_dict['resistance']['R_test_list']['mean'] R_asc = neuron_dict['resistance']['R_fit_ASC_and_R']['mean'] C = neuron_dict['capacitance']['C_test_list']['mean'] El = neuron_dict['El']['El_noise']['measured']['mean'] # get the sweeps dir_name = os.path.join(relative_path, 'mouse_nwb/specimen_' + specimen_id) the_sweeps = ctc.get_ephys_sweeps(int(specimen_id), os.path.join(dir_name, 'ephys_sweeps.json')) noise1_sweeps = get_sweep_num_by_name(the_sweeps, 'Noise 1') # put data in the format required for functions below n1_s1_data = ctc.get_ephys_data(int(specimen_id), os.path.join(dir_name, 'ephys.nwb')).get_sweep(
n2_sweeps=get_sweep_num_by_name(the_sweeps, 'Noise 2') # check to see if their are at least two noise 1 and noise 2 sweeps in the data nwb file if not check_more_than_two_sweeps(n1_sweeps, nwb): print specimen_id ,"has less than two noise_1 sweeps" logging.warning(str(specimen_id) +" has less than two noise_1 sweeps") sp_id_to_remove.append(specimen_id) continue if not check_more_than_two_sweeps(n2_sweeps, nwb): print specimen_id ,"has less than two noise_2 sweeps" logging.warning(str(specimen_id) +" has less than two noise_2 sweeps") sp_id_to_remove.append(specimen_id) continue # check if there is at least a level 1 GLIF model in the structured data directory if not get_file_path_endswith(specimen_id_directory, '_GLIF1_neuron_config.json'): print specimen_id, "has no model file!!!!!!!" logging.warning(str(specimen_id) +" has no model file") sp_id_to_remove.append(specimen_id) continue # check to see that the spike times in the model files are the same (if not, it is likely that the was more than one stimulus amplitude played to the neuron) glif_spike_times_n1=get_model_spike_times_from_nwb('_GLIF1_neuron_config.json', specimen_id_directory, '(LIF)', n1_sweeps, where_running) if check_spike_times_identical(glif_spike_times_n1): pass else: print specimen_id , "noise 1 has inconsistent model sweep times" logging.warning(str(specimen_id)+"noise 1 has inconsistent model sweep times") sp_id_to_remove.append(specimen_id) glif_spike_times_n2=get_model_spike_times_from_nwb('_GLIF1_neuron_config.json', specimen_id_directory, '(LIF)', n2_sweeps, where_running)
def cycle(specimen_id_directory, ends_with, model_string, data_dict): '''Calculates the explained variance ratio for the specified specimen_id_directory and model. This function is here for the repetitive nature of the code. inputs: specimen_id_directory: string path to structured data directory containing neuron_config, preprocessor, etc., files. ends_with: string end of file searching for: options "_GLIF1_neuron_config.json","_GLIF2_neuron_config.json" etc. model_string: string string searching for in model name: options '(LIF)', '(LIF-R)', '(LIF-ASC)', '(LIF-R_ASC)', '(LIF-R_ASC_A') data_dict: dictionary contains data returned by extract_data output: writes explained variance ratios into file ending with 'exp_var_ratio_10ms.json' in specimen_id_directory ''' # see if specified model configuration file exist in the data folders try: file = get_file_path_endswith(specimen_id_directory, ends_with) except: return specimen_id = int(os.path.basename(specimen_id_directory)[:9]) cre = os.path.basename(specimen_id_directory)[10:] #confirming the dt of noise 1 and noise 2 are the same in the file if data_dict['n1_dt'] != data_dict['n2_dt']: raise Exception('The dt in noise 1 and noise 2 is not the same.') # initializing data structures for explained variance calculations ev = {} ev['after_opt'] = {} ev['before_opt'] = {} ev['model_spike_times_same_across_sweeps'] = {} ev['run_model_spike_times_match_database'] = {} # get the spike indicies of the model from the .nwb file in the database if specimen_id == 580895033: if ends_with == '_GLIF1_neuron_config.json': ev['model_spike_times_same_across_sweeps']['n1'] = 1 ev['model_spike_times_same_across_sweeps']['n2'] = 1 ev['run_model_spike_times_match_database']['n2'] = True ev["n2_after_opt_sanitycheck"] = 0.9838105341820447 ev["after_opt"]["noise_1"] = 0.9793973377573459 ev["after_opt"]["noise_2"] = 0.983810807305087 ev["before_opt"]["noise_1"] = 0.8454442315760935 ev["before_opt"]["noise_2"] = 0.8493365092125525 if ends_with == '_GLIF2_neuron_config.json': ev['model_spike_times_same_across_sweeps']['n1'] = 1 ev['model_spike_times_same_across_sweeps']['n2'] = 1 ev['run_model_spike_times_match_database']['n2'] = True ev["n2_after_opt_sanitycheck"] = 0.9889582213030378 ev["after_opt"]["noise_1"] = 0.9885054832008723 ev["after_opt"]["noise_2"] = 0.988952726396574 ev["before_opt"]["noise_1"] = 0.8852614534451949 ev["before_opt"]["noise_2"] = 0.8882540368687765 if ends_with == '_GLIF3_neuron_config.json': ev['model_spike_times_same_across_sweeps']['n1'] = 1 ev['model_spike_times_same_across_sweeps']['n2'] = 1 ev['run_model_spike_times_match_database']['n2'] = True ev["n2_after_opt_sanitycheck"] = 0.972059377795663 ev["after_opt"]["noise_1"] = 0.964542013582842 ev["after_opt"]["noise_2"] = 0.972065677218419 ev["before_opt"]["noise_1"] = 0.9175506860780771 ev["before_opt"]["noise_2"] = 0.9192162154035345 if ends_with == '_GLIF4_neuron_config.json': ev['model_spike_times_same_across_sweeps']['n1'] = 1 ev['model_spike_times_same_across_sweeps']['n2'] = 1 ev['run_model_spike_times_match_database']['n2'] = True ev["n2_after_opt_sanitycheck"] = 0.9838078849900366 ev["after_opt"]["noise_1"] = 0.9774371918205483 ev["after_opt"]["noise_2"] = 0.983816449506429 ev["before_opt"]["noise_1"] = 0.9481063969607645 ev["before_opt"]["noise_2"] = 0.952096857211585 if ends_with == '_GLIF5_neuron_config.json': ev['model_spike_times_same_across_sweeps']['n1'] = 1 ev['model_spike_times_same_across_sweeps']['n2'] = 1 ev['run_model_spike_times_match_database']['n2'] = True ev["n2_after_opt_sanitycheck"] = 0.9836467816928267 ev["after_opt"]["noise_1"] = 0.9784782997497251 ev["after_opt"]["noise_2"] = 0.983643486774882 ev["before_opt"]["noise_1"] = 0.8846618004335125 ev["before_opt"]["noise_2"] = 0.8904106067655934 json_utilities.write( os.path.join( specimen_id_directory, str(specimen_id) + '_' + cre + ends_with[:7] + 'exp_var_ratio_10ms.json'), ev) print '\twrote output to ', os.path.join( specimen_id_directory, str(specimen_id) + '_' + cre + ends_with[:7] + 'exp_var_ratio_10ms.json') return model_n1_nwb_ind = get_model_spike_ind_from_nwb( ends_with, specimen_id_directory, model_string, data_dict['noise1_sweeps'], data_dict['n1_dt'], where_running)[0] # get explained variances ev['after_opt']['noise_1'] = exVar(data_dict['noise1_spike_ind'], [model_n1_nwb_ind], [.01], data_dict['n1_dt'], len(data_dict['noise1_stim']))[0] ev['after_opt']['noise_2'] = get_ev_from_folder( ends_with, specimen_id_directory, model_string) # get explained varience ratio from glif api #---------------------------------------------------------------------------------------------- #---------grabbing data along with performing a series of sanity checks for later use----------- #---------------------------------------------------------------------------------------------- neuron_config = json_utilities.read(file) neuron_config['dt'] = data_dict['n2_dt'] neuron = GlifNeuron.from_dict(neuron_config) #set up model for running print '\trunning', specimen_id, 'noise 2 after optimization as a sanity check to compare with what is in database' model_n2_after = neuron.run(data_dict['noise2_stim']) #running model print '\tfinished', specimen_id, 'running model on noise 2 after optimization as a sanity check to compare with what is in database' # before calculating explained variance this is a sanity check to make sure spike times and steps match with the dt within the same file assert model_n2_after['grid_spike_times'] / data_dict[ 'n2_dt'] == model_n2_after['spike_time_steps'] # calculate the explained variance from the the model run ev_GLIF1_n2_after = exVar(data_dict['noise2_spike_ind'], [model_n2_after['spike_time_steps']], [.01], data_dict['n2_dt'], len(data_dict['noise2_stim'])) ev['n2_after_opt_sanitycheck'] = ev_GLIF1_n2_after[ 0] #this is from the rerun done here # Sanity check to make sure model spike times from the database are all the same.. # Note that all of these should have been eliminated via the "check_sweeps_and_rm_folders.py" script glif_spike_times_n1 = get_model_spike_times_from_nwb( ends_with, specimen_id_directory, model_string, data_dict['noise1_sweeps'], where_running) ev['model_spike_times_same_across_sweeps'][ 'n1'] = check_spike_times_identical(glif_spike_times_n1) glif_spike_times_n2 = get_model_spike_times_from_nwb( ends_with, specimen_id_directory, model_string, data_dict['noise2_sweeps'], where_running) ev['model_spike_times_same_across_sweeps'][ 'n2'] = check_spike_times_identical(glif_spike_times_n2) # sanity check to make sure calculated model spike times run here match what is in the Allen Institute Cell Types Database. # just checking against first sweep since they sweeps should all be identical ev['run_model_spike_times_match_database']['n2'] = np.allclose( model_n2_after['grid_spike_times'], glif_spike_times_n2[0], atol=.0001, rtol=0, equal_nan=True) #-------------------------------------------------------- #-------------------------------------------------------- #-------------------------------------------------------- # running and calculating exp var for data before optimization neuron_config['dt'] = data_dict['n1_dt'] neuron_config['coeffs']['th_inf'] = 1.0 neuron = GlifNeuron.from_dict(neuron_config) print '\trunning noise 1', specimen_id, 'before optimization' model_n1_before = neuron.run(data_dict['noise1_stim']) ev_GLIF1_n1_before = exVar(data_dict['noise1_spike_ind'], [model_n1_before['spike_time_steps']], [.01], data_dict['n1_dt'], len(data_dict['noise1_stim'])) print '\tfinished noise 1', specimen_id, 'before optimization' print '\trunning noise 2', specimen_id, 'before optimization' model_n2_before = neuron.run(data_dict['noise2_stim']) ev_GLIF1_n2_before = exVar(data_dict['noise2_spike_ind'], [model_n2_before['spike_time_steps']], [.01], data_dict['n2_dt'], len(data_dict['noise2_stim'])) print '\tfinished noise 2', specimen_id, 'before optimization' ev['before_opt']['noise_1'] = ev_GLIF1_n1_before[0] ev['before_opt']['noise_2'] = ev_GLIF1_n2_before[0] # save the file to the local structured data directory json_utilities.write( os.path.join( specimen_id_directory, str(specimen_id) + '_' + cre + ends_with[:7] + 'exp_var_ratio_10ms.json'), ev) print '\twrote output to ', os.path.join( specimen_id_directory, str(specimen_id) + '_' + cre + ends_with[:7] + 'exp_var_ratio_10ms.json')
structured_data_directory=os.path.join(relative_path, 'create_data_dir', 'mouse_struc_data_dir') #structured_data_directory=os.path.join(relative_path, 'create_data_dir', 'human_struc_data_dir') #--------------------------------------------------------------------------- #--------------------------------------------------------------------------- #--------------------------------------------------------------------------- # sort the data so that specifying start and end integers works folders=np.sort([os.path.join(structured_data_directory, f) for f in os.listdir(structured_data_directory)]) # check for issues in the "*_GLIF*_exp_var_ratio_10ms.json" files for folder in folders: specimen_id=os.path.basename(folder)[:9] for s in ['GLIF1', 'GLIF2', 'GLIF3', 'GLIF4', 'GLIF5']: file =False try: file=get_file_path_endswith(folder, s+'_exp_var_ratio_10ms.json') except: pass if file: print 'checking', file dict=ju.read(file) database_value=dict['n2_after_opt_sanitycheck'] calculated_value=dict['after_opt']['noise_2'] rtol=0. atol=1e-3 # check if two values recorded in the "*_GLIF*_exp_var_ratio_10ms.json" are the same if not np.isclose(database_value, calculated_value, rtol=rtol, atol=atol): print specimen_id, s, ':the value difference,',database_value,'-', calculated_value, '=', np.absolute(database_value-calculated_value), 'is > the tolerance, ', atol + rtol * np.absolute(calculated_value) # check to make sure model spike times of noise 1 from the database are all the same. if dict['model_spike_times_same_across_sweeps']['n1']==False:
def general_exclusions(folder_path, n_in_cre=5, resist=True, th_inf_bad=True, spike_cut=True, ev=.2, accidental_exclusion=True): '''Returns a list of specimen ids that will be excluded from all levels and analysis. Note that some of these exclusions may be irrelevant for the curated data via the Allen Institute Cell Types Database. Nonetheless, I leave these exclusions here for my own use on internal data. Inputs: folder_path: string path to the structured data directory n_in_cre: integer specifies number of neurons that have exist in a cre line to include them in the analysis resist: boolean if True, exclude neurons which have a calculated resistance over 1000 MOhms th_inf_bad: boolean if True, exclude neurons which have a calculated threshold less than -60 mV spike_cut: boolean if True, exclude neurons which have an intercept larger than 30 mV after fitting the spike cut length ev: float exclude neurons that have an GLIF1 explained variance on noise 1 less than the provided value. Note that noise 1 is used because exclusion criteria are only applied to training data accidental_exclusion: boolean one neuron was either accidentally excluded from the analysis or removed for an unknown reason. If True, exclude this neuron. Returns: exclude_me_sp_ids: list of strings list of neuron by specimen ids to be eliminated from the structured data directory ''' initial_sp_ids=[f[0:9] for f in os.listdir(folder_path)] print 'GENERAL EXCLUSIONS: there will be overlap in numbers below i.e. some models will be excluded for more than one reason' print '\tTotal number of preprocessed files:', len(initial_sp_ids) folders=[os.path.join(folder_path, f) for f in os.listdir(folder_path)] strange_pp_exclusion=[] #exclusion for preprocessor files that do not have the correct format # exclude via slope and intercept from spike cutting results spike_cutting_exclusions=[] if spike_cut: for folder in folders: specimen_ID=os.path.basename(folder)[:9] pp_file=get_pp_path(folder) pp_dict=ju.read(pp_file) try: if pp_dict['spike_cutting']['NOdeltaV']['intercept'] > .03: spike_cutting_exclusions.append(specimen_ID) except: print folder, 'DOES NOT LOOK LIKE A NORMAL PREPROCESSOR FILE' strange_pp_exclusion.append(specimen_ID) print '\t', len(set(spike_cutting_exclusions)), 'neurons were excluded for having an intercept larger than .03' # exclude based on the measured experimental threshold # note that the experimental threshold is the same for all models of the same neuron so just look at GLIF1 file. th_inf_exclusion_list=[] if th_inf_bad: for folder in folders: specimen_ID=os.path.basename(folder)[:9] pp_file=get_pp_path(folder) pp_dict=ju.read(pp_file) try: if pp_dict['th_inf']['via_Vmeasure']['value']< -.06: th_inf_exclusion_list.append(specimen_ID) except: print folder, 'DOES NOT LOOK LIKE A NORMAL PREPROCESSOR FILE' strange_pp_exclusion.append(specimen_ID) print '\t', len(set(th_inf_exclusion_list)), 'neurons have a th_inf less than -60 mV' # exclude based on resistance resistance_exclusion_list=[] if resist: for folder in folders: specimen_ID=os.path.basename(folder)[:9] pp_file=get_pp_path(folder) pp_dict=ju.read(pp_file) try: if pp_dict['resistance']['R_test_list']['mean']>1000.e6: resistance_exclusion_list.append(specimen_ID) except: print folder, 'DOES NOT LOOK LIKE A NORMAL PREPROCESSOR FILE' strange_pp_exclusion.append(specimen_ID) print '\t', len(set(resistance_exclusion_list)), 'neurons have a resistance fit WITHOUT ASC larger than 1000 MOhms.' print '\t', len(set(strange_pp_exclusion)), 'neurons have a strange looking preprocessor file.' # exclude based on explained variance on training data exp_var_exclusion_no_file=[] if ev: for folder in folders: specimen_ID=os.path.basename(folder)[:9] try: file=get_file_path_endswith(folder, 'GLIF1_exp_var_ratio_10ms.json') except: exp_var_exclusion_no_file.append(specimen_ID) print '\t', len(set(exp_var_exclusion_no_file)),'neurons have no explained variance file which means they probably had a empty array in a noise 1. See calc_all_explained_variance.py variable model_GLIF1_n1_after' exp_var_exclusion_below=[] # the following mouse neurons were either accidentally excluded from the analysis or removed for a reason that eludes me now. accidental_exclusions=[] if accidental_exclusion: if os.path.isdir(os.path.join(folder_path,'569739534'+'_Chrna2-Cre_OE25')): #if this directory exists get rid of it. accidental_exclusions=['569739534'] else: pass #neuron already excluded or not in directory print '\t', len(set(accidental_exclusions)),'neurons were excluded from the analysis by accident. Set accidental_exclusion flag to False to use it if reprocessing all data.' def check_ev_value(folder,ew): '''Checks to see if the explained variance of the training data (noise 1) is below the specified value. inputs: folder: path to folder where files are located ew: string specifies the unique end of a file name of the file searching for returns: Nothing. Appends specimen IDs to be excluded to the 'exp_var_exclusion_below' list ''' specimen_ID=os.path.basename(folder)[:9] try: file=get_file_path_endswith(folder, ew) #if file doesnt exist this will fail dictionary=ju.read(file) if dictionary['after_opt']['noise_1']<ev: exp_var_exclusion_below.append(specimen_ID) except: print 'cant find a file for', specimen_ID, 'this should not happen if the check_sweeps_and_rm_folders.py was run!' pass if ev: for folder in folders: check_ev_value(folder, 'GLIF1_exp_var_ratio_10ms.json') print '\t', len(set(exp_var_exclusion_below)), 'neurons have a GLIF explained variance on noise 1 training data of less than', ev # get the set of all neurons that are still included in analysis after the above exclusions init_excluded_id_list=list(set(spike_cutting_exclusions+ resistance_exclusion_list+ th_inf_exclusion_list+ strange_pp_exclusion+ exp_var_exclusion_no_file+ exp_var_exclusion_below+ accidental_exclusions)) reduced_sp_ids=list(set(initial_sp_ids)-set(init_excluded_id_list)) # specimen ids remaining after above exclusions # remove data that does not have at least a specified number (n_in_cre) of neurons in a cre line if n_in_cre is not False: small_cre_line_exclusion=np.array([]) cre_list=[] for folder in folders: specimen_ID=os.path.basename(folder)[:9] if specimen_ID in reduced_sp_ids: cre_list.append({'sp':specimen_ID, 'cre': os.path.basename(folder)[10:]}) df=pd.DataFrame(cre_list) for cre in df['cre'].unique(): if len(df[df['cre']==cre])<n_in_cre: small_cre_line_exclusion=np.append(small_cre_line_exclusion, (df[df['cre']==cre]['sp'].values)) # create list of specimen IDs whose folder should be completely eliminated exclude_me_sp_ids=list(set(small_cre_line_exclusion.tolist()+ init_excluded_id_list)) print 'A total of',len(exclude_me_sp_ids), 'out of', len(folders), 'neurons are excluded via general exclusion criteria leaving',len(folders)-len(exclude_me_sp_ids), 'for this analysis' return exclude_me_sp_ids
for f in os.listdir(folder): if ('GLIF2' in f ) or ('GLIF4' in f) or ('GLIF5' in f): os.remove(os.path.join(folder,f)) # find and remove bad spike component of threshold from the directory bad_voltage_comp_of_th=exclude_via_v_comp_of_th(structured_data_directory) for folder in folders: sp_id=os.path.basename(folder)[0:9] if sp_id in bad_voltage_comp_of_th: for f in os.listdir(folder): if ('GLIF5' in f): os.remove(os.path.join(folder,f)) # count up the files in the directory for folder in folders: if not get_file_path_endswith(folder,'GLIF1_neuron_config.json'): print 'nope' cre_list=[] folders=[os.path.join(structured_data_directory, f) for f in os.listdir(structured_data_directory)] for folder in folders: specimen_ID=os.path.basename(folder)[:9] cre_list.append({'sp':specimen_ID, 'cre': os.path.basename(folder)[10:]}) df=pd.DataFrame(cre_list) print df.groupby('cre').size() # count up the files in the directory print 'TOTALS' print 'GLIF1 has', count(structured_data_directory,'_GLIF1_neuron_config.json') print 'GLIF2 has', count(structured_data_directory,'_GLIF2_neuron_config.json')
import sys relative_path = os.path.dirname(os.getcwd()) sys.path.append(os.path.join(relative_path, 'libraries')) from data_library import check_and_organize_data, get_file_path_endswith, get_pp_path from pub_plot_library import distribution_plot data_path = os.path.join(relative_path, 'mouse_struc_data_dir') folders = [os.path.join(data_path, f) for f in os.listdir(data_path)] all_neurons = [] for folder in folders: specimen_ID = os.path.basename(folder)[:9] cre = os.path.basename(folder)[10:] try: get_file_path_endswith( folder, '_GLIF5_neuron_config.json' ) #checks if the file is there, if not the values should not be used except: continue pp_file = get_pp_path(folder) pp_dict = ju.read(pp_file) if pp_dict['threshold_adaptation'][ 'a_voltage_comp_of_thr_from_fitab'] is not None and pp_dict[ 'threshold_adaptation'][ 'b_voltage_comp_of_thr_from_fitab'] is not None: all_neurons.append([ specimen_ID, cre, pp_dict['threshold_adaptation']['a_voltage_comp_of_thr_from_fitab'] / pp_dict['threshold_adaptation'] ['b_voltage_comp_of_thr_from_fitab'], np.log10(pp_dict['threshold_adaptation']
MAKE_PLOT=True, SHOW_PLOT=True, BLOCK=True, PUBLICATION_PLOT=True) # load data out of configuration files data_path = os.path.join(relative_path, 'mouse_struc_data_dir') folders = [os.path.join(data_path, f) for f in os.listdir(data_path)] all_neurons = [] for folder in folders: specimen_ID = os.path.basename(folder)[:9] cre = os.path.basename(folder)[10:] try: file = get_file_path_endswith(folder, '_GLIF2_neuron_config.json') except: continue neuron_dict = ju.read(file) all_neurons.append([ specimen_ID, cre, neuron_dict['threshold_reset_method']['params']['a_spike'] * 1.e3, 1. / neuron_dict['threshold_reset_method']['params']['b_spike'] * 1.e3 ]) (cre_dict) = check_and_organize_data(all_neurons) percentile_dict = distribution_plot(cre_dict, 2, 3, xlabel=r'$\delta \Theta_s (mV)$',