def fetch_preprocessing(search_query): """Take the search query and load the data and the input paths""" # get the queryset file_path = bd.query_database('analyzed_data', search_query) assert len(file_path) > 0, 'Query gave no results' # parse the search query parsed_query = parse_search_string(search_query) # if coming from vr or video, also get lists for the dates and animals if parsed_query['analysis_type'] == 'preprocessing': if parsed_query['rig'] == 'miniscope': m2m_field = 'video_analysis' else: m2m_field = 'vr_analysis' # filter the path for the days # date_list = [datetime.datetime.strptime(el[m2m_field][0][:10], '%m_%d_%Y') for el in file_path] date_list = [ datetime.datetime.strptime(el['date'], '%Y-%m-%dT%H:%M:%SZ').date() for el in file_path ] # filter the list for animals animal_list = [el[m2m_field][0][30:41] for el in file_path] else: date_list = [] animal_list = [] # # load the data # data_all = [pd.read_hdf(el['analysis_path'], sub_key) for el in file_path] # get the paths paths_all = [el['analysis_path'] for el in file_path] return file_path, paths_all, parsed_query, date_list, animal_list
import pandas as pd import numpy as np import sklearn.mixture as mix import sklearn.decomposition as decomp import functions_plotting as fp import functions_data_handling as fd import umap # get the data paths try: data_path = snakemake.input[0] except NameError: # define the search string search_string = 'result:succ, lighting:normal, rig:miniscope, =analysis_type:aggEnc' # query the database for data to plot data_all = bd.query_database('analyzed_data', search_string) data_path = data_all[0]['analysis_path'] print(data_path) # load the data data = fd.aggregate_loader(data_path) # assemble the array with the parameters of choice target_data = data.loc[:, ['mouse_cricket_distance'] + ['encounter_id', 'trial_id']].groupby( ['trial_id', 'encounter_id']).agg(list).to_numpy() target_data = np.array([el for sublist in target_data for el in sublist]) # PCA the data before clustering pca = decomp.PCA() transformed_data = pca.fit_transform(target_data)
# get the save paths save_path = snakemake.output[0] pic_path = snakemake.output[1] except NameError: # USE FOR DEBUGGING ONLY (need to edit the search query and the object selection) # define the search string search_string = processing_parameters.search_string # define the target model if 'miniscope' in search_string: target_model = 'video_experiment' else: target_model = 'vr_experiment' # get the queryset files = bd.query_database(target_model, search_string)[0] raw_path = files['bonsai_path'] calcium_path = files['bonsai_path'][:-4] + '_calcium.hdf5' # assemble the save paths save_path = os.path.join(paths.analysis_path, os.path.basename( files['bonsai_path'][:-4])) + '_preproc.hdf5' pic_path = os.path.join(save_path[:-13] + '.png') # get the file date file_date = datetime.datetime.strptime(files['date'], '%Y-%m-%dT%H:%M:%SZ') # decide the analysis path based on the file name and date # if miniscope but no imaging, run bonsai only if (files['rig'] == 'miniscope') and (files['imaging'] == 'no'): # run the first stage of preprocessing
from tensorflow.keras.optimizers import Adam # define the likelihood threshold for the DLC points likelihood_threshold = 0.1 # define the search string search_string = processing_parameters.search_string # define the target model if 'miniscope' in search_string: target_model = 'video_experiment' else: target_model = 'vr_experiment' # get the queryset file_set = bd.query_database(target_model, search_string)[:2] # allocate memory to accumulate the trajectories all_points = [] # run through the files for files in file_set: raw_path = files['bonsai_path'] calcium_path = files['bonsai_path'][:-4] + '_calcium.hdf5' file_path_dlc = files['bonsai_path'].replace('.csv', '_dlc.h5') # load the bonsai info raw_h5 = pd.read_hdf(file_path_dlc) # get the column names column_names = raw_h5.columns # take only the relevant columns
if 'rig' not in search_query: # if the rig argument wasn't give it, add it search_query += ',rig:vr' # parse the search string parsed_search = fd.parse_search_string(search_query) # if the analysis type requires CA data, make sure notes=BLANK if 'CA' in parsed_search['analysis_type']: parsed_search['imaging'] = 'doric' if 'imaging' not in search_query: search_query += ',imaging:doric' # also get the target database entries target_entries = bd.query_database( target_model, fd.remove_query_field(search_query, 'analysis_type')) # if there are no entries, skip the iteration if len(target_entries) == 0: print('No entries: ' + search_query) continue else: print(str(len(target_entries)) + ' entries: ' + search_query) # add the queries to the list full_queries.append(target_entries) full_paths.append(target_path) full_parsed.append(parsed_search) # allocate a list for the mice new_queries = [] new_paths = [] new_parsed = []
out_path = sys.argv[2] data_all = json.loads(sys.argv[3]) # get the parts for the file naming name_parts = out_path.split('_') day = name_parts[0] animal = name_parts[1] rig = name_parts[2] except IndexError: # get the search string search_string = processing_parameters.search_string_cnmfe animal = processing_parameters.animal day = processing_parameters.day rig = processing_parameters.rig # query the database for data to plot data_all = bd.query_database('video_experiment', search_string) # video_data = data_all[0] # video_path = video_data['tif_path'] video_path = [el['tif_path'] for el in data_all] # overwrite data_all with just the urls data_all = { os.path.basename(el['bonsai_path'])[:-4]: el['url'] for el in data_all } # assemble the output path out_path = os.path.join( paths.analysis_path, '_'.join( (day, animal, rig, 'calciumday.hdf5'))) # delete the folder contents fi.delete_contents(paths.temp_path)
# get the analysis type analysis_type = dict_path['analysis_type'] except NameError: # define the analysis type analysis_type = 'cellMatch' # define the target mouse target_mouse = 'DG_200701_a' # define the search query search_query = 'slug:' + target_mouse # # define the origin model # ori_type = 'preprocessing' # # get a dictionary with the search terms # dict_path = fd.parse_search_string(search_query) path_info = bd.query_database('video_experiment', search_query) # # get the info and paths # path_info, paths_all, parsed_query, date_list, animal_list = \ # fd.fetch_preprocessing(search_query + ', =analysis_type:' + ori_type) # # get the raw output_path # dict_path['analysis_type'] = analysis_type raw_path = os.path.join(paths.analysis_path, 'cellMatch_' + search_query + '.hdf5') # paths_all = os.path.join(paths.analysis_path, '') # load the data for the matching # for all the files for files in path_info: current_file = cnmf.online_cnmf.load_OnlineCNMF(files['fluo_path'])