def fetch_preprocessing(search_query):
    """Take the search query and load the data and the input paths"""
    # get the queryset
    file_path = bd.query_database('analyzed_data', search_query)

    assert len(file_path) > 0, 'Query gave no results'

    # parse the search query
    parsed_query = parse_search_string(search_query)

    # if coming from vr or video, also get lists for the dates and animals
    if parsed_query['analysis_type'] == 'preprocessing':
        if parsed_query['rig'] == 'miniscope':
            m2m_field = 'video_analysis'
        else:
            m2m_field = 'vr_analysis'

        # filter the path for the days
        # date_list = [datetime.datetime.strptime(el[m2m_field][0][:10], '%m_%d_%Y') for el in file_path]
        date_list = [
            datetime.datetime.strptime(el['date'],
                                       '%Y-%m-%dT%H:%M:%SZ').date()
            for el in file_path
        ]

        # filter the list for animals
        animal_list = [el[m2m_field][0][30:41] for el in file_path]
    else:
        date_list = []
        animal_list = []
    # # load the data
    # data_all = [pd.read_hdf(el['analysis_path'], sub_key) for el in file_path]
    # get the paths
    paths_all = [el['analysis_path'] for el in file_path]

    return file_path, paths_all, parsed_query, date_list, animal_list
Example #2
0
import pandas as pd
import numpy as np
import sklearn.mixture as mix
import sklearn.decomposition as decomp
import functions_plotting as fp
import functions_data_handling as fd
import umap

# get the data paths
try:
    data_path = snakemake.input[0]
except NameError:
    # define the search string
    search_string = 'result:succ, lighting:normal, rig:miniscope, =analysis_type:aggEnc'
    # query the database for data to plot
    data_all = bd.query_database('analyzed_data', search_string)
    data_path = data_all[0]['analysis_path']
print(data_path)

# load the data
data = fd.aggregate_loader(data_path)

# assemble the array with the parameters of choice
target_data = data.loc[:, ['mouse_cricket_distance'] +
                       ['encounter_id', 'trial_id']].groupby(
                           ['trial_id', 'encounter_id']).agg(list).to_numpy()
target_data = np.array([el for sublist in target_data for el in sublist])

# PCA the data before clustering
pca = decomp.PCA()
transformed_data = pca.fit_transform(target_data)
Example #3
0
    # get the save paths
    save_path = snakemake.output[0]
    pic_path = snakemake.output[1]
except NameError:
    # USE FOR DEBUGGING ONLY (need to edit the search query and the object selection)
    # define the search string
    search_string = processing_parameters.search_string

    # define the target model
    if 'miniscope' in search_string:
        target_model = 'video_experiment'
    else:
        target_model = 'vr_experiment'

    # get the queryset
    files = bd.query_database(target_model, search_string)[0]
    raw_path = files['bonsai_path']
    calcium_path = files['bonsai_path'][:-4] + '_calcium.hdf5'
    # assemble the save paths
    save_path = os.path.join(paths.analysis_path,
                             os.path.basename(
                                 files['bonsai_path'][:-4])) + '_preproc.hdf5'
    pic_path = os.path.join(save_path[:-13] + '.png')

# get the file date
file_date = datetime.datetime.strptime(files['date'], '%Y-%m-%dT%H:%M:%SZ')

# decide the analysis path based on the file name and date
# if miniscope but no imaging, run bonsai only
if (files['rig'] == 'miniscope') and (files['imaging'] == 'no'):
    # run the first stage of preprocessing
Example #4
0
from tensorflow.keras.optimizers import Adam

# define the likelihood threshold for the DLC points
likelihood_threshold = 0.1

# define the search string
search_string = processing_parameters.search_string

# define the target model
if 'miniscope' in search_string:
    target_model = 'video_experiment'
else:
    target_model = 'vr_experiment'

# get the queryset
file_set = bd.query_database(target_model, search_string)[:2]

# allocate memory to accumulate the trajectories
all_points = []

# run through the files
for files in file_set:
    raw_path = files['bonsai_path']
    calcium_path = files['bonsai_path'][:-4] + '_calcium.hdf5'

    file_path_dlc = files['bonsai_path'].replace('.csv', '_dlc.h5')
    # load the bonsai info
    raw_h5 = pd.read_hdf(file_path_dlc)
    # get the column names
    column_names = raw_h5.columns
    # take only the relevant columns
Example #5
0
        if 'rig' not in search_query:
            # if the rig argument wasn't give it, add it
            search_query += ',rig:vr'

    # parse the search string
    parsed_search = fd.parse_search_string(search_query)
    # if the analysis type requires CA data, make sure notes=BLANK
    if 'CA' in parsed_search['analysis_type']:

        parsed_search['imaging'] = 'doric'

        if 'imaging' not in search_query:
            search_query += ',imaging:doric'

    # also get the target database entries
    target_entries = bd.query_database(
        target_model, fd.remove_query_field(search_query, 'analysis_type'))
    # if there are no entries, skip the iteration
    if len(target_entries) == 0:
        print('No entries: ' + search_query)
        continue
    else:
        print(str(len(target_entries)) + ' entries: ' + search_query)
    # add the queries to the list
    full_queries.append(target_entries)
    full_paths.append(target_path)
    full_parsed.append(parsed_search)

# allocate a list for the mice
new_queries = []
new_paths = []
new_parsed = []
Example #6
0
        out_path = sys.argv[2]
        data_all = json.loads(sys.argv[3])
        # get the parts for the file naming
        name_parts = out_path.split('_')
        day = name_parts[0]
        animal = name_parts[1]
        rig = name_parts[2]

    except IndexError:
        # get the search string
        search_string = processing_parameters.search_string_cnmfe
        animal = processing_parameters.animal
        day = processing_parameters.day
        rig = processing_parameters.rig
        # query the database for data to plot
        data_all = bd.query_database('video_experiment', search_string)
        # video_data = data_all[0]
        # video_path = video_data['tif_path']
        video_path = [el['tif_path'] for el in data_all]
        # overwrite data_all with just the urls
        data_all = {
            os.path.basename(el['bonsai_path'])[:-4]: el['url']
            for el in data_all
        }
        # assemble the output path
        out_path = os.path.join(
            paths.analysis_path, '_'.join(
                (day, animal, rig, 'calciumday.hdf5')))

    # delete the folder contents
    fi.delete_contents(paths.temp_path)
Example #7
0
    # get the analysis type
    analysis_type = dict_path['analysis_type']

except NameError:
    # define the analysis type
    analysis_type = 'cellMatch'
    # define the target mouse
    target_mouse = 'DG_200701_a'
    # define the search query
    search_query = 'slug:' + target_mouse
    # # define the origin model
    # ori_type = 'preprocessing'
    # # get a dictionary with the search terms
    # dict_path = fd.parse_search_string(search_query)

    path_info = bd.query_database('video_experiment', search_query)

    # # get the info and paths
    # path_info, paths_all, parsed_query, date_list, animal_list = \
    #     fd.fetch_preprocessing(search_query + ', =analysis_type:' + ori_type)
    # # get the raw output_path
    # dict_path['analysis_type'] = analysis_type
    raw_path = os.path.join(paths.analysis_path,
                            'cellMatch_' + search_query + '.hdf5')

    # paths_all = os.path.join(paths.analysis_path, '')

# load the data for the matching
# for all the files
for files in path_info:
    current_file = cnmf.online_cnmf.load_OnlineCNMF(files['fluo_path'])