Exemple #1
0
def TempdirBehaviorCache(mock_api, request):
    temp_dir = tempfile.TemporaryDirectory()
    manifest = os.path.join(temp_dir.name, "manifest.json")
    yield VisualBehaviorOphysProjectCache(fetch_api=mock_api(),
                                          cache=request.param,
                                          manifest=manifest)
    temp_dir.cleanup()
Exemple #2
0
def main():
    parser = argparse.ArgumentParser(description='Write project metadata to '
                                     'csvs')
    parser.add_argument('--out_dir',
                        help='directory to save csvs',
                        required=True)
    parser.add_argument('--project_name', help='project name', required=True)
    parser.add_argument('--data_release_date',
                        help='Project release date. '
                        'Ie 2021-03-25',
                        required=True,
                        nargs="+")
    parser.add_argument('--overwrite_ok',
                        help='Whether to allow overwriting '
                        'existing output files',
                        dest='overwrite_ok',
                        action='store_true')
    args = parser.parse_args()

    bpc = VisualBehaviorOphysProjectCache.from_lims(
        data_release_date=args.data_release_date)
    bpmw = BehaviorProjectMetadataWriter(
        behavior_project_cache=bpc,
        out_dir=args.out_dir,
        project_name=args.project_name,
        data_release_date=args.data_release_date,
        overwrite_ok=args.overwrite_ok)
    bpmw.write_metadata()
Exemple #3
0
def test_metadata():
    release_date = '2021-03-25'
    with tempfile.TemporaryDirectory() as tmp_dir:
        bpc = VisualBehaviorOphysProjectCache.from_lims(
            data_release_date=release_date)
        bpmw = BehaviorProjectMetadataWriter(
            behavior_project_cache=bpc,
            out_dir=tmp_dir,
            project_name='visual-behavior-ophys',
            data_release_date=release_date)
        bpmw.write_metadata()

        expected_path = os.path.join(get_resources_dir(),
                                     'project_metadata_writer',
                                     'expected')
        # test behavior
        expected = pd.read_pickle(os.path.join(expected_path,
                                               'behavior_session_table.pkl'))
        expected = sort_df(df=expected, sort_col='behavior_session_id')
        obtained = pd.read_csv(os.path.join(tmp_dir,
                                            'behavior_session_table.csv'),
                               dtype={'mouse_id': str},
                               parse_dates=['date_of_acquisition'])
        obtained = sort_df(df=obtained, sort_col='behavior_session_id')
        convert_strings_to_lists(df=obtained)
        pd.testing.assert_frame_equal(expected,
                                      obtained)

        # test ophys session
        expected = pd.read_pickle(os.path.join(expected_path,
                                               'ophys_session_table.pkl'))
        expected = sort_df(df=expected, sort_col='ophys_session_id')
        obtained = pd.read_csv(os.path.join(tmp_dir,
                                            'ophys_session_table.csv'),
                               dtype={'mouse_id': str},
                               parse_dates=['date_of_acquisition'])
        obtained = sort_df(df=obtained, sort_col='ophys_session_id')
        convert_strings_to_lists(df=obtained)
        pd.testing.assert_frame_equal(expected,
                                      obtained)

        # test ophys experiment
        expected = pd.read_pickle(os.path.join(expected_path,
                                               'ophys_experiment_table.pkl'))
        expected = sort_df(df=expected, sort_col='ophys_experiment_id')
        obtained = pd.read_csv(os.path.join(tmp_dir,
                                            'ophys_experiment_table.csv'),
                               dtype={'mouse_id': str},
                               parse_dates=['date_of_acquisition'])
        obtained = sort_df(df=obtained, sort_col='ophys_experiment_id')
        convert_strings_to_lists(df=obtained, is_session=False)
        pd.testing.assert_frame_equal(expected,
                                      obtained)
def get_multi_session_df(project_code,
                         session_number,
                         conditions,
                         data_type,
                         event_type,
                         time_window=[-3, 3.1],
                         interpolate=True,
                         output_sampling_rate=30,
                         response_window_duration=0.5,
                         use_extended_stimulus_presentations=False,
                         overwrite=False):
    """

    :param project_code:
    :param session_number:
    :param conditions:
    :param data_type:
    :param event_type:
    :param time_window:
    :param interpolate:
    :param output_sampling_rate:
    :param response_window_duration:
    :param use_extended_stimulus_presentations:
    :return:
    """
    # cant get prefered stimulus if images are not in the set of conditions
    if ('image_name' in conditions) or ('change_image_name' in conditions):
        get_pref_stim = True
    else:
        get_pref_stim = False
    print('get_pref_stim', get_pref_stim)

    cache_dir = loading.get_platform_analysis_cache_dir()
    cache = VisualBehaviorOphysProjectCache.from_s3_cache(cache_dir=cache_dir)
    print(cache_dir)
    experiments_table = cache.get_ophys_experiment_table()
    # dont include Ai94 experiments because they makes things too slow
    experiments_table = experiments_table[(experiments_table.reporter_line !=
                                           'Ai94(TITL-GCaMP6s)')]

    session_number = float(session_number)
    experiments = experiments_table[
        (experiments_table.project_code == project_code)
        & (experiments_table.session_number == session_number)].copy()
    print('session_types:', experiments.session_type.unique(),
          ' - there should only be one session_type per session_number')
    session_type = experiments.session_type.unique()[0]

    filename = loading.get_file_name_for_multi_session_df(
        data_type, event_type, project_code, session_type, conditions)
    mega_mdf_write_dir = loading.get_multi_session_df_dir(
        interpolate=interpolate,
        output_sampling_rate=output_sampling_rate,
        event_type=event_type)
    filepath = os.path.join(mega_mdf_write_dir, filename)

    if not overwrite:  # if we dont want to overwrite
        if os.path.exists(filepath):  # and file exists, dont regenerate
            print('multi_session_df exists for', filepath)
            print('not regenerating')
            process_data = False
        else:  # if file doesnt exist, create it
            print('creating multi session mean df for', filename)
            process_data = True
    else:  # if we do want to overwrite
        process_data = True  # regenerate and save
        print('creating multi session mean df for', filename)

    if process_data:
        mega_mdf = pd.DataFrame()
        for experiment_id in experiments.index.unique():
            try:
                print(experiment_id)
                # get dataset
                dataset = loading.get_ophys_dataset(
                    experiment_id,
                    get_extended_stimulus_presentations=
                    use_extended_stimulus_presentations)
                # get stimulus_response_df
                df = loading.get_stimulus_response_df(
                    dataset,
                    data_type=data_type,
                    event_type=event_type,
                    time_window=time_window,
                    interpolate=interpolate,
                    output_sampling_rate=output_sampling_rate,
                    load_from_file=True)
                # use response_window duration from stim response df if it exists
                if response_window_duration in df.keys():
                    response_window_duration = df.response_window_duration.values[
                        0]
                df['ophys_experiment_id'] = experiment_id
                # if using omissions, only include omissions where time from last change is more than 3 seconds
                if event_type == 'omissions':
                    df = df[df.time_from_last_change > 3]
                # modify columns for specific conditions
                if 'passive' in dataset.metadata['session_type']:
                    df['lick_on_next_flash'] = False
                    df['engaged'] = False
                    df['engagement_state'] = 'disengaged'
                if 'running_state' in conditions:  # create 'running_state' Boolean column based on threshold on mean_running_speed
                    df['running'] = [
                        True if mean_running_speed > 2 else False
                        for mean_running_speed in df.mean_running_speed.values
                    ]
                if 'pupil_state' in conditions:  # create 'pupil_state' Boolean column based on threshold on mean_pupil_
                    if 'mean_pupil_area' in df.keys():
                        df = df[df.mean_pupil_area.isnull() == False]
                        if len(df) > 100:
                            median_pupil_area = df.mean_pupil_area.median()
                            df['large_pupil'] = [
                                True if mean_pupil_area > median_pupil_area
                                else False for mean_pupil_area in
                                df.mean_pupil_area.values
                            ]
                if 'pre_change' in conditions:
                    df = df[df.pre_change.isnull() == False]
                # get params for mean df creation from stimulus_response_df
                output_sampling_rate = df.frame_rate.unique()[0]

                mdf = ut.get_mean_df(
                    df,
                    conditions=conditions,
                    frame_rate=output_sampling_rate,
                    window_around_timepoint_seconds=time_window,
                    response_window_duration_seconds=response_window_duration,
                    get_pref_stim=get_pref_stim,
                    exclude_omitted_from_pref_stim=True)
                if 'correlation_values' in mdf.keys():
                    mdf = mdf.drop(columns=['correlation_values'])
                mdf['ophys_experiment_id'] = experiment_id
                print('mean df created for', experiment_id)
                mega_mdf = pd.concat([mega_mdf, mdf])
            except Exception as e:  # flake8: noqa: E722
                print(e)
                print('problem for', experiment_id)

        if 'level_0' in mega_mdf.keys():
            mega_mdf = mega_mdf.drop(columns='level_0')
        if 'index' in mega_mdf.keys():
            mega_mdf = mega_mdf.drop(columns='index')

        # if file of the same name exists, delete & overwrite to prevent files from getting huge
        if os.path.exists(filepath):
            os.remove(filepath)
        print('saving multi session mean df as ', filename)
        mega_mdf.to_hdf(filepath, key='df')
        print('saved to', mega_mdf_write_dir)

        return mega_mdf

    else:
        print('multi_session_df not created')
Exemple #5
0
            print('problem for cell_specimen_id:', cell_specimen_id, ', ophys_experiment_id:', ophys_experiment_id)
            print(e)
    if save_dir:
        utils.save_figure(fig, figsize, save_dir, folder,
                          str(cell_specimen_id) + '_' + metadata_string + '_' + suffix)
        plt.close()


# examples
if __name__ == '__main__':

    from allensdk.brain_observatory.behavior.behavior_project_cache import VisualBehaviorOphysProjectCache

    # load cache
    cache_dir = loading.get_platform_analysis_cache_dir()
    cache = VisualBehaviorOphysProjectCache.from_s3_cache(cache_dir)
    experiments_table = loading.get_platform_paper_experiment_table()

    # load multi_session_df
    df_name = 'omission_response_df'
    conditions = ['cell_specimen_id']
    use_events = True
    filter_events = True

    multi_session_df = loading.get_multi_session_df(cache_dir, df_name, conditions, experiments_table,
                                                    use_events=use_events, filter_events=filter_events)

    # limit to platform paper dataset
    multi_session_df = multi_session_df[multi_session_df.ophys_experiment_id.isin(experiments_table.index.values)]
    # merge with metadata
    multi_session_df = multi_session_df.merge(experiments_table, on='ophys_experiment_id')
# python file to execute on cluster
python_file = r"/home/marinag/visual_behavior_analysis/scripts/create_multi_session_df.py"

# conda environment to use
conda_environment = 'visual_behavior_sdk'

# build the python path
# this assumes that the environments are saved in the user's home directory in a folder called 'anaconda2'
python_path = os.path.join(os.path.expanduser("~"), 'anaconda2', 'envs',
                           conda_environment, 'bin', 'python')

# define the job record output folder
stdout_location = r"/allen/programs/braintv/workgroups/nc-ophys/visual_behavior/cluster_jobs/multi_session_dfs"

cache_dir = loading.get_platform_analysis_cache_dir()
cache = bpc.from_s3_cache(cache_dir=cache_dir)
print(cache_dir)

experiments_table = cache.get_ophys_experiment_table()

# call the `sbatch` command to run the jobs.
for project_code in experiments_table.project_code.unique():
    print(project_code)
    for session_number in experiments_table.session_number.unique():

        # instantiate a Slurm object
        slurm = Slurm(
            mem='120g',  # '24g'
            cpus_per_task=1,
            time='60:00:00',
            partition='braintv',