Esempio n. 1
0
def get_allen_natural_movie_data(session_id,
                                 brain_area,
                                 resize_width,
                                 save_path,
                                 cache_dir='../../ecephys_cache_dir/'):
    # Load cache file
    print('Loading cache dir, will download if it does not already exist')
    manifest_path = os.path.join(cache_dir, 'manifest.json')
    cache = EcephysProjectCache.from_warehouse(manifest=manifest_path)

    # Load up a particular recording session
    print('Loading session data')
    sessions_table = cache.get_session_table()
    session = cache.get_session_data(session_id)

    # Load stimulus tables (trial start/end times and stimulus conditions)
    print('Loading stimulus tables, may take a few minutes')
    stimulus_tables = [
        session.get_stimulus_table("natural_movie_one"),
        session.get_stimulus_table("natural_movie_three")
    ]

    # Load movie frames/data
    print('Loading movie frames, may take a few minutes')
    raw_movie_frames = [
        cache.get_natural_movie_template(1),
        cache.get_natural_movie_template(3)
    ]

    # Get the ids of units in given brain area
    unit_ids = session.units[session.units["ecephys_structure_acronym"] ==
                             brain_area].index.to_numpy()

    # Get raw spike times (not chunked into clips yet)
    print('Getting spike times for each movie')
    presentation_ids_1 = session.stimulus_presentations.loc[(
        session.stimulus_presentations['stimulus_name'] == 'natural_movie_one'
    )].index.values
    spike_times_1 = session.presentationwise_spike_times(
        stimulus_presentation_ids=presentation_ids_1, unit_ids=unit_ids)

    presentation_ids_3 = session.stimulus_presentations.loc[(
        session.stimulus_presentations['stimulus_name'] ==
        'natural_movie_three')].index.values
    spike_times_3 = session.presentationwise_spike_times(
        stimulus_presentation_ids=presentation_ids_3, unit_ids=unit_ids)

    neural_data = []
    power = []
    split_half = []
    spike_rate = []
    noise_ceiling = []

    # Loop through each unit to get processed data
    for idx, unit_id in enumerate(unit_ids):
        print('Starting unit', idx + 1, '/', len(unit_ids))

        # First get spike counts per frame for each film (one or three) and block (0 or 1)
        _, spikes_1_0, spikes_r_1_0 = get_spike_counts_by_frame(
            session, spike_times_1, 'natural_movie_one', 0, brain_area,
            unit_id)
        _, spikes_1_1, spikes_r_1_1 = get_spike_counts_by_frame(
            session, spike_times_1, 'natural_movie_one', 1, brain_area,
            unit_id)
        _, spikes_3_0, spikes_r_3_0 = get_spike_counts_by_frame(
            session, spike_times_3, 'natural_movie_three', 0, brain_area,
            unit_id)
        _, spikes_3_1, spikes_r_3_1 = get_spike_counts_by_frame(
            session, spike_times_3, 'natural_movie_three', 1, brain_area,
            unit_id)

        # Join spikes together for each block
        spikes_r_1 = np.concatenate([spikes_r_1_0, spikes_r_1_1], axis=0)
        spikes_r_3 = np.concatenate([spikes_r_3_0, spikes_r_3_1], axis=0)

        # Get signal and noise power for each block
        sp_1, np_1, _ = sahani_quick(spikes_r_1)
        sp_3, np_3, _ = sahani_quick(spikes_r_3)
        sp_ = (sp_1 + sp_3) / 2
        np_ = (np_1 + np_3) / 2
        # Average noise/signal ratio for each block
        sp_np_ratio = ((np_1 / sp_1) + (np_3 / sp_3)) / 2

        # Get split-half correlation
        r_1 = split_half_r(spikes_r_1)
        r_3 = split_half_r(spikes_r_3)
        r = (r_1 + r_3) / 2

        # Get noise ceiling
        nc_1, ev_1, on_1 = get_noise_ceiling(spikes_r_1)
        nc_3, ev_3, on_3 = get_noise_ceiling(spikes_r_3)
        noise_ceiling_mean = (nc_1 + nc_3) / 2, (ev_1 + ev_3) / 2, (on_1 +
                                                                    on_3) / 2

        # Average spikes block-average across both blocks
        spikes_1 = np.mean(spikes_r_1, axis=0)
        spikes_3 = np.mean(spikes_r_3, axis=0)

        # Smooth both with Gaussian kernel
        spikes_1_smoothed = ndimage.gaussian_filter1d(spikes_1, sigma=1 / 2)
        spikes_3_smoothed = ndimage.gaussian_filter1d(spikes_3, sigma=1 / 2)

        # Get spike rate (spikes/frame)
        spike_rate_1 = np.mean(spikes_1)
        spike_rate_3 = np.mean(spikes_3)
        spike_rate_mean = (spike_rate_1 + spike_rate_3) / 2

        # Now divide these neural responses into 50 frame chunks
        # along with corresponding 50 frame chunks of (resized) visual stimuli/movies
        stimuli, unit_responses = get_chunked_data(
            session, raw_movie_frames, [spikes_1_smoothed, spikes_3_smoothed],
            resize_width, True)

        stimuli_nonbp, unit_responses = get_chunked_data(
            session, raw_movie_frames, [spikes_1_smoothed, spikes_3_smoothed],
            resize_width, False)

        neural_data.append(unit_responses)
        power.append([sp_, np_, sp_np_ratio])
        split_half.append(r)
        spike_rate.append(spike_rate_mean)
        noise_ceiling.append(noise_ceiling_mean)

    neural_data = np.array(neural_data)
    power = np.array(power)
    split_half = np.array(split_half)
    spike_rate = np.array(spike_rate)
    noise_ceiling = np.array(noise_ceiling)

    np.save(save_path, {
        "stimuli": stimuli,
        "stimuli_nonbp": stimuli_nonbp,
        "neural_data": neural_data,
        "power": power,
        "split_half": split_half,
        "spike_rate": spike_rate,
        "noise_ceiling": noise_ceiling
    },
            allow_pickle=True)

    print('Saved data at', args.save_path)

    return stimuli, neural_data, power
import numpy as np
import pandas as pd
from pandas import ExcelWriter
from allensdk.brain_observatory.ecephys.ecephys_session import EcephysSession
from allensdk.brain_observatory.ecephys.ecephys_project_cache import EcephysProjectCache
from keras.layers import Dense, Dropout, LSTM, BatchNormalization, Flatten, TimeDistributed
from keras.models import Sequential
from keras.utils import to_categorical
import tensorflow as tf
from keras.optimizers import Adam
import decoding_functions
from openpyxl import Workbook, load_workbook

data_dir = '/Users/bioel/PycharmProjects/untitled4/ecephys_cache_dir'
manifest_path = os.path.join(data_dir, 'manifest.json')
cache = EcephysProjectCache.from_warehouse(manifest=manifest_path)
session_table = cache.get_session_table()

all_sessions_all = session_table.loc[session_table.session_type ==
                                     'brain_observatory_1.1'].index
# here to toggle in case some sessions elicit errors: #
all_sessions_pre = all_sessions_all[0:28]
all_sessions_post = all_sessions_all[29:]

#######################################################
all_sessions = np.append(all_sessions_pre, all_sessions_post)

wb = Workbook()
SVM_acc = np.empty(len(all_sessions))
SNN_acc = np.empty(len(all_sessions))
DNN_acc = np.empty(len(all_sessions))
Esempio n. 3
0
def getSummaryData(dataDirectory):
    print('Getting cache...')
    manifest_path = os.path.join(dataDirectory, "manifest.json")
    cache = EcephysProjectCache.from_warehouse(manifest=manifest_path)
    sessions = cache.get_session_table()
    return cache, sessions
Esempio n. 4
0
def get_allen_sessions(
        data_directory='/mnt/Data/Datasets/allen/ecephys_cache_dir/'):
    manifest_path = os.path.join(data_directory, "manifest.json")
    cache = EcephysProjectCache.from_warehouse(manifest=manifest_path)
    sessions = cache.get_session_table()
    return sessions, cache
import numpy as np
import pandas as pd
import pprint
pp = pprint.PrettyPrinter(depth=10).pprint
from mpl_toolkits import mplot3d
import matplotlib.pyplot as plt
import pprint
op = pprint.PrettyPrinter(depth=11).pprint
from allensdk.brain_observatory.ecephys.ecephys_project_cache import EcephysProjectCache

# Cut nan regions
cutNaNRegions = True

# Get session and cache
print('Getting session data...')
cache = EcephysProjectCache.from_warehouse(
    manifest="./example_ecephys_project_cache/manifest.json")
sessions = cache.get_session_table()
session = cache.get_session_data(session_id=791319847)

# Print probe properties
print('Showing possible probes...')
channelInfo = cache.get_channels()
probeIDs = session.probes.index.values
for probe_id in probeIDs:
    probeChannels = channelInfo.loc[channelInfo.ecephys_probe_id ==
                                    probe_id].index
    structure_acronyms, intervals = session.channel_structure_intervals(
        probeChannels)
    print('Probe: %s : %s' % (probe_id, structure_acronyms))

# Select probe and get LFP data
Esempio n. 6
0
import matplotlib.pyplot as plt
import numpy as np
import os
import pandas as pd
import platform
import scipy.stats as stats
import seaborn as sns
import signal as signal

from allensdk.brain_observatory.ecephys.ecephys_project_cache import EcephysProjectCache
from allensdk.brain_observatory.ecephys import ecephys_session

cache = EcephysProjectCache.fixed(manifest=manifest_path)
"""
Neuropixels dataset from the Allen Institute
"""

# Load data.
platstring = platform.platform()

if "Darwin" in platstring:
    # OS X
    data_root = "/Volumes/Brain2019/"
elif "Windows" in platstring:
    # Windows (replace with the drive letter of USB drive)
    data_root = "E:/"
elif ("amzn1" in platstring):
    # then on AWS
    data_root = "/data/"
else:
    # then your own linux platform
def get_all_timeseries_to_df(sessionIDs = [], regions = [], datatype = "both"):
    """
    Purpose: retrieve LFP and/or spike time data from across sessions restricted by region

    Inputs:
    sessionIDs: a list of session IDs from the sessions object.
    If left empty [], the function will default to go through all the sessions and check whether any given session
    contained the regions specified in the 'regions' argument and will add the session to the list of session IDs

    regions: a list of regions (as labeled in the manual_structure_acronym column:
    ['None', 'TH', 'DG', 'CA', 'VISmma', 'MB', 'VISpm', 'VISp', 'VISl',
       'VISrl', 'VISam', 'VIS', 'VISal', 'VISmmp'])

    datatype: "lfps", "spikes", or "both". Defines whether you want to retrieve just the lfp data for the sessionIDs and
    regions, just the spike data, or both. Defaults to both

    Returns:
    Pandas dataframe for each datatype- if datatype is "both", then provide a variable name for EACH dataframe:
    LFP dataset contains:
        one row for each channelID,
        the timeseries array as a list,
        the channel's vertical, horizontal, and structure label,
        the probe ID that the channel belongs to,
        the mouse genotype,
        and the session ID
    Spikes contains:
        one row for each unit ID,
        unit's timeseries array as a list,
        the channel on which the unit was recorded,
        all of that channel's QC data,
        the channel's vertical, horizontal, and structure label,
        the probe ID that the channel belongs to,
        the mouse genotype,
        and the session ID

    Example:

    v1_lfps, v1_spikes = get_all_timeseries_to_df(sessionIDs = sessions.index[0:3], regions = ['VISp'], datatype = "both")

    will return the lfp and spike data for every channel recorded in VISp in the first 2 sessions listed in the sessions dataframe
    """

    #Get cache and info about all the sessions
    cache = EcephysProjectCache.fixed(manifest=manifest_path)
    sessions = cache.get_sessions()
    allchannelsinfo = cache.get_channels()
    allunitsinfo = cache.get_units()

    #If no session ID is passed into the list, find all sessions that contain the regions
    #and append that session ID to the sessionIDs list
    if len(sessionIDs) == 0:
        sessionIDs = []
        for i in np.arange(len(sessions.structure_acronyms)):
            sessionid = sessions.structure_acronyms.index[i]
            if any(elem in sessions.structure_acronyms[sessionid] for elem in regions):
                sessionIDs.append(sessionid)
    #Double check that the regions specified actually appear in the sessionIDs specified
    for sessionID in sessionIDs:
        for elem in regions:
            if elem not in sessions.structure_acronyms[sessionID]:
                print("Session {} does not contain region {}.".format(sessionID, elem))
    all_lfps_df = pd.DataFrame()
    all_spikes_df = pd.DataFrame()
    #Grab channel and unit info for each region
    for sessionID in sessionIDs:
        session_info = cache.get_session_data(sessionID)
        session_channels = session_info.channels
        session_probes = session_info.probes
        session_units = session_info.units
        for region in regions:
            region_lfps = {}
            region_spikes = {}
            region_lfps_df = pd.DataFrame()
            region_spikes_df = pd.DataFrame()
            region_channelinfo = session_channels[session_channels.manual_structure_acronym == region]
            region_units = session_units[session_units.peak_channel_id.isin(region_channelinfo.index)]
            #Depending on what type of data, grab the lfps and/or spike times
            if datatype == "both":
                session_spike_times = session_info.spike_times
                for probeid in session_channels.probe_id[session_channels.manual_structure_acronym == region].unique():
                    print('retrieving probe {} from session {} cache'.format(probeid, sessionID))
                    probe_lfp = session_info.get_lfp(probeid)
                    region_channels_lfp = probe_lfp.loc[dict(channel = probe_lfp.channel.isin(region_channelinfo.index))]
                    for chan in region_channels_lfp["channel"].values:
                        print('appending channel {} from probe {} for area {}'.format(chan, probeid, region))
                        region_lfps[chan] = region_channels_lfp[:, 'channel' == chan].values
                for unit in region_units.index:
                    print('appending unit {} from session {} for area {}'.format(unit, sessionID, region))
                    region_spikes[unit] = session_spike_times[unit]
            elif datatype == "lfp":
                for probeid in session_channels.probe_id[session_channels.manual_structure_acronym == region].unique():
                    print('retrieving probe {} from session {} cache'.format(probeid, sessionID))
                    probe_lfp = session_info.get_lfp(probeid)
                    region_channels_lfp = probe_lfp.loc[dict(channel = probe_lfp.channel.isin(region_channelinfo.index))]
                    for chan in region_channels_lfp["channel"].values:
                        print('appending channel {} from probe {} for area {}'.format(chan, probeid, region))
                        region_lfps[chan] = region_channels_lfp[:, 'channel' == chan].values
            elif datatype == "spikes":
                session_spike_times = session_info.spike_times
                for unit in region_units.index:
                    print('appending unit {} from session {} for area {}'.format(unit, sessionID, region))
                    region_spikes[unit] = session_spike_times[unit]
            #Build up the dataset after each region is added
            #lfp dataset
            print('putting all the lfps from region {} and session {} to the larger dataset if lfps were requested'.format(region, sessionID))
            region_lfps_df['channel_id'] = region_lfps.keys()
            region_lfps_df['lfp_timeseries'] = region_lfps.values()
            region_lfps_df = pd.merge(region_lfps_df,
              region_channelinfo.loc[:, 'manual_structure_acronym':'probe_id'],
              left_on = 'channel_id',
              right_on = region_channelinfo.index)
            region_lfps_df['sessionID'] = sessionID
            region_lfps_df['genotype'] = sessions.genotype[sessions.index == sessionID].unique()[0]
            #spike dataset
            print('putting all the spikes from region {} and session {} to the larger dataset if spikes were requested'.format(region, sessionID))
            region_spikes_df['unit_id'] = region_spikes.keys()
            region_spikes_df['spike_timeseries'] = region_spikes.values()
            region_spikes_df = pd.merge(region_spikes_df,
              region_units,
              left_on = 'unit_id',
              right_on = region_units.index)
            region_spikes_df['sessionID'] = sessionID
            region_spikes_df['genotype'] = sessions.genotype[sessions.index == sessionID].unique()[0]
            #append to the overarching datasets that aren't tied to specific session id or region
            all_lfps_df = all_lfps_df.append(region_lfps_df)
            all_spikes_df = all_spikes_df.append(region_spikes_df)
    if datatype == "both":
        return all_lfps_df, all_spikes_df
    elif datatype == "lfp":
        return all_lfps_df
    elif datatype == "spikes":
        return all_spikes_df