def total_time(strain, mouse, day): """Returns the total amount of time recorded for a certain mouse-day. Parameters ---------- strain : int mouse : int day : int Returns ------- The total amount of time in seconds of the specified mouse-day. """ start_seconds, end_seconds = data.load_start_time_end_time( strain, mouse, day) return end_seconds - start_seconds
def aggregate_interval(strain, mouse, feature, bin_width): """ Aggregate the interval data based on n-minute time intervals, return a time series. Parameters ---------- strain: int nonnegative integer indicating the strain number mouse: int nonnegative integer indicating the mouse number feature: {"AS", "F", "M_AS", "M_IS", "W"} "AS": Active state probalibity "F": Food consumed (g) "M_AS": Movement outside homebase "M_IS": Movement inside homebase "W": Water consumed (g) bin_width: number of minutes of time interval for data aggregation Returns ------- ts: pandas.tseries a pandas time series of length 12(day)*24(hour)*60(minute)/n """ # Input Check if (not isinstance(strain, int)) or (strain < 0): raise ValueError('Strain must be a non-negative integer') if (not isinstance(mouse, int)) or (mouse < 0): raise ValueError('Mouse value must be a non-negative integer') if feature not in INTERVAL_FEATURES: raise ValueError( 'Input value must in {"AS", "F", "M_AS", "M_IS", "W"}') if (not isinstance(bin_width, int)) or bin_width < 0 or bin_width > 1440: raise ValueError( 'Bin width (minutes) must be a non-negative integer below 1440') # load data intervals = data.load_intervals(feature) mouse_data = intervals.loc[(intervals['strain'] == strain) & (intervals['mouse'] == mouse)] # build data frame days = sorted(np.unique(mouse_data['day'])) bin_count = int(24 * 60 / bin_width) time_behaviour = np.repeat(0.0, bin_count * len(days)) bin_length = bin_width * 60 for j in days: df = mouse_data.loc[mouse_data['day'] == j] start_end = data.load_start_time_end_time(strain, mouse, j) start = np.asarray(df['start']) - start_end[0] end = np.asarray(df['stop']) - start_end[0] for i in range(len(start)): start_time = start[i] end_time = end[i] start_index = int(start_time / (bin_width * 60)) end_index = int(end_time / (bin_width * 60)) if start_index == end_index: time_behaviour[start_index + j * bin_count] += end_time - start_time elif end_index - start_index == 1: time_behaviour[ start_index + j * bin_count] += bin_length * end_index - start_time time_behaviour[end_index + j * bin_count] += end_time % bin_length else: time_behaviour[start_index + j * bin_count] += bin_length * (start_index + 1) - start_time time_behaviour[end_index + j * bin_count] += end_time % bin_length time_behaviour[start_index + j * bin_count + 1:end_index + j * bin_count] += bin_length if feature == 'F' or feature == 'W': all_feature = data.load_all_features() group = all_feature[[ "strain", "mouse", "day", "hour", "Food", "Water" ]].groupby(["strain", "mouse", "day"]).sum() group = group.reset_index() mouse_data = group.loc[(group['strain'] == strain) & (group['mouse'] == mouse)].copy() mouse_data.loc[:, 'day'] = np.arange(len(mouse_data)) for i in mouse_data['day'].astype('int'): if feature == 'F': food_amount = float(mouse_data['Food'][mouse_data['day'] == i]) time_behaviour[(bin_count * i):(bin_count * (i + 1))] /= sum( time_behaviour[(bin_count * i):(bin_count * (i + 1))]) time_behaviour[(bin_count * i):(bin_count * (i + 1))] *= food_amount else: food_amount = float( mouse_data['Water'][mouse_data['day'] == i]) time_behaviour[(bin_count * i):(bin_count * (i + 1))] /= sum( time_behaviour[(bin_count * i):(bin_count * (i + 1))]) time_behaviour[(bin_count * i):(bin_count * (i + 1))] *= food_amount if feature == 'AS': time_behaviour /= (bin_width * 60) ts = pd.Series(time_behaviour, index=pd.date_range('01/01/2014', periods=len(time_behaviour), freq=str(bin_width) + 'min')) return (ts)
def aggregate_movement(strain, mouse, bin_width): """ Aggregate the movement data based on n-minute time intervals, return a time series. Parameters ---------- strain: int nonnegative integer indicating the strain number mouse: int nonnegative integer indicating the mouse number bin_width: number of minutes of time interval for data aggregation Returns ------- ts: pandas.tseries a pandas time series of length (#day)*24(hour)*60(minute)/n """ # Input Check if (not isinstance(strain, int)) or (strain < 0): raise ValueError('Strain must be a non-negative integer') if (not isinstance(mouse, int)) or (mouse < 0): raise ValueError('Mouse value must be a non-negative integer') if (not isinstance(bin_width, int)) or bin_width < 0 or bin_width > 1440: raise ValueError( 'Bin width (minutes) must be a non-negative integer below 1440') # determine number of days intervals = data.load_intervals('IS') mouse_data = intervals.loc[(intervals['strain'] == strain) & (intervals['mouse'] == mouse)] days = sorted(np.unique(mouse_data['day'])) # build data frame bin_count = int(24 * 60 / bin_width) time_movements = np.repeat(0.0, bin_count * len(days)) bin_length = bin_width * 60 for j in days: M = data.load_movement(strain, mouse, day=int(j)) distance_df = pd.DataFrame({ "start": M["t"].values[0:-1], "end": M["t"].values[1:], "distance": np.linalg.norm(M[["x", "y"]].values[1:] - M[["x", "y"]].values[0:-1], axis=1) }) start_end = data.load_start_time_end_time(strain, mouse, j) start = np.asarray(distance_df['start']) - start_end[0] end = np.asarray(distance_df['end']) - start_end[0] dist = distance_df['distance'] for i in range(len(start)): start_time = start[i] end_time = end[i] start_index = int(start_time / (bin_width * 60)) end_index = int(end_time / (bin_width * 60)) if start_index == end_index: time_movements[start_index + j * bin_count] += dist[i] else: time_movements[ end_index + j * bin_count] += end_time % \ bin_length / (end_time - start_time) * dist[i] time_movements[ start_index + j * bin_count] += dist[i] - \ end_time % bin_length / (end_time - start_time) * dist[i] ts = pd.Series(time_movements, index=pd.date_range('01/01/2014', periods=len(time_movements), freq=str(bin_width) + 'min')) return (ts)
def test_start_time_end_time_loader(): # Test for strain 0, mouse 0, day 0 times = data.load_start_time_end_time(0, 0, 0) assert len(times) == 2 assert times[1] - times[0] > 0
from mousestyles.data.utils import (total_time_rectangle_bins, pull_locom_tseries_subset) # Make position density first mouse first day # Make position density first mouse first day # from mousestyles.data import load_movement M = load_movement(0, 0, 0) CT = M['t'] CX = M['x'] CY = M['y'] # mask for HB Move Events CT_NHB = M['isHB'] CT_HB = ~ CT_NHB start_time, stop_time = load_start_time_end_time(0, 0, 0) # Cage boundaries YLower = 1.0 YUpper = 43.0 XUpper = 3.75 XLower = -16.25 xbins = 12 ybins = 24 M = np.vstack([CT, CX, CY]) pos_subset = pull_locom_tseries_subset(M, start_time, stop_time) bin_times = total_time_rectangle_bins(pos_subset, xlims=( XLower, XUpper), ylims=(YLower, YUpper), xbins=xbins, ybins=ybins) position_pdf = bin_times / bin_times.sum() plt.matshow(position_pdf)
def aggregate_interval(strain, mouse, feature, bin_width): """ Aggregate the interval data based on n-minute time intervals, return a time series. Parameters ---------- strain: int nonnegative integer indicating the strain number mouse: int nonnegative integer indicating the mouse number feature: {"AS", "F", "M_AS", "M_IS", "W"} "AS": Active state probalibity "F": Food consumed (g) "M_AS": Movement outside homebase "M_IS": Movement inside homebase "W": Water consumed (g) bin_width: number of minutes of time interval for data aggregation Returns ------- ts: pandas.tseries a pandas time series of length 12(day)*24(hour)*60(minute)/n """ # Input Check if (not isinstance(strain, int)) or (strain < 0): raise ValueError( 'Strain must be a non-negative integer') if (not isinstance(mouse, int)) or (mouse < 0): raise ValueError( 'Mouse value must be a non-negative integer') if feature not in INTERVAL_FEATURES: raise ValueError( 'Input value must in {"AS", "F", "M_AS", "M_IS", "W"}') if (not isinstance(bin_width, int)) or bin_width < 0 or bin_width > 1440: raise ValueError( 'Bin width (minutes) must be a non-negative integer below 1440') # load data intervals = data.load_intervals(feature) mouse_data = intervals.loc[ (intervals['strain'] == strain) & (intervals['mouse'] == mouse)] # build data frame days = sorted(np.unique(mouse_data['day'])) bin_count = int(24 * 60 / bin_width) time_behaviour = np.repeat(0.0, bin_count * len(days)) bin_length = bin_width * 60 for j in days: df = mouse_data.loc[mouse_data['day'] == j] start_end = data.load_start_time_end_time(strain, mouse, j) start = np.asarray(df['start']) - start_end[0] end = np.asarray(df['stop']) - start_end[0] for i in range(len(start)): start_time = start[i] end_time = end[i] start_index = int(start_time / (bin_width * 60)) end_index = int(end_time / (bin_width * 60)) if start_index == end_index: time_behaviour[start_index + j * bin_count] += end_time - start_time elif end_index - start_index == 1: time_behaviour[ start_index + j * bin_count] += bin_length * end_index - start_time time_behaviour[end_index + j * bin_count] += end_time % bin_length else: time_behaviour[ start_index + j * bin_count] += bin_length * (start_index + 1) - start_time time_behaviour[end_index + j * bin_count] += end_time % bin_length time_behaviour[start_index + j * bin_count + 1:end_index + j * bin_count] += bin_length if feature == 'F' or feature == 'W': all_feature = data.load_all_features() group = all_feature[ ["strain", "mouse", "day", "hour", "Food", "Water"]].groupby( ["strain", "mouse", "day"]).sum() group = group.reset_index() mouse_data = group.loc[(group['strain'] == strain) & (group['mouse'] == mouse)].copy() mouse_data.loc[:, 'day'] = np.arange(len(mouse_data)) for i in mouse_data['day'].astype('int'): if feature == 'F': food_amount = float(mouse_data['Food'][mouse_data['day'] == i]) time_behaviour[ (bin_count * i):(bin_count * (i + 1))] /= sum( time_behaviour[(bin_count * i):(bin_count * (i + 1))]) time_behaviour[(bin_count * i):(bin_count * (i + 1))] *= food_amount else: food_amount = float(mouse_data['Water'][ mouse_data['day'] == i]) time_behaviour[ (bin_count * i):(bin_count * (i + 1))] /= sum( time_behaviour[(bin_count * i):(bin_count * (i + 1))]) time_behaviour[(bin_count * i):(bin_count * (i + 1))] *= food_amount if feature == 'AS': time_behaviour /= (bin_width * 60) ts = pd.Series(time_behaviour, index=pd.date_range( '01/01/2014', periods=len(time_behaviour), freq=str(bin_width) + 'min')) return ts
def aggregate_movement(strain, mouse, bin_width): """ Aggregate the movement data based on n-minute time intervals, return a time series. Parameters ---------- strain: int nonnegative integer indicating the strain number mouse: int nonnegative integer indicating the mouse number bin_width: number of minutes of time interval for data aggregation Returns ------- ts: pandas.tseries a pandas time series of length (#day)*24(hour)*60(minute)/n """ # Input Check if (not isinstance(strain, int)) or (strain < 0): raise ValueError( 'Strain must be a non-negative integer') if (not isinstance(mouse, int)) or (mouse < 0): raise ValueError( 'Mouse value must be a non-negative integer') if (not isinstance(bin_width, int)) or bin_width < 0 or bin_width > 1440: raise ValueError( 'Bin width (minutes) must be a non-negative integer below 1440') # determine number of days intervals = data.load_intervals('IS') mouse_data = intervals.loc[ (intervals['strain'] == strain) & (intervals['mouse'] == mouse)] days = sorted(np.unique(mouse_data['day'])) # build data frame bin_count = int(24 * 60 / bin_width) time_movements = np.repeat(0.0, bin_count * len(days)) bin_length = bin_width * 60 for j in days: M = data.load_movement(strain, mouse, day=int(j)) distance_df = pd.DataFrame({"start": M["t"].values[0:-1], "end": M["t"].values[1:], "distance": np.linalg.norm(M[["x", "y"]].values[1:] - M[["x", "y"]].values[0:-1], axis=1)}) start_end = data.load_start_time_end_time(strain, mouse, j) start = np.asarray(distance_df['start']) - start_end[0] end = np.asarray(distance_df['end']) - start_end[0] dist = distance_df['distance'] for i in range(len(start)): start_time = start[i] end_time = end[i] start_index = int(start_time / (bin_width * 60)) end_index = int(end_time / (bin_width * 60)) if start_index == end_index: time_movements[start_index + j * bin_count] += dist[i] else: time_movements[ end_index + j * bin_count] += end_time % \ bin_length / (end_time - start_time) * dist[i] time_movements[ start_index + j * bin_count] += dist[i] - \ end_time % bin_length / (end_time - start_time) * dist[i] ts = pd.Series(time_movements, index=pd.date_range( '01/01/2014', periods=len(time_movements), freq=str(bin_width) + 'min')) return ts