Python load_intervals 예제들, mousestyles.data.load_intervals Python 예제들

예제 #1

0

파일 보기

def test_feature_load_input():
    # checking functions raise the correct errors
    with pytest.raises(ValueError) as excinfo:
        data.load_intervals('A')
    msg = 'Input value must be one of {"AS", "F", "IS", "M_AS", "M_IS", "W"}'
    assert excinfo.value.args[0] == msg

    with pytest.raises(ValueError) as excinfo:
        data.load_movement(-1, 0, 0)
    assert excinfo.value.args[0] == "Input values need to be nonnegative"

    with pytest.raises(TypeError) as excinfo:
        data.load_movement(0.0, 0, 0)
    assert excinfo.value.args[0] == "Input values need to be integer"

예제 #2

0

파일 보기

def test_create_collapse_intervals1():

    # Checking intervals are being correctly loaded for a particular
    # activity type and mouse-day

    # More specifically this is testing 'Food' Consumption
    # for strain=0, mouse=0, day=0
    load_ints1 = metrics.create_intervals('F', strain=0, mouse=0, day=0)

    # Collapse the intervals within a given threshold
    cp = behavior.create_collapsed_intervals(load_ints1, bout_threshold=0.001)

    # Calculate the sum of the loaded intervals
    ints1_sum_collapsed = sum(cp)

    # We now do the above calculations using a manual query
    l_ints_manual = data.load_intervals('F')
    l_ints_manual = l_ints_manual.query('strain == 0 and mouse == 0 and \
                                         day == 0')[['start', 'stop']]

    ints_manual = intervals.Intervals(l_ints_manual)

    # Collapse the intervals within a given threshold
    ints_man_collapsed = ints_manual.copy().connect_gaps(eps=0.001)
    ints_man_sum_collapsed = sum(ints_man_collapsed)

    # Check that the sum of total values from manual query and
    # function output match for every value
    assert (ints1_sum_collapsed == ints_man_sum_collapsed).all()

예제 #3

0

파일 보기

파일: test_behavior.py 프로젝트: YZhouEntheos/mousestyles

def test_create_collapse_intervals1():

    # Checking intervals are being correctly loaded for a particular
    # activity type and mouse-day

    # More specifically this is testing 'Food' Consumption
    # for strain=0, mouse=0, day=0
    load_ints1 = metrics.create_intervals(
        'F', strain=0, mouse=0, day=0)

    # Collapse the intervals within a given threshold
    cp = behavior.create_collapsed_intervals(load_ints1, bout_threshold=0.001)

    # Calculate the sum of the loaded intervals
    ints1_sum_collapsed = sum(cp)

    # We now do the above calculations using a manual query
    l_ints_manual = data.load_intervals('F')
    l_ints_manual = l_ints_manual.query('strain == 0 and mouse == 0 and \
                                         day == 0')[['start', 'stop']]

    ints_manual = intervals.Intervals(l_ints_manual)

    # Collapse the intervals within a given threshold
    ints_man_collapsed = ints_manual.copy().connect_gaps(eps=0.001)
    ints_man_sum_collapsed = sum(ints_man_collapsed)

    # Check that the sum of total values from manual query and
    # function output match for every value
    assert (ints1_sum_collapsed == ints_man_sum_collapsed).all()

예제 #4

0

파일 보기

def test_create_intervals():

    # Checking intervals are being correctly loaded for a particular
    # activity type and mouse-day
    load_ints1 = metrics.create_intervals('F', strain=0, mouse=0, day=0)

    # Calculate the sum of the loaded intervals
    ints1_sum = sum(load_ints1)

    # We now do the above calculations using a manual query
    l_ints_manual = data.load_intervals('F')
    l_ints_manual = l_ints_manual.query('strain == 0 and mouse == 0 and \
                                         day == 0')[['start', 'stop']]
    ints_manual = intervals.Intervals(l_ints_manual)
    ints_manual_sum = sum(ints_manual)

    assert ints1_sum.all() == ints_manual_sum.all()

예제 #5

0

파일 보기

파일: test_behavior.py 프로젝트: YZhouEntheos/mousestyles

def test_create_intervals():

    # Checking intervals are being correctly loaded for a particular
    # activity type and mouse-day
    load_ints1 = metrics.create_intervals(
        'F', strain=0, mouse=0, day=0)

    # Calculate the sum of the loaded intervals
    ints1_sum = sum(load_ints1)

    # We now do the above calculations using a manual query
    l_ints_manual = data.load_intervals('F')
    l_ints_manual = l_ints_manual.query('strain == 0 and mouse == 0 and \
                                         day == 0')[['start', 'stop']]
    ints_manual = intervals.Intervals(l_ints_manual)
    ints_manual_sum = sum(ints_manual)

    assert ints1_sum.all() == ints_manual_sum.all()

예제 #6

0

파일 보기

def create_intervals(activity_type, strain, mouse, day):
    r"""Returns an interval object on a certain mouse-strain-day
    for a given activity type.

    Parameters
    ----------
    activity_type : str
        String specifying activity type {"AS", "F", "IS", "M_AS", "M_IS", "W"}
    strain : int
        Integer representing the strain of the mouse
    mouse : int
        Integer representing the specific mouse
    day : int
        Integer representing the day to produce the metrics for
    bout_threshold: : float
        Float representing the time threshold to use for collapsing separate
        events into bouts

    Returns
    -------
    An intervals object `intervals` for the given activity type and
    mouse-strain-day within a bout threshold

    Examples
    --------

    >>> ints=behavior.create_intervals(activity_type = 'F', strain = 0
                                       , mouse = 0, day = 0
                                       , bout_threshold = 0.001)
    >>> print(sum(ints))

    """

    # Load intervals by activity type
    l_ints = data.load_intervals(activity_type)

    # Subset to M*2 array of  (start, stop) intervals based on
    # specific mouse, strain and day
    l_ints = _select_strain_mouse_day_in_data_frame(l_ints, strain, mouse, day)
    l_ints = l_ints[['start', 'stop']]

    # Create and return the intervals object
    return intervals.Intervals(l_ints)

예제 #7

0

파일 보기

파일: metrics.py 프로젝트: FengshiNiu/mousestyles

def create_intervals(activity_type, strain, mouse, day):
    r"""Returns an interval object on a certain mouse-strain-day
    for a given activity type.

    Parameters
    ----------
    activity_type : str
        String specifying activity type {"AS", "F", "IS", "M_AS", "M_IS", "W"}
    strain : int
        Integer representing the strain of the mouse
    mouse : int
        Integer representing the specific mouse
    day : int
        Integer representing the day to produce the metrics for
    bout_threshold: : float
        Float representing the time threshold to use for collapsing separate
        events into bouts

    Returns
    -------
    An intervals object `intervals` for the given activity type and
    mouse-strain-day within a bout threshold

    Examples
    --------

    >>> ints=behavior.create_intervals(activity_type = 'F', strain = 0
                                       , mouse = 0, day = 0
                                       , bout_threshold = 0.001)
    >>> print(sum(ints))

    """

    # Load intervals by activity type
    l_ints = data.load_intervals(activity_type)

    # Subset to M*2 array of  (start, stop) intervals based on
    # specific mouse, strain and day
    l_ints = _select_strain_mouse_day_in_data_frame(l_ints, strain, mouse, day)
    l_ints = l_ints[['start', 'stop']]

    # Create and return the intervals object
    return intervals.Intervals(l_ints)

예제 #8

0

파일 보기

def test_create_intervals2():

    # Checking intervals are being correctly loaded for a particular
    # activity type and mouse-day

    # More specifically this is testing 'Water' Consumption
    # for strain=0, mouse=1, day=1
    load_ints1 = metrics.create_intervals('W', strain=0, mouse=1, day=1)

    # Calculate the sum of the loaded intervals
    ints1_sum = sum(load_ints1)

    # We now do the above calculations using a manual query
    l_ints_manual = data.load_intervals('W')
    l_ints_manual = l_ints_manual.query('strain == 0 and mouse == 1 and \
                                         day == 1')[['start', 'stop']]
    ints_manual = intervals.Intervals(l_ints_manual)
    ints_manual_sum = sum(ints_manual)

    # Check that the sum of total values from manual query and
    # function output match for every value
    assert (ints1_sum == ints_manual_sum).all()

예제 #9

0

파일 보기

파일: test_behavior.py 프로젝트: YZhouEntheos/mousestyles

def test_create_intervals2():

    # Checking intervals are being correctly loaded for a particular
    # activity type and mouse-day

    # More specifically this is testing 'Water' Consumption
    # for strain=0, mouse=1, day=1
    load_ints1 = metrics.create_intervals(
        'W', strain=0, mouse=1, day=1)

    # Calculate the sum of the loaded intervals
    ints1_sum = sum(load_ints1)

    # We now do the above calculations using a manual query
    l_ints_manual = data.load_intervals('W')
    l_ints_manual = l_ints_manual.query('strain == 0 and mouse == 1 and \
                                         day == 1')[['start', 'stop']]
    ints_manual = intervals.Intervals(l_ints_manual)
    ints_manual_sum = sum(ints_manual)

    # Check that the sum of total values from manual query and
    # function output match for every value
    assert (ints1_sum == ints_manual_sum).all()

예제 #10

0

파일 보기

def aggregate_interval(strain, mouse, feature, bin_width):
    """
    Aggregate the interval data based on n-minute time
    intervals, return a time series.

    Parameters
    ----------
    strain: int
        nonnegative integer indicating the strain number
    mouse: int
        nonnegative integer indicating the mouse number
    feature: {"AS", "F", "M_AS", "M_IS", "W"}
        "AS": Active state probalibity
        "F": Food consumed (g)
        "M_AS": Movement outside homebase
        "M_IS": Movement inside homebase
        "W": Water consumed (g)
    bin_width: number of minutes of time interval for data aggregation

    Returns
    -------
    ts: pandas.tseries
        a pandas time series of length 12(day)*24(hour)*60(minute)/n
    """
    # Input Check

    if (not isinstance(strain, int)) or (strain < 0):
        raise ValueError('Strain must be a non-negative integer')
    if (not isinstance(mouse, int)) or (mouse < 0):
        raise ValueError('Mouse value must be a non-negative integer')
    if feature not in INTERVAL_FEATURES:
        raise ValueError(
            'Input value must in {"AS", "F", "M_AS", "M_IS", "W"}')
    if (not isinstance(bin_width, int)) or bin_width < 0 or bin_width > 1440:
        raise ValueError(
            'Bin width (minutes) must be a non-negative integer below 1440')

    # load data
    intervals = data.load_intervals(feature)
    mouse_data = intervals.loc[(intervals['strain'] == strain)
                               & (intervals['mouse'] == mouse)]

    # build data frame
    days = sorted(np.unique(mouse_data['day']))
    bin_count = int(24 * 60 / bin_width)
    time_behaviour = np.repeat(0.0, bin_count * len(days))
    bin_length = bin_width * 60

    for j in days:
        df = mouse_data.loc[mouse_data['day'] == j]
        start_end = data.load_start_time_end_time(strain, mouse, j)
        start = np.asarray(df['start']) - start_end[0]
        end = np.asarray(df['stop']) - start_end[0]

        for i in range(len(start)):
            start_time = start[i]
            end_time = end[i]
            start_index = int(start_time / (bin_width * 60))
            end_index = int(end_time / (bin_width * 60))
            if start_index == end_index:
                time_behaviour[start_index +
                               j * bin_count] += end_time - start_time
            elif end_index - start_index == 1:
                time_behaviour[
                    start_index +
                    j * bin_count] += bin_length * end_index - start_time
                time_behaviour[end_index +
                               j * bin_count] += end_time % bin_length
            else:
                time_behaviour[start_index +
                               j * bin_count] += bin_length * (start_index +
                                                               1) - start_time
                time_behaviour[end_index +
                               j * bin_count] += end_time % bin_length
                time_behaviour[start_index + j * bin_count + 1:end_index +
                               j * bin_count] += bin_length

    if feature == 'F' or feature == 'W':
        all_feature = data.load_all_features()
        group = all_feature[[
            "strain", "mouse", "day", "hour", "Food", "Water"
        ]].groupby(["strain", "mouse", "day"]).sum()
        group = group.reset_index()
        mouse_data = group.loc[(group['strain'] == strain)
                               & (group['mouse'] == mouse)].copy()
        mouse_data.loc[:, 'day'] = np.arange(len(mouse_data))
        for i in mouse_data['day'].astype('int'):
            if feature == 'F':
                food_amount = float(mouse_data['Food'][mouse_data['day'] == i])
                time_behaviour[(bin_count * i):(bin_count * (i + 1))] /= sum(
                    time_behaviour[(bin_count * i):(bin_count * (i + 1))])
                time_behaviour[(bin_count * i):(bin_count *
                                                (i + 1))] *= food_amount
            else:
                food_amount = float(
                    mouse_data['Water'][mouse_data['day'] == i])
                time_behaviour[(bin_count * i):(bin_count * (i + 1))] /= sum(
                    time_behaviour[(bin_count * i):(bin_count * (i + 1))])
                time_behaviour[(bin_count * i):(bin_count *
                                                (i + 1))] *= food_amount
    if feature == 'AS':
        time_behaviour /= (bin_width * 60)

    ts = pd.Series(time_behaviour,
                   index=pd.date_range('01/01/2014',
                                       periods=len(time_behaviour),
                                       freq=str(bin_width) + 'min'))

    return (ts)

예제 #11

0

파일 보기

def aggregate_movement(strain, mouse, bin_width):
    """
    Aggregate the movement data based on n-minute
    time intervals, return a time series.

    Parameters
    ----------
    strain: int
        nonnegative integer indicating the strain number
    mouse: int
        nonnegative integer indicating the mouse number
    bin_width: number of minutes of time interval for data aggregation

    Returns
    -------
    ts: pandas.tseries
        a pandas time series of length (#day)*24(hour)*60(minute)/n
    """
    # Input Check
    if (not isinstance(strain, int)) or (strain < 0):
        raise ValueError('Strain must be a non-negative integer')
    if (not isinstance(mouse, int)) or (mouse < 0):
        raise ValueError('Mouse value must be a non-negative integer')
    if (not isinstance(bin_width, int)) or bin_width < 0 or bin_width > 1440:
        raise ValueError(
            'Bin width (minutes) must be a non-negative integer below 1440')

    # determine number of days
    intervals = data.load_intervals('IS')
    mouse_data = intervals.loc[(intervals['strain'] == strain)
                               & (intervals['mouse'] == mouse)]
    days = sorted(np.unique(mouse_data['day']))

    # build data frame
    bin_count = int(24 * 60 / bin_width)
    time_movements = np.repeat(0.0, bin_count * len(days))
    bin_length = bin_width * 60

    for j in days:
        M = data.load_movement(strain, mouse, day=int(j))
        distance_df = pd.DataFrame({
            "start":
            M["t"].values[0:-1],
            "end":
            M["t"].values[1:],
            "distance":
            np.linalg.norm(M[["x", "y"]].values[1:] -
                           M[["x", "y"]].values[0:-1],
                           axis=1)
        })
        start_end = data.load_start_time_end_time(strain, mouse, j)
        start = np.asarray(distance_df['start']) - start_end[0]
        end = np.asarray(distance_df['end']) - start_end[0]
        dist = distance_df['distance']
        for i in range(len(start)):
            start_time = start[i]
            end_time = end[i]
            start_index = int(start_time / (bin_width * 60))
            end_index = int(end_time / (bin_width * 60))
            if start_index == end_index:
                time_movements[start_index + j * bin_count] += dist[i]
            else:
                time_movements[
                    end_index + j * bin_count] += end_time % \
                    bin_length / (end_time - start_time) * dist[i]
                time_movements[
                    start_index + j * bin_count] += dist[i] - \
                    end_time % bin_length / (end_time - start_time) * dist[i]

    ts = pd.Series(time_movements,
                   index=pd.date_range('01/01/2014',
                                       periods=len(time_movements),
                                       freq=str(bin_width) + 'min'))

    return (ts)

예제 #12

0

파일 보기

파일: __init__.py 프로젝트: aksam-ahmad/mousestyles

def create_time_matrix(combined_gap=4, time_gap=1, days_index=137):
    r"""
    Return a time matrix for estimate the MLE parobability.
    The rows are 137 mousedays. The columns are time series
    in a day. The data are the mouse activity at that time.
    0 represents IS, 1 represents eating, 2 represents
    drinking, 3 represents others activity in AS.

    Parameters
    ----------
    combined_gap: nonnegative float or int
        The threshold for combining small intervals. If next start time
        minus last stop time is smaller than combined_gap than combined
        these two intervals.
    time_gap: positive float or int
        The time gap for create the columns time series
    days_index: nonnegative int
        The number of days to process, from day 0 to day days_index.

    Returns
    -------
    time: Pandas.DataFrame
        a matrix represents the activity for a certain
        mouse day and a certain time.

    Examples
    --------
    >>> time = create_time_matrix(combined_gap=4, time_gap=1).iloc[0, 0:10]
    >>> strain    0
        mouse     0
        day       0
        48007     0
        48008     0
        48009     0
        48010     0
        48011     0
        48012     0
        48013     0
        Name: 0, dtype: float64
    """
    # check all the inputs
    condition_combined_gap = ((type(combined_gap) == int or
                              type(combined_gap) == float) and
                              combined_gap >= 0)
    condition_time_gap = ((type(time_gap) == int or type(time_gap) ==
                           float) and time_gap > 0)
    condition_days_index = (type(days_index) == int and days_index >= 0)
    if not condition_time_gap:
        raise ValueError("time_gap should be nonnegative int or float")
    if not condition_combined_gap:
        raise ValueError("combined_gap should be nonnegative int or float")
    if not condition_days_index:
        raise ValueError("days_index should be nonnegative int")

    intervals_AS = data.load_intervals('AS')
    intervals_F = data.load_intervals('F')
    intervals_W = data.load_intervals('W')
    intervals_IS = data.load_intervals('IS')
    # 137 days totally
    days = np.array(intervals_AS.iloc[:, 0:3].drop_duplicates().
                    reset_index(drop=True))
    # set time range for columns
    initial = int(min(intervals_IS['stop']))
    end = int(max(intervals_IS['stop'])) + 1
    columns = np.arange(initial, end + 1, time_gap)
    # result matrix
    matrix = np.zeros((days.shape[0], len(columns)))
    # we set 0 as IS, 1 as F, 2 as W, 3 as Others
    for i in range(days.shape[0]):
        W = np.array(intervals_W[(intervals_W['strain'] == days[i, 0]) &
                                 (intervals_W['mouse'] == days[i, 1]) &
                                 (intervals_W['day'] == days[i, 2])].
                     iloc[:, 3:5])
        F = np.array(intervals_F[(intervals_F['strain'] == days[i, 0]) &
                                 (intervals_F['mouse'] == days[i, 1]) &
                                 (intervals_F['day'] == days[i, 2])].
                     iloc[:, 3:5])
        AS = np.array(intervals_AS[(intervals_AS['strain'] == days[i, 0]) &
                                   (intervals_AS['mouse'] == days[i, 1]) &
                                   (intervals_AS['day'] == days[i, 2])].
                      iloc[:, 3:5])
        n = W.shape[0]
        index = (np.array(np.where(W[1:, 0]-W[0:n - 1, 1] >
                                   combined_gap))).ravel()
        stop_W = W[np.append(index, n - 1), 1]
        start_W = W[np.append(0, index + 1), 0]
        n = F.shape[0]
        index = (np.array(np.where(F[1:, 0]-F[0:n-1, 1] >
                                   combined_gap))).ravel()
        stop_F = F[np.append(index, n - 1), 1]
        start_F = F[np.append(0, index + 1), 0]
        n = AS.shape[0]
        index = (np.array(np.where(AS[1:, 0]-AS[0:n - 1, 1] >
                                   combined_gap))).ravel()
        stop_AS = AS[np.append(index, n - 1), 1]
        start_AS = AS[np.append(0, index + 1), 0]
        for j in range(len(columns)):
            if sum(np.logical_and(columns[j] > start_AS, columns[j] <
                                  stop_AS)) != 0:
                if sum(np.logical_and(columns[j] > start_F, columns[j] <
                                      stop_F)) != 0:
                    matrix[i, j] = 1  # food
                elif sum(np.logical_and(columns[j] > start_W, columns[j] <
                                        stop_W)) != 0:
                    matrix[i, j] = 2  # water
                else:
                    matrix[i, j] = 3  # others
        # give you the precent of matrix has been processed
        print(i / days.shape[0], 'has been processed')
        if i > days_index:
            break
    # format data frame
    matrix = pd.DataFrame(matrix, columns=columns)
    title = pd.DataFrame(days, columns=['strain', 'mouse', 'day'])
    time_matrix = pd.concat([title, matrix], axis=1)
    return(time_matrix)

예제 #13

0

파일 보기

def test_intervals_loader():
    # Checking load_intervals returns a data frame of the correct dimension
    AS = data.load_intervals('AS')
    assert AS.shape == (1343, 5)

예제 #14

0

파일 보기

def create_time_matrix(combined_gap=4, time_gap=1, days_index=137):
    r"""
    Return a time matrix for estimate the MLE parobability.
    The rows are 137 mousedays. The columns are time series
    in a day. The data are the mouse activity at that time.
    0 represents IS, 1 represents eating, 2 represents
    drinking, 3 represents others activity in AS.

    Parameters
    ----------
    combined_gap: nonnegative float or int
        The threshold for combining small intervals. If next start time
        minus last stop time is smaller than combined_gap than combined
        these two intervals.
    time_gap: positive float or int
        The time gap for create the columns time series
    days_index: nonnegative int
        The number of days to process, from day 0 to day days_index.

    Returns
    -------
    time: Pandas.DataFrame
        a matrix represents the activity for a certain
        mouse day and a certain time.

    Examples
    --------
    >>> time = create_time_matrix(combined_gap=4, time_gap=1).iloc[0, 0:10]
    >>> strain    0
        mouse     0
        day       0
        48007     0
        48008     0
        48009     0
        48010     0
        48011     0
        48012     0
        48013     0
        Name: 0, dtype: float64
    """
    # check all the inputs
    condition_combined_gap = ((type(combined_gap) == int
                               or type(combined_gap) == float)
                              and combined_gap >= 0)
    condition_time_gap = ((type(time_gap) == int or type(time_gap) == float)
                          and time_gap > 0)
    condition_days_index = (type(days_index) == int and days_index >= 0)
    if not condition_time_gap:
        raise ValueError("time_gap should be nonnegative int or float")
    if not condition_combined_gap:
        raise ValueError("combined_gap should be nonnegative int or float")
    if not condition_days_index:
        raise ValueError("days_index should be nonnegative int")

    intervals_AS = data.load_intervals('AS')
    intervals_F = data.load_intervals('F')
    intervals_W = data.load_intervals('W')
    intervals_IS = data.load_intervals('IS')
    # 137 days totally
    days = np.array(
        intervals_AS.iloc[:, 0:3].drop_duplicates().reset_index(drop=True))
    # set time range for columns
    initial = int(min(intervals_IS['stop']))
    end = int(max(intervals_IS['stop'])) + 1
    columns = np.arange(initial, end + 1, time_gap)
    # result matrix
    matrix = np.zeros((days.shape[0], len(columns)))
    # we set 0 as IS, 1 as F, 2 as W, 3 as Others
    for i in range(days.shape[0]):
        W = np.array(intervals_W[(intervals_W['strain'] == days[i, 0])
                                 & (intervals_W['mouse'] == days[i, 1]) &
                                 (intervals_W['day'] == days[i, 2])].iloc[:,
                                                                          3:5])
        F = np.array(intervals_F[(intervals_F['strain'] == days[i, 0])
                                 & (intervals_F['mouse'] == days[i, 1]) &
                                 (intervals_F['day'] == days[i, 2])].iloc[:,
                                                                          3:5])
        AS = np.array(
            intervals_AS[(intervals_AS['strain'] == days[i, 0])
                         & (intervals_AS['mouse'] == days[i, 1]) &
                         (intervals_AS['day'] == days[i, 2])].iloc[:, 3:5])
        n = W.shape[0]
        index = (np.array(
            np.where(W[1:, 0] - W[0:n - 1, 1] > combined_gap))).ravel()
        stop_W = W[np.append(index, n - 1), 1]
        start_W = W[np.append(0, index + 1), 0]
        n = F.shape[0]
        index = (np.array(
            np.where(F[1:, 0] - F[0:n - 1, 1] > combined_gap))).ravel()
        stop_F = F[np.append(index, n - 1), 1]
        start_F = F[np.append(0, index + 1), 0]
        n = AS.shape[0]
        index = (np.array(
            np.where(AS[1:, 0] - AS[0:n - 1, 1] > combined_gap))).ravel()
        stop_AS = AS[np.append(index, n - 1), 1]
        start_AS = AS[np.append(0, index + 1), 0]
        for j in range(len(columns)):
            if sum(np.logical_and(columns[j] > start_AS,
                                  columns[j] < stop_AS)) != 0:
                if sum(
                        np.logical_and(columns[j] > start_F,
                                       columns[j] < stop_F)) != 0:
                    matrix[i, j] = 1  # food
                elif sum(
                        np.logical_and(columns[j] > start_W,
                                       columns[j] < stop_W)) != 0:
                    matrix[i, j] = 2  # water
                else:
                    matrix[i, j] = 3  # others
        # give you the precent of matrix has been processed
        print(i / days.shape[0], 'has been processed')
        if i > days_index:
            break
    # format data frame
    matrix = pd.DataFrame(matrix, columns=columns)
    title = pd.DataFrame(days, columns=['strain', 'mouse', 'day'])
    time_matrix = pd.concat([title, matrix], axis=1)
    return (time_matrix)

예제 #15

0

파일 보기

파일: __init__.py 프로젝트: YZhouEntheos/mousestyles

def aggregate_interval(strain, mouse, feature, bin_width):
    """
    Aggregate the interval data based on n-minute time
    intervals, return a time series.

    Parameters
    ----------
    strain: int
        nonnegative integer indicating the strain number
    mouse: int
        nonnegative integer indicating the mouse number
    feature: {"AS", "F", "M_AS", "M_IS", "W"}
        "AS": Active state probalibity
        "F": Food consumed (g)
        "M_AS": Movement outside homebase
        "M_IS": Movement inside homebase
        "W": Water consumed (g)
    bin_width: number of minutes of time interval for data aggregation

    Returns
    -------
    ts: pandas.tseries
        a pandas time series of length 12(day)*24(hour)*60(minute)/n
    """
    # Input Check

    if (not isinstance(strain, int)) or (strain < 0):
        raise ValueError(
            'Strain must be a non-negative integer')
    if (not isinstance(mouse, int)) or (mouse < 0):
        raise ValueError(
            'Mouse value must be a non-negative integer')
    if feature not in INTERVAL_FEATURES:
        raise ValueError(
            'Input value must in {"AS", "F", "M_AS", "M_IS", "W"}')
    if (not isinstance(bin_width, int)) or bin_width < 0 or bin_width > 1440:
        raise ValueError(
            'Bin width (minutes) must be a non-negative integer below 1440')

    # load data
    intervals = data.load_intervals(feature)
    mouse_data = intervals.loc[
        (intervals['strain'] == strain) & (intervals['mouse'] == mouse)]

    # build data frame
    days = sorted(np.unique(mouse_data['day']))
    bin_count = int(24 * 60 / bin_width)
    time_behaviour = np.repeat(0.0, bin_count * len(days))
    bin_length = bin_width * 60

    for j in days:
        df = mouse_data.loc[mouse_data['day'] == j]
        start_end = data.load_start_time_end_time(strain, mouse, j)
        start = np.asarray(df['start']) - start_end[0]
        end = np.asarray(df['stop']) - start_end[0]

        for i in range(len(start)):
            start_time = start[i]
            end_time = end[i]
            start_index = int(start_time / (bin_width * 60))
            end_index = int(end_time / (bin_width * 60))
            if start_index == end_index:
                time_behaviour[start_index + j *
                               bin_count] += end_time - start_time
            elif end_index - start_index == 1:
                time_behaviour[
                    start_index + j *
                    bin_count] += bin_length * end_index - start_time
                time_behaviour[end_index + j *
                               bin_count] += end_time % bin_length
            else:
                time_behaviour[
                    start_index + j *
                    bin_count] += bin_length * (start_index + 1) - start_time
                time_behaviour[end_index + j *
                               bin_count] += end_time % bin_length
                time_behaviour[start_index + j * bin_count +
                               1:end_index + j * bin_count] += bin_length

    if feature == 'F' or feature == 'W':
        all_feature = data.load_all_features()
        group = all_feature[
            ["strain", "mouse", "day", "hour", "Food", "Water"]].groupby(
            ["strain", "mouse", "day"]).sum()
        group = group.reset_index()
        mouse_data = group.loc[(group['strain'] == strain) &
                               (group['mouse'] == mouse)].copy()
        mouse_data.loc[:, 'day'] = np.arange(len(mouse_data))
        for i in mouse_data['day'].astype('int'):
            if feature == 'F':
                food_amount = float(mouse_data['Food'][mouse_data['day'] == i])
                time_behaviour[
                    (bin_count * i):(bin_count * (i + 1))] /= sum(
                    time_behaviour[(bin_count * i):(bin_count * (i + 1))])
                time_behaviour[(bin_count * i):(bin_count *
                                                (i + 1))] *= food_amount
            else:
                food_amount = float(mouse_data['Water'][
                                    mouse_data['day'] == i])
                time_behaviour[
                    (bin_count * i):(bin_count * (i + 1))] /= sum(
                    time_behaviour[(bin_count * i):(bin_count * (i + 1))])
                time_behaviour[(bin_count * i):(bin_count *
                                                (i + 1))] *= food_amount
    if feature == 'AS':
        time_behaviour /= (bin_width * 60)

    ts = pd.Series(time_behaviour, index=pd.date_range(
        '01/01/2014', periods=len(time_behaviour),
        freq=str(bin_width) + 'min'))

    return ts

예제 #16

0

파일 보기

파일: __init__.py 프로젝트: YZhouEntheos/mousestyles

def aggregate_movement(strain, mouse, bin_width):
    """
    Aggregate the movement data based on n-minute
    time intervals, return a time series.

    Parameters
    ----------
    strain: int
        nonnegative integer indicating the strain number
    mouse: int
        nonnegative integer indicating the mouse number
    bin_width: number of minutes of time interval for data aggregation

    Returns
    -------
    ts: pandas.tseries
        a pandas time series of length (#day)*24(hour)*60(minute)/n
    """
    # Input Check
    if (not isinstance(strain, int)) or (strain < 0):
        raise ValueError(
            'Strain must be a non-negative integer')
    if (not isinstance(mouse, int)) or (mouse < 0):
        raise ValueError(
            'Mouse value must be a non-negative integer')
    if (not isinstance(bin_width, int)) or bin_width < 0 or bin_width > 1440:
        raise ValueError(
            'Bin width (minutes) must be a non-negative integer below 1440')

    # determine number of days
    intervals = data.load_intervals('IS')
    mouse_data = intervals.loc[
        (intervals['strain'] == strain) & (intervals['mouse'] == mouse)]
    days = sorted(np.unique(mouse_data['day']))

    # build data frame
    bin_count = int(24 * 60 / bin_width)
    time_movements = np.repeat(0.0, bin_count * len(days))
    bin_length = bin_width * 60

    for j in days:
        M = data.load_movement(strain, mouse, day=int(j))
        distance_df = pd.DataFrame({"start": M["t"].values[0:-1],
                                    "end": M["t"].values[1:],
                                    "distance":
                                    np.linalg.norm(M[["x", "y"]].values[1:] -
                                                   M[["x", "y"]].values[0:-1],
                                                   axis=1)})
        start_end = data.load_start_time_end_time(strain, mouse, j)
        start = np.asarray(distance_df['start']) - start_end[0]
        end = np.asarray(distance_df['end']) - start_end[0]
        dist = distance_df['distance']
        for i in range(len(start)):
            start_time = start[i]
            end_time = end[i]
            start_index = int(start_time / (bin_width * 60))
            end_index = int(end_time / (bin_width * 60))
            if start_index == end_index:
                time_movements[start_index + j *
                               bin_count] += dist[i]
            else:
                time_movements[
                    end_index + j * bin_count] += end_time % \
                    bin_length / (end_time - start_time) * dist[i]
                time_movements[
                    start_index + j * bin_count] += dist[i] - \
                    end_time % bin_length / (end_time - start_time) * dist[i]

    ts = pd.Series(time_movements, index=pd.date_range(
        '01/01/2014', periods=len(time_movements),
        freq=str(bin_width) + 'min'))

    return ts