예제 #1
0
def test_feature_load_input():
    # checking functions raise the correct errors
    with pytest.raises(ValueError) as excinfo:
        data.load_intervals('A')
    msg = 'Input value must be one of {"AS", "F", "IS", "M_AS", "M_IS", "W"}'
    assert excinfo.value.args[0] == msg

    with pytest.raises(ValueError) as excinfo:
        data.load_movement(-1, 0, 0)
    assert excinfo.value.args[0] == "Input values need to be nonnegative"

    with pytest.raises(TypeError) as excinfo:
        data.load_movement(0.0, 0, 0)
    assert excinfo.value.args[0] == "Input values need to be integer"
예제 #2
0
def getdistance(strain, mouse, day):
    """
    Return the distance of each two consecutive points among coordinates
    which is bigger than 1cm(truncated).

    Parameters
    ----------
    strain : int
    the strain number of the mouse
    mouse  : int
    the mouse number in its strain
    day    :  int
    the day number

    Returns
    -------
    cut_dist : an array of number
    The vector of truncated distance.

    Examples
    --------
    >>> getdistance (0, 0, 0)
    array([ 1.00648944,  1.02094319,  1.0178885 , ...,  1.00099351,
    1.01191156,  1.00423354])
    """
    df = data.load_movement(strain, mouse, day)
    xcood = df["x"]
    ycood = df["y"]
    distance_vector = np.sqrt(np.diff(xcood)**2 + np.diff(ycood)**2)
    msk = distance_vector > 1
    cut_dist = distance_vector[msk]
    return (cut_dist)
예제 #3
0
def get_travel_distances(strain=0, mouse=0, day=0):
    """ Get distances travelled in 20ms for this strain, this mouse,
    on this day.

    Parameters
    ----------
    strain: int {0, 1, 2}
        The strain of mouse to test
    mouse: int {0, 1, 2, 3}
        The mouse twin id with in the strain
    day: int {0, 1, ..., 11}
        The day to calculate the distance

    Returns
    -------
    x: np.ndarray shape (n, 1)
        The distances travelled in 20ms for this mouse on this day, truncated
        at 1cm (i.e. only record mouse movement when it moves more than 1cm)

    Examples:
    >>> get_travel_distances(0, 0, 0)[:3]
    array([ 1.00648944,  1.02094319,  1.0178885 ])
    """
    df = load_movement(strain=strain, mouse=mouse, day=day)
    x = np.array(np.sqrt(df.x.diff()**2 + df.y.diff()**2))[1:]
    x = x[x >= 1]
    return x
예제 #4
0
def get_travel_distances(strain=0, mouse=0, day=0):
    """ Get distances travelled in 20ms for this strain, this mouse,
    on this day.

    Parameters
    ----------
    strain: int {0, 1, 2}
        The strain of mouse to test
    mouse: int {0, 1, 2, 3}
        The mouse twin id with in the strain
    day: int {0, 1, ..., 11}
        The day to calculate the distance

    Returns
    -------
    x: np.ndarray shape (n, 1)
        The distances travelled in 20ms for this mouse on this day, truncated
        at 1cm (i.e. only record mouse movement when it moves more than 1cm)

    Examples:
    >>> get_travel_distances(0, 0, 0)[:3]
    array([ 1.00648944,  1.02094319,  1.0178885 ])
    """
    df = load_movement(strain=strain, mouse=mouse, day=day)
    x = np.array(np.sqrt(df.x.diff()**2 + df.y.diff()**2))[1:]
    x = x[x >= 1]
    return x
예제 #5
0
def test_load_movement_and_intervals():
    m1 = data.load_movement(1, 1, 1)
    m2 = data.load_movement_and_intervals(
        1, 1, 1, [])  # don't add any features
    assert np.all(m1 == m2)
    m3 = data.load_movement_and_intervals(1, 1, 1, ['AS'])
    m4 = data.load_movement_and_intervals(1, 1, 1, 'AS')
    assert m3.shape[1] == m1.shape[1] + 1  # adds one column
    assert m3.shape[0] == m1.shape[0]  # same number of rows
    assert np.all(m3 == m4)
예제 #6
0
def test_load_movement_and_intervals():
    m1 = data.load_movement(1, 1, 1)
    m2 = data.load_movement_and_intervals(
        1, 1, 1, [])  # don't add any features
    assert np.all(m1 == m2)
    m3 = data.load_movement_and_intervals(1, 1, 1, ['AS'])
    m4 = data.load_movement_and_intervals(1, 1, 1, 'AS')
    assert m3.shape[1] == m1.shape[1] + 1  # adds one column
    assert m3.shape[0] == m1.shape[0]  # same number of rows
    assert np.all(m3 == m4)
예제 #7
0
def test_filter_path_input():
    movement = data.load_movement(0, 0, 0)
    paths = path_diversity.path_index(movement, 1, 1)
    # checking functions raise the correct errors
    # input negative number
    with pytest.raises(ValueError) as excinfo:
        path_diversity.filter_path(movement, paths, -1)
    assert excinfo.value.args[0] == "Input values need to be positive"
    # input zeros
    with pytest.raises(ValueError) as excinfo:
        path_diversity.filter_path(movement, paths, 0)
    assert excinfo.value.args[0] == "Input values need to be positive"
예제 #8
0
def test_path_input():
    movement = data.load_movement(0, 0, 0)
    # checking functions raise the correct errors
    # input negative number
    with pytest.raises(ValueError) as excinfo:
        path_index(movement, -1, -1)
    assert excinfo.value.args[0] == "Input values need to be positive"
    # input zeros
    with pytest.raises(ValueError) as excinfo:
        path_index(movement, 0, 0)
    assert excinfo.value.args[0] == "Input values need to be positive"
    # min_path_length cannot be floating number
    with pytest.raises(TypeError) as excinfo:
        path_index(movement, 1, 1.5)
    assert excinfo.value.args[0] == "min_path_length needs to be integer"
예제 #9
0
def test_dist_speed_input():
    movement = data.load_movement(0, 0, 0)
    # Check if function raises the correct type of errors.
    # Input negative numbers
    with pytest.raises(ValueError) as excinfo:
        path_diversity.get_dist_speed(movement, -1, -1)
    assert excinfo.value.args[0] == "Start and end indices must be positive"
    # Input non-integers
    with pytest.raises(TypeError) as excinfo:
        path_diversity.get_dist_speed(movement, 0.1, 0.1)
    assert excinfo.value.args[0] == "Start and end indices must be integers"
    # Input start index greater than end index
    with pytest.raises(ValueError) as excinfo:
        path_diversity.get_dist_speed(movement, 500, 2)
    assert excinfo.value.args[
        0] == "Start index must be smaller than end index"
    # Input indices that encompass data outside of true data length
    with pytest.raises(ValueError) as excinfo:
        path_diversity.get_dist_speed(movement, 0, len(movement))
    assert excinfo.value.args[0] == "Number of observations must be less than \
예제 #10
0
def test_dist_speed_input():
    movement = data.load_movement(0, 0, 0)
    # Check if function raises the correct type of errors.
    # Input negative numbers
    with pytest.raises(ValueError) as excinfo:
        get_dist_speed.get_dist_speed(movement, -1, -1)
    assert excinfo.value.args[0] == "Start and end indices must be positive"
    # Input non-integers
    with pytest.raises(TypeError) as excinfo:
        get_dist_speed.get_dist_speed(movement, 0.1, 0.1)
    assert excinfo.value.args[0] == "Start and end indices must be integers"
    # Input start index greater than end index
    with pytest.raises(ValueError) as excinfo:
        get_dist_speed.get_dist_speed(movement, 500, 2)
    assert excinfo.value.args[
        0] == "Start index must be smaller than end index"
    # Input indices that encompass data outside of true data length
    with pytest.raises(ValueError) as excinfo:
        get_dist_speed.get_dist_speed(movement, 0, len(movement))
    assert excinfo.value.args[0] == "Number of observations must be less than \
예제 #11
0
def test_detect_noise_input():
    movement = data.load_movement(0, 0, 0)
    paths = path_diversity.path_index(movement, 1, 1)
    # Check if function raises the correct type of errors.
    # Input negative angle_threshold
    with pytest.raises(ValueError) as excinfo:
        path_diversity.detect_noise(movement, paths, -1, 1)
    assert excinfo.value.args[0] == "Input values need to be positive"
    # Input negative delta_t
    with pytest.raises(ValueError) as excinfo:
        path_diversity.detect_noise(movement, paths, 1, -1)
    assert excinfo.value.args[0] == "Input values need to be positive"
    # Input zero angle_threshold
    with pytest.raises(ValueError) as excinfo:
        path_diversity.detect_noise(movement, paths, 0, 1)
    assert excinfo.value.args[0] == "Input values need to be positive"
    # Input zero delta_t
    with pytest.raises(ValueError) as excinfo:
        path_diversity.detect_noise(movement, paths, 1, 0)
    assert excinfo.value.args[0] == "Input values need to be positive"
예제 #12
0
def test_path():
    movement = data.load_movement(0, 0, 0)
    # Checking functions output the correct path
    paths = path_index(movement, 1, 1)
    assert paths[:5] == [[22, 53], [55, 59], [67, 89], [91, 95], [96, 114]]
예제 #13
0
import numpy as np
import matplotlib.pyplot as plt

from mousestyles.data import load_movement
from mousestyles.path_diversity import path_index

movement = load_movement(0, 0, 0)
paths = path_index(movement, 1, 1)

xlim = [-16.25, 3.75]
ylim = [1.0, 43.0]

for sep in paths:
    path = movement[sep[0]:sep[1] + 1]
    plt.plot(path['x'], path['y'], 'b',
             linewidth=1, alpha=.1)
    plt.xlabel('x-coordinate')
    plt.xlim(xlim[0], xlim[1])
    plt.ylabel('y-coordinate')
    plt.ylim(ylim[0], ylim[1])
    plt.title("Example of path plot")

plt.show()
예제 #14
0
def hypo_powerLaw_null(strain, mouse, day, law_est=0, seed=-1):
    """
    Return the outcome from GLRT with null hypothesis law distribution.

    Description
    -----------
    This function used the Generalized Likelihood Ratio Test to test the
    goodness of fit: in other words, which distribution is more likely.

    In this function, we choose the powerLaw distributin to be the null
    and exponential distribution to be the alternative. We derived the
    test statistics by theory and pluged in MLE as our estimation of
    best parameters.

    After we calculated the paramters, we need to find the rejection
    region, critical value or pvalue. To get a more general test, we
    want to use pvalue, instead of critical value under certain
    significance level.

    To find the p-value, we use simulation methods, and all random
    numbers are drawn from previous functions. Therefore, although
    p value should be a constant given data, it is not a constant in
    our function, if we did not set the seed.

    In general, in this function, if the p value is too small, then we
    will reject the null, and we say powerlaw is not a better fit
    compared to exponential distribution.

    Parameters
    ----------
    strain : int
        the strain number of the mouse
    mouse  : int
        the mouse number in its strain
    day       :  int
        the day number
    law_est: double (optional)
        the estimated parameter in law distribution

    Returns
    -------
    p_value:
        the probablity under null reject.

    Examples
    --------
    >>> hypo_law_null (0, 0, 0)
    0.0070000000000000001
    """
    if seed != -1:
        np.random.seed(seed)
    df = data.load_movement(strain, mouse, day)
    xcood = df["x"]
    ycood = df["y"]
    distance_vector = np.sqrt(np.diff(xcood)**2 + np.diff(ycood)**2)
    msk = distance_vector > 1
    cut_dist = distance_vector[msk]
    if law_est == 0:
        law_est = 1 + len(cut_dist) * 1 / \
                          (np.sum(np.log(cut_dist / np.min(cut_dist))))
    n = len(cut_dist)
    log_cut = np.log(cut_dist)
    sum_cut = np.sum(log_cut)
    test_stat = n * (np.log(sum_cut - n) - np.log(sum_cut)) - law_est * sum_cut
    sample_stat = []
    for i in range(1000):
        sample = random_powerlaw(len(cut_dist), law_est)
        sum_sam = np.sum(sample)
        log_sam = np.log(sample)
        sum_log_sam = np.log(np.sum(log_sam))
        tmp = n * (np.log(sum_sam - n) -
                   sum_log_sam) - law_est * np.sum(log_sam)
        sample_stat.append(tmp)
    # critical_value = ss.mstats.mquantiles(sample_stat, prob = 0.05)[0]
    p_value = np.sum(sample_stat > test_stat) / len(sample_stat)
    return (p_value)
예제 #15
0
def test_filter_path():
    movement = data.load_movement(0, 0, 0)
    paths = path_index(movement, 1, 1)
    # Checking functions output the correct path
    pass_paths = filter_path.filter_paths(movement, paths, 20)
    assert pass_paths == [[3082, 3181], [30835, 30970], [31346, 31557]]
예제 #16
0
def hypo_powerLaw_null(strain, mouse, day, law_est=0, seed=-1):
    """
    Return the outcome from GLRT with null hypothesis law distribution.

    This function used the Generalized Likelihood Ratio Test to test the
    goodness of fit: in other words, which distribution is more likely.

    In this function, we choose the powerLaw distributin to be the null
    and exponential distribution to be the alternative. We derived the
    test statistics by theory and pluged in MLE as our estimation of
    best parameters.

    After we calculated the paramters, we need to find the rejection
    region, critical value or pvalue. To get a more general test, we
    want to use pvalue, instead of critical value under certain
    significance level.

    To find the p-value, we use simulation methods, and all random
    numbers are drawn from previous functions. Therefore, although
    p value should be a constant given data, it is not a constant in
    our function, if we did not set the seed.

    In general, in this function, if the p value is too small, then we
    will reject the null, and we say powerlaw is not a better fit
    compared to exponential distribution.

    Parameters
    ----------
    strain : int
        the strain number of the mouse
    mouse : int
        the mouse number in its strain
    day :  int
        the day number
    law_est: double (optional)
        the estimated parameter in law distribution

    Returns
    -------
    p_value:
        the probablity under null reject.

    Examples
    --------
    >>> hypo_law_null (0, 0, 0)
    0.0070000000000000001
    """
    if seed != -1:
        np.random.seed(seed)
    df = data.load_movement(strain, mouse, day)
    xcood = df["x"]
    ycood = df["y"]
    distance_vector = np.sqrt(np.diff(xcood)**2 + np.diff(ycood)**2)
    msk = distance_vector > 1
    cut_dist = distance_vector[msk]
    if law_est == 0:
        law_est = 1 + len(cut_dist) * 1 / \
            (np.sum(np.log(cut_dist / np.min(cut_dist))))
    n = len(cut_dist)
    log_cut = np.log(cut_dist)
    sum_cut = np.sum(log_cut)
    test_stat = n * (np.log(sum_cut - n) - np.log(sum_cut)) - law_est * sum_cut
    sample_stat = []
    for i in range(1000):
        sample = random_powerlaw(len(cut_dist), law_est)
        sum_sam = np.sum(sample)
        log_sam = np.log(sample)
        sum_log_sam = np.log(np.sum(log_sam))
        tmp = n * (np.log(sum_sam - n) - sum_log_sam) - \
            law_est * np.sum(log_sam)
        sample_stat.append(tmp)
    # critical_value = ss.mstats.mquantiles(sample_stat, prob = 0.05)[0]
    p_value = np.sum(sample_stat > test_stat) / len(sample_stat)
    return p_value
예제 #17
0
def test_movement_loader():
    # Checking load_movement returns a data frame of the correct dimension
    movement = data.load_movement(0, 0, 0)
    assert movement.shape == (39181, 4)
예제 #18
0
def aggregate_movement(strain, mouse, bin_width):
    """
    Aggregate the movement data based on n-minute
    time intervals, return a time series.

    Parameters
    ----------
    strain: int
        nonnegative integer indicating the strain number
    mouse: int
        nonnegative integer indicating the mouse number
    bin_width: number of minutes of time interval for data aggregation

    Returns
    -------
    ts: pandas.tseries
        a pandas time series of length (#day)*24(hour)*60(minute)/n
    """
    # Input Check
    if (not isinstance(strain, int)) or (strain < 0):
        raise ValueError('Strain must be a non-negative integer')
    if (not isinstance(mouse, int)) or (mouse < 0):
        raise ValueError('Mouse value must be a non-negative integer')
    if (not isinstance(bin_width, int)) or bin_width < 0 or bin_width > 1440:
        raise ValueError(
            'Bin width (minutes) must be a non-negative integer below 1440')

    # determine number of days
    intervals = data.load_intervals('IS')
    mouse_data = intervals.loc[(intervals['strain'] == strain)
                               & (intervals['mouse'] == mouse)]
    days = sorted(np.unique(mouse_data['day']))

    # build data frame
    bin_count = int(24 * 60 / bin_width)
    time_movements = np.repeat(0.0, bin_count * len(days))
    bin_length = bin_width * 60

    for j in days:
        M = data.load_movement(strain, mouse, day=int(j))
        distance_df = pd.DataFrame({
            "start":
            M["t"].values[0:-1],
            "end":
            M["t"].values[1:],
            "distance":
            np.linalg.norm(M[["x", "y"]].values[1:] -
                           M[["x", "y"]].values[0:-1],
                           axis=1)
        })
        start_end = data.load_start_time_end_time(strain, mouse, j)
        start = np.asarray(distance_df['start']) - start_end[0]
        end = np.asarray(distance_df['end']) - start_end[0]
        dist = distance_df['distance']
        for i in range(len(start)):
            start_time = start[i]
            end_time = end[i]
            start_index = int(start_time / (bin_width * 60))
            end_index = int(end_time / (bin_width * 60))
            if start_index == end_index:
                time_movements[start_index + j * bin_count] += dist[i]
            else:
                time_movements[
                    end_index + j * bin_count] += end_time % \
                    bin_length / (end_time - start_time) * dist[i]
                time_movements[
                    start_index + j * bin_count] += dist[i] - \
                    end_time % bin_length / (end_time - start_time) * dist[i]

    ts = pd.Series(time_movements,
                   index=pd.date_range('01/01/2014',
                                       periods=len(time_movements),
                                       freq=str(bin_width) + 'min'))

    return (ts)
예제 #19
0
def aggregate_movement(strain, mouse, bin_width):
    """
    Aggregate the movement data based on n-minute
    time intervals, return a time series.

    Parameters
    ----------
    strain: int
        nonnegative integer indicating the strain number
    mouse: int
        nonnegative integer indicating the mouse number
    bin_width: number of minutes of time interval for data aggregation

    Returns
    -------
    ts: pandas.tseries
        a pandas time series of length (#day)*24(hour)*60(minute)/n
    """
    # Input Check
    if (not isinstance(strain, int)) or (strain < 0):
        raise ValueError(
            'Strain must be a non-negative integer')
    if (not isinstance(mouse, int)) or (mouse < 0):
        raise ValueError(
            'Mouse value must be a non-negative integer')
    if (not isinstance(bin_width, int)) or bin_width < 0 or bin_width > 1440:
        raise ValueError(
            'Bin width (minutes) must be a non-negative integer below 1440')

    # determine number of days
    intervals = data.load_intervals('IS')
    mouse_data = intervals.loc[
        (intervals['strain'] == strain) & (intervals['mouse'] == mouse)]
    days = sorted(np.unique(mouse_data['day']))

    # build data frame
    bin_count = int(24 * 60 / bin_width)
    time_movements = np.repeat(0.0, bin_count * len(days))
    bin_length = bin_width * 60

    for j in days:
        M = data.load_movement(strain, mouse, day=int(j))
        distance_df = pd.DataFrame({"start": M["t"].values[0:-1],
                                    "end": M["t"].values[1:],
                                    "distance":
                                    np.linalg.norm(M[["x", "y"]].values[1:] -
                                                   M[["x", "y"]].values[0:-1],
                                                   axis=1)})
        start_end = data.load_start_time_end_time(strain, mouse, j)
        start = np.asarray(distance_df['start']) - start_end[0]
        end = np.asarray(distance_df['end']) - start_end[0]
        dist = distance_df['distance']
        for i in range(len(start)):
            start_time = start[i]
            end_time = end[i]
            start_index = int(start_time / (bin_width * 60))
            end_index = int(end_time / (bin_width * 60))
            if start_index == end_index:
                time_movements[start_index + j *
                               bin_count] += dist[i]
            else:
                time_movements[
                    end_index + j * bin_count] += end_time % \
                    bin_length / (end_time - start_time) * dist[i]
                time_movements[
                    start_index + j * bin_count] += dist[i] - \
                    end_time % bin_length / (end_time - start_time) * dist[i]

    ts = pd.Series(time_movements, index=pd.date_range(
        '01/01/2014', periods=len(time_movements),
        freq=str(bin_width) + 'min'))

    return ts