def test_find_best_interval_input(): # checking functions raise the correct errors for wrong input # time_df is not DataFrame with pytest.raises(ValueError) as excinfo: find_best_interval(df=5, strain_num=2) assert excinfo.value.args[0] == "df should be pandas DataFrame" # strain_num is not integer in 0,1,2 row_i = np.hstack( (np.zeros(13), np.ones(10), np.ones(10) * 2, np.ones(10) * 3)) time_df_eg = np.vstack((row_i, row_i, row_i)) time_df_eg = pd.DataFrame(time_df_eg) time_df_eg.rename(columns={0: 'strain'}, inplace=True) with pytest.raises(ValueError) as excinfo: find_best_interval(df=time_df_eg, strain_num=3) assert excinfo.value.args[0] == "strain_num can only be 0, 1, 2" # interval_length_initial is a numpy array with positive integers with pytest.raises(ValueError) as excinfo: find_best_interval(df=time_df_eg, strain_num=0, interval_length_initial=3) assert excinfo.value.args[0] == "interval_length_initial positive np.array" with pytest.raises(ValueError) as excinfo: find_best_interval(df=time_df_eg, strain_num=0, interval_length_initial=np.array([1, 2, -1])) assert excinfo.value.args[0] == "interval_length_initial positive np.array" with pytest.raises(ValueError) as excinfo: find_best_interval(df=time_df_eg, strain_num=0, interval_length_initial=np.array([1, 2, 3.1])) assert excinfo.value.args[0] == "interval_length_initial positive np.array"
def test_find_best_interval_input(): # checking functions raise the correct errors for wrong input # time_df is not DataFrame with pytest.raises(ValueError) as excinfo: find_best_interval(df=5, strain_num=2) assert excinfo.value.args[0] == "df should be pandas DataFrame" # strain_num is not integer in 0,1,2 row_i = np.hstack((np.zeros(13), np.ones(10), np.ones(10)*2, np.ones(10)*3)) time_df_eg = np.vstack((row_i, row_i, row_i)) time_df_eg = pd.DataFrame(time_df_eg) time_df_eg.rename(columns={0: 'strain'}, inplace=True) with pytest.raises(ValueError) as excinfo: find_best_interval(df=time_df_eg, strain_num=3) assert excinfo.value.args[0] == "strain_num can only be 0, 1, 2" # interval_length_initial is a numpy array with positive integers with pytest.raises(ValueError) as excinfo: find_best_interval(df=time_df_eg, strain_num=0, interval_length_initial=3) assert excinfo.value.args[0] == "interval_length_initial positive np.array" with pytest.raises(ValueError) as excinfo: find_best_interval(df=time_df_eg, strain_num=0, interval_length_initial=np.array([1, 2, -1])) assert excinfo.value.args[0] == "interval_length_initial positive np.array" with pytest.raises(ValueError) as excinfo: find_best_interval(df=time_df_eg, strain_num=0, interval_length_initial=np.array([1, 2, 3.1])) assert excinfo.value.args[0] == "interval_length_initial positive np.array"
def test_find_best_interval(): row_i = np.hstack((np.zeros(40))) time_df_eg = np.vstack((row_i, row_i, row_i)) time_df_eg = pd.DataFrame(time_df_eg) time_df_eg.rename(columns={0: 'strain'}, inplace=True) time, fake, score = find_best_interval(time_df_eg, 0, np.arange(10, 40, 10)) assert time == 10 assert np.array_equal(fake, np.zeros(40)) assert 1 - score < 0.05
def test_find_best_interval(): row_i = np.hstack((np.zeros(40))) time_df_eg = np.vstack((row_i, row_i, row_i)) time_df_eg = pd.DataFrame(time_df_eg) time_df_eg.rename(columns={0: 'strain'}, inplace=True) time, fake, score = find_best_interval(time_df_eg, 0, np.arange(10, 40, 10)) assert time == 10 assert np.array_equal(fake, np.zeros(40)) assert 1-score < 0.05
def plot_dynamics(df, strain_num, interval_length_initial=np.arange(600, 7800, 600), plot_time_range=np.arange(36000, 36100, 1)): r""" returns a plot that can help understand the behavior dynamics that are obtained from the best simulation. The data used as input is the pandas DataFrame generated by function create_time_matrix. The output is a plot that summarizes the dynamics of a fake mouse of the given strain_num. The strain_num could be chosen. Of note is that 0 represents IS, 1 represents eating, 2 represents drinking, 3 represents others activity in AS. In the plot, blue represents IS, bright green represents eating, yellow represents drinking, and red represents other activities in AS. Parameters ---------- df: Pandas.DataFrame a huge data frame containing info on strain, mouse no., mouse day, and different states at chosen time points. starin_num: int an integer specifying the desired mouse strain. strain_num is 0, 1, or 2. interval_length_initial: numpy.ndarray a numpy.ndarray specifying the range of time interval that it optimizes on. plot_time_range: numpy.ndarray a numpy.ndarray specifying the range of time range of the plot. Returns ------- dynamics_plot: plot a plot of behavior dynamics of a fake mouse of the given strain_num. The x-axis is the time stamps that start from 0. For strain_num = 0, the x-aixs is from 0 to 92,400. For stain_num = 1, the x-axis is from 0 90,000. For strain_num = 2, the x-axis is from 0 to 88,800. We assign different colors for different states. In the plot, blue represents IS, bright green represents eating, yellow represents drinking, and red represents other activities in AS. Examples -------- >>> row_i = np.hstack((np.zeros(40))) >>> time_df_eg = np.vstack((row_i, row_i, row_i)) >>> time_df_eg = pd.DataFrame(time_df_eg) >>> time_df_eg.rename(columns={0:'strain'}, inplace=True) >>> plot_dynamics_plot(time_df_eg, 0, np.arange(10, 40, 10), np.arange(0, 40, 1)) """ # check all the inputs condition_df = (type(df) == pd.core.frame.DataFrame) condition_strain_num = (strain_num in (0, 1, 2)) condition_interval_length_initial = (type(interval_length_initial) == np.ndarray and np.sum(interval_length_initial > 0) == len(interval_length_initial) and all(isinstance(i, np.int64) for i in interval_length_initial)) condition_plot_time_range = (type(plot_time_range) == np.ndarray and np.sum(plot_time_range > 0) == len(plot_time_range) and all(isinstance(i, np.int64) for i in plot_time_range)) if not condition_df: raise ValueError("df should be pandas DataFrame") if not condition_strain_num: raise ValueError("strain_num can only be 0, 1, 2") if not condition_interval_length_initial: raise ValueError("interval_length_initial positive np.array") if not condition_plot_time_range: raise ValueError("plot_time_range positive np.array") value_array = find_best_interval(df, strain_num)[1][plot_time_range] value_list = list(value_array) time_list = list(plot_time_range) fig, dynamics_plot = plt.subplots(figsize=(6, 1)) dynamics_plot.scatter(time_list, [1] * len(time_list), c=value_list, marker='s', s=100) dynamics_plot.yaxis.set_visible(False) dynamics_plot.xaxis.set_ticks_position('bottom') dynamics_plot.get_yaxis().set_ticklabels([]) plt.show()
def plot_dynamics(df, strain_num, interval_length_initial=np.arange(600, 7800, 600), plot_time_range=np.arange(36000, 36100, 1)): r""" returns a plot that can help understand the behavior dynamics that are obtained from the best simulation. The data used as input is the pandas DataFrame generated by function create_time_matrix. The output is a plot that summarizes the dynamics of a fake mouse of the given strain_num. The strain_num could be chosen. Of note is that 0 represents IS, 1 represents eating, 2 represents drinking, 3 represents others activity in AS. In the plot, blue represents IS, bright green represents eating, yellow represents drinking, and red represents other activities in AS. Parameters ---------- df: Pandas.DataFrame a huge data frame containing info on strain, mouse no., mouse day, and different states at chosen time points. starin_num: int an integer specifying the desired mouse strain. strain_num is 0, 1, or 2. interval_length_initial: numpy.ndarray a numpy.ndarray specifying the range of time interval that it optimizes on. plot_time_range: numpy.ndarray a numpy.ndarray specifying the range of time range of the plot. Returns ------- dynamics_plot: plot a plot of behavior dynamics of a fake mouse of the given strain_num. The x-axis is the time stamps that start from 0. For strain_num = 0, the x-aixs is from 0 to 92,400. For stain_num = 1, the x-axis is from 0 90,000. For strain_num = 2, the x-axis is from 0 to 88,800. We assign different colors for different states. In the plot, blue represents IS, bright green represents eating, yellow represents drinking, and red represents other activities in AS. Examples -------- >>> row_i = np.hstack((np.zeros(40))) >>> time_df_eg = np.vstack((row_i, row_i, row_i)) >>> time_df_eg = pd.DataFrame(time_df_eg) >>> time_df_eg.rename(columns={0:'strain'}, inplace=True) >>> plot_dynamics_plot(time_df_eg, 0, np.arange(10, 40, 10), np.arange(0, 40, 1)) """ # check all the inputs condition_df = (type(df) == pd.core.frame.DataFrame) condition_strain_num = (strain_num in (0, 1, 2)) condition_interval_length_initial = ( type(interval_length_initial) == np.ndarray and np.sum(interval_length_initial > 0) == len(interval_length_initial) and all(isinstance(i, np.int64) for i in interval_length_initial)) condition_plot_time_range = ( type(plot_time_range) == np.ndarray and np.sum(plot_time_range > 0) == len(plot_time_range) and all(isinstance(i, np.int64) for i in plot_time_range)) if not condition_df: raise ValueError("df should be pandas DataFrame") if not condition_strain_num: raise ValueError("strain_num can only be 0, 1, 2") if not condition_interval_length_initial: raise ValueError("interval_length_initial positive np.array") if not condition_plot_time_range: raise ValueError("plot_time_range positive np.array") value_array = find_best_interval(df, strain_num)[1][plot_time_range] value_list = list(value_array) time_list = list(plot_time_range) fig, dynamics_plot = plt.subplots(figsize=(6, 1)) dynamics_plot.scatter(time_list, [1] * len(time_list), c=value_list, marker='s', s=100) dynamics_plot.yaxis.set_visible(False) dynamics_plot.xaxis.set_ticks_position('bottom') dynamics_plot.get_yaxis().set_ticklabels([]) plt.show()