def test_solver_uncoupled(self): """Tests that solver_uncoupled function returns array of correct shape and value""" path = io.get_data_file_path('simulated_uncoupled_6x6.pkl') testdata = celldensity.CellDen(pd.read_pickle(path)) test_params = [0.03, 0.005, 0.005] testdata_matrix = testdata.pd2np() testdata_solved = model.solver_uncoupled(test_params, testdata) assert np.shape(testdata_solved) == np.shape(testdata_matrix) np.testing.assert_almost_equal(testdata_solved[0, 1, 2, 3], testdata_matrix[0, 1, 2, 3], 3)
def test_log_likelihood_uncoupled(self): """Tests that log_likelihood_uncoupled function returns a reasonable estimate""" test_params = [0.03966004, 0.00523172, 0.00523965] path = io.get_data_file_path('simulated_uncoupled_6x6.pkl') testdata = celldensity.CellDen(pd.read_pickle(path)) mu_n = -0.15 sigma_n = 0.1 val = model.log_likelihood_uncoupled(test_params, testdata, mu_n, sigma_n) np.testing.assert_almost_equal(val, -10410.36855, 4)
def test_log_likelihood_coupled(self): """Tests that log_likelihood_coupled function returns a reasonable estimate""" test_params = [0.03178564, 0.00310762, 0.00017541, 0.00022762] path = io.get_data_file_path('simulated_coupled_6x6.pkl') testdata = celldensity.CellDen(pd.read_pickle(path)) mu_n = -0.15 sigma_n = 0.1 val = model.log_likelihood_coupled(test_params, testdata, mu_n, sigma_n) np.testing.assert_almost_equal(val, -10289.069087654105, 4)
def test_negative_log_posterior_uncoupled(self): """Tests that negative_log_posterior_uncoupled function returns correct value""" test_params = [0.03966004, 0.00523172, 0.00523965] path = io.get_data_file_path('simulated_uncoupled_6x6.pkl') testdata = celldensity.CellDen(pd.read_pickle(path)) mu_n = -0.15 sigma_n = 0.1 ll = model.log_likelihood_uncoupled(test_params, testdata, mu_n, sigma_n) self.assertTrue((-1 * ll) == model.negative_log_posterior_uncoupled( test_params, testdata, mu_n, sigma_n))
def test_log_posterior_coupled(self): """Tests that log_posterior_coupled function returns a reasonable estimate""" test_params = [0.03178564, 0.00310762, 0.00017541, 0.00022762] path = io.get_data_file_path('simulated_coupled_6x6.pkl') testdata = celldensity.CellDen(pd.read_pickle(path)) mu_n = -0.15 sigma_n = 0.1 lp = model.log_prior_coupled(test_params) ll = model.log_likelihood_coupled(test_params, testdata, mu_n, sigma_n) self.assertTrue((ll + lp) == model.log_posterior_coupled( test_params, testdata, mu_n, sigma_n))
def test_inference(self): """ Tests that inference on simulated data returns accurate params """ #import data path = io.get_data_file_path('simulated_data.pkl') test_data = pd.read_pickle(path) data = celldensity.CellDen(test_data) #test data generated using k=[0.018, 0.001, 0.002] k0 = [0.02, 0.005, 0.001] res = op.fmin(model.negative_log_posterior_uncoupled, k0, args=(data, -0.15, 0.1)) np.testing.assert_almost_equal(res, k0, 2)
def test_model_regress(self): """ Tests that model is running correctly""" # Parameters to calculate likelihood function params1 = [0.04, 0.005, 0.005] params2 = [0.1, 0.1, 0.1] path = io.get_data_file_path('simulated_uncoupled_6x6.pkl') testdata = celldensity.CellDen(pd.read_pickle(path)) # Calculate log_likelihood function # sigma_n is set arbitrarily as 0.4 val_1 = model.log_likelihood_uncoupled(params1, testdata, -0.15, 0.1) val_2 = model.log_likelihood_uncoupled(params2, testdata, -0.15, 0.1) self.assertTrue(val_2 < val_1)
def test_residual(self): """Tests that residual function returns a correct value""" params1 = [0.1, 0.1, 0.1] params2 = [0.1, 0.1, 0.1, 0.1] path = io.get_data_file_path('simulated_coupled_6x6.pkl') testdata = celldensity.CellDen(pd.read_pickle(path)) mu_n = -0.15 sigma_n = 0.1 chi2_uncoupled = np.sum( model.residual(params1, testdata, mu_n, sigma_n, False)**2) chi2_coupled = np.sum( model.residual(params2, testdata, mu_n, sigma_n, True)**2) np.testing.assert_almost_equal(1604132.5558146131, chi2_uncoupled, 5) np.testing.assert_almost_equal(2277944.6229283987, chi2_coupled, 5)
def cell_density(data_file, CellA, CellB, BinDiv, ImgWidth): ''' Parameters: ----------- data_file: the .csv file containing the data with the following columns Identity Labeling: Column 0:'ImageNumber': denoting the time step image Column 1: 'ObjectNumber': denoting the arbitrary identity of the cell Intensity classifications: 'Classify_Intensity_UpperQuartileIntensity_Sox2_high_Intensity_UpperQuartileIntensity_Oct4_high' 'Classify_Intensity_UpperQuartileIntensity_Sox2_high_Intensity_UpperQuartileIntensity_Oct4_low' 'Classify_Intensity_UpperQuartileIntensity_Sox2_low_Intensity_UpperQuartileIntensity_Oct4_high' 'Classify_Intensity_UpperQuartileIntensity_Sox2_low_Intensity_UpperQuartileIntensity_Oct4_low' Locations: 'Location_Center_X' 'Location_Center_Y' CellA: Name of the first cell type, e.g.'Sox2' CellB: Name of the second cell type, e.g. 'Oct4' BinDiv: An integer telling the function to divide the orginal cell image into BinDiv x BinDiv bins ImgWidth: the linear size of the image in pixels (e.g. for an image of 1024x1024, just enter 1024) return: ----------- A dataframe of cell density, whose 1) Main Columns are CellA, CellB and Both-Cell 2) Sub Columns are different bins 3) Rows are different time step t ''' data_path = io.get_data_file_path(data_file) data_loc, both_high, both_low, high_CellA, high_CellB = io.load_data( data_path, CellA, CellB) def bin_cell_den_at_one_t(t): Both_X=data_loc.loc[((data_loc['ImageNumber']==t)&((data_loc[both_high]==1)|(data_loc[both_low]==1))),\ 'Location_Center_X'].values Both_Y=data_loc.loc[((data_loc['ImageNumber']==t)&((data_loc[both_high]==1)|(data_loc[both_low]==1))),\ 'Location_Center_Y'].values CellA_X = data_loc.loc[((data_loc['ImageNumber'] == t) & (data_loc[high_CellA] == 1)), 'Location_Center_X'].values CellA_Y = data_loc.loc[((data_loc['ImageNumber'] == t) & (data_loc[high_CellA] == 1)), 'Location_Center_Y'].values CellB_X = data_loc.loc[((data_loc['ImageNumber'] == t) & (data_loc[high_CellB] == 1)), 'Location_Center_X'].values CellB_Y = data_loc.loc[((data_loc['ImageNumber'] == t) & (data_loc[high_CellB] == 1)), 'Location_Center_Y'].values BinWidth = np.floor_divide(ImgWidth, BinDiv) def one_bin_den(i, j): #bins are aranged in ith row and jth col # bin_index=i*BinDiv+j i = (BinDiv - 1) - i BinY_Low = BinWidth * i BinY_High = BinWidth * (i + 1) BinX_Low = BinWidth * j BinX_High = BinWidth * (j + 1) BinArea = 1 #treat as unit area, otherwise can use ((length_scale/ImgWidth)*BinWidth)**2 Both_Bin_Den = len(Both_X[(Both_X >= BinX_Low) * (Both_X < BinX_High) * (Both_Y >= BinY_Low) * (Both_Y < BinY_High)]) / BinArea CellA_Bin_Den = len(CellA_X[(CellA_X >= BinX_Low) * (CellA_X < BinX_High) * (CellA_Y >= BinY_Low) * (CellA_Y < BinY_High)]) / BinArea CellB_Bin_Den = len(CellB_X[(CellB_X >= BinX_Low) * (CellB_X < BinX_High) * (CellB_Y >= BinY_Low) * (CellB_Y < BinY_High)]) / BinArea return [CellA_Bin_Den, CellB_Bin_Den, Both_Bin_Den] all_bin_den = np.vectorize(one_bin_den, otypes=[np.ndarray]) bin_j, bin_i = np.meshgrid(np.arange(BinDiv), np.arange(BinDiv)) cell_den_at_t = np.array(list(all_bin_den(bin_i, bin_j).flatten())) CellA_den_at_t = (cell_den_at_t[:, 0]).flatten() CellB_den_at_t = (cell_den_at_t[:, 1]).flatten() Both_den_at_t = (cell_den_at_t[:, 2]).flatten() return [CellA_den_at_t, CellB_den_at_t, Both_den_at_t] bin_cell_den_at_all_t = np.vectorize(bin_cell_den_at_one_t, otypes=[np.ndarray]) max_t = data_loc['ImageNumber'].max() cell_den_diff_t = bin_cell_den_at_all_t(np.arange(max_t) + 1) CellA_den = np.zeros((max_t, BinDiv**2)) CellB_den = np.zeros((max_t, BinDiv**2)) Both_den = np.zeros((max_t, BinDiv**2)) for t in range(max_t): CellA_den[t, :] = cell_den_diff_t[t][0] CellB_den[t, :] = cell_den_diff_t[t][1] Both_den[t, :] = cell_den_diff_t[t][2] cols = pd.MultiIndex.from_tuples([(x, y) for x in [CellA, CellB, 'Both'] for y in np.arange(BinDiv * BinDiv)]) return pd.DataFrame(np.hstack((np.hstack( (CellA_den, CellB_den)), Both_den)), columns=cols)
def draw_cell_loc(data_file, CellA, CellB, time, BinDiv=1, bin_i=0, bin_j=0, ImgWidth=1024, colorBoth=[255 / 255, 174 / 255, 66 / 255], colorA='g', colorB='r'): ''' data_file: the .csv file containing the data with the following columns Identity Labeling: Column 0:'ImageNumber': denoting the time step image Column 1: 'ObjectNumber': denoting the arbitrary identity of the cell Intensity classifications: 'Classify_Intensity_UpperQuartileIntensity_Sox2_high_Intensity_UpperQuartileIntensity_Oct4_high' 'Classify_Intensity_UpperQuartileIntensity_Sox2_high_Intensity_UpperQuartileIntensity_Oct4_low' 'Classify_Intensity_UpperQuartileIntensity_Sox2_low_Intensity_UpperQuartileIntensity_Oct4_high' 'Classify_Intensity_UpperQuartileIntensity_Sox2_low_Intensity_UpperQuartileIntensity_Oct4_low' Locations: 'Location_Center_X' 'Location_Center_Y' CellA: Name of the first cell type, e.g.'Sox2' CellB: Name of the second cell type, e.g. 'Oct4' time: from 0 to (max time step-1) BinDiv: An integer telling the function to divide the orginal cell image into BinDiv x BinDiv bins bin_i, bin_j: from 0 to (maxmimum bin num-1), extracting the bin_i th row and the bin_j th column ImgWidth: the width dimension of the image in pixels (e.g. for an image of 1024x1024, just enter 1024 ''' t = time + 1 data_path = io.get_data_file_path(data_file) data_loc, both_high, both_low, high_CellA, high_CellB = io.load_data( data_path, CellA, CellB) #read the concerned time t Both_X=data_loc.loc[((data_loc['ImageNumber']==t)&((data_loc[both_high]==1)|(data_loc[both_low]==1))),\ 'Location_Center_X'].values Both_Y=data_loc.loc[((data_loc['ImageNumber']==t)&((data_loc[both_high]==1)|(data_loc[both_low]==1))),\ 'Location_Center_Y'].values CellA_X = data_loc.loc[((data_loc['ImageNumber'] == t) & (data_loc[high_CellA] == 1)), 'Location_Center_X'].values CellA_Y = data_loc.loc[((data_loc['ImageNumber'] == t) & (data_loc[high_CellA] == 1)), 'Location_Center_Y'].values CellB_X = data_loc.loc[((data_loc['ImageNumber'] == t) & (data_loc[high_CellB] == 1)), 'Location_Center_X'].values CellB_Y = data_loc.loc[((data_loc['ImageNumber'] == t) & (data_loc[high_CellB] == 1)), 'Location_Center_Y'].values #Find the coor in the specified bin BinWidth = np.floor_divide(ImgWidth, BinDiv) i = (BinDiv - 1) - bin_i j = bin_j BinY_Low = BinWidth * i BinY_High = BinWidth * (i + 1) BinX_Low = BinWidth * j BinX_High = BinWidth * (j + 1) concerned_Both_X = Both_X[(Both_X >= BinX_Low) * (Both_X < BinX_High) * (Both_Y >= BinY_Low) * (Both_Y < BinY_High)] concerned_Both_Y = Both_Y[(Both_X >= BinX_Low) * (Both_X < BinX_High) * (Both_Y >= BinY_Low) * (Both_Y < BinY_High)] concerned_CellA_X = CellA_X[(CellA_X >= BinX_Low) * (CellA_X < BinX_High) * (CellA_Y >= BinY_Low) * (CellA_Y < BinY_High)] concerned_CellA_Y = CellA_Y[(CellA_X >= BinX_Low) * (CellA_X < BinX_High) * (CellA_Y >= BinY_Low) * (CellA_Y < BinY_High)] concerned_CellB_X = CellB_X[(CellB_X >= BinX_Low) * (CellB_X < BinX_High) * (CellB_Y >= BinY_Low) * (CellB_Y < BinY_High)] concerned_CellB_Y = CellB_Y[(CellB_X >= BinX_Low) * (CellB_X < BinX_High) * (CellB_Y >= BinY_Low) * (CellB_Y < BinY_High)] #plot out the cell distribution plt.figure(figsize=(12, 3)) if BinDiv == 1: title_end = ' at time ' + str(time) else: title_end = ' in Bin ' + str(bin_i * BinDiv + bin_j) + ' at time ' + str(time) plt.subplot(1, 3, 1, aspect='equal') plt.scatter(concerned_CellA_X, concerned_CellA_Y, color=colorA, s=1.5) if BinDiv == 1: plt.xlim(0, ImgWidth) plt.ylim(ImgWidth, 0) else: plt.xlim(BinX_Low, BinX_High) plt.ylim(BinY_High, BinY_Low + 1) plt.title('Distribution of ' + CellA + title_end) plt.subplot(1, 3, 2, aspect='equal') plt.scatter(concerned_CellB_X, concerned_CellB_Y, color=colorB, s=1.5) if BinDiv == 1: plt.xlim(0, ImgWidth) plt.ylim(ImgWidth, 0) else: plt.xlim(BinX_Low, BinX_High) plt.ylim(BinY_High, BinY_Low + 1) plt.title('Distribution of ' + CellB + title_end) plt.subplot(1, 3, 3, aspect='equal') plt.scatter(concerned_Both_X, concerned_Both_Y, color=colorBoth, s=1.5) if BinDiv == 1: plt.xlim(0, ImgWidth) plt.ylim(ImgWidth, 0) else: plt.xlim(BinX_Low, BinX_High) plt.ylim(BinY_High, BinY_Low + 1) plt.title('Distribution of ' + 'Both-Cell' + title_end) plt.tight_layout() return