def intra_vs_inter_gene_dist_hist(df): """This function compares the intra vs inter gene distance score by dropping features: 'original': without dropping any features 'cells-with': The proportion of cells with each nucleus counts(each nucleus counts means that several nucleus count groups can be formed because a number of cells have the same number of nuclei) 'frac': fraction of nuclei that expresses a particular transcription factor 'd-neighbor': distances between the n nearest neighbors in an image of labeled nuclei 'theta': angles between nearest neighbors in an image of labeled nuclei 'cos': cosine of angles between nearest neighbors in an image of labeled nuclei 'sin': cosine of angles between nearest neighbors in an image of labeled nuclei 'adaptive': If there is large variation in the background intensity, adaptive or local threshold is used 'intensity': Segment objects based on intensity threshold and compute properties 'extent': the spatial extent of the well defined rigid objects is the complete object itself 'euler_number': it is a scalar whose value is the total number of objects in the image minus the total number of holes in those objects. 'eccentricity': how elongated it is 'area': area of an object 'otsu-threshold': otsu's method is used to automatically perform clustering-based image thresholding or the reduction of a graylevel image to a binary image. """ names = [ 'original', 'cells-with', 'frac', 'd-neighbor', 'theta', 'cos', 'sin', 'adaptive', 'solidity', 'intensity', 'extent', 'euler_number', 'eccentricity', 'area', 'otsu-threshold' ] t_stat_p_comp = [] t_stat_comp = [] ks_stat_p_comp = [] ks_stat_comp = [] coll = MongoClient()['myofusion_test']['wells_test'] for pattern in names: reduced_column = [c for c in df.columns if pattern not in c.lower()] reduced_df = df[reduced_column] intra, inter = metric.gene_distance_score(reduced_df, coll) t_stat, t_stat_p = scipy.stats.ttest_ind( intra, inter) # t-test stat and Pvalues t_stat_p_comp.append(t_stat_p) t_stat_comp.append(t_stat) ks_stat, ks_stat_p = scipy.stats.ks_2samp( intra, inter) # Computes the Kolmogorov-Smirnov statistic on 2 samples. ks_stat_p_comp.append(ks_stat_p) ks_stat_comp.append(ks_stat) final_dataframe = DataFrame({ 't_stat_p_comp': t_stat_p_comp, 't_stat_comp': t_stat_comp, 'ks_stat_p_comp': ks_stat_p_comp, 'ks_stat_comp': ks_stat_comp }) final_dataframe.index = names return final_dataframe
def test_gene_distance_score2(): intra, inter = metrics.gene_distance_score(test_data, collection) assert np.mean(intra) < np.mean(inter)
def test_gene_distance_score(): expected_intra = [] for i in range(0, 4): expected_intra.append(test_distance[2*i][2*i+1]) intra, inter = metrics.gene_distance_score(test_data, collection) np.testing.assert_array_almost_equal(expected_intra, intra, decimal=4)
def test_gene_distance_score(): expected_intra = [] for i in range(0, 4): expected_intra.append(test_distance[2 * i][2 * i + 1]) intra, inter = metrics.gene_distance_score(test_data, collection) np.testing.assert_array_almost_equal(expected_intra, intra, decimal=4)