def intra_vs_inter_gene_dist_hist(df):
    """This function compares the intra vs inter gene distance score
     by dropping features:
    'original': without dropping any features
    'cells-with': The proportion of cells with each nucleus counts(each nucleus
     counts means that several nucleus count groups can be formed because a
     number of cells have the same number of nuclei)
    'frac': fraction of nuclei that expresses a particular transcription factor
    'd-neighbor': distances between the n nearest neighbors in an image of labeled
     nuclei
    'theta': angles between nearest neighbors in an image of labeled nuclei
    'cos': cosine of angles between nearest neighbors in an image of labeled nuclei
    'sin': cosine of angles between nearest neighbors in an image of labeled nuclei
    'adaptive': If there is large variation in the background intensity, adaptive or
    local threshold is used
    'intensity': Segment objects based on intensity threshold and compute properties
    'extent': the spatial extent of the well defined rigid objects is the complete
     object itself
    'euler_number': it is a scalar whose value is the total number of objects in the
     image minus the total number of holes in those objects.
    'eccentricity': how elongated it is
    'area': area of an object
    'otsu-threshold': otsu's method is used to automatically perform clustering-based
     image thresholding or the reduction of a graylevel image to a binary image.

    """

    names = [
        'original', 'cells-with', 'frac', 'd-neighbor', 'theta', 'cos', 'sin',
        'adaptive', 'solidity', 'intensity', 'extent', 'euler_number',
        'eccentricity', 'area', 'otsu-threshold'
    ]
    t_stat_p_comp = []
    t_stat_comp = []
    ks_stat_p_comp = []
    ks_stat_comp = []

    coll = MongoClient()['myofusion_test']['wells_test']

    for pattern in names:
        reduced_column = [c for c in df.columns if pattern not in c.lower()]
        reduced_df = df[reduced_column]
        intra, inter = metric.gene_distance_score(reduced_df, coll)
        t_stat, t_stat_p = scipy.stats.ttest_ind(
            intra, inter)  # t-test stat and Pvalues
        t_stat_p_comp.append(t_stat_p)
        t_stat_comp.append(t_stat)
        ks_stat, ks_stat_p = scipy.stats.ks_2samp(
            intra,
            inter)  # Computes the Kolmogorov-Smirnov statistic on 2 samples.
        ks_stat_p_comp.append(ks_stat_p)
        ks_stat_comp.append(ks_stat)

    final_dataframe = DataFrame({
        't_stat_p_comp': t_stat_p_comp,
        't_stat_comp': t_stat_comp,
        'ks_stat_p_comp': ks_stat_p_comp,
        'ks_stat_comp': ks_stat_comp
    })

    final_dataframe.index = names
    return final_dataframe
Example #2
0
def test_gene_distance_score2():
    intra, inter = metrics.gene_distance_score(test_data, collection)
    assert np.mean(intra) < np.mean(inter)
Example #3
0
def test_gene_distance_score2():
    intra, inter = metrics.gene_distance_score(test_data, collection)
    assert np.mean(intra) < np.mean(inter)
Example #4
0
def test_gene_distance_score():
    expected_intra = []
    for i in range(0, 4):
        expected_intra.append(test_distance[2*i][2*i+1])
    intra, inter = metrics.gene_distance_score(test_data, collection)
    np.testing.assert_array_almost_equal(expected_intra, intra, decimal=4)
Example #5
0
def test_gene_distance_score():
    expected_intra = []
    for i in range(0, 4):
        expected_intra.append(test_distance[2 * i][2 * i + 1])
    intra, inter = metrics.gene_distance_score(test_data, collection)
    np.testing.assert_array_almost_equal(expected_intra, intra, decimal=4)