Example #1
0
def __run_iteration__rows(title, desc, data, tree, samples, otus, rows_filter, cols_filter, table):
    rows_dist, _ = ctwc__distance_matrix.get_distance_matrices(data,
                                                               tree,
                                                               samples,
                                                               otus,
                                                               sample_filter=cols_filter,
                                                               otu_filter=rows_filter,
                                                               skip_cols=True)

    ctwc__plot.plot_mat(rows_dist, header="{0}: {1}".format(title, "OTUs Distance Matrix"))

    picked_indices, last_rank, _, _, _, _ = ctwc__cluster_rank.filter_rows_by_top_rank(data,
                                                                                       rows_dist,
                                                                                       otus)

    selected_rows_filter, compliment_rows_filter = __prepare_otu_filters_from_indices(picked_indices, otus, rows_filter)

    sorted_rows_mat = __sort_matrix_rows_by_selection(rows_dist, picked_indices)
    sorted_mat = __sort_matrix_cols_by_selection(sorted_rows_mat, picked_indices)

    ctwc__plot.plot_mat(sorted_mat, header="{0}: {1}".format(title, "OTUs Distance Matrix - sorted"))

    if table is not None:
        picked_otus = ctwc__data_handler.get_otus_by_indices(picked_indices, table)
        taxonomies = ctwc__data_handler.get_taxonomies_for_otus(picked_otus)
        INFO("Picked OTUs:")
        for taxonomy in taxonomies:
            INFO(taxonomy)

    num_otus = len(picked_indices)
    num_samples = len(samples) - len(cols_filter)

    return (num_otus, num_samples), selected_rows_filter, compliment_rows_filter
Example #2
0
def test_otu_distance_matrix(samples, otus, tree, data, table):
    INFO("Test OTU distance matrix and filtering...")
    data[::4, :] = 50
    indices = range(36)
    otu_filter_1, _ = ctwc.__prepare_otu_filters_from_indices(indices,
                                                           otus,
                                                           [])
    indices = range(36, 96)
    otu_filter, _ = ctwc.__prepare_otu_filters_from_indices(indices,
                                                         otus,
                                                         otu_filter_1)

    rows_dist, _ = ctwc__distance_matrix.get_distance_matrices(data,
                                                               tree,
                                                               samples,
                                                               otus,
                                                               otu_filter=otu_filter,
                                                               sample_filter=[],
                                                               skip_cols=True)

    INFO("Test filtering...")
    for row_ind, row in enumerate(rows_dist):
        count = len( [ cell for cell in row if cell == ctwc__distance_matrix.INF_VALUE ] )
        if row_ind < 96:
            ASSERT(count == data.shape[0] - 1)
        else:
            ASSERT(count == 96)

    INFO("Passed OTU distance matrix and filtering tests")
Example #3
0
def test_sample_distance_matrix(samples, otus, tree, data, table):
    INFO("Test sample distance matrix and filtering...")
    data[:,::15] = 50
    indices = range(0, 36)
    samp_filter_1, _ = ctwc.__prepare_sample_filters_from_indices(indices,
                                                               samples,
                                                               [])
    indices = range(36, 96)
    sample_filter, _ = ctwc.__prepare_sample_filters_from_indices(indices,
                                                               samples,
                                                               samp_filter_1)

    _, cols_dist = ctwc__distance_matrix.get_distance_matrices(data,
                                                               tree,
                                                               samples,
                                                               otus,
                                                               otu_filter=[],
                                                               sample_filter=sample_filter,
                                                               skip_rows=True)

    INFO("Test filtering...")
    for row_ind, row in enumerate(cols_dist):
        count = len( [ cell for cell in row if cell == ctwc__distance_matrix.INF_VALUE ] )
        if row_ind < 96:
            ASSERT(count == data.shape[1] - 1)
        else:
            ASSERT(count == 96)
Example #4
0
def test():
    data, otus, samples = ctwc__data_handler.get_sample_biom_table()
    tree = ctwc__data_handler.get_gg_97_otu_tree()
    _, cols_dist = ctwc__distance_matrix.get_distance_matrices(data, tree, samples, otus, skip_rows=True)
    picked_indices, max_rank, filtered_data, filtered_dist_matrix, _ , _ = filter_cols_by_top_rank(data, cols_dist, otus, True)

    INFO("Picked {0} indices".format(len(picked_indices)))
    clust, labels, ag = ctwc__cluster_1d.cluster_rows(filtered_data.transpose(), cols_dist)
Example #5
0
def __run_iteration__cols(title, desc, data, tree, samples, otus, rows_filter, cols_filter, table):
    _, cols_dist = ctwc__distance_matrix.get_distance_matrices(data,
                                                               tree,
                                                               samples,
                                                               otus,
                                                               otu_filter=rows_filter,
                                                               sample_filter=cols_filter,
                                                               skip_rows=True)

    ctwc__plot.plot_mat(cols_dist, header="{0}: {1}".format(title, "Samples Distance Matrix"))

    picked_indices, last_rank, _, _, _, _ = ctwc__cluster_rank.filter_cols_by_top_rank(data,
                                                                                       cols_dist,
                                                                                       samples)

    selected_cols_filter, compliment_cols_filter = __prepare_sample_filters_from_indices(picked_indices, samples, cols_filter)

    sorted_rows_mat = __sort_matrix_rows_by_selection(cols_dist, picked_indices)
    sorted_mat = __sort_matrix_cols_by_selection(sorted_rows_mat, picked_indices)

    ctwc__plot.plot_mat(sorted_mat, header="{0}: {1}".format(title, "Samples Distance Matrix - sorted"))

    INFO("Selected {0} samples:".format(len(picked_indices)))
    DEBUG(picked_indices)
    if table is not None:
        picked_samples = ctwc__data_handler.get_samples_by_indices(picked_indices, table)
        DEBUG(picked_samples)
        dates = ctwc__data_handler.get_collection_dates_for_samples(picked_samples)
        INFO("Collection dates for selected samples:")
        for row in dates:
            INFO(row)

    num_otus = len(otus) - len(rows_filter)
    num_samples = len(picked_indices)

    return (num_otus, num_samples), selected_cols_filter, compliment_cols_filter