def __run_iteration__rows(title, desc, data, tree, samples, otus, rows_filter, cols_filter, table): rows_dist, _ = ctwc__distance_matrix.get_distance_matrices(data, tree, samples, otus, sample_filter=cols_filter, otu_filter=rows_filter, skip_cols=True) ctwc__plot.plot_mat(rows_dist, header="{0}: {1}".format(title, "OTUs Distance Matrix")) picked_indices, last_rank, _, _, _, _ = ctwc__cluster_rank.filter_rows_by_top_rank(data, rows_dist, otus) selected_rows_filter, compliment_rows_filter = __prepare_otu_filters_from_indices(picked_indices, otus, rows_filter) sorted_rows_mat = __sort_matrix_rows_by_selection(rows_dist, picked_indices) sorted_mat = __sort_matrix_cols_by_selection(sorted_rows_mat, picked_indices) ctwc__plot.plot_mat(sorted_mat, header="{0}: {1}".format(title, "OTUs Distance Matrix - sorted")) if table is not None: picked_otus = ctwc__data_handler.get_otus_by_indices(picked_indices, table) taxonomies = ctwc__data_handler.get_taxonomies_for_otus(picked_otus) INFO("Picked OTUs:") for taxonomy in taxonomies: INFO(taxonomy) num_otus = len(picked_indices) num_samples = len(samples) - len(cols_filter) return (num_otus, num_samples), selected_rows_filter, compliment_rows_filter
def test_otu_distance_matrix(samples, otus, tree, data, table): INFO("Test OTU distance matrix and filtering...") data[::4, :] = 50 indices = range(36) otu_filter_1, _ = ctwc.__prepare_otu_filters_from_indices(indices, otus, []) indices = range(36, 96) otu_filter, _ = ctwc.__prepare_otu_filters_from_indices(indices, otus, otu_filter_1) rows_dist, _ = ctwc__distance_matrix.get_distance_matrices(data, tree, samples, otus, otu_filter=otu_filter, sample_filter=[], skip_cols=True) INFO("Test filtering...") for row_ind, row in enumerate(rows_dist): count = len( [ cell for cell in row if cell == ctwc__distance_matrix.INF_VALUE ] ) if row_ind < 96: ASSERT(count == data.shape[0] - 1) else: ASSERT(count == 96) INFO("Passed OTU distance matrix and filtering tests")
def test_sample_distance_matrix(samples, otus, tree, data, table): INFO("Test sample distance matrix and filtering...") data[:,::15] = 50 indices = range(0, 36) samp_filter_1, _ = ctwc.__prepare_sample_filters_from_indices(indices, samples, []) indices = range(36, 96) sample_filter, _ = ctwc.__prepare_sample_filters_from_indices(indices, samples, samp_filter_1) _, cols_dist = ctwc__distance_matrix.get_distance_matrices(data, tree, samples, otus, otu_filter=[], sample_filter=sample_filter, skip_rows=True) INFO("Test filtering...") for row_ind, row in enumerate(cols_dist): count = len( [ cell for cell in row if cell == ctwc__distance_matrix.INF_VALUE ] ) if row_ind < 96: ASSERT(count == data.shape[1] - 1) else: ASSERT(count == 96)
def test(): data, otus, samples = ctwc__data_handler.get_sample_biom_table() tree = ctwc__data_handler.get_gg_97_otu_tree() _, cols_dist = ctwc__distance_matrix.get_distance_matrices(data, tree, samples, otus, skip_rows=True) picked_indices, max_rank, filtered_data, filtered_dist_matrix, _ , _ = filter_cols_by_top_rank(data, cols_dist, otus, True) INFO("Picked {0} indices".format(len(picked_indices))) clust, labels, ag = ctwc__cluster_1d.cluster_rows(filtered_data.transpose(), cols_dist)
def __run_iteration__cols(title, desc, data, tree, samples, otus, rows_filter, cols_filter, table): _, cols_dist = ctwc__distance_matrix.get_distance_matrices(data, tree, samples, otus, otu_filter=rows_filter, sample_filter=cols_filter, skip_rows=True) ctwc__plot.plot_mat(cols_dist, header="{0}: {1}".format(title, "Samples Distance Matrix")) picked_indices, last_rank, _, _, _, _ = ctwc__cluster_rank.filter_cols_by_top_rank(data, cols_dist, samples) selected_cols_filter, compliment_cols_filter = __prepare_sample_filters_from_indices(picked_indices, samples, cols_filter) sorted_rows_mat = __sort_matrix_rows_by_selection(cols_dist, picked_indices) sorted_mat = __sort_matrix_cols_by_selection(sorted_rows_mat, picked_indices) ctwc__plot.plot_mat(sorted_mat, header="{0}: {1}".format(title, "Samples Distance Matrix - sorted")) INFO("Selected {0} samples:".format(len(picked_indices))) DEBUG(picked_indices) if table is not None: picked_samples = ctwc__data_handler.get_samples_by_indices(picked_indices, table) DEBUG(picked_samples) dates = ctwc__data_handler.get_collection_dates_for_samples(picked_samples) INFO("Collection dates for selected samples:") for row in dates: INFO(row) num_otus = len(otus) - len(rows_filter) num_samples = len(picked_indices) return (num_otus, num_samples), selected_cols_filter, compliment_cols_filter