def run_cc_net_nmf_clusters_worker(network_mat, spreadsheet_mat, lap_dag, lap_val, run_parameters, sample): """Worker to execute net_nmf_clusters in a single process Args: network_mat: genes x genes symmetric matrix. spreadsheet_mat: genes x samples matrix. lap_dag: laplacian matrix component, L = lap_dag - lap_val. lap_val: laplacian matrix component, L = lap_dag - lap_val. run_parameters: dictionay of run-time parameters. sample: each single loop. Returns: None """ np.random.seed(sample) rows_sampling_fraction = run_parameters["rows_sampling_fraction"] cols_sampling_fraction = run_parameters["cols_sampling_fraction"] spreadsheet_mat, \ sample_permutation = kn.sample_a_matrix( spreadsheet_mat , rows_sampling_fraction , cols_sampling_fraction ) spreadsheet_mat, \ iterations = kn.smooth_matrix_with_rwr(spreadsheet_mat, network_mat, run_parameters) spreadsheet_mat = kn.get_quantile_norm_matrix(spreadsheet_mat) h_mat = kn.perform_net_nmf(spreadsheet_mat, lap_val, lap_dag, run_parameters) save_a_clustering_to_tmp(h_mat, sample_permutation, run_parameters, sample)
def run_cc_kmeans_clusters_worker(spreadsheet_mat, run_parameters, sample): #----------------------------------------------------- """Worker to execute kmeans in a single process Args: spreadsheet_mat: genes x samples matrix. run_parameters: dictionary of run-time parameters. sample: each loops. Returns: None """ import knpackage.toolbox as kn import numpy as np np.random.seed(sample) rows_sampling_fraction = run_parameters["rows_sampling_fraction"] cols_sampling_fraction = run_parameters["cols_sampling_fraction"] number_of_clusters = run_parameters["number_of_clusters"] spreadsheet_mat, sample_permutation = kn.sample_a_matrix( spreadsheet_mat, rows_sampling_fraction, cols_sampling_fraction) spreadsheet_mat_T = spreadsheet_mat.T labels = kn.perform_kmeans(spreadsheet_mat_T, number_of_clusters) h_mat = labels_to_hmat(labels, number_of_clusters) kn.save_a_clustering_to_tmp(h_mat, sample_permutation, run_parameters, sample)
def run_cc_nmf_clusters_worker(spreadsheet_mat, run_parameters, sample): """Worker to execute nmf_clusters in a single process Args: spreadsheet_mat: genes x samples matrix. run_parameters: dictionary of run-time parameters. sample: each loops. Returns: None """ np.random.seed(sample) rows_sampling_fraction = run_parameters["rows_sampling_fraction"] cols_sampling_fraction = run_parameters["cols_sampling_fraction"] spreadsheet_mat, \ sample_permutation = kn.sample_a_matrix( spreadsheet_mat , rows_sampling_fraction , cols_sampling_fraction ) h_mat = kn.perform_nmf(spreadsheet_mat, run_parameters) save_a_clustering_to_tmp(h_mat, sample_permutation, run_parameters, sample)
def run_cc_hclust_clusters_worker(spreadsheet_mat, run_parameters, sample): #----------------------------------------------------- """Worker to execute hclust in a single process Args: spreadsheet_mat: genes x samples matrix. run_parameters: dictionary of run-time parameters. sample: each loops. Returns: None """ import knpackage.toolbox as kn import numpy as np np.random.seed(sample) rows_sampling_fraction = run_parameters["rows_sampling_fraction"] cols_sampling_fraction = run_parameters["cols_sampling_fraction"] number_of_clusters = run_parameters["number_of_clusters"] affinity_metric = run_parameters['affinity_metric'] linkage_criterion = run_parameters['linkage_criterion'] spreadsheet_mat, sample_permutation = kn.sample_a_matrix( spreadsheet_mat, rows_sampling_fraction, cols_sampling_fraction) labels, _ = perform_hclust(spreadsheet_mat, number_of_clusters, affinity_metric, linkage_criterion) h_mat = labels_to_hmat(labels, number_of_clusters) kn.save_a_clustering_to_tmp(h_mat, sample_permutation, run_parameters, sample)
def test_sample_a_matrix(self): """ assert that the random sample is of the propper size, the permutation points to the correct columns and that the number of rows set to zero is correct. """ n_test_rows = 11 n_test_cols = 5 pct_smpl = 0.6 n_zero_rows = int(np.round(n_test_rows * (1 - pct_smpl))) n_smpl_cols = int(np.round(n_test_cols * pct_smpl)) epsilon_sum = max(n_test_rows, n_test_cols) * 1e-15 A = np.random.rand(n_test_rows, n_test_cols) + epsilon_sum B, P = kn.sample_a_matrix(A, pct_smpl, pct_smpl) self.assertEqual(B.shape[1], P.size, msg='permutation size not equal columns') self.assertEqual(P.size, n_smpl_cols, msg='number of sample columns exception') perm_err_sum = 0 n_zero_err_sum = 0 B_col = 0 for A_col in P: n_zeros = (np.int_(B[:, B_col] == 0)).sum() if n_zeros != n_zero_rows: n_zero_err_sum += 1 C = A[:, A_col] - B[:, B_col] C[B[:, B_col] == 0] = 0 B_col += 1 if C.sum() > epsilon_sum: perm_err_sum += 1 self.assertEqual(n_zero_err_sum, 0, msg='number of zero columns exception') self.assertEqual(perm_err_sum, 0, msg='permutation index exception')