def test_params(my_N, my_ind, my_v): result = get_xlmhg_test_result(my_N, my_ind, X=1) assert isinstance(result, mHGResult) result = get_xlmhg_test_result(my_N, my_ind, L=my_N) assert isinstance(result, mHGResult) result = get_xlmhg_test_result(my_N, my_ind, pval_thresh=0.05) assert isinstance(result, mHGResult) table = np.empty((my_N + 1, my_N + 1), np.longdouble) result = get_xlmhg_test_result(my_N, my_ind, table=table) assert isinstance(result, mHGResult)
def test_figure(tmpdir): v = np.uint8([1,0,1,1,0,1] + [0]*12 + [1,0]) X = 3 L = 10 N = v.size indices = np.uint16(np.nonzero(v)[0]) result = xlmhg.get_xlmhg_test_result(N, indices, X=X, L=L) fig = xlmhg.get_result_figure(result) output_file = text(tmpdir.join('plot1.html')) plot(fig, filename=output_file, auto_open=False) assert os.path.isfile(output_file) fig = xlmhg.get_result_figure(result, width=500, height=350) output_file = text(tmpdir.join('plot2.html')) plot(fig, filename=output_file, auto_open=False) assert os.path.isfile(output_file) fig = xlmhg.get_result_figure(result, show_title=True, show_inset=False) output_file = text(tmpdir.join('plot3.html')) plot(fig, filename=output_file, auto_open=False) assert os.path.isfile(output_file)
def test_significant2(my_N, my_ind): """Test if we return the exact p-value for a significant test when requested, even if we don't need to calculate it in order to determine that the test is significant.""" res = get_xlmhg_test_result(my_N, my_ind, pval_thresh=0.045, exact_pval='if_significant') assert res.pval == 0.0244453044375645
def test_limit_pval(my_incredible_pval_v): """Test differential accuracy of PVAL1 and PVAL2.""" # PVAL1 algorithm should handle this without problems N = my_incredible_pval_v.size ind = np.uint16(np.nonzero(my_incredible_pval_v)[0]) res = get_xlmhg_test_result(N, ind) assert res.stat == 1.5112233509292993e-216 assert res.cutoff == 200 assert res.pval == res.stat res = get_xlmhg_test_result(N, ind, use_alg1=True) # PVAL2 algorithm should report an invalid p-value # (either <= 0 or unrealistically large; in this case < 0) # and the front-end should replace that with the O(1)-bound assert res.stat == 1.5112233509292993e-216 assert res.cutoff == 200 assert res.stat < res.pval < 1e-200
def my_rank_based_result(my_v, my_ranked_genes, my_gene_set): N = my_v.size X = 1 L = N indices = np.uint16(np.nonzero(my_v)[0]) ind_genes = [my_ranked_genes[i] for i in indices] assert set(ind_genes) == my_gene_set.genes ## stat, n_star, pval = xlmhg_test(my_v, X, L) res = get_xlmhg_test_result(N, indices, X, L) # result = GSEResult(stat, n_star, pval, N, X, L, my_gene_set, # sel, sel_genes) result = RankBasedGSEResult(my_gene_set, N, indices, ind_genes, X, L, res.stat, res.cutoff, res.pval) return result
def my_rank_based_result(my_v, my_ranked_genes, my_gene_set): N = my_v.size X = 1 L = N indices = np.uint16(np.nonzero(my_v)[0]) ind_genes = [my_ranked_genes[i] for i in indices] assert set(ind_genes) == my_gene_set.genes ## stat, n_star, pval = xlmhg_test(my_v, X, L) res = get_xlmhg_test_result(N, indices, X, L) #result = GSEResult(stat, n_star, pval, N, X, L, my_gene_set, # sel, sel_genes) result = RankBasedGSEResult(my_gene_set, N, indices, ind_genes, X, L, res.stat, res.cutoff, res.pval) return result
def my_rank_based_result(my_matrix, my_v): indices = np.uint16(np.nonzero(my_v)[0]) ind_genes = [my_matrix.genes[i] for i in indices] gs_genes = list(ind_genes) gene_set = GeneSet(genes=gs_genes, id='Random1', name='Random gene Set 1') N = my_v.size X = 1 L = N ## stat, n_star, pval = xlmhg_test(my_v, X, L) res = get_xlmhg_test_result(N, indices, X, L) result = RankBasedGSEResult(gene_set, N, indices, ind_genes, X, L, res.stat, res.cutoff, res.pval) return result
def test_figure_double_axis(tmpdir): v = np.uint8([1,0,1,1,0,1] + [0]*12 + [1,0]) X = 3 L = 10 N = v.size indices = np.uint16(np.nonzero(v)[0]) result = xlmhg.get_xlmhg_test_result(N, indices, X=X, L=L) fig = xlmhg.get_result_figure(result, plot_fold_enrichment=True) output_file = text(tmpdir.join('plot4.html')) plot(fig, filename=output_file, auto_open=False) assert os.path.isfile(output_file)
def test_significant1(my_N, my_ind): """Test if we return the exact p-value for a significant test when requested.""" res = get_xlmhg_test_result(my_N, my_ind, pval_thresh=0.07, exact_pval='if_significant') assert res.pval == 0.0244453044375645
def test_ONbound(my_N, my_ind): """Test if we return the O(N)-bound instead of the exact p-value, if the bound is equal to or smaller than `pval_thresh`.""" res = get_xlmhg_test_result(my_N, my_ind, pval_thresh=0.045, exact_pval='if_necessary') assert res.pval == 0.04179566563467492
def test_O1bound(my_N, my_ind): """Test if we return the O(1)-bound if that's "<=" `pval_thresh`""" res = get_xlmhg_test_result(my_N, my_ind, pval_thresh=0.07, exact_pval='if_necessary') assert res.pval == 0.0696594427244582
def test_alg1(my_N, my_ind): """Test if we can use PVAL1 to calculate p-value.""" res = get_xlmhg_test_result(my_N, my_ind, use_alg1=True) assert res.stat == 0.01393188854489164 assert res.cutoff == 6 assert res.pval == 0.0244453044375645
def test_result(my_ind, my_v): N = my_v.size result = get_xlmhg_test_result(N, my_ind) assert isinstance(result, mHGResult)
def test_lowerbound(my_N, my_ind): """Test if we return the O(1)-bound when stat > pval_thresh.""" res = get_xlmhg_test_result(my_N, my_ind, pval_thresh=0.01, exact_pval='if_necessary') assert res.pval == 0.0696594427244582
def get_rank_based_enrichment( self, ranked_genes, pval_thresh=0.05, X_frac=0.25, X_min=5, L=None, adjust_pval_thresh=True, escore_pval_thresh=None, exact_pval='always', gene_set_ids=None, table=None): """Test for gene set enrichment at the top of a ranked list of genes. This function uses the XL-mHG test to identify enriched gene sets. This function also calculates XL-mHG E-scores for the enriched gene sets, using ``escore_pval_thresh`` as the p-value threshold "psi". Parameters ---------- ranked_genes : list of str The ranked list of genes. pval_thresh : float, optional The p-value threshold used to determine significance. See also ``adjust_pval_thresh``. [0.05] X_frac : float, optional The min. fraction of genes from a gene set required for enrichment. [0.25] X_min : int, optional The min. no. of genes from a gene set required for enrichment. [5] L : int, optional The lowest cutoff to test for enrichment. If ``None``, int(0.25*(no. of genes)) will be used. [None] adjust_pval_thresh : bool, optional Whether to adjust the p-value thershold for multiple testing, using the Bonferroni method. [True] escore_pval_thresh : float or None, optional The "psi" p-value threshold used in calculating E-scores. [None] exact_pval : str Choices are: "always", "if_significant", "if_necessary". Parameter will be passed to `xlmhg.get_xlmhg_test_result`. ["always"] gene_set_ids : list of str or None, optional A list of gene set IDs to specify which gene sets should be tested for enrichment. If ``None``, all gene sets will be tested. [None] table : 2-dim numpy.ndarray of type numpy.longdouble or None, optional The dynamic programming table used by the algorithm for calculating XL-mHG p-values. Passing this avoids memory re-allocation when calling this function repetitively. [None] Returns ------- list of `RankBasedGSEResult` A list of all significantly enriched gene sets. """ if isinstance(X_frac, (int, np.integer)): X_frac = float(X_frac) # type checks assert isinstance(ranked_genes, Iterable) assert isinstance(pval_thresh, (float, np.float)) assert isinstance(X_frac, (float, np.float)) assert isinstance(X_min, (int, np.integer)) if L is not None: assert isinstance(L, (int, np.integer)) assert isinstance(adjust_pval_thresh, bool) assert isinstance(exact_pval, (str, _oldstr)) if escore_pval_thresh is not None: assert isinstance(escore_pval_thresh, (float, np.float)) if gene_set_ids is not None: assert isinstance(gene_set_ids, Iterable) if table is not None: assert isinstance(table, np.ndarray) and \ np.issubdtype(table.dtype, np.longdouble) if L is None: L = int(len(ranked_genes)/4.0) gene_set_coll = self._gene_set_coll gene_memberships = self._gene_memberships # postpone this if escore_pval_thresh is None: # if no separate E-score p-value threshold is specified, use the # p-value threshold (this results in conservative E-scores) logger.warning('No E-score p-value threshold supplied. ' 'The E-score p-value threshold will be set to the' 'global significance threshold. This will result ' 'in conservative E-scores.') # test only some terms? if gene_set_ids is not None: gs_indices = np.int64([self._gene_set_coll.index(id_) for id_ in gene_set_ids]) gene_sets = [gene_set_coll[id_] for id_ in gene_set_ids] gene_set_coll = GeneSetCollection(gene_sets) gene_memberships = gene_memberships[:, gs_indices] # not a view! # reorder rows in annotation matrix to match the given gene ranking # also exclude genes not in the ranking unknown = 0 L_adj = L sel = [] filtered_genes = [] logger.debug('Looking up indices for %d genes...' % len(ranked_genes)) for i, g in enumerate(ranked_genes): assert isinstance(g, (str, _oldstr)) try: idx = self._genome.index(g) except ValueError: unknown += 1 # adjust L if the gene was above the original L cutoff if i < L: L_adj -= 1 else: sel.append(idx) filtered_genes.append(g) sel = np.int64(sel) logger.debug('Adjusted L: %d', L_adj) # the following also copies the data (not a view) gene_memberships = gene_memberships[sel, :] N, m = gene_memberships.shape if unknown > 0: # Some genes in the ranked list were unknown (i.e., not present in # the specified genome). logger.warn('%d / %d unknown genes (%.1f %%), will be ignored.', unknown, len(ranked_genes), 100 * (unknown / float(len(ranked_genes)))) # Determine the number of gene set genes above the L'th cutoff, # for all gene sets. This quantity is useful, because we don't need # to perform any more work for gene sets that have less than X genes # above the cutoff. k_above_L = np.sum(gene_memberships[:L_adj, :], axis=0, dtype=np.int64) # Determine the number of genes below the L'th cutoff, for all gene # sets. k_below_L = np.sum(gene_memberships[L_adj:, :], axis=0, dtype=np.int64) # Determine the total number K of genes in each gene set that are # present in the ranked list (this is equal to k_above_L + k_below_L) K_vec = k_above_L + k_below_L # Determine the largest K across all gene sets. K_max = np.amax(K_vec) # Determine X for all gene sets. X = np.amax( np.c_[np.tile(X_min, m), np.int64(np.ceil(X_frac * K_vec))], axis=1) # Determine the number of tests (we do not conduct a test if the # total number of gene set genes in the ranked list is below X). num_tests = np.sum(K_vec-X >= 0) logger.info('Conducting %d tests.', num_tests) # determine Bonferroni-corrected p-value, if desired final_pval_thresh = pval_thresh if adjust_pval_thresh and num_tests > 0: final_pval_thresh /= float(num_tests) logger.info('Using Bonferroni-corrected p-value threshold: %.1e', final_pval_thresh) if escore_pval_thresh is None: escore_pval_thresh = final_pval_thresh elif escore_pval_thresh < final_pval_thresh: logger.warning('The E-score p-value threshold is smaller than ' 'the p-value threshold. Setting E-score p-value ' 'threshold to the p-value threshold.') escore_pval_thresh = final_pval_thresh # Prepare the matrix that holds the dynamic programming table for # the calculation of the XL-mHG p-value. if table is None: table = np.empty((K_max+1, N+1), dtype=np.longdouble) else: if table.shape[0] < K_max+1 or table.shape[1] < N+1: raise ValueError( 'The supplied array is too small (%d x %d) to hold the ' 'entire dynamic programming table. The required size is' '%d x %d (rows x columns).' % (table.shape[0], table.shape[1], K_max+1, N+1)) # find enriched GO terms # logger.info('Testing %d gene sets for enrichment...', m) logger.debug('(N=%d, X_frac=%.2f, X_min=%d, L=%d; K_max=%d)', len(ranked_genes), X_frac, X_min, L, K_max) enriched = [] num_tests = 0 # number of tests conducted for j in range(m): # determine gene set-specific value for X X = max(X_min, int(ceil(X_frac * float(K_vec[j])))) # Determine significance of gene set enrichment using the XL-mHG # test (only if there are at least X gene set genes in the list). if K_vec[j] >= X: num_tests += 1 # We only need to perform the XL-mHG test if there are enough # gene set genes above the L'th cutoff (otherwise, pval = 1.0). if k_above_L[j] >= X: # perform test # Determine the ranks of the gene set genes in the # ranked list. indices = np.uint16(np.nonzero(gene_memberships[:, j])[0]) res = xlmhg.get_xlmhg_test_result( N, indices, X, L, pval_thresh=final_pval_thresh, escore_pval_thresh=escore_pval_thresh, exact_pval=exact_pval, table=table) # check if gene set is significantly enriched if res.pval <= final_pval_thresh: # generate RankedGSEResult ind_genes = [ranked_genes[i] for i in indices] gse_result = RankBasedGSEResult( gene_set_coll[j], N, indices, ind_genes, X, L, res.stat, res.cutoff, res.pval, escore_pval_thresh=escore_pval_thresh ) enriched.append(gse_result) # report results q = len(enriched) ignored = m - num_tests if ignored > 0: logger.debug('%d / %d gene sets (%.1f%%) had less than X genes ' 'annotated with them and were ignored.', ignored, m, 100 * (ignored / float(m))) logger.info('%d / %d gene sets were found to be significantly ' 'enriched (p-value <= %.1e).', q, m, final_pval_thresh) return enriched
def batch_xlmhg(marker_exp, c_list, coi, X=None, L=None): """Applies XL-mHG test to a gene expression matrix, gene by gene. Outputs a 3-column DataFrame representing statistical results of XL-mHG. :param marker_exp: A DataFrame whose rows are cell identifiers, columns are gene identifiers, and values are float values representing gene expression. :param c_list: A Series whose indices are cell identifiers, and whose values are the cluster which that cell is part of. :param coi: The cluster of interest. :param X: An integer to be used as argument to the XL-mHG test. :param L: An integer to be used as argument to the XL-mHG test. :returns: A matrix with arbitrary row indices, whose columns are the gene name, stat, cutoff, and pval outputs of the XL-mHG test; of float, int, and float type respectively. Their names are 'gene', 'mHG_stat', 'mHG_cutoff', and 'mHG_pval'. :rtype: pandas.DataFrame """ # * 1 converts to integer mem_list = (c_list == coi) * 1 #count_n = 0 #count_2n = 0 #Count the number of cells in the cluster, store into count_n count_n = np.sum(mem_list) ''' for cell in mem_list: if cell == 1: count_n = count_n + 1 else: continue ''' #Twice the number of cells in the cluster #count_2n = count_n * 2 #Set X and L params if X is None: X = np.int(.15 * count_n) else: X = np.int(X * count_n) if L is None: if 2 * count_n >= marker_exp.shape[0]: L = np.int(marker_exp.shape[0]) else: L = np.int(2 * count_n) #L = marker_exp.shape[0] print('X = ' + str(X)) print('L = ' + str(L)) print('Cluster size ' + str(count_n)) xlmhg = marker_exp.apply(lambda col: hg.xlmhg_test(mem_list.reindex( col.sort_values(ascending=False).index).values, X=X, L=L)) xlmhg_1 = marker_exp.apply(lambda col: hg.get_xlmhg_test_result( N=len(mem_list), indices=np.array(np.where( np.array( mem_list.reindex(col.sort_values(ascending=False).index).values == 1))[0], dtype='int64').astype('uint16'), X=X, L=L, pval_thresh=1e-12, escore_pval_thresh=1e-1, tol=1e-12).escore) output = pd.DataFrame() output['gene_1'] = xlmhg.index output[['mHG_stat', 'mHG_cutoff', 'mHG_pval' ]] = pd.DataFrame(xlmhg.values.tolist(), columns=['mHG_stat', 'mHG_cutoff', 'mHG_pval']) ''' print(xlmhg_1) output['escore'] = np.array(xlmhg_1,dtype=float) output.fillna(0,inplace=True) print(output.sort_values(by='escore', ascending=False)) time.sleep(1000) ''' return output
def test_pval_necessary(my_N, my_ind): """Test if we return the p-value when it is necessary.""" res = get_xlmhg_test_result(my_N, my_ind, pval_thresh=0.04, exact_pval='if_necessary') assert res.pval == 0.0244453044375645
def test_non_contiguous(my_N, my_ind): """Test if non-contiguous arrays result in a ValueError.""" with pytest.raises(ValueError): result = get_xlmhg_test_result(my_N, my_ind[::-1])
def test_table_too_small(my_N, my_ind): """Test if ValueError is raised when the supplied array is too small.""" K = my_ind.size with pytest.raises(ValueError): table = np.empty(((my_N-K), (my_N-K)), np.longdouble) result = get_xlmhg_test_result(my_N, my_ind, table=table)
def get_rank_based_enrichment( self, ranked_genes: List[str], pval_thresh: float = 0.05, X_frac: float = 0.25, X_min: int = 5, L: int = None, adjust_pval_thresh: bool = True, escore_pval_thresh: float = None, exact_pval: str = 'always', gene_set_ids: List[str] = None, table: np.ndarray = None) -> RankBasedGSEResult: """Test for gene set enrichment at the top of a ranked list of genes. This function uses the XL-mHG test to identify enriched gene sets. This function also calculates XL-mHG E-scores for the enriched gene sets, using ``escore_pval_thresh`` as the p-value threshold "psi". Parameters ---------- ranked_gene_ids : list of str The ranked list of gene IDs. pval_thresh : float, optional The p-value threshold used to determine significance. See also ``adjust_pval_thresh``. [0.05] X_frac : float, optional The min. fraction of genes from a gene set required for enrichment. [0.25] X_min : int, optional The min. no. of genes from a gene set required for enrichment. [5] L : int, optional The lowest cutoff to test for enrichment. If ``None``, int(0.25*(no. of genes)) will be used. [None] adjust_pval_thresh : bool, optional Whether to adjust the p-value thershold for multiple testing, using the Bonferroni method. [True] escore_pval_thresh : float or None, optional The "psi" p-value threshold used in calculating E-scores. If ``None``, will be set to p-value threshold. [None] exact_pval : str Choices are: "always", "if_significant", "if_necessary". Parameter will be passed to `xlmhg.get_xlmhg_test_result`. ["always"] gene_set_ids : list of str or None, optional A list of gene set IDs to specify which gene sets should be tested for enrichment. If ``None``, all gene sets will be tested. [None] table : 2-dim numpy.ndarray of type numpy.longdouble or None, optional The dynamic programming table used by the algorithm for calculating XL-mHG p-values. Passing this avoids memory re-allocation when calling this function repetitively. [None] Returns ------- list of `RankBasedGSEResult` A list of all significantly enriched gene sets. """ # make sure X_frac is a float (e.g., if specified as 0) X_frac = float(X_frac) if table is not None: if not np.issubdtype(table.dtype, np.longdouble): raise TypeError('The provided array for storing the dynamic ' 'programming table must be of type ' '"longdouble"!') if L is None: L = int(len(ranked_genes) / 4.0) gene_set_coll = self._gene_set_coll gene_memberships = self._gene_memberships # postpone this if escore_pval_thresh is None: # if no separate E-score p-value threshold is specified, use the # p-value threshold (this results in conservative E-scores) logger.warning('No E-score p-value threshold supplied. ' 'The E-score p-value threshold will be set to the' 'global significance threshold. This will result ' 'in conservative E-scores.') # test only some terms? if gene_set_ids is not None: gs_indices = np.int64( [self._gene_set_coll.index(id_) for id_ in gene_set_ids]) gene_sets = [gene_set_coll[id_] for id_ in gene_set_ids] gene_set_coll = GeneSetCollection(gene_sets) gene_memberships = gene_memberships[:, gs_indices] # not a view! # reorder rows in annotation matrix to match the given gene ranking # also exclude genes not in the ranking unknown = 0 L_adj = L sel = [] filtered_genes = [] logger.debug('Looking up indices for %d genes...' % len(ranked_genes)) for i, g in enumerate(ranked_genes): try: idx = self._gene_indices[g] except KeyError: unknown += 1 # adjust L if the gene was above the original L cutoff if i < L: L_adj -= 1 else: sel.append(idx) filtered_genes.append(g) sel = np.int64(sel) logger.debug('Adjusted L: %d', L_adj) # the following also copies the data (not a view) gene_memberships = gene_memberships[sel, :] N, m = gene_memberships.shape if unknown > 0: # Some genes in the ranked list were unknown (i.e., not present in # the specified genome). logger.warn('%d / %d unknown genes (%.1f %%), will be ignored.', unknown, len(ranked_genes), 100 * (unknown / float(len(ranked_genes)))) # Determine the number of gene set genes above the L'th cutoff, # for all gene sets. This quantity is useful, because we don't need # to perform any more work for gene sets that have less than X genes # above the cutoff. k_above_L = np.sum(gene_memberships[:L_adj, :], axis=0, dtype=np.int64) # Determine the number of genes below the L'th cutoff, for all gene # sets. k_below_L = np.sum(gene_memberships[L_adj:, :], axis=0, dtype=np.int64) # Determine the total number K of genes in each gene set that are # present in the ranked list (this is equal to k_above_L + k_below_L) K_vec = k_above_L + k_below_L # Determine the largest K across all gene sets. K_max = np.amax(K_vec) # Determine X for all gene sets. X = np.amax(np.c_[np.tile(X_min, m), np.int64(np.ceil(X_frac * K_vec))], axis=1) # Determine the number of tests (we do not conduct a test if the # total number of gene set genes in the ranked list is below X). num_tests = np.sum(K_vec - X >= 0) logger.info('Conducting %d tests.', num_tests) # determine Bonferroni-corrected p-value, if desired final_pval_thresh = pval_thresh if adjust_pval_thresh and num_tests > 0: final_pval_thresh /= float(num_tests) logger.info('Using Bonferroni-corrected p-value threshold: %.1e', final_pval_thresh) if escore_pval_thresh is None: escore_pval_thresh = final_pval_thresh elif escore_pval_thresh < final_pval_thresh: logger.warning('The E-score p-value threshold is smaller than ' 'the p-value threshold. Setting E-score p-value ' 'threshold to the p-value threshold.') escore_pval_thresh = final_pval_thresh # Prepare the matrix that holds the dynamic programming table for # the calculation of the XL-mHG p-value. if table is None: table = np.empty((K_max + 1, N + 1), dtype=np.longdouble) else: if table.shape[0] < K_max + 1 or table.shape[1] < N + 1: raise ValueError( 'The supplied array is too small (%d x %d) to hold the ' 'entire dynamic programming table. The required size is' '%d x %d (rows x columns).' % (table.shape[0], table.shape[1], K_max + 1, N + 1)) # find enriched GO terms # logger.info('Testing %d gene sets for enrichment...', m) logger.debug('(N=%d, X_frac=%.2f, X_min=%d, L=%d; K_max=%d)', len(ranked_genes), X_frac, X_min, L, K_max) enriched = [] num_tests = 0 # number of tests conducted for j in range(m): # determine gene set-specific value for X X = max(X_min, int(ceil(X_frac * float(K_vec[j])))) # Determine significance of gene set enrichment using the XL-mHG # test (only if there are at least X gene set genes in the list). if K_vec[j] >= X: num_tests += 1 # We only need to perform the XL-mHG test if there are enough # gene set genes above the L'th cutoff (otherwise, pval = 1.0). if k_above_L[j] >= X: # perform test # Determine the ranks of the gene set genes in the # ranked list. indices = np.uint16(np.nonzero(gene_memberships[:, j])[0]) res = xlmhg.get_xlmhg_test_result( N, indices, X, L, pval_thresh=final_pval_thresh, escore_pval_thresh=escore_pval_thresh, exact_pval=exact_pval, table=table) # check if gene set is significantly enriched if res.pval <= final_pval_thresh: # generate RankedGSEResult ind_genes = [ranked_genes[i] for i in indices] gse_result = RankBasedGSEResult( gene_set_coll[j], N, indices, ind_genes, X, L, res.stat, res.cutoff, res.pval, escore_pval_thresh=escore_pval_thresh) enriched.append(gse_result) # report results q = len(enriched) ignored = m - num_tests if ignored > 0: logger.debug( '%d / %d gene sets (%.1f%%) had less than X genes ' 'annotated with them and were ignored.', ignored, m, 100 * (ignored / float(m))) logger.info( '%d / %d gene sets were found to be significantly ' 'enriched (p-value <= %.1e).', q, m, final_pval_thresh) return enriched