Beispiel #1
0
 del commongenes
 # discard bad columns
 tobediscarded = np.logical_or.reduce(
     ((gene_atb.matrix != 0).sum(axis=0) < 3,
      (gene_atb.matrix == 0).sum(axis=0) < 3,
      np.isnan(gene_atb.matrix).any(axis=0)))
 gene_atb.discard(tobediscarded, axis=1)
 tobediscarded = np.logical_or((gene_cst.matrix != 0).sum(axis=0) < 3,
                               np.isnan(gene_cst.matrix).any(axis=0))
 gene_cst.discard(tobediscarded, axis=1)
 if gene_atb.shape[0] == 0 or gene_atb.shape[1] == 0 or gene_cst.shape[
         0] == 0 or gene_cst.shape[1] == 0:
     continue
 # arbitrary prioritization to break redundancyindex ties later
 gene_atb.columnmeta[
     'arbitrary_pvalues'] = featureselection.univariate_chisquare(
         X=gene_atb.matrix, Y=gene_cst.matrix[:, 0] < 0.2)[1]
 tobediscarded = np.isnan(gene_atb.columnmeta['arbitrary_pvalues'])
 gene_atb.discard(tobediscarded, axis=1)
 if gene_atb.shape[0] == 0 or gene_atb.shape[1] == 0:
     continue
 # discard redundant features
 rowstatpreferredorder = np.array(['mean', 'stdv'], dtype='object')
 atb_atb = atb_atb.tolabels(gene_atb.columnlabels, gene_atb.columnlabels)
 atb_atb.rowmeta = copy.deepcopy(gene_atb.columnmeta)
 atb_atb.columnmeta = copy.deepcopy(gene_atb.columnmeta)
 redundancyindex = (np.abs(atb_atb.matrix) >
                    similarity_threshold).sum(1).astype('float64')
 for i, rowstat in enumerate(rowstatpreferredorder):
     if rowstat in atb_atb.rowlabels:
         redundancyindex[atb_atb.rowlabels == rowstat] += 1 / (2 + i)
 table = list(
 tobediscarded = np.logical_or((gene_atb.matrix != 0).sum(axis=0) < 3,
                               np.isnan(gene_atb.matrix).any(axis=0))
 gene_atb.discard(tobediscarded, axis=1)
 tobediscarded = np.logical_or((gene_cst.matrix != 0).sum(axis=0) < 3,
                               np.isnan(gene_cst.matrix).any(axis=0))
 gene_cst.discard(tobediscarded, axis=1)
 if gene_atb.shape[0] == 0 or gene_atb.shape[1] == 0 or gene_cst.shape[
         0] == 0 or gene_cst.shape[1] == 0:
     continue
 # arbitrary prioritization to break redundancyindex ties later
 gene_atb.columnmeta['arbitrary_pvalues'] = np.zeros(gene_atb.shape[1],
                                                     dtype='float64')
 gene_atb.columnmeta['arbitrary_pvalues'][
     ~gene_atb.
     columnmeta['isrowstat']] = featureselection.univariate_chisquare(
         X=gene_atb.matrix[:, ~gene_atb.columnmeta['isrowstat']],
         Y=gene_cst.select([], '0'))[1]
 gene_atb.columnmeta['arbitrary_pvalues'][
     gene_atb.columnmeta['isrowstat']] = featureselection.univariate_utest(
         X=gene_atb.matrix[:, gene_atb.columnmeta['isrowstat']],
         Y=gene_cst.select([], '0'))[1]
 tobediscarded = np.isnan(gene_atb.columnmeta['arbitrary_pvalues'])
 gene_atb.discard(tobediscarded, axis=1)
 if gene_atb.shape[0] == 0 or gene_atb.shape[1] == 0:
     continue
 # discard redundant features
 rowstatpreferredorder = np.array(['mean', 'stdv'], dtype='object')
 atb_atb = atb_atb.tolabels(gene_atb.columnlabels, gene_atb.columnlabels)
 atb_atb.rowmeta = copy.deepcopy(gene_atb.columnmeta)
 atb_atb.columnmeta = copy.deepcopy(gene_atb.columnmeta)
 redundancyindex = (np.abs(atb_atb.matrix) >