예제 #1
0
 def _get_par(self, datao):
     gaussiane = [ estimate_gaussian_per_class(datao, at, common_if_extreme=True) for at in range(len(datao.domain.attributes)) ]
     normalizec = []
     for i,g in zip(range(len(datao.domain.attributes)), gaussiane):
         r = [ _llrlogratio(ex[i].value, *g) for ex in datao ]
         normalizec.append((mean(r), std(r)))
     return gaussiane, normalizec
예제 #2
0
 def _get_par(self, datao):
     gaussiane = [
         estimate_gaussian_per_class(datao, at, common_if_extreme=True)
         for at in range(len(datao.domain.attributes))
     ]
     normalizec = []
     for i, g in zip(range(len(datao.domain.attributes)), gaussiane):
         r = [_llrlogratio(ex[i].value, *g) for ex in datao]
         normalizec.append((mean(r), std(r)))
     return gaussiane, normalizec
예제 #3
0
def estimate_gaussian_per_class(data,
                                i,
                                a=None,
                                b=None,
                                common_if_extreme=False):
    cv = data.domain.class_var

    if a == None: a = cv.values[0]
    if b == None: b = cv.values[1]

    def avWCVal(value):
        return [
            ex[i].value for ex in data
            if ex[-1].value == value and not ex[i].isSpecial()
        ]

    list1 = avWCVal(a)
    list2 = avWCVal(b)

    mi1 = mi2 = st1 = st2 = None

    try:
        mi1 = statc.mean(list1)
        st1 = statc.std(list1)
    except:
        pass

    try:
        mi2 = statc.mean(list2)
        st2 = statc.std(list2)
    except:
        pass

    def extreme():
        return st1 == 0 or st2 == 0

    if common_if_extreme and extreme():
        st1 = st2 = statc.std(list1 + list2)

    return mi1, st1, mi2, st2
예제 #4
0
    def build_feature(self, data, gs):

        at = Orange.feature.Continuous(name=str(gs))
        geneset = list(gs.genes)

        nm, name_ind, genes, takegenes, to_geneset = self._match_data(
            data, geneset, odic=True)

        gsi = [name_ind[g] for g in genes]
        gausse = compute_llr(data, gsi, self._gauss_cache)
        genes_gs = [to_geneset[g] for g in genes]

        if self.normalize:  # per (3) in the paper
            #compute log ratios for all samples and genes from this gene set
            for i, gene_gs, g in zip(gsi, genes_gs, gausse):
                if gene_gs not in self._normalizec:  #skip if computed already
                    r = [_llrlogratio(ex[i].value, *g) for ex in data]
                    self._normalizec[gene_gs] = (mean(r), std(r))

        def t(ex,
              w,
              genes_gs=genes_gs,
              gausse=gausse,
              normalizec=self._normalizec):
            nm2, name_ind2, genes2 = self._match_instance(ex, genes_gs, None)
            gsvalues = [vou(ex, gn, name_ind2) for gn in genes2]

            vals = [
                _llrlogratio(v, *g) if v != "?" else 0.0
                for v, g in zip(gsvalues, gausse)
            ]

            if len(normalizec):  #normalize according to (3)
                vals2 = []
                for v, g in zip(vals, genes_gs):
                    m, s = normalizec[g]
                    if s == 0:  #disregard attributes without differences
                        vals2.append(0.)
                    else:
                        vals2.append((v - m) / s)
                vals = vals2

            return sum(vals)

        at.get_value_from = t
        return at
예제 #5
0
    def build_feature(self, data, gs):

        at = Orange.feature.Continuous(name=str(gs))
        geneset = list(gs.genes)

        nm, name_ind, genes, takegenes, to_geneset = self._match_data(data, geneset, odic=True)

        gsi = [ name_ind[g] for g in genes ]
        gausse = compute_llr(data, gsi, self._gauss_cache)
        genes_gs = [ to_geneset[g] for g in genes ]

        if self.normalize: # per (3) in the paper
            #compute log ratios for all samples and genes from this gene set
            for i, gene_gs, g in zip(gsi, genes_gs, gausse):
                if gene_gs not in self._normalizec: #skip if computed already
                    r = [ _llrlogratio(ex[i].value, *g) for ex in data ]
                    self._normalizec[gene_gs] = (mean(r), std(r))

        def t(ex, w, genes_gs=genes_gs, gausse=gausse, normalizec=self._normalizec):
            nm2, name_ind2, genes2 = self._match_instance(ex, genes_gs, None)
            gsvalues = [ vou(ex, gn, name_ind2) for gn in genes2 ]

            vals = [ _llrlogratio(v, *g) if v != "?" else 0.0 for v,g in zip(gsvalues, gausse) ]

            if len(normalizec): #normalize according to (3)
                vals2 = []
                for v,g in zip(vals, genes_gs):
                    m,s = normalizec[g]
                    if s == 0: #disregard attributes without differences
                        vals2.append(0.)
                    else:
                        vals2.append((v-m)/s)
                vals = vals2
            
            return sum(vals)
     
        at.get_value_from = t
        return at
예제 #6
0
 def stdev(l):
     return statc.std(l)