def _get_par(self, datao): gaussiane = [ estimate_gaussian_per_class(datao, at, common_if_extreme=True) for at in range(len(datao.domain.attributes)) ] normalizec = [] for i,g in zip(range(len(datao.domain.attributes)), gaussiane): r = [ _llrlogratio(ex[i].value, *g) for ex in datao ] normalizec.append((mean(r), std(r))) return gaussiane, normalizec
def _get_par(self, datao): gaussiane = [ estimate_gaussian_per_class(datao, at, common_if_extreme=True) for at in range(len(datao.domain.attributes)) ] normalizec = [] for i, g in zip(range(len(datao.domain.attributes)), gaussiane): r = [_llrlogratio(ex[i].value, *g) for ex in datao] normalizec.append((mean(r), std(r))) return gaussiane, normalizec
def estimate_gaussian_per_class(data, i, a=None, b=None, common_if_extreme=False): cv = data.domain.class_var if a == None: a = cv.values[0] if b == None: b = cv.values[1] def avWCVal(value): return [ ex[i].value for ex in data if ex[-1].value == value and not ex[i].isSpecial() ] list1 = avWCVal(a) list2 = avWCVal(b) mi1 = mi2 = st1 = st2 = None try: mi1 = statc.mean(list1) st1 = statc.std(list1) except: pass try: mi2 = statc.mean(list2) st2 = statc.std(list2) except: pass def extreme(): return st1 == 0 or st2 == 0 if common_if_extreme and extreme(): st1 = st2 = statc.std(list1 + list2) return mi1, st1, mi2, st2
def build_feature(self, data, gs): at = Orange.feature.Continuous(name=str(gs)) geneset = list(gs.genes) nm, name_ind, genes, takegenes, to_geneset = self._match_data( data, geneset, odic=True) gsi = [name_ind[g] for g in genes] gausse = compute_llr(data, gsi, self._gauss_cache) genes_gs = [to_geneset[g] for g in genes] if self.normalize: # per (3) in the paper #compute log ratios for all samples and genes from this gene set for i, gene_gs, g in zip(gsi, genes_gs, gausse): if gene_gs not in self._normalizec: #skip if computed already r = [_llrlogratio(ex[i].value, *g) for ex in data] self._normalizec[gene_gs] = (mean(r), std(r)) def t(ex, w, genes_gs=genes_gs, gausse=gausse, normalizec=self._normalizec): nm2, name_ind2, genes2 = self._match_instance(ex, genes_gs, None) gsvalues = [vou(ex, gn, name_ind2) for gn in genes2] vals = [ _llrlogratio(v, *g) if v != "?" else 0.0 for v, g in zip(gsvalues, gausse) ] if len(normalizec): #normalize according to (3) vals2 = [] for v, g in zip(vals, genes_gs): m, s = normalizec[g] if s == 0: #disregard attributes without differences vals2.append(0.) else: vals2.append((v - m) / s) vals = vals2 return sum(vals) at.get_value_from = t return at
def build_feature(self, data, gs): at = Orange.feature.Continuous(name=str(gs)) geneset = list(gs.genes) nm, name_ind, genes, takegenes, to_geneset = self._match_data(data, geneset, odic=True) gsi = [ name_ind[g] for g in genes ] gausse = compute_llr(data, gsi, self._gauss_cache) genes_gs = [ to_geneset[g] for g in genes ] if self.normalize: # per (3) in the paper #compute log ratios for all samples and genes from this gene set for i, gene_gs, g in zip(gsi, genes_gs, gausse): if gene_gs not in self._normalizec: #skip if computed already r = [ _llrlogratio(ex[i].value, *g) for ex in data ] self._normalizec[gene_gs] = (mean(r), std(r)) def t(ex, w, genes_gs=genes_gs, gausse=gausse, normalizec=self._normalizec): nm2, name_ind2, genes2 = self._match_instance(ex, genes_gs, None) gsvalues = [ vou(ex, gn, name_ind2) for gn in genes2 ] vals = [ _llrlogratio(v, *g) if v != "?" else 0.0 for v,g in zip(gsvalues, gausse) ] if len(normalizec): #normalize according to (3) vals2 = [] for v,g in zip(vals, genes_gs): m,s = normalizec[g] if s == 0: #disregard attributes without differences vals2.append(0.) else: vals2.append((v-m)/s) vals = vals2 return sum(vals) at.get_value_from = t return at
def stdev(l): return statc.std(l)