예제 #1
0
    def __init__(self, t, save_data=1, interactions_too = 1, dependencies_too=0, prepare=1, pvalues = 0, simple_too=0,iterative_scaling=0,weighting=None):
        if prepare:
            t = self._prepare(t)
        if save_data:
            self.discData = t   # save the discretized data

        ### PREPARE INDIVIDUAL ATTRIBUTES ###

        # Attribute Preparation
        NA = len(t.domain.attributes)

        self.names = []
        self.labelname = ""
        if t.domain.classVar:
            self.labelname = t.domain.classVar.name
        self.gains = []
        self.freqs = []
        self.way2 = {}
        self.way3 = {}
        self.ig = []
        self.list = []
        self.abslist = []
        self.plist = []
        self.plut = {}
        self.ents = {}
        self.corr = {}
        self.chi2 = {}
        self.simple = {}
        for i in range(NA):
            if weighting != None:
                atc = orngContingency.get2Int(t,t.domain.attributes[i],t.domain.classVar,wid=weighting)
            else:
                atc = orngContingency.get2Int(t,t.domain.attributes[i],t.domain.classVar)
            gai = atc.InteractionInformation()
            self.gains.append(gai)
            self.corr[(i,-1)] = gai
            self.ents[(i,)] = orngContingency.Entropy(atc.a)
            self.way2[(i,-1,)] = atc
            self.ents[(i,-1)] = orngContingency.Entropy(atc.m)
            N = sum(atc.a)
            self.chi2[(i, i)] = statc.chisqprob(N * (numpy.sum(numpy.outer(atc.pa, atc.pa)) - 2 + len(atc.pa)), (len(atc.pa)-1)**2)

#            self.chi2[(i, i)] = N * (numpy.sum(numpy.outer(atc.pa, atc.pa)) - 2 + len(atc.pa))   
            if simple_too:
                simp = 0.0
                for k in xrange(min(len(atc.a),len(atc.b))):
                    try:
                        simp += atc.pm[k,k]
                    except:
                        pass
                self.simple[(i,-1)] = simp
            # fix the name
            st = '%s'%t.domain.attributes[i].name # copy
            self.names.append(st)
            if pvalues:
                pv = orngContingency.getPvalue(gai,atc)
                self.plist.append((pv,(gai,i,-1)))
                self.plut[(i,-1)] = pv
                #print "%s\t%f\t%f\t%d"%(st,pv,gai,atc.total)
            line = []
            for j in range(i):
                if dependencies_too:
                    if weighting != None:
                        c = orngContingency.get2Int(t,t.domain.attributes[j],t.domain.attributes[i],wid=weighting)
                    else:
                        c = orngContingency.get2Int(t,t.domain.attributes[j],t.domain.attributes[i])
                    self.way2[(j,i,)] = c
                    gai = c.InteractionInformation()
                    self.ents[(j,i,)] = orngContingency.Entropy(c.m)
                    self.corr[(j,i,)] = gai
                    self.chi2[(j,i)] = c.ChiSquareP()   
                    if simple_too:
                        simp = 0.0
                        for k in xrange(min(len(c.a),len(c.b))):
                            try:
                                qq = c.pm[k,k]
                            except:
                                qq = 0
                            simp += qq
                        self.simple[(j,i)] = simp
                    if pvalues:
                        pv = orngContingency.getPvalue(gai,c)
                        self.plist.append((pv,(gai,j,i)))
                        self.plut[(j,i)] = pv
                if interactions_too:
                    if weighting != None:
                        c = orngContingency.get3Int(t,t.domain.attributes[j],t.domain.attributes[i],t.domain.classVar,wid=weighting)
                    else:
                        c = orngContingency.get3Int(t,t.domain.attributes[j],t.domain.attributes[i],t.domain.classVar)
                    self.way3[(j,i,-1)] = c
                    igv = c.InteractionInformation()
                    line.append(igv)
                    self.list.append((igv,(igv,j,i)))
                    self.abslist.append((abs(igv),(igv,j,i)))
                    if pvalues:
                        if iterative_scaling:
                            div = c.IPF()
                        else:
                            div = c.KSA()[0]
                        pv = orngContingency.getPvalue(div,c)
                        #print "%s-%s\t%f\t%f\t%d"%(c.names[0],c.names[1],pv,igv,c.total)
                        self.plist.append((pv,(igv,j,i,-1)))
                        self.plut[(j,i,-1)] = pv
            self.ig.append(line)
        self.entropy = orngContingency.Entropy(atc.b)
        self.ents[(-1,)] = self.entropy
        self.list.sort()
        self.abslist.sort()
        self.plist.sort()

        self.attlist = []
        for i in range(NA):
            self.attlist.append((self.gains[i],i))
        self.attlist.sort()
        self.NA = NA
예제 #2
0
    def __init__(self,
                 t,
                 save_data=1,
                 interactions_too=1,
                 dependencies_too=0,
                 prepare=1,
                 pvalues=0,
                 simple_too=0,
                 iterative_scaling=0,
                 weighting=None):
        if prepare:
            t = self._prepare(t)
        if save_data:
            self.discData = t  # save the discretized data

        ### PREPARE INDIVIDUAL ATTRIBUTES ###

        # Attribute Preparation
        NA = len(t.domain.attributes)

        self.names = []
        self.labelname = ""
        if t.domain.classVar:
            self.labelname = t.domain.classVar.name
        self.gains = []
        self.freqs = []
        self.way2 = {}
        self.way3 = {}
        self.ig = []
        self.list = []
        self.abslist = []
        self.plist = []
        self.plut = {}
        self.ents = {}
        self.corr = {}
        self.chi2 = {}
        self.simple = {}
        for i in range(NA):
            if weighting != None:
                atc = orngContingency.get2Int(t,
                                              t.domain.attributes[i],
                                              t.domain.classVar,
                                              wid=weighting)
            else:
                atc = orngContingency.get2Int(t, t.domain.attributes[i],
                                              t.domain.classVar)
            gai = atc.InteractionInformation()
            self.gains.append(gai)
            self.corr[(i, -1)] = gai
            self.ents[(i, )] = orngContingency.Entropy(atc.a)
            self.way2[(
                i,
                -1,
            )] = atc
            self.ents[(i, -1)] = orngContingency.Entropy(atc.m)
            N = sum(atc.a)
            self.chi2[(i, i)] = statc.chisqprob(
                N * (numpy.sum(numpy.outer(atc.pa, atc.pa)) - 2 + len(atc.pa)),
                (len(atc.pa) - 1)**2)

            #            self.chi2[(i, i)] = N * (numpy.sum(numpy.outer(atc.pa, atc.pa)) - 2 + len(atc.pa))
            if simple_too:
                simp = 0.0
                for k in xrange(min(len(atc.a), len(atc.b))):
                    try:
                        simp += atc.pm[k, k]
                    except:
                        pass
                self.simple[(i, -1)] = simp
            # fix the name
            st = '%s' % t.domain.attributes[i].name  # copy
            self.names.append(st)
            if pvalues:
                pv = orngContingency.getPvalue(gai, atc)
                self.plist.append((pv, (gai, i, -1)))
                self.plut[(i, -1)] = pv
                #print "%s\t%f\t%f\t%d"%(st,pv,gai,atc.total)
            line = []
            for j in range(i):
                if dependencies_too:
                    if weighting != None:
                        c = orngContingency.get2Int(t,
                                                    t.domain.attributes[j],
                                                    t.domain.attributes[i],
                                                    wid=weighting)
                    else:
                        c = orngContingency.get2Int(t, t.domain.attributes[j],
                                                    t.domain.attributes[i])
                    self.way2[(
                        j,
                        i,
                    )] = c
                    gai = c.InteractionInformation()
                    self.ents[(
                        j,
                        i,
                    )] = orngContingency.Entropy(c.m)
                    self.corr[(
                        j,
                        i,
                    )] = gai
                    self.chi2[(j, i)] = c.ChiSquareP()
                    if simple_too:
                        simp = 0.0
                        for k in xrange(min(len(c.a), len(c.b))):
                            try:
                                qq = c.pm[k, k]
                            except:
                                qq = 0
                            simp += qq
                        self.simple[(j, i)] = simp
                    if pvalues:
                        pv = orngContingency.getPvalue(gai, c)
                        self.plist.append((pv, (gai, j, i)))
                        self.plut[(j, i)] = pv
                if interactions_too:
                    if weighting != None:
                        c = orngContingency.get3Int(t,
                                                    t.domain.attributes[j],
                                                    t.domain.attributes[i],
                                                    t.domain.classVar,
                                                    wid=weighting)
                    else:
                        c = orngContingency.get3Int(t, t.domain.attributes[j],
                                                    t.domain.attributes[i],
                                                    t.domain.classVar)
                    self.way3[(j, i, -1)] = c
                    igv = c.InteractionInformation()
                    line.append(igv)
                    self.list.append((igv, (igv, j, i)))
                    self.abslist.append((abs(igv), (igv, j, i)))
                    if pvalues:
                        if iterative_scaling:
                            div = c.IPF()
                        else:
                            div = c.KSA()[0]
                        pv = orngContingency.getPvalue(div, c)
                        #print "%s-%s\t%f\t%f\t%d"%(c.names[0],c.names[1],pv,igv,c.total)
                        self.plist.append((pv, (igv, j, i, -1)))
                        self.plut[(j, i, -1)] = pv
            self.ig.append(line)
        self.entropy = orngContingency.Entropy(atc.b)
        self.ents[(-1, )] = self.entropy
        self.list.sort()
        self.abslist.sort()
        self.plist.sort()

        self.attlist = []
        for i in range(NA):
            self.attlist.append((self.gains[i], i))
        self.attlist.sort()
        self.NA = NA