Python DPLIB.getAUCの例

プログラミング言語: Python

名前空間/パッケージ名: Lib

クラス/型: DPLIB

メソッド/関数: getAUC

hotexamples.comのコード掲載数: 7

Python DPLIB.getAUC - 7件のコード例が見つかりました。すべてオープンソースプロジェクトから抽出されたPythonのLib.DPLIB.getAUCの実例で、最も評価が高いものを厳選しています。コード例の評価を行っていただくことで、より質の高いコード例が表示されるようになります。

よく使われるメソッド

表示非表示

getMeasures(8)

getAUC(7)

getConfMatrix(7)

getMeasuresCount(4)

getExtMeasures(3)

FindAllSimilarInstancesIndexes(2)

CombinePops(2)

LoadCSV(2)

MySort(2)

NNFilter(2)

SaveToCsv(1)

getConfAndExtMeasures(1)

getStats(1)

getRandomSubSet(1)

getMeasuresCountSet(1)

MAD(1)

getConfMatrixSet(1)

getConfMatrixAndExtMeasures(1)

getActuals(1)

SetBugCountForMut(1)

getAUCSet(1)

getAUCCV(1)

NNFilterMulti(1)

fSelectSet(1)

fSelectInfoGain(1)

doReplaces(1)

checkSimilarity(1)

applyPCA(1)

SortPopulation(1)

iterativeInfoGainSubsetting(1)

コード例 #1

ファイルを表示

ファイル: Benchmarks.py プロジェクト: rebvar/datasci.datalytikz.com

    def Basic(trainSet,
              testSet,
              file,
              fout,
              name,
              vecin,
              clfName,
              isCount=False):

        auc = 0
        l = GLOB(clfName).getClassifier()
        l.buildClassifier(trainSet)

        vec = l.evaluateModel(testSet)
        actual = testSet[:, -1]

        if isCount:

            vals = DPLIB.getMeasuresCount(actual, vec)
            print(name + ":" + file + ": " + str(vals))
            fout.write("\n" + name + ":" + file + ": " + "Vals=" + str(vals))

        else:
            tvals = DPLIB.getConfMatrix(actual, vec)
            vals = DPLIB.getMeasures(tvals)
            auc = DPLIB.getAUC(actual, vec)
            print(name + ":" + file + ": " + str(vals) + " AUC = " + str(auc))
            fout.write("\n" + name + ":" + file + ": " + " AUC = " + str(auc) +
                       ";" + "Vals=" + str(vals))

コード例 #2

ファイルを表示

ファイル: Benchmarks.py プロジェクト: rebvar/datasci.datalytikz.com

    def NNFilterMulti(trainSeti, testSet, file, fout, name, vecin, count,
                      clfName, tunelrn, vSets):

        startTime = Common.getCurrentTimeMil()

        trainSet = DPLIB.NNFilterMulti(trainSeti, testSet, count)

        l = GLOB(clfName, tunelrn).getClassifier()
        if (tunelrn):

            l = l.getTunedCLF(trainSet, vSets, fout, name, file)

            print("#TUNE-LRN-PARAMS-" + name + ":" + file + ": " +
                  str(l.selectedParams))

            fout.write("#TUNE-LRN-PARAMS-" + name + ":" + file + ": ")
            fout.write(str(l.selectedParams))
            fout.write("\n")
            sCheck = l.getCLFOptions()

            print("#SETSET-LRN-PARAMS-" + name + ":" + file + ": " +
                  str(sCheck))

            fout.write("#SETSET-LRN-PARAMS-" + name + ":" + file + ": ")
            fout.write(str(sCheck))
            fout.write("\n")

        l.buildClassifier(trainSet)

        vec = l.evaluateModel(testSet)

        vecin = vec

        tvals = DPLIB.getConfMatrix(testSet[:, -1], vecin)

        print("#CONF-TEST-" + name + ":" + file + ": " + str(tvals))

        fout.write("#CONF-TEST-" + name + ":" + file + ": ")
        fout.write(str(tvals))
        fout.write("\n")

        auc = DPLIB.getAUC(testSet[:, -1], vec)
        vals = DPLIB.getMeasures(tvals)

        print(name + ":" + file + ": " + str(vals) + " AUC = " + str(auc))

        fout.write(name + ":" + file + ": ")
        fout.write(str(vals))
        fout.write(" AUC = ")
        fout.write(str(auc))
        fout.write("\n")

        time = Common.getCurrentTimeMil() - startTime

        print("#TIME-FOR:" + name + ":" + file + ": " + str(time))
        fout.write("#TIME-FOR:" + name + ":" + file + ": " + str(time) + "\n")

        return vecin

コード例 #3

ファイルを表示

ファイル: Benchmarks.py プロジェクト: rebvar/datasci.datalytikz.com

    def WMulti(files,
               file,
               testSet,
               fout,
               features,
               name,
               clfName,
               dp,
               convertToBinary=True):

        train = []
        for file2 in files:
            if (file2[0:3] == file[0:3] and file2 < file < 0):
                train.append(file2)

        if (len(train)):
            trainSet = DPLIB.LoadCSV(train, dp, features, convertToBinary)

            if (name.lower().find("infogain") >= 0):
                #int indi[] = DPLIB.fSelectInfoGain(trainSet);
                #if (DPLIB.useIterativeInfoGainSubsetting)
                #{
                #    indi = DPLIB.iterativeInfoGainSubsetting(trainSet, indi,clfName);
                #}
                #else
                #    indi = DPLIB.getTopX(indi);
                #trainSet = DPLIB.fSelectSet(trainSet, indi);
                #testSet = DPLIB.fSelectSet(testSet, indi);
                pass

            l = GLOB(clfName).getClassifier()
            l.buildClassifier(trainSet)
            vec = l.evaluateModel(testSet)

            tvals = DPLIB.getConfMatrix(testSet[:, -1], vec)
            auc = DPLIB.getAUC(testSet[:, -1], vec)
            vals = DPLIB.getMeasures(tvals)
            print(name + ":" + file + ": " + str(vals) + " AUC = " + str(auc))

            fout.write("\n" + name + ":" + file + ": " + " AUC = " + str(auc) +
                       ";" + "Vals=" + str(vals))

        else:

            print(name + ":" + file + ": " + "!!!" + " AUC = !!!")
            fout.write("\n" + name + ":" + file + ": !!!")

コード例 #4

ファイルを表示

ファイル: Benchmarks.py プロジェクト: rebvar/datasci.datalytikz.com

    def LOC50(testSeti, file, fout, name, locIndex):
        startTime = Common.getCurrentTimeMil()
        spentISTime = 0
        tempTime = 0
        spentISTime = Common.getCurrentTimeMil()
        allloc = testSeti[:, locIndex]

        med = np.median(allloc)
        predicted = [1 if t >= med else 0 for t in allloc]
        actual = testSeti[:, -1]
        tvals = DPLIB.getConfMatrix(actual, predicted)

        print("#CONF-TEST-" + name + ":" + file + ": " + str(tvals))

        fout.write("#CONF-TEST-" + name + ":" + file + ": ")
        fout.write(str(tvals))
        fout.write("\n")

        vals = DPLIB.getMeasures(tvals)

        auc = DPLIB.getAUC(actual, predicted)
        print(name + ":" + file + ": " + str(vals) + " AUC = " + str(auc))

        fout.write(name + ":" + file + ": ")
        fout.write(str(vals))
        fout.write(" AUC = ")
        fout.write(str(auc))
        fout.write("\n")

        time = Common.getCurrentTimeMil() - startTime

        print("#TIME-FOR:" + name + ":" + file + ": " + str(time))
        fout.write("#TIME-FOR:" + name + ":" + file + ": " + str(time) + "\n")

        print("#TIME-FOR-IS:" + name + ":" + file + ": " + str(time))
        fout.write("#TIME-FOR-IS:" + name + ":" + file + ": " + str(time) +
                   "\n")

コード例 #5

ファイルを表示

    def CreateBuckets(self, trainSet, testSet, vSets, name, testCut, iternum,
                      save, superbit, stages, buckets, doprint, clfName,
                      tunelrn):

        out = []
        if self.isCount:
            keySet = list(
                DPLIB.getMeasuresCount([0, 1, 2, 3], [0, 1, 2, 3]).keys())
        else:
            keySet = list(
                DPLIB.getExtMeasures({
                    "tp": 1,
                    "tn": 2,
                    "fp": 3,
                    "fn": 4
                }).keys())

        out.append("#STARTED FOR-" + name + ":" + self.file + ": ")

        startTime = Common.getCurrentTimeMil()
        spentIsTime = 0
        tempTime = 0

        out.append("#Using also Label For train in LSH")

        if (vSets == None):

            vSets = []
            vSets.append(trainSet)

        if (save):
            DPLIB.SaveToCsv(
                trainSet, "MAIN-TRAIN-FILE-" + "ITER=" + str(iternum) + "--" +
                "METHOD=" + name + "--FILE=" + self.file + "--")
            DPLIB.SaveToCsv(
                testSet, "MAIN-TEST-FILE-" + "ITER=" + str(iternum) + "--" +
                "METHOD=" + name + "--FILE=" + self.file + "--")
            for i in range(len(vSets)):

                DPLIB.SaveToCsv(
                    trainSet,
                    "VSET-FILE-" + "INDEX=" + str(i) + "ITER=" + str(iternum) +
                    "--" + "METHOD=" + name + "--FILE=" + self.file + "--")

        np.random.shuffle(trainSet)
        np.random.shuffle(testSet)
        tempTime = Common.getCurrentTimeMil()
        count = len(trainSet)
        bins = {}
        # R^n
        n = trainSet.shape[1] - 1

        binid = 0
        #lshmin = LSHMinHash(stages, buckets, n);
        try:
            lshsuper = LSHSuperBit(stages=stages,
                                   buckets=buckets,
                                   dimensions=n)
        except Exception as ex:
            print('##SuperBit with specified parameters failed:' + str(ex))
            return None
        sp = 0.75
        # Compute a SuperBit signature, and a LSH hash
        for i in range(count):
            vector = trainSet[i, 1:].tolist()

            hash = None
            if (superbit):
                hash = lshsuper.hash(vector)
            else:
                ##Minhash support
                # #hash = lshmin.hash(vecBool);
                pass

            binid = hash[0]
            if not binid in bins.keys():
                bins[binid] = []

            bins[binid].append(trainSet[i])

        spentIsTime += Common.getCurrentTimeMil() - tempTime

        numBins = len(bins.keys())

        for binid in bins.keys():
            bins[binid] = np.array(bins[binid])

        out.append("#Number of BINS:" + name + ":" + self.file + ": " +
                   str(numBins))

        pop = []

        for i in bins.keys():

            trSet = bins[i]
            l = GLOB(clfName, tunelrn).getClassifier()

            #if (tunelrn):
            #    l = l.getTunedCLF(trSet, vSets,fout,name, file);

            l.buildClassifier(trSet)
            cf = 0
            j = 0

            allvecs = []
            confs = []
            allcfs = []
            allaucs = []
            valsA = None
            confsA = None
            aucA = 0.0
            for vSet in vSets:

                vec = None
                actuals = None

                vec = l.evaluateModel(vSet)
                actuals = vSet[:, -1]

                vals = None
                auc = 0
                if self.isCount:
                    vals = DPLIB.getMeasuresCount(actuals, vec)
                else:

                    auc = DPLIB.getAUC(actuals, vec)
                    aucA += auc
                    allaucs.append(auc)
                    if (testCut):
                        vCF = 0.1
                        bestCF = 0
                        bestCFVal = -1
                        bestVals = None

                        while True:

                            tvals = DPLIB.getConfMatrix(actuals, vec, vCF)
                            measures = DPLIB.getMeasures(tvals)
                            fit = measures["F"] * measures["GMean1"]
                            if (fit > bestCFVal or bestVals == None):

                                bestCFVal = fit
                                bestCF = vCF
                                bestVals = tvals

                            vCF += 0.1

                            if (vCF >= 1):
                                break

                        if (confsA == None):

                            confsA = {key: 0 for key in bestVals.keys()}

                        for j in confsA.keys():
                            confsA[j] += bestVals[j]

                        confs.append(bestVals)

                        vals = DPLIB.getMeasures(bestVals)
                        cf += bestCF
                        allcfs.append(bestCF)

                    else:

                        tvals = DPLIB.getConfMatrix(actuals, vec)

                        if (confsA == None):

                            confsA = {key: 0 for key in tvals.keys()}

                        for j in confsA.keys():
                            confsA[j] += tvals[j]

                        confs.append(tvals)

                        vals = DPLIB.getMeasures(tvals)
                        allcfs.append(DPLIB.DefaultCF)

                allvecs.append(vals)

                if (valsA == None):
                    valsA = {key: 0 for key in keySet}

                for j in keySet:
                    valsA[j] += vals[j]

            for j in keySet:
                valsA[j] /= len(vSets)

            h = None
            if not self.isCount:
                for j in confsA.keys():
                    confsA[j] /= len(vSets)

                if (testCut):
                    cf /= len(vSets)

                aucA /= len(vSets)

                h = CHRM_GIS(trSet, valsA, aucA)
                h.fitnesses = allvecs
                h.aucs = allaucs
                h.conf = confsA
                h.confs = confs
                h.allcfs = allcfs
                if (testCut):
                    h.bestCF = cf
                else:
                    h.bestCF = DPLIB.DefaultCF
            else:

                h = CHRM_GIS_Count(trSet, valsA)
                h.fitnesses = allvecs

            pop.append(h)
            l = None

        tempTime = Common.getCurrentTimeMil()
        pop = DPLIB.MySort(pop)
        spentIsTime += Common.getCurrentTimeMil() - tempTime
        top = pop[0]

        out.append("#Instances in Top:" + str(len(top.ds)))

        out.append("#STAGES:" + name + ":" + self.file + ": " + str(stages))
        out.append("#BUCKETS:" + name + ":" + self.file + ": " + str(buckets))
        if not self.isCount:
            out.append("#BEST-CF-VALUE:" + name + ":" + self.file + ": " +
                       str(top.bestCF))

        l = GLOB(clfName, tunelrn).getClassifier()

        if (tunelrn):

            l = l.getTunedCLF(top.ds, vSets, fout, name, file)

            out.append("#TUNE-LRN-PARAMS-" + name + ":" + self.file + ": " +
                       str(l.selectedParams))
            sCheck = l.getCLFOptions()
            out.append("#SETSET-LRN-PARAMS-" + name + ":" + self.file + ": " +
                       str(sCheck))

        l.buildClassifier(top.ds)

        vec = l.evaluateModel(testSet)

        out.append("#LSH-FOR-TOP-ONLY")

        if self.isCount:
            vals = DPLIB.getMeasuresCount(testSet[:, -1], vec)
            out.append(name + ":" + self.file + ": " + str(vals))
        else:
            tvals = DPLIB.getConfMatrix(testSet[:, -1], vec, top.bestCF)
            out.append("#CONF-TEST-" + name + ":" + self.file + ": " +
                       str(tvals))
            vals = DPLIB.getMeasures(tvals)
            auc = DPLIB.getAUC(testSet[:, -1], vec)
            vals['auc'] = auc
            out.append(name + ":" + self.file + ": " + str(vals))

        for i in range(len(pop)):

            pop[i] = None

        pop = None

        for i in bins.keys():
            bins[i] = None

        bins = None

        time = Common.getCurrentTimeMil() - startTime

        if (name.find("LSHTune") < 0):
            out.append("#TIME-FOR:" + name + ":" + self.file + ": " +
                       str(time))
            out.append("#TIME-FOR-IS:" + name + ":" + self.file + ": " +
                       str(spentIsTime))
            self.output = +out

        top.addToExtra("SPENT-TIME-IS", float(spentIsTime))

        return top, out

コード例 #6

ファイルを表示

    def run(self, trainSeti, testSeti, name, fout, vSets, vSetType,
            fixedTrainSize, log, ignoreOK, threshold, thresholds, rejectedFits,
            rejectedPerfs, rejectedTestPerfs, clfName):

        mad = 0.0
        if self.isCount:
            keySet = list(
                DPLIB.getMeasuresCount([0, 1, 2, 3], [0, 1, 2, 3]).keys())
            mad = DPLIB.SetBugCountForMut(trainSeti)
        else:
            keySet = list(
                DPLIB.getExtMeasures({
                    "tp": 1,
                    "tn": 2,
                    "fp": 3,
                    "fn": 4
                }).keys())
        startTime = Common.getCurrentTimeMil()
        tempTime = 0
        spentISTime = 0

        #For Binary Prediction, isCount = False
        auc = 0
        preds = []
        pop = []

        trainSet = np.copy(trainSeti)
        testSet = np.copy(testSeti)
        pop.clear()

        tstSize = len(testSet)
        partSize = int(tstSize / self.numParts)
        preds.clear()
        diffs = []
        auc = 0.0

        #For isCount = True
        actuals = []
        prrs = []

        if (log):
            self.prnt("#GIS-OPTIONS;;For=" + name + "@" + ":iters=" +
                      str(self.iters) + "-POPSIZE=" + str(self.popSize) +
                      "-NumParts=" + str(self.numParts) + "-NumGens=" +
                      str(self.numGens) + "-sizeTop=" + str(self.sizeTopP) +
                      "-Learner=" + clfName + "\n")

        isOK = True

        np.random.shuffle(testSet)
        self.FinalLearners = []
        self.FinalDatasets = []
        for p in range(self.numParts):

            diffp = []

            self.prnt("\n" + str(p) + ": ")

            tempTime = Common.getCurrentTimeMil()
            pop.clear()
            start = p * partSize
            end = (p + 1) * partSize
            if (end > tstSize):
                end = tstSize

            if (p == self.numParts - 1):
                end = tstSize

            testPart = testSet[start:end, :]

            spentISTime += Common.getCurrentTimeMil() - tempTime

            uinds = set()
            if (vSets == None or len(vSets) == 0):

                if (vSets == None):
                    vSets = []

                vSet = None
                retVal = ""
                if (vSetType == 'Train Set'):

                    vSet = trainSeti
                    if (log):
                        retVal = DPLIB.getStats(vSet, True, True, True)
                        self.prnt("#VSETINFO;;prt=" + str(p) + ";;For=" +
                                  name + "@" + ":" + retVal + "\n")

                        retVal = None

                elif (vSetType == 'NN-Filter'):
                    tempTime = Common.getCurrentTimeMil()
                    vSet = DPLIB.NNFilter(trainSet, testPart, 1)
                    spentISTime += Common.getCurrentTimeMil() - tempTime

                    if (log):

                        retVal = DPLIB.getStats(vSet, True, True, True)
                        self.prnt("#VSETINFO;;prt=" + str(p) + ";;For=" +
                                  name + "@" + ":" + retVal + "\n")
                        retVal = None

                #If random, but not fed into the func, generate one randomly, with size of testPart
                elif (vSetType == 'Multiple Random'
                      or vSetType == 'Single Random'):

                    size = len(testPart)
                    vSet = []
                    j = 0
                    while (j < size):
                        index = np.random.randint(trainSet.numInstances())

                        if (not index in uinds):
                            uinds.add(index)
                        else:
                            continue

                        vSets.append(trainSet[index])

                        j += 1

                    if (log):

                        retVal = DPLIB.getStats(vSet, true, true, True)
                        self.prnt("#VSETINFO;;prt=" + str(p) + ";;For=" +
                                  name + "@" + ":" + retVal + "\n")
                        retVal = None

                    vSet = np.array(vSet)

                elif (vSetType == '!!TEST!!'):

                    #Upper Bound. Should not be used.
                    self.prnt("Should not be used.")
                    vSet = testSeti
                    if (log):

                        retVal = DPLIB.getStats(vSet, True, True, True)
                        self.prnt("#VSETINFO;;prt=" + str(p) + ";;For=" +
                                  name + "@" + ":" + retVal + "\n")

                        retVal = None

                elif vSetType == 'KS2':
                    vSet = None
                vSets.append(vSet)

            else:

                retVal = ""
                for vSet in vSets:

                    if (log):
                        retVal = DPLIB.getStats(vSet, True, True, True)
                        self.prnt("#VSETINFO;;prt=" + str(p) + ";;For=" +
                                  name + "@" + ":" + retVal + "\n")
                        retVal = None

            for i in range(self.popSize):
                tempTime = Common.getCurrentTimeMil()
                uinds.clear()

                size = 0

                if (fixedTrainSize):
                    size = self.chrmSize
                else:
                    size = np.random.randint(self.chrmSize) + 10

                while True:
                    trSet = []
                    j = 0
                    while (j < size):
                        index = np.random.randint(len(trainSet))

                        trSet.append(trainSet[index])

                        if (not index in uinds):
                            uinds.add(index)

                        j += 1

                    spentISTime += Common.getCurrentTimeMil() - tempTime
                    trSet = np.array(trSet)
                    if len(set(list(trSet[:, -1]))) >= 2:
                        break

                tempTime = Common.getCurrentTimeMil()

                pv, p_vals = DPLIB.checkSimilarity(trSet[:, :-1],
                                                   testPart[:, :-1])

                if self.isCount:
                    h = CHRM_GIS_Count(trSet, None, extraAsFitness='p-val')
                    h.addToExtra('p-val', sum(p_vals))
                    pop.append(h)
                else:

                    h = CHRM_GIS(trSet, None, None, extraAsFitness='p-val')
                    h.addToExtra('p-val', sum(p_vals))
                    pop.append(h)

                spentISTime += Common.getCurrentTimeMil() - tempTime

            tempTime = Common.getCurrentTimeMil()
            pop = DPLIB.MySort(pop)
            spentISTime += Common.getCurrentTimeMil() - tempTime

            cnt = 0
            g = 0
            for g in range(self.numGens):
                self.prnt(str(g) + " ")
                if (log):
                    pass
                    #retVal = ""
                    #for i in range(len(pop)):

                    #    chrm = pop[i]
                    #    retVal = DPLIB.getStats(chrm.ds, False, False, False);
                    #    self.prnt("#POPITNFO;;gn="+str(g)+";;prt="+str(p)+";;For="+name+"@"+":"+retVal+"\n");
                    #    self.prnt("#POPITVALS;;gn="+str(g)+";;prt="+str(p)+";;For="+name+"@"+":"+"rpaf="+str(chrm.fitness).replace(", ", ",")
                    #            +";;conf="+str(chrm.conf).replace(", ", ",")+";;fit="+str(chrm.getFitness())+";;TConf2="+str(chrm.testConf).replace(", ", ",")+";;TRpaf2="+str(chrm.testFitness).replace(", ", ",")+"\n");
                    #    retVal = None;

                tempTime = Common.getCurrentTimeMil()
                newPop = []
                for i in range(self.sizeTopP):
                    newPop.append(pop[i])

                i = 0
                for i in range(0, len(pop) - self.sizeTopP, 2):
                    idx1 = 0
                    idx2 = 0
                    while (idx1 == idx2):
                        if (cnt >= 3):
                            idx1 = np.random.randint(len(pop))
                            idx2 = np.random.randint(len(pop))
                        else:
                            idx1 = GA.tornament(pop)
                            idx2 = GA.tornament(pop)
                            cnt += 1

                    cnt = 0
                    ds1 = pop[idx1].ds
                    ds2 = pop[idx2].ds
                    while True:

                        ds1, ds2 = GA.crossOver(ds1,
                                                ds2,
                                                fixedTrainSize,
                                                isCount=self.isCount)
                        if len(set(list(ds1[:, -1]))) >= 2 and len(
                                set(list(ds2[:, -1]))) >= 2:
                            break
                        self.prnt('repeat cross')
                    while True:
                        ds1 = GA.Mutate(ds1, isCount=self.isCount, mad=mad)
                        if len(set(list(ds1[:, -1]))) >= 2:
                            break
                        self.prnt(
                            'repeat mut ds1, because all elements are of type one class'
                        )

                    while True:

                        ds2 = GA.Mutate(ds2, isCount=self.isCount, mad=mad)
                        if len(set(list(ds2[:, -1]))) >= 2:
                            break
                        self.prnt(
                            'repeat mut ds1, because all elements are of type one class'
                        )
                    if self.isCount:
                        newPop.append(
                            CHRM_GIS_Count(ds1, None, extraAsFitness='p-val'))
                        newPop.append(
                            CHRM_GIS_Count(ds2, None, extraAsFitness='p-val'))
                    else:
                        newPop.append(
                            CHRM_GIS(ds1, None, extraAsFitness='p-val'))
                        newPop.append(
                            CHRM_GIS(ds2, None, extraAsFitness='p-val'))

                spentISTime += Common.getCurrentTimeMil() - tempTime

                for i in range(len(newPop)):

                    tempTime = Common.getCurrentTimeMil()

                    pv, p_vals = DPLIB.checkSimilarity(newPop[i].ds[:, :-1],
                                                       testPart[:, :-1])

                    newPop[i].addToExtra('p-val', sum(p_vals))

                    spentISTime += Common.getCurrentTimeMil() - tempTime

                tempTime = Common.getCurrentTimeMil()

                newPop = DPLIB.MySort(newPop)
                exit = False
                countComp = 0

                newPop, rdel = DPLIB.CombinePops(pop, newPop)

                if (log):
                    pass
                    #retVal = ""
                    #for i in range(len(rdel)):

                    #    chrm = rdel[i];
                    #    retVal = DPLIB.getStats(chrm.ds, False, False, False);
                    #    self.prnt("#POPDELITNFO;;gn="+str(g)+";;prt="+str(p)+";;For="+name+"@"+":"+retVal+";;rpaf="+str(chrm.fitness).replace(", ", ",")
                    #            +";;conf="+str(chrm.conf).replace(", ", ",")+";;fit="+str(chrm.getFitness())+";;TConf2="+str(chrm.testConf).replace(", ", ",")+";;TRpaf2="+str(chrm.testFitness).replace(", ", ",")
                    #            +"\n");

                    #    retVal = None;

                rdel = None

                diff = abs(
                    GA.GetMeanFittness(pop, countComp) -
                    GA.GetMeanFittness(newPop, countComp))
                if (diff < 0.000001):
                    exit = True

                diffp.append(diff)

                pop = newPop
                if (pop[0].getFitness() > 0.0) and (exit):
                    break

                exit = False
                spentISTime += Common.getCurrentTimeMil() - tempTime

            w = []
            if (self.count == 0):
                self.count = len(pop)

            for i in range(self.count):
                l = GLOB(clfName).getClassifier()
                tds = pop[i].ds
                self.FinalLearners.append(l)
                self.FinalDatasets.append(tds)
                testPartI = testPart

                l.buildClassifier(tds)

                if self.isCount:
                    actual = DPLIB.getActuals(testPartI)
                    prr = l.evaluateModel(testPartI)
                    #vals = DPLIB.getMeasuresCount(actual,prr)

                    actall = None
                    predall = None
                    if (len(actuals) == self.count):

                        actuals[i] = actuals[i] + actual
                        prrs[i] = prrs[i] + prr
                    else:
                        actuals.append(actual)
                        prrs.append(prr)

                else:

                    vec = l.evaluateModel(testPartI)

                    if (len(preds) == self.count):
                        preds[i] += list(vec)
                    else:
                        preds.append(list(vec))

                if (log):
                    pass
                    #retVal = DPLIB.getStats(tds, True, True, True);
                    #self.prnt("#TRPRTNFO;;prt="+str(p)+";;For="+name+"@"+":"+retVal+"\n");

                    #retVal = DPLIB.getStats(testPart,true,true, True);
                    #self.prnt("#TSTPRTNFO;;prt="+str(p)+";;For="+name+"@"+":"+retVal+"\n");
                    #vals = DPLIB.getConfMatrix(testPart[:,-1],vec)

                    #self.prnt("#TSTPRTVALS;;prt="+str(p)+";;For="+name+"@"+":"+
                    #        "rpaf="+str(DPLIB.getMeasures(vals)).replace(", ", ",")
                    #            +";;conf="+str(vals).replace(", ", ",")+"\n");

                    #retVal = None;

                w.append(pop[i].getFitness())

        isOK = True

        if not isOK:
            pass
        else:
            thresholds.append(pop[0].getFitness())

        self.prnt()
        self.prnt("Best Top Fitness:" + str(pop[0].fitness))
        self.prnt("Best Fitness (mean):", pop[0].getMeanFitness())

        if self.isCount:
            vals = DPLIB.getMeasuresCountSet(actuals, prrs)
        else:
            vals1 = DPLIB.getConfMatrixSet(testSet[:, -1], preds)
            vals = DPLIB.getMeasures(vals1)

        if (isOK):

            if not self.isCount:

                if (len(preds) == 1):
                    auc = DPLIB.getAUC(testSet[:, -1], preds[0])
                else:
                    auc = DPLIB.getAUCSet(testSet[:, -1], preds)

                vals['auc'] = auc
                self.prnt()
                self.prnt("#CONF-TEST:" + name + ":" + self.file + ": " +
                          str(vals1))

                self.prnt()
                self.prnt(name + ":" + self.file + ": " + str(vals))

                self.prnt()
            else:
                self.prnt()
                self.prnt(name + ":" + self.file + ": " + str(vals))
                self.prnt()
        else:

            bestI = pop[0]
            rejectedFits.append(bestI.getFitness())

            rejVals = copy.deepcopy(bestI.fitness)
            rejectedPerfs.append(rejVals)

            testRejVals = copy.deepcopy(vals)

            rejectedTestPerfs.eppend(testRejVals)

            self.prnt("#NOTOKPREDS----" + name + ":" + self.file + ": " +
                      str(vals))

            if not self.isCount:
                self.prnt()
                self.prnt("#NOTOKPREDS----" + "#CONF-TEST:" + name + ":" +
                          self.file + ": " + str(vals1))

        time = Common.getCurrentTimeMil() - startTime

        self.prnt("#TIME-FOR:" + name + ":" + self.file + ": " + str(time))

        self.prnt("#TIME-FOR-IS:" + name + ":" + self.file + ": " +
                  str(spentISTime))

        return isOK

コード例 #7

ファイルを表示

ファイル: Benchmarks.py プロジェクト: rebvar/datasci.datalytikz.com

    def NNFilter(trainSeti, testSet, file, fout, name, vecin, count, clfName,
                 tunelrn, vSets, testCut):

        startTime = Common.getCurrentTimeMil()
        spentISTime = 0
        tempTime = 0

        bestFit = 0.0
        bestCount = 0
        btrainSet = None
        cfbf = DPLIB.DefaultCF

        if (count == 0):
            for i in range(1, 11):

                tempTime = Common.getCurrentTimeMil()

                trainSet = DPLIB.NNFilter(trainSeti, testSet, i)

                spentISTime += Common.getCurrentTimeMil() - tempTime

                l = GLOB(clfName, tunelrn).getClassifier()

                if (tunelrn):
                    l = l.getTunedCLF(trainSet, vSets, fout, name, file)

                l.buildClassifier(trainSet)

                avgFit = 0.0
                j = 0
                for j in range(len(vSets)):

                    vec = l.evaluateModel(vSets[j])

                    tvals = DPLIB.getConfMatrix(vSets[j][:, -1], vec)
                    measures = DPLIB.getExtMeasures(tvals)
                    fit = measures["F"] * measures["GMean1"]
                    avgFit += fit

                avgFit /= len(vSets)

                if (avgFit > bestFit):
                    bestFit = avgFit
                    bestCount = i
                    btrainSet = trainSet[:, :]

            if (testCut):

                cf = 0
                j = 0

                trainSet = btrainSet

                l = GLOB(clfName, tunelrn).getClassifier()

                if (tunelrn):
                    l = l.getTunedCLF(trainSet, vSets, fout, name, file)

                l.buildClassifier(trainSet)
                avgFit = 0.0

                for j in range(len(vSets)):

                    vec = l.evaluateModel(vSets[j])
                    vCF = 0.1
                    bestCF = 0
                    bestCFVal = -1
                    bestVals = None

                    while True:
                        tvals = DPLIB.getConfMatrix(vSets[j][:, -1], vec, vCF)
                        measures = DPLIB.getExtMeasures(tvals)
                        fit = measures["F"] * measures["GMean1"]
                        if (fit > bestCFVal or bestVals == None):
                            bestCFVal = fit
                            bestCF = vCF
                            bestVals = tvals

                        vCF += 0.1
                        if vCF >= 1:
                            break
                    cf += bestCF

                cf /= vSets.size()
                cfbf = cf

        trainSet = None
        if (count == 0):
            trainSet = btrainSet
        else:
            tempTime = Common.getCurrentTimeMil()
            trainSet = DPLIB.NNFilter(trainSeti, testSet, count)
            spentISTime = Common.getCurrentTimeMil() - tempTime
            bestCount = count

        l = GLOB(clfName, tunelrn).getClassifier()

        if (tunelrn):
            l = l.getTunedCLF(trainSet, vSets, fout, name, file)

            print("#TUNE-LRN-PARAMS-" + name + ":" + file + ": " +
                  str(l.selectedParams))
            fout.write("#TUNE-LRN-PARAMS-" + name + ":" + file + ": ")
            fout.write(str(l.selectedParams))
            fout.write("\n")
            sCheck = l.getCLFOptions()

            print("#SETSET-LRN-PARAMS-" + name + ":" + file + ": " +
                  str(sCheck))
            fout.write("#SETSET-LRN-PARAMS-" + name + ":" + file + ": ")
            fout.write(str(sCheck))
            fout.write("\n")

        l.buildClassifier(trainSet)

        vec = l.evaluateModel(testSet)

        vecin = vec

        tvals = DPLIB.getConfMatrix(testSet[:, -1], vecin, cfbf)
        if (count == 0):

            print("#BESTCOUNT-" + name + ":" + file + ": " + str(bestCount))

            fout.write("#BESTCOUNT-" + name + ":" + file + ": ")
            fout.write(str(bestCount))
            fout.write("\n")

            print("#BESTFIT-" + name + ":" + file + ": " + str(bestFit))
            fout.write("#BESTFIT-" + name + ":" + file + ": ")
            fout.write(str(bestFit))
            fout.write("\n")

        print("#CONF-TEST-" + name + ":" + file + ": " + str(tvals))
        fout.write("#CONF-TEST-" + name + ":" + file + ": ")
        fout.write(str(tvals))
        fout.write("\n")
        if (testCut):

            print("#NN-BEST-CF-VALUE:" + name + ":" + file + ": " + str(cfbf))

            fout.write("#NN-BEST-CF-VALUE:" + name + ":" + file + ": ")
            fout.write(str(cfbf))
            fout.write("\n")

        vals = DPLIB.getMeasures(tvals)
        auc = DPLIB.getAUC(testSet[:, -1], vecin)
        print(name + ":" + file + ": " + str(vals) + " AUC = " + str(auc))

        fout.write(name + ":" + file + ": ")
        fout.write(str(vals))
        fout.write(" AUC = ")
        fout.write(str(auc))
        fout.write("\n")

        time = Common.getCurrentTimeMil() - startTime

        print("#TIME-FOR:" + name + ":" + file + ": " + str(time))
        fout.write("#TIME-FOR:" + name + ":" + file + ": " + str(time) + "\n")

        print("#TIME-FOR-IS:" + name + ":" + file + ": " + str(spentISTime))
        fout.write("#TIME-FOR-IS:" + name + ":" + file + ": " +
                   str(spentISTime) + "\n")

        return vecin