Python DPLIB.getMeasures Examples

Programming Language: Python

Namespace/Package Name: Lib

Class/Type: DPLIB

Method/Function: getMeasures

Examples at hotexamples.com: 8

Python DPLIB.getMeasures - 8 examples found. These are the top rated real world Python examples of Lib.DPLIB.getMeasures extracted from open source projects. You can rate examples to help us improve the quality of examples.

Frequently Used Methods

Show Hide

getMeasures(8)

getAUC(7)

getConfMatrix(7)

getMeasuresCount(4)

getExtMeasures(3)

FindAllSimilarInstancesIndexes(2)

CombinePops(2)

LoadCSV(2)

MySort(2)

NNFilter(2)

SaveToCsv(1)

getConfAndExtMeasures(1)

getStats(1)

getRandomSubSet(1)

getMeasuresCountSet(1)

MAD(1)

getConfMatrixSet(1)

getConfMatrixAndExtMeasures(1)

getActuals(1)

SetBugCountForMut(1)

getAUCSet(1)

getAUCCV(1)

NNFilterMulti(1)

fSelectSet(1)

fSelectInfoGain(1)

doReplaces(1)

checkSimilarity(1)

applyPCA(1)

SortPopulation(1)

iterativeInfoGainSubsetting(1)

Example #1

Show file

File: Benchmarks.py Project: rebvar/datasci.datalytikz.com

    def Basic(trainSet,
              testSet,
              file,
              fout,
              name,
              vecin,
              clfName,
              isCount=False):

        auc = 0
        l = GLOB(clfName).getClassifier()
        l.buildClassifier(trainSet)

        vec = l.evaluateModel(testSet)
        actual = testSet[:, -1]

        if isCount:

            vals = DPLIB.getMeasuresCount(actual, vec)
            print(name + ":" + file + ": " + str(vals))
            fout.write("\n" + name + ":" + file + ": " + "Vals=" + str(vals))

        else:
            tvals = DPLIB.getConfMatrix(actual, vec)
            vals = DPLIB.getMeasures(tvals)
            auc = DPLIB.getAUC(actual, vec)
            print(name + ":" + file + ": " + str(vals) + " AUC = " + str(auc))
            fout.write("\n" + name + ":" + file + ": " + " AUC = " + str(auc) +
                       ";" + "Vals=" + str(vals))

Example #2

Show file

File: Benchmarks.py Project: rebvar/datasci.datalytikz.com

    def NNFilterMulti(trainSeti, testSet, file, fout, name, vecin, count,
                      clfName, tunelrn, vSets):

        startTime = Common.getCurrentTimeMil()

        trainSet = DPLIB.NNFilterMulti(trainSeti, testSet, count)

        l = GLOB(clfName, tunelrn).getClassifier()
        if (tunelrn):

            l = l.getTunedCLF(trainSet, vSets, fout, name, file)

            print("#TUNE-LRN-PARAMS-" + name + ":" + file + ": " +
                  str(l.selectedParams))

            fout.write("#TUNE-LRN-PARAMS-" + name + ":" + file + ": ")
            fout.write(str(l.selectedParams))
            fout.write("\n")
            sCheck = l.getCLFOptions()

            print("#SETSET-LRN-PARAMS-" + name + ":" + file + ": " +
                  str(sCheck))

            fout.write("#SETSET-LRN-PARAMS-" + name + ":" + file + ": ")
            fout.write(str(sCheck))
            fout.write("\n")

        l.buildClassifier(trainSet)

        vec = l.evaluateModel(testSet)

        vecin = vec

        tvals = DPLIB.getConfMatrix(testSet[:, -1], vecin)

        print("#CONF-TEST-" + name + ":" + file + ": " + str(tvals))

        fout.write("#CONF-TEST-" + name + ":" + file + ": ")
        fout.write(str(tvals))
        fout.write("\n")

        auc = DPLIB.getAUC(testSet[:, -1], vec)
        vals = DPLIB.getMeasures(tvals)

        print(name + ":" + file + ": " + str(vals) + " AUC = " + str(auc))

        fout.write(name + ":" + file + ": ")
        fout.write(str(vals))
        fout.write(" AUC = ")
        fout.write(str(auc))
        fout.write("\n")

        time = Common.getCurrentTimeMil() - startTime

        print("#TIME-FOR:" + name + ":" + file + ": " + str(time))
        fout.write("#TIME-FOR:" + name + ":" + file + ": " + str(time) + "\n")

        return vecin

Example #3

Show file

File: Benchmarks.py Project: rebvar/datasci.datalytikz.com

    def WMulti(files,
               file,
               testSet,
               fout,
               features,
               name,
               clfName,
               dp,
               convertToBinary=True):

        train = []
        for file2 in files:
            if (file2[0:3] == file[0:3] and file2 < file < 0):
                train.append(file2)

        if (len(train)):
            trainSet = DPLIB.LoadCSV(train, dp, features, convertToBinary)

            if (name.lower().find("infogain") >= 0):
                #int indi[] = DPLIB.fSelectInfoGain(trainSet);
                #if (DPLIB.useIterativeInfoGainSubsetting)
                #{
                #    indi = DPLIB.iterativeInfoGainSubsetting(trainSet, indi,clfName);
                #}
                #else
                #    indi = DPLIB.getTopX(indi);
                #trainSet = DPLIB.fSelectSet(trainSet, indi);
                #testSet = DPLIB.fSelectSet(testSet, indi);
                pass

            l = GLOB(clfName).getClassifier()
            l.buildClassifier(trainSet)
            vec = l.evaluateModel(testSet)

            tvals = DPLIB.getConfMatrix(testSet[:, -1], vec)
            auc = DPLIB.getAUC(testSet[:, -1], vec)
            vals = DPLIB.getMeasures(tvals)
            print(name + ":" + file + ": " + str(vals) + " AUC = " + str(auc))

            fout.write("\n" + name + ":" + file + ": " + " AUC = " + str(auc) +
                       ";" + "Vals=" + str(vals))

        else:

            print(name + ":" + file + ": " + "!!!" + " AUC = !!!")
            fout.write("\n" + name + ":" + file + ": !!!")

Example #4

Show file

File: Benchmarks.py Project: rebvar/datasci.datalytikz.com

    def LOC50(testSeti, file, fout, name, locIndex):
        startTime = Common.getCurrentTimeMil()
        spentISTime = 0
        tempTime = 0
        spentISTime = Common.getCurrentTimeMil()
        allloc = testSeti[:, locIndex]

        med = np.median(allloc)
        predicted = [1 if t >= med else 0 for t in allloc]
        actual = testSeti[:, -1]
        tvals = DPLIB.getConfMatrix(actual, predicted)

        print("#CONF-TEST-" + name + ":" + file + ": " + str(tvals))

        fout.write("#CONF-TEST-" + name + ":" + file + ": ")
        fout.write(str(tvals))
        fout.write("\n")

        vals = DPLIB.getMeasures(tvals)

        auc = DPLIB.getAUC(actual, predicted)
        print(name + ":" + file + ": " + str(vals) + " AUC = " + str(auc))

        fout.write(name + ":" + file + ": ")
        fout.write(str(vals))
        fout.write(" AUC = ")
        fout.write(str(auc))
        fout.write("\n")

        time = Common.getCurrentTimeMil() - startTime

        print("#TIME-FOR:" + name + ":" + file + ": " + str(time))
        fout.write("#TIME-FOR:" + name + ":" + file + ": " + str(time) + "\n")

        print("#TIME-FOR-IS:" + name + ":" + file + ": " + str(time))
        fout.write("#TIME-FOR-IS:" + name + ":" + file + ": " + str(time) +
                   "\n")

Example #5

Show file

    def CreateBuckets(self, trainSet, testSet, vSets, name, testCut, iternum,
                      save, superbit, stages, buckets, doprint, clfName,
                      tunelrn):

        out = []
        if self.isCount:
            keySet = list(
                DPLIB.getMeasuresCount([0, 1, 2, 3], [0, 1, 2, 3]).keys())
        else:
            keySet = list(
                DPLIB.getExtMeasures({
                    "tp": 1,
                    "tn": 2,
                    "fp": 3,
                    "fn": 4
                }).keys())

        out.append("#STARTED FOR-" + name + ":" + self.file + ": ")

        startTime = Common.getCurrentTimeMil()
        spentIsTime = 0
        tempTime = 0

        out.append("#Using also Label For train in LSH")

        if (vSets == None):

            vSets = []
            vSets.append(trainSet)

        if (save):
            DPLIB.SaveToCsv(
                trainSet, "MAIN-TRAIN-FILE-" + "ITER=" + str(iternum) + "--" +
                "METHOD=" + name + "--FILE=" + self.file + "--")
            DPLIB.SaveToCsv(
                testSet, "MAIN-TEST-FILE-" + "ITER=" + str(iternum) + "--" +
                "METHOD=" + name + "--FILE=" + self.file + "--")
            for i in range(len(vSets)):

                DPLIB.SaveToCsv(
                    trainSet,
                    "VSET-FILE-" + "INDEX=" + str(i) + "ITER=" + str(iternum) +
                    "--" + "METHOD=" + name + "--FILE=" + self.file + "--")

        np.random.shuffle(trainSet)
        np.random.shuffle(testSet)
        tempTime = Common.getCurrentTimeMil()
        count = len(trainSet)
        bins = {}
        # R^n
        n = trainSet.shape[1] - 1

        binid = 0
        #lshmin = LSHMinHash(stages, buckets, n);
        try:
            lshsuper = LSHSuperBit(stages=stages,
                                   buckets=buckets,
                                   dimensions=n)
        except Exception as ex:
            print('##SuperBit with specified parameters failed:' + str(ex))
            return None
        sp = 0.75
        # Compute a SuperBit signature, and a LSH hash
        for i in range(count):
            vector = trainSet[i, 1:].tolist()

            hash = None
            if (superbit):
                hash = lshsuper.hash(vector)
            else:
                ##Minhash support
                # #hash = lshmin.hash(vecBool);
                pass

            binid = hash[0]
            if not binid in bins.keys():
                bins[binid] = []

            bins[binid].append(trainSet[i])

        spentIsTime += Common.getCurrentTimeMil() - tempTime

        numBins = len(bins.keys())

        for binid in bins.keys():
            bins[binid] = np.array(bins[binid])

        out.append("#Number of BINS:" + name + ":" + self.file + ": " +
                   str(numBins))

        pop = []

        for i in bins.keys():

            trSet = bins[i]
            l = GLOB(clfName, tunelrn).getClassifier()

            #if (tunelrn):
            #    l = l.getTunedCLF(trSet, vSets,fout,name, file);

            l.buildClassifier(trSet)
            cf = 0
            j = 0

            allvecs = []
            confs = []
            allcfs = []
            allaucs = []
            valsA = None
            confsA = None
            aucA = 0.0
            for vSet in vSets:

                vec = None
                actuals = None

                vec = l.evaluateModel(vSet)
                actuals = vSet[:, -1]

                vals = None
                auc = 0
                if self.isCount:
                    vals = DPLIB.getMeasuresCount(actuals, vec)
                else:

                    auc = DPLIB.getAUC(actuals, vec)
                    aucA += auc
                    allaucs.append(auc)
                    if (testCut):
                        vCF = 0.1
                        bestCF = 0
                        bestCFVal = -1
                        bestVals = None

                        while True:

                            tvals = DPLIB.getConfMatrix(actuals, vec, vCF)
                            measures = DPLIB.getMeasures(tvals)
                            fit = measures["F"] * measures["GMean1"]
                            if (fit > bestCFVal or bestVals == None):

                                bestCFVal = fit
                                bestCF = vCF
                                bestVals = tvals

                            vCF += 0.1

                            if (vCF >= 1):
                                break

                        if (confsA == None):

                            confsA = {key: 0 for key in bestVals.keys()}

                        for j in confsA.keys():
                            confsA[j] += bestVals[j]

                        confs.append(bestVals)

                        vals = DPLIB.getMeasures(bestVals)
                        cf += bestCF
                        allcfs.append(bestCF)

                    else:

                        tvals = DPLIB.getConfMatrix(actuals, vec)

                        if (confsA == None):

                            confsA = {key: 0 for key in tvals.keys()}

                        for j in confsA.keys():
                            confsA[j] += tvals[j]

                        confs.append(tvals)

                        vals = DPLIB.getMeasures(tvals)
                        allcfs.append(DPLIB.DefaultCF)

                allvecs.append(vals)

                if (valsA == None):
                    valsA = {key: 0 for key in keySet}

                for j in keySet:
                    valsA[j] += vals[j]

            for j in keySet:
                valsA[j] /= len(vSets)

            h = None
            if not self.isCount:
                for j in confsA.keys():
                    confsA[j] /= len(vSets)

                if (testCut):
                    cf /= len(vSets)

                aucA /= len(vSets)

                h = CHRM_GIS(trSet, valsA, aucA)
                h.fitnesses = allvecs
                h.aucs = allaucs
                h.conf = confsA
                h.confs = confs
                h.allcfs = allcfs
                if (testCut):
                    h.bestCF = cf
                else:
                    h.bestCF = DPLIB.DefaultCF
            else:

                h = CHRM_GIS_Count(trSet, valsA)
                h.fitnesses = allvecs

            pop.append(h)
            l = None

        tempTime = Common.getCurrentTimeMil()
        pop = DPLIB.MySort(pop)
        spentIsTime += Common.getCurrentTimeMil() - tempTime
        top = pop[0]

        out.append("#Instances in Top:" + str(len(top.ds)))

        out.append("#STAGES:" + name + ":" + self.file + ": " + str(stages))
        out.append("#BUCKETS:" + name + ":" + self.file + ": " + str(buckets))
        if not self.isCount:
            out.append("#BEST-CF-VALUE:" + name + ":" + self.file + ": " +
                       str(top.bestCF))

        l = GLOB(clfName, tunelrn).getClassifier()

        if (tunelrn):

            l = l.getTunedCLF(top.ds, vSets, fout, name, file)

            out.append("#TUNE-LRN-PARAMS-" + name + ":" + self.file + ": " +
                       str(l.selectedParams))
            sCheck = l.getCLFOptions()
            out.append("#SETSET-LRN-PARAMS-" + name + ":" + self.file + ": " +
                       str(sCheck))

        l.buildClassifier(top.ds)

        vec = l.evaluateModel(testSet)

        out.append("#LSH-FOR-TOP-ONLY")

        if self.isCount:
            vals = DPLIB.getMeasuresCount(testSet[:, -1], vec)
            out.append(name + ":" + self.file + ": " + str(vals))
        else:
            tvals = DPLIB.getConfMatrix(testSet[:, -1], vec, top.bestCF)
            out.append("#CONF-TEST-" + name + ":" + self.file + ": " +
                       str(tvals))
            vals = DPLIB.getMeasures(tvals)
            auc = DPLIB.getAUC(testSet[:, -1], vec)
            vals['auc'] = auc
            out.append(name + ":" + self.file + ": " + str(vals))

        for i in range(len(pop)):

            pop[i] = None

        pop = None

        for i in bins.keys():
            bins[i] = None

        bins = None

        time = Common.getCurrentTimeMil() - startTime

        if (name.find("LSHTune") < 0):
            out.append("#TIME-FOR:" + name + ":" + self.file + ": " +
                       str(time))
            out.append("#TIME-FOR-IS:" + name + ":" + self.file + ": " +
                       str(spentIsTime))
            self.output = +out

        top.addToExtra("SPENT-TIME-IS", float(spentIsTime))

        return top, out

Example #6

Show file

    def run(self, trainSeti, testSeti, name, fout, vSets, vSetType,
            fixedTrainSize, log, ignoreOK, threshold, thresholds, rejectedFits,
            rejectedPerfs, rejectedTestPerfs, clfName):

        mad = 0.0
        if self.isCount:
            keySet = list(
                DPLIB.getMeasuresCount([0, 1, 2, 3], [0, 1, 2, 3]).keys())
            mad = DPLIB.SetBugCountForMut(trainSeti)
        else:
            keySet = list(
                DPLIB.getExtMeasures({
                    "tp": 1,
                    "tn": 2,
                    "fp": 3,
                    "fn": 4
                }).keys())
        startTime = Common.getCurrentTimeMil()
        tempTime = 0
        spentISTime = 0

        #For Binary Prediction, isCount = False
        auc = 0
        preds = []
        pop = []

        trainSet = np.copy(trainSeti)
        testSet = np.copy(testSeti)
        pop.clear()

        tstSize = len(testSet)
        partSize = int(tstSize / self.numParts)
        preds.clear()
        diffs = []
        auc = 0.0

        #For isCount = True
        actuals = []
        prrs = []

        if (log):
            self.prnt("#GIS-OPTIONS;;For=" + name + "@" + ":iters=" +
                      str(self.iters) + "-POPSIZE=" + str(self.popSize) +
                      "-NumParts=" + str(self.numParts) + "-NumGens=" +
                      str(self.numGens) + "-sizeTop=" + str(self.sizeTopP) +
                      "-Learner=" + clfName + "\n")

        isOK = True

        np.random.shuffle(testSet)
        self.FinalLearners = []
        self.FinalDatasets = []
        for p in range(self.numParts):

            diffp = []

            self.prnt("\n" + str(p) + ": ")

            tempTime = Common.getCurrentTimeMil()
            pop.clear()
            start = p * partSize
            end = (p + 1) * partSize
            if (end > tstSize):
                end = tstSize

            if (p == self.numParts - 1):
                end = tstSize

            testPart = testSet[start:end, :]

            spentISTime += Common.getCurrentTimeMil() - tempTime

            uinds = set()
            if (vSets == None or len(vSets) == 0):

                if (vSets == None):
                    vSets = []

                vSet = None
                retVal = ""
                if (vSetType == 'Train Set'):

                    vSet = trainSeti
                    if (log):
                        retVal = DPLIB.getStats(vSet, True, True, True)
                        self.prnt("#VSETINFO;;prt=" + str(p) + ";;For=" +
                                  name + "@" + ":" + retVal + "\n")

                        retVal = None

                elif (vSetType == 'NN-Filter'):
                    tempTime = Common.getCurrentTimeMil()
                    vSet = DPLIB.NNFilter(trainSet, testPart, 1)
                    spentISTime += Common.getCurrentTimeMil() - tempTime

                    if (log):

                        retVal = DPLIB.getStats(vSet, True, True, True)
                        self.prnt("#VSETINFO;;prt=" + str(p) + ";;For=" +
                                  name + "@" + ":" + retVal + "\n")
                        retVal = None

                #If random, but not fed into the func, generate one randomly, with size of testPart
                elif (vSetType == 'Multiple Random'
                      or vSetType == 'Single Random'):

                    size = len(testPart)
                    vSet = []
                    j = 0
                    while (j < size):
                        index = np.random.randint(trainSet.numInstances())

                        if (not index in uinds):
                            uinds.add(index)
                        else:
                            continue

                        vSets.append(trainSet[index])

                        j += 1

                    if (log):

                        retVal = DPLIB.getStats(vSet, true, true, True)
                        self.prnt("#VSETINFO;;prt=" + str(p) + ";;For=" +
                                  name + "@" + ":" + retVal + "\n")
                        retVal = None

                    vSet = np.array(vSet)

                elif (vSetType == '!!TEST!!'):

                    #Upper Bound. Should not be used.
                    self.prnt("Should not be used.")
                    vSet = testSeti
                    if (log):

                        retVal = DPLIB.getStats(vSet, True, True, True)
                        self.prnt("#VSETINFO;;prt=" + str(p) + ";;For=" +
                                  name + "@" + ":" + retVal + "\n")

                        retVal = None

                elif vSetType == 'KS2':
                    vSet = None
                vSets.append(vSet)

            else:

                retVal = ""
                for vSet in vSets:

                    if (log):
                        retVal = DPLIB.getStats(vSet, True, True, True)
                        self.prnt("#VSETINFO;;prt=" + str(p) + ";;For=" +
                                  name + "@" + ":" + retVal + "\n")
                        retVal = None

            for i in range(self.popSize):
                tempTime = Common.getCurrentTimeMil()
                uinds.clear()

                size = 0

                if (fixedTrainSize):
                    size = self.chrmSize
                else:
                    size = np.random.randint(self.chrmSize) + 10

                while True:
                    trSet = []
                    j = 0
                    while (j < size):
                        index = np.random.randint(len(trainSet))

                        trSet.append(trainSet[index])

                        if (not index in uinds):
                            uinds.add(index)

                        j += 1

                    spentISTime += Common.getCurrentTimeMil() - tempTime
                    trSet = np.array(trSet)
                    if len(set(list(trSet[:, -1]))) >= 2:
                        break

                tempTime = Common.getCurrentTimeMil()

                pv, p_vals = DPLIB.checkSimilarity(trSet[:, :-1],
                                                   testPart[:, :-1])

                if self.isCount:
                    h = CHRM_GIS_Count(trSet, None, extraAsFitness='p-val')
                    h.addToExtra('p-val', sum(p_vals))
                    pop.append(h)
                else:

                    h = CHRM_GIS(trSet, None, None, extraAsFitness='p-val')
                    h.addToExtra('p-val', sum(p_vals))
                    pop.append(h)

                spentISTime += Common.getCurrentTimeMil() - tempTime

            tempTime = Common.getCurrentTimeMil()
            pop = DPLIB.MySort(pop)
            spentISTime += Common.getCurrentTimeMil() - tempTime

            cnt = 0
            g = 0
            for g in range(self.numGens):
                self.prnt(str(g) + " ")
                if (log):
                    pass
                    #retVal = ""
                    #for i in range(len(pop)):

                    #    chrm = pop[i]
                    #    retVal = DPLIB.getStats(chrm.ds, False, False, False);
                    #    self.prnt("#POPITNFO;;gn="+str(g)+";;prt="+str(p)+";;For="+name+"@"+":"+retVal+"\n");
                    #    self.prnt("#POPITVALS;;gn="+str(g)+";;prt="+str(p)+";;For="+name+"@"+":"+"rpaf="+str(chrm.fitness).replace(", ", ",")
                    #            +";;conf="+str(chrm.conf).replace(", ", ",")+";;fit="+str(chrm.getFitness())+";;TConf2="+str(chrm.testConf).replace(", ", ",")+";;TRpaf2="+str(chrm.testFitness).replace(", ", ",")+"\n");
                    #    retVal = None;

                tempTime = Common.getCurrentTimeMil()
                newPop = []
                for i in range(self.sizeTopP):
                    newPop.append(pop[i])

                i = 0
                for i in range(0, len(pop) - self.sizeTopP, 2):
                    idx1 = 0
                    idx2 = 0
                    while (idx1 == idx2):
                        if (cnt >= 3):
                            idx1 = np.random.randint(len(pop))
                            idx2 = np.random.randint(len(pop))
                        else:
                            idx1 = GA.tornament(pop)
                            idx2 = GA.tornament(pop)
                            cnt += 1

                    cnt = 0
                    ds1 = pop[idx1].ds
                    ds2 = pop[idx2].ds
                    while True:

                        ds1, ds2 = GA.crossOver(ds1,
                                                ds2,
                                                fixedTrainSize,
                                                isCount=self.isCount)
                        if len(set(list(ds1[:, -1]))) >= 2 and len(
                                set(list(ds2[:, -1]))) >= 2:
                            break
                        self.prnt('repeat cross')
                    while True:
                        ds1 = GA.Mutate(ds1, isCount=self.isCount, mad=mad)
                        if len(set(list(ds1[:, -1]))) >= 2:
                            break
                        self.prnt(
                            'repeat mut ds1, because all elements are of type one class'
                        )

                    while True:

                        ds2 = GA.Mutate(ds2, isCount=self.isCount, mad=mad)
                        if len(set(list(ds2[:, -1]))) >= 2:
                            break
                        self.prnt(
                            'repeat mut ds1, because all elements are of type one class'
                        )
                    if self.isCount:
                        newPop.append(
                            CHRM_GIS_Count(ds1, None, extraAsFitness='p-val'))
                        newPop.append(
                            CHRM_GIS_Count(ds2, None, extraAsFitness='p-val'))
                    else:
                        newPop.append(
                            CHRM_GIS(ds1, None, extraAsFitness='p-val'))
                        newPop.append(
                            CHRM_GIS(ds2, None, extraAsFitness='p-val'))

                spentISTime += Common.getCurrentTimeMil() - tempTime

                for i in range(len(newPop)):

                    tempTime = Common.getCurrentTimeMil()

                    pv, p_vals = DPLIB.checkSimilarity(newPop[i].ds[:, :-1],
                                                       testPart[:, :-1])

                    newPop[i].addToExtra('p-val', sum(p_vals))

                    spentISTime += Common.getCurrentTimeMil() - tempTime

                tempTime = Common.getCurrentTimeMil()

                newPop = DPLIB.MySort(newPop)
                exit = False
                countComp = 0

                newPop, rdel = DPLIB.CombinePops(pop, newPop)

                if (log):
                    pass
                    #retVal = ""
                    #for i in range(len(rdel)):

                    #    chrm = rdel[i];
                    #    retVal = DPLIB.getStats(chrm.ds, False, False, False);
                    #    self.prnt("#POPDELITNFO;;gn="+str(g)+";;prt="+str(p)+";;For="+name+"@"+":"+retVal+";;rpaf="+str(chrm.fitness).replace(", ", ",")
                    #            +";;conf="+str(chrm.conf).replace(", ", ",")+";;fit="+str(chrm.getFitness())+";;TConf2="+str(chrm.testConf).replace(", ", ",")+";;TRpaf2="+str(chrm.testFitness).replace(", ", ",")
                    #            +"\n");

                    #    retVal = None;

                rdel = None

                diff = abs(
                    GA.GetMeanFittness(pop, countComp) -
                    GA.GetMeanFittness(newPop, countComp))
                if (diff < 0.000001):
                    exit = True

                diffp.append(diff)

                pop = newPop
                if (pop[0].getFitness() > 0.0) and (exit):
                    break

                exit = False
                spentISTime += Common.getCurrentTimeMil() - tempTime

            w = []
            if (self.count == 0):
                self.count = len(pop)

            for i in range(self.count):
                l = GLOB(clfName).getClassifier()
                tds = pop[i].ds
                self.FinalLearners.append(l)
                self.FinalDatasets.append(tds)
                testPartI = testPart

                l.buildClassifier(tds)

                if self.isCount:
                    actual = DPLIB.getActuals(testPartI)
                    prr = l.evaluateModel(testPartI)
                    #vals = DPLIB.getMeasuresCount(actual,prr)

                    actall = None
                    predall = None
                    if (len(actuals) == self.count):

                        actuals[i] = actuals[i] + actual
                        prrs[i] = prrs[i] + prr
                    else:
                        actuals.append(actual)
                        prrs.append(prr)

                else:

                    vec = l.evaluateModel(testPartI)

                    if (len(preds) == self.count):
                        preds[i] += list(vec)
                    else:
                        preds.append(list(vec))

                if (log):
                    pass
                    #retVal = DPLIB.getStats(tds, True, True, True);
                    #self.prnt("#TRPRTNFO;;prt="+str(p)+";;For="+name+"@"+":"+retVal+"\n");

                    #retVal = DPLIB.getStats(testPart,true,true, True);
                    #self.prnt("#TSTPRTNFO;;prt="+str(p)+";;For="+name+"@"+":"+retVal+"\n");
                    #vals = DPLIB.getConfMatrix(testPart[:,-1],vec)

                    #self.prnt("#TSTPRTVALS;;prt="+str(p)+";;For="+name+"@"+":"+
                    #        "rpaf="+str(DPLIB.getMeasures(vals)).replace(", ", ",")
                    #            +";;conf="+str(vals).replace(", ", ",")+"\n");

                    #retVal = None;

                w.append(pop[i].getFitness())

        isOK = True

        if not isOK:
            pass
        else:
            thresholds.append(pop[0].getFitness())

        self.prnt()
        self.prnt("Best Top Fitness:" + str(pop[0].fitness))
        self.prnt("Best Fitness (mean):", pop[0].getMeanFitness())

        if self.isCount:
            vals = DPLIB.getMeasuresCountSet(actuals, prrs)
        else:
            vals1 = DPLIB.getConfMatrixSet(testSet[:, -1], preds)
            vals = DPLIB.getMeasures(vals1)

        if (isOK):

            if not self.isCount:

                if (len(preds) == 1):
                    auc = DPLIB.getAUC(testSet[:, -1], preds[0])
                else:
                    auc = DPLIB.getAUCSet(testSet[:, -1], preds)

                vals['auc'] = auc
                self.prnt()
                self.prnt("#CONF-TEST:" + name + ":" + self.file + ": " +
                          str(vals1))

                self.prnt()
                self.prnt(name + ":" + self.file + ": " + str(vals))

                self.prnt()
            else:
                self.prnt()
                self.prnt(name + ":" + self.file + ": " + str(vals))
                self.prnt()
        else:

            bestI = pop[0]
            rejectedFits.append(bestI.getFitness())

            rejVals = copy.deepcopy(bestI.fitness)
            rejectedPerfs.append(rejVals)

            testRejVals = copy.deepcopy(vals)

            rejectedTestPerfs.eppend(testRejVals)

            self.prnt("#NOTOKPREDS----" + name + ":" + self.file + ": " +
                      str(vals))

            if not self.isCount:
                self.prnt()
                self.prnt("#NOTOKPREDS----" + "#CONF-TEST:" + name + ":" +
                          self.file + ": " + str(vals1))

        time = Common.getCurrentTimeMil() - startTime

        self.prnt("#TIME-FOR:" + name + ":" + self.file + ": " + str(time))

        self.prnt("#TIME-FOR-IS:" + name + ":" + self.file + ": " +
                  str(spentISTime))

        return isOK

Example #7

Show file

File: Benchmarks.py Project: rebvar/datasci.datalytikz.com

    def WCFolds(testSet, folds, file, fout, name, clfName):

        auc = 0
        preds = []
        actuals = []
        vals = None
        tssCopy = testSet[:, :]
        rnd = random.Random(Common.getCurrentTimeMil())
        np.random.shuffle(tssCopy)

        skf = StratifiedKFold(n_splits=folds)
        X = tssCopy[:, :-1]
        y = tssCopy[:, -1]
        for train_index, test_index in skf.split(X, y):

            cvtrain, cvtest = X[train_index], X[test_index]
            cvtrainY, cvtestY = y[train_index], y[test_index]

            cvtrain = np.append(cvtrain,
                                cvtrainY.reshape((len(cvtrainY), 1)),
                                axis=1)

            cvtest = np.append(cvtest,
                               cvtestY.reshape((len(cvtestY), 1)),
                               axis=1)

            if (name.lower().find("infogain") >= 0):
                pass
                #int indi[] = DPLIB.fSelectInfoGain(cvtrain);
                #if (DPLIB.useIterativeInfoGainSubsetting)
                #{
                #    indi = DPLIB.iterativeInfoGainSubsetting(cvtrain, indi, clfName);
                #}
                #else
                #    indi = DPLIB.getTopX(indi);
                #cvtrain = DPLIB.fSelectSet(cvtrain, indi);
                #cvtest = DPLIB.fSelectSet(cvtest, indi);

            m = GLOB(clfName).getClassifier()
            m.buildClassifier(cvtrain)

            vec = m.evaluateModel(cvtest)

            preds.append(vec)
            actuals.append(cvtestY)
            if vals == None:

                vals = DPLIB.getConfMatrix(cvtestY, vec)

            else:

                v2 = DPLIB.getConfMatrix(cvtestY, vec)

                for key in vals.keys():
                    vals[key] += v2[key]

        auc = DPLIB.getAUCCV(actuals, preds)
        vals1 = DPLIB.getMeasures(vals)
        print(name + ":" + file + ": " + str(vals1) + " AUC = " + str(auc))
        fout.write("\n" + name + ":" + file + ": " + " AUC = " + str(auc) +
                   ";" + "Vals=" + str(vals1))

Example #8

Show file

File: Benchmarks.py Project: rebvar/datasci.datalytikz.com

    def NNFilter(trainSeti, testSet, file, fout, name, vecin, count, clfName,
                 tunelrn, vSets, testCut):

        startTime = Common.getCurrentTimeMil()
        spentISTime = 0
        tempTime = 0

        bestFit = 0.0
        bestCount = 0
        btrainSet = None
        cfbf = DPLIB.DefaultCF

        if (count == 0):
            for i in range(1, 11):

                tempTime = Common.getCurrentTimeMil()

                trainSet = DPLIB.NNFilter(trainSeti, testSet, i)

                spentISTime += Common.getCurrentTimeMil() - tempTime

                l = GLOB(clfName, tunelrn).getClassifier()

                if (tunelrn):
                    l = l.getTunedCLF(trainSet, vSets, fout, name, file)

                l.buildClassifier(trainSet)

                avgFit = 0.0
                j = 0
                for j in range(len(vSets)):

                    vec = l.evaluateModel(vSets[j])

                    tvals = DPLIB.getConfMatrix(vSets[j][:, -1], vec)
                    measures = DPLIB.getExtMeasures(tvals)
                    fit = measures["F"] * measures["GMean1"]
                    avgFit += fit

                avgFit /= len(vSets)

                if (avgFit > bestFit):
                    bestFit = avgFit
                    bestCount = i
                    btrainSet = trainSet[:, :]

            if (testCut):

                cf = 0
                j = 0

                trainSet = btrainSet

                l = GLOB(clfName, tunelrn).getClassifier()

                if (tunelrn):
                    l = l.getTunedCLF(trainSet, vSets, fout, name, file)

                l.buildClassifier(trainSet)
                avgFit = 0.0

                for j in range(len(vSets)):

                    vec = l.evaluateModel(vSets[j])
                    vCF = 0.1
                    bestCF = 0
                    bestCFVal = -1
                    bestVals = None

                    while True:
                        tvals = DPLIB.getConfMatrix(vSets[j][:, -1], vec, vCF)
                        measures = DPLIB.getExtMeasures(tvals)
                        fit = measures["F"] * measures["GMean1"]
                        if (fit > bestCFVal or bestVals == None):
                            bestCFVal = fit
                            bestCF = vCF
                            bestVals = tvals

                        vCF += 0.1
                        if vCF >= 1:
                            break
                    cf += bestCF

                cf /= vSets.size()
                cfbf = cf

        trainSet = None
        if (count == 0):
            trainSet = btrainSet
        else:
            tempTime = Common.getCurrentTimeMil()
            trainSet = DPLIB.NNFilter(trainSeti, testSet, count)
            spentISTime = Common.getCurrentTimeMil() - tempTime
            bestCount = count

        l = GLOB(clfName, tunelrn).getClassifier()

        if (tunelrn):
            l = l.getTunedCLF(trainSet, vSets, fout, name, file)

            print("#TUNE-LRN-PARAMS-" + name + ":" + file + ": " +
                  str(l.selectedParams))
            fout.write("#TUNE-LRN-PARAMS-" + name + ":" + file + ": ")
            fout.write(str(l.selectedParams))
            fout.write("\n")
            sCheck = l.getCLFOptions()

            print("#SETSET-LRN-PARAMS-" + name + ":" + file + ": " +
                  str(sCheck))
            fout.write("#SETSET-LRN-PARAMS-" + name + ":" + file + ": ")
            fout.write(str(sCheck))
            fout.write("\n")

        l.buildClassifier(trainSet)

        vec = l.evaluateModel(testSet)

        vecin = vec

        tvals = DPLIB.getConfMatrix(testSet[:, -1], vecin, cfbf)
        if (count == 0):

            print("#BESTCOUNT-" + name + ":" + file + ": " + str(bestCount))

            fout.write("#BESTCOUNT-" + name + ":" + file + ": ")
            fout.write(str(bestCount))
            fout.write("\n")

            print("#BESTFIT-" + name + ":" + file + ": " + str(bestFit))
            fout.write("#BESTFIT-" + name + ":" + file + ": ")
            fout.write(str(bestFit))
            fout.write("\n")

        print("#CONF-TEST-" + name + ":" + file + ": " + str(tvals))
        fout.write("#CONF-TEST-" + name + ":" + file + ": ")
        fout.write(str(tvals))
        fout.write("\n")
        if (testCut):

            print("#NN-BEST-CF-VALUE:" + name + ":" + file + ": " + str(cfbf))

            fout.write("#NN-BEST-CF-VALUE:" + name + ":" + file + ": ")
            fout.write(str(cfbf))
            fout.write("\n")

        vals = DPLIB.getMeasures(tvals)
        auc = DPLIB.getAUC(testSet[:, -1], vecin)
        print(name + ":" + file + ": " + str(vals) + " AUC = " + str(auc))

        fout.write(name + ":" + file + ": ")
        fout.write(str(vals))
        fout.write(" AUC = ")
        fout.write(str(auc))
        fout.write("\n")

        time = Common.getCurrentTimeMil() - startTime

        print("#TIME-FOR:" + name + ":" + file + ": " + str(time))
        fout.write("#TIME-FOR:" + name + ":" + file + ": " + str(time) + "\n")

        print("#TIME-FOR-IS:" + name + ":" + file + ": " + str(spentISTime))
        fout.write("#TIME-FOR-IS:" + name + ":" + file + ": " +
                   str(spentISTime) + "\n")

        return vecin