コード例 #1
0
ファイル: StringLocator.py プロジェクト: ccreeper/weka-forpy
 def copyStringValues(cls, inst:Instance, a0=None, a1=None, a2:AttributeLocator=None, a3:Instances=None, a4:AttributeLocator=None):
     if isinstance(a0,Instances) and isinstance(a1,AttributeLocator):
         if inst.dataset() is None:
             raise Exception("Instance has no dataset assigned!!")
         elif inst.dataset().numAttributes() != a0.numAttributes():
             raise Exception("Src and Dest differ in # of attributes: "
                       + str(inst.dataset().numAttributes()) + " != "
                       + str(a0.numAttributes()))
         cls.copyStringValuesFromSrc(inst,True,inst.dataset(),a1,a0,a1)
     else:
         if a1 == a3:
             return
         if len(a2.getAttributeIndices()) != len(a4.getAttributeIndices()):
             raise Exception("Src and Dest string indices differ in length: "
                             + str(len(a2.getAttributeIndices())) + " != "
                             + str(len(a4.getAttributeIndices())))
         if len(a2.getLocatorIndices()) != len(a4.getLocatorIndices()):
             raise Exception("Src and Dest locator indices differ in length: "
                             + str(len(a2.getLocatorIndices())) + " != "
                             + str(len(a4.getLocatorIndices())))
         for i in range(len(a2.getAttributeIndices())):
             if a0:
                 instIndex = a2.getActualIndex(a2.getAttributeIndices()[i])
             else:
                 instIndex = a4.getActualIndex(a4.getAttributeIndices()[i])
             src = a1.attribute(a2.getActualIndex(a2.getAttributeIndices()[i]))
             dest = a3.attribute(a4.getActualIndex(a4.getAttributeIndices()[i]))
             if not inst.isMissing(instIndex):
                 valIndex = dest.addStringValue(src, int(inst.value(instIndex)))
                 inst.setValue(instIndex, valIndex)
コード例 #2
0
    def convertInstanceNumeric(self, instance: Instance):
        if not self.m_needToTransform:
            self.push(instance, False)
            return
        vals = [0] * self.outputFormatPeek().numAttributes()
        attSoFar = 0
        for j in range(self.getInputFormat().numAttributes()):
            att = self.getInputFormat().attribute(j)
            if not att.isNominal() or j == self.getInputFormat().classIndex():
                vals[attSoFar] = instance.value(j)
                attSoFar += 1
            else:
                if instance.isMissing(j):
                    for k in range(att.numValues() - 1):
                        vals[attSoFar + k] = instance.value(j)
                else:
                    k = 0
                    while int(instance.value(j)) != self.m_Indices[j][k]:
                        vals[attSoFar + k] = 1
                        k += 1
                    while k < att.numValues() - 1:
                        vals[attSoFar + k] = 0
                        k += 1
                attSoFar += att.numValues() - 1

        inst = Instance(instance.weight(), vals)
        self.copyValues(inst, False, instance.dataset(),
                        self.outputFormatPeek())
        self.push(inst)
コード例 #3
0
 def convertInstanceNominal(self, instance: Instance):
     if not self.m_needToTransform:
         self.push(instance, False)
         return
     vals = [0] * self.outputFormatPeek().numAttributes()
     attSoFar = 0
     for j in range(self.getInputFormat().numAttributes()):
         att = self.getInputFormat().attribute(j)
         if not att.isNominal() or j == self.getInputFormat().classIndex():
             vals[attSoFar] = instance.value(j)
             attSoFar += 1
         else:
             if att.numValues() <= 2 and not self.m_TransformAll:
                 vals[attSoFar] = instance.value(j)
                 attSoFar += 1
             else:
                 if instance.isMissing(j):
                     for k in range(att.numValues()):
                         vals[attSoFar + k] = instance.value(j)
                 else:
                     for k in range(att.numValues()):
                         if k == int(instance.value(j)):
                             vals[attSoFar + k] = 1
                         else:
                             vals[attSoFar + k] = 0
                 attSoFar += att.numValues()
     inst = Instance(instance.weight(), vals)
     self.copyValues(inst, False, instance.dataset(),
                     self.outputFormatPeek())
     self.push(inst)
コード例 #4
0
ファイル: Filter.py プロジェクト: ccreeper/weka-forpy
 def push(self, instance: Instance, copyInstance: bool = True):
     if instance is not None:
         if instance.dataset() is not None:
             if copyInstance:
                 instance = copy.deepcopy(instance)
             self.copyValues(instance, False)
         instance.setDataset(self.m_OutputFormat)
         self.m_OutputQueue.put(instance)
コード例 #5
0
 def convertInstance(self,instance:Instance):
     inst=instance
     hasMissing=instance.hasMissingValue()
     if hasMissing:
         vals=[0]*self.getInputFormat().numAttributes()
         for j in range(instance.numAttributes()):
             if instance.isMissing(j) and self.getInputFormat().classIndex()!=j \
                 and (self.getInputFormat().attribute(j).isNominal() or self.getInputFormat().attribute(j).isNumeric()):
                 vals[j]=self.m_ModesAndMeans[j]
             else:
                 vals[j]=instance.value(j)
         inst=Instance(instance.weight(),vals)
     inst.setDataset(instance.dataset())
     self.push(inst,not hasMissing)
コード例 #6
0
    def evaluationForSingleInstance(self, a0, instance:Instance, storePredictions:bool):
        if isinstance(a0,List):
            if self.m_ClassIsNominal:
                pred= Utils.maxIndex(a0)
                if a0[int(pred)] <= 0:
                    pred= Utils.missingValue()
                self.updateStatsForClassifier(a0, instance)
                if storePredictions and not self.m_DiscardPredictions:
                    if self.m_Predictions is None:
                        self.m_Predictions=[]
                    self.m_Predictions.append(NominalPrediction(instance.classValue(), a0, instance.weight()))
            else:
                pred=a0[0]
                self.updateStatsForPredictor(pred,instance)
                if storePredictions and not self.m_DiscardPredictions:
                    if self.m_Predictions is None:
                        self.m_Predictions=[]
                    self.m_Predictions.append(NumericPrediction(instance.classValue(),pred,instance.weight()))
            return pred
        elif isinstance(a0,Classifier):
            classMissing=copy.deepcopy(instance)
            classMissing.setDataset(instance.dataset())
            #TODO
            # if isinstance(a0,InputMappedClassifier)
            # else:
            classMissing.setClassMissing()
            # print("isMiss: ", instance.value(5))

            pred=self.evaluationForSingleInstance(a0.distributionForInstance(classMissing),instance,storePredictions)
            if not self.m_ClassIsNominal:
                if not instance.classIsMissing() and not Utils.isMissingValue(pred):
                    if isinstance(a0,IntervalEstimator):
                        self.updateStatsForIntervalEstimator(a0,classMissing,instance.classValue())
                    else:
                        self.m_CoverageStatisticsAvailable=False
                    if isinstance(a0,ConditionalDensityEstimator):
                        self.updateStatsForConditionalDensityEstimator(a0,classMissing,instance.classValue())
                    else:
                        self.m_ComplexityStatisticsAvailable=False
            return pred
コード例 #7
0
ファイル: Remove.py プロジェクト: ccreeper/weka-forpy
 def input(self, instance: Instance):
     if self.getInputFormat() is None:
         raise Exception("No input instance format defined")
     if self.m_NewBatch:
         self.resetQueue()
         self.m_NewBatch = False
     if self.getOutputFormat().numAttributes() == 0:
         return False
     if len(self.m_SelectedAttributes) == self.getInputFormat(
     ).numAttributes():
         inst = copy.deepcopy(instance)
         inst.setDataset(None)
     else:
         vals = [0] * self.getOutputFormat().numAttributes()
         for i in range(len(self.m_SelectedAttributes)):
             current = self.m_SelectedAttributes[i]
             vals[i] = instance.value(current)
         inst = Instance(instance.weight(), vals)
     self.copyValues(inst, False, instance.dataset(),
                     self.outputFormatPeek())
     self.push(inst)
     return True
コード例 #8
0
    def process(self, toPredict: Instance, classifier: Classifier,
                evaluation: Evaluation):
        probActual = probNext = pred = 0
        classMissing = copy.deepcopy(toPredict)
        classMissing.setDataset(toPredict.dataset())

        if toPredict.classAttribute().isNominal():
            #返回分类预测的概率分布
            preds = classifier.distributionForInstance(classMissing)
            #若概率全部为0,则表示不属于任何一类
            val = 0
            if sum(preds) == 0:
                pred = Utils.missingValue()
                probActual = Utils.missingValue()
            else:
                #分类结果为概率最大的一项下标
                pred = Utils.maxIndex(preds)
                if not Utils.isMissingValue(toPredict.classIndex()):
                    #如果值不缺失,表示非预测样本,不做修改
                    if not Utils.isMissingValue(toPredict.classValue()):
                        val = int(toPredict.classValue())
                    probActual = preds[val]
                else:
                    probActual = preds[Utils.maxIndex(preds)]
            for i in range(toPredict.classAttribute().numValues()):
                if i != val and preds[i] > probNext:
                    probNext = preds[i]
            evaluation.evaluationForSingleInstance(preds, toPredict, True)
        else:
            #单项评估
            pred = evaluation.evaluateModelOnceAndRecordPrediction(
                classifier, toPredict)
        if not self.m_SaveForVisualization:
            return
        #保存可视化数据
        if self.m_PlotInstances is not None:
            isNominal = toPredict.classAttribute().isNominal()
            values = [0] * self.m_PlotInstances.numAttributes()
            i = 0
            while i < self.m_PlotInstances.numAttributes():
                #预测值前的所有值照原来的拷贝
                if i < toPredict.classIndex():
                    values[i] = toPredict.value(i)
                elif i == toPredict.classIndex():
                    if isNominal:
                        #首选结果与备选结果的差值
                        values[i] = probActual - probNext
                        #预测结果
                        values[i + 1] = pred
                        #原始值
                        values[i + 2] = toPredict.value(i)
                        i += 2
                    else:
                        values[i] = pred
                        values[i + 1] = toPredict.value(i)
                        i += 1
                else:
                    if isNominal:
                        values[i] = toPredict.value(i - 2)
                    else:
                        values[i] = toPredict.value(i - 1)
                i += 1
            # print("============")
            # for m in values:
            #     print("val:",m)
            # print("============")
            self.m_PlotInstances.add(Instance(1.0, values))
            if toPredict.classAttribute().isNominal():
                if toPredict.isMissing(
                        toPredict.classIndex()) or Utils.isMissingValue(pred):
                    self.m_PlotShapes.append(Plot2D.MISSING_SHAPE)
                elif pred != toPredict.classValue():
                    self.m_PlotShapes.append(Plot2D.ERROR_SHAPE)
                else:
                    self.m_PlotShapes.append(Plot2D.CONST_AUTOMATIC_SHAPE)
                if self.m_pointSizeProportionalToMargin:
                    self.m_PlotSizes.append(probActual - probNext)
                else:
                    sizeAdj = 0
                    if pred != toPredict.classValue():
                        sizeAdj = 1
                    self.m_PlotSizes.append(Plot2D.DEFAULT_SHAPE_SIZE.value +
                                            sizeAdj)
            else:
                errd = None
                if not toPredict.isMissing(toPredict.classIndex(
                )) and not Utils.isMissingValue(pred):
                    errd = pred - toPredict.classValue()
                    self.m_PlotShapes.append(Plot2D.CONST_AUTOMATIC_SHAPE)
                else:
                    self.m_PlotShapes.append(Plot2D.MISSING_SHAPE)
                self.m_PlotSizes.append(errd)