Ejemplos de Attributes en Python, ejemplos de Help_Classes.Attributes.Attributes en Python

Ejemplo n.º 1

0

Mostrar archivo

Archivo: InstanceSet.py Proyecto: minminmail/Chi_negative_rules_python

    def printAsOriginal(self, out, int):
        # Printing the header as the original one
        # print(self.header)

        if self.storeAttributesAsNonStatic and self.attributes is not None:
            if self.printInOut == 1 or self.printInOut == 3:
                print(self.attributes.getInputHeader())

        if self.printInOut == 2 or self.printInOut == 3:

            print(self.attributes.getOutputHeader())

        else:
            if self.printInOut == 1 or self.printInOut == 3:
                out.println(Attributes.getInputHeader())
            if self.printInOut == 2 or self.printInOut == 3:
                out.println(Attributes.getOutputHeader())

        print("@data")
        for i in range(0, len(self.instanceSet)):
            print()
            if self.storeAttributesAsNonStatic and self.attributes is not None:
                self.instanceSet[i].printAsOriginal(self.attributes, out)
        else:
            self.instanceSet[i].printAsOriginal(out)

Ejemplo n.º 2

0

Mostrar archivo

Archivo: InstanceSet.py Proyecto: minminmail/Chi_negative_rules_python

    def getNewHeader(self):
        line = ""
        attrs = []

        # Getting the relation name and the attributes
        if self.storeAttributesAsNonStatic and self.attributes is not None:
            line = "@relation " + self.attributes.getRelationName() + "\n"
            attrs = self.attributes.getInputAttributes(Attributes)
        else:
            line = "@relation " + Attributes.getRelationName() + "\n"
            attrs = Attributes.getInputAttributes(Attributes)

        for i in range(0, attrs.length):
            line += attrs[i].toString() + "\n"
            # Gettin all the outputs attributes
        if self.storeAttributesAsNonStatic and self.attributes is not None:
            attrs = self.attributes.getOutputAttributes()
            line += attrs[0].toString() + "\n"
            # Getting @inputs and @outputs
            line += self.attributes.getInputHeader() + "\n"
            line += self.attributes.getOutputHeader() + "\n"

        else:
            attrs = Attributes.getOutputAttributes()
            line += str(attrs[0]) + "\n"

        # Getting @inputs and @outputs
        line += Attributes.getInputHeader() + "\n"
        line += Attributes.getOutputHeader() + "\n"

        return line

Ejemplo n.º 3

0

Mostrar archivo

Archivo: MyDataSet.py Proyecto: minminmail/FarcHD_scikit

    def get_ranges(self):

        # print("self.get_nvars()" + str(self.get_nvars()))
        rangos = [[0.0 for y in range(2)] for x in range(self.get_nvars())]
        # print("rangos has two dimensions, first is self.get_nvars()==" + str(self.getn_inputs()) + ",second is 2")
        ninputs = self.get_ninputs()
        for i in range(0, ninputs):
            # print("self.getn_inputs() is :" + str(nInputs) + " i = " + str(i))
            attHere = Attributes.getInputAttribute(Attributes, i)
            # print("attHere.getNumNominalValues()== " + str(attHere.getNumNominalValues()))
            if attHere.getNumNominalValues() > 0:
                rangos[i][0] = 0.0
                rangos[i][1] = attHere.getNumNominalValues() - 1
                # print(" attHere.getNumNominalValues() > 0,rangos[" + str(i) + "][0]==" + str(rangos[i][0]) + ",rangos[i][1]== " + str(rangos[i][1]))

            else:
                rangos[i][0] = attHere.getMinAttribute()
                rangos[i][1] = attHere.getMaxAttribute()
                # print(" attHere.getNumNominalValues() <= 0, rangos[" + str(i) + "][0]==" + str(rangos[i][0]) + ",rangos[i][1]== " + str(rangos[i][1]))

        rangos[self.get_nvars() - 1][0] = Attributes.getOutputAttribute(
            Attributes, 0).getMinAttribute()
        rangos[self.get_nvars() - 1][1] = Attributes.getOutputAttribute(
            Attributes, 0).getMaxAttribute()
        return rangos

Ejemplo n.º 4

0

Mostrar archivo

Archivo: MyDataSet.py Proyecto: minminmail/pythonFARCHDShare

    def returnRanks(self):

        print("self.getnVars()" + str(self.getnVars()))
        rangos = [[0.0 for y in range(2)] for x in range(self.getnVars())]

        print("rangos has two dimensions, first is self.getnVars()==" +
              str(self.getnVars()) + ",second is 2")
        for i in range(0, self.getnInputs()):
            print("self.getnInputs()" + str(self.getnInputs()) + " i = " +
                  str(i))
            attHere = Attributes.getInputAttribute(Attributes, i)
            print("attHere.getNumNominalValues()== " +
                  str(attHere.getNumNominalValues()))
            if attHere.getNumNominalValues() > 0:
                rangos[i][0] = 0.0
                rangos[i][1] = attHere.getNumNominalValues() - 1
                print(" attHere.getNumNominalValues() > 0,rangos[" + str(i) +
                      "][0]==" + str(rangos[i][0]) + ",rangos[i][1]== " +
                      str(rangos[i][1]))
            else:
                rangos[i][0] = attHere.getMinAttribute()
                rangos[i][1] = attHere.getMaxAttribute()
                print(" attHere.getNumNominalValues() <= 0, rangos[" + str(i) +
                      "][0]==" + str(rangos[i][0]) + ",rangos[i][1]== " +
                      str(rangos[i][1]))

        att0 = Attributes.getOutputAttribute(Attributes, 0)
        print("self.getnVars() -1" + str(self.getnVars() - 1))
        rangos[self.getnVars() - 1][0] = att0.getMinAttribute()
        print(" rangos[self.getnVars() -1][0] " +
              str(rangos[self.getnVars() - 1][0]))
        rangos[self.getnVars() - 1][1] = att0.getMaxAttribute()
        print(" rangos[self.getnVars() -1][1] " +
              str(rangos[self.getnVars() - 1][1]))
        return rangos

Ejemplo n.º 5

0

Mostrar archivo

Archivo: MyDataSet.py Proyecto: minminmail/FarcHD_scikit

    def get_granularity_zone_ranges(self, data_set_x_array):

        # print("self.get_nvars()" + str(self.get_nvars()))
        rangos = [[0.0 for y in range(2)] for x in range(self.get_nvars())]
        # print("rangos has two dimensions, first is self.get_nvars()==" + str(self.get_nvars()) + ",second is 2")
        nInputs = self.getn_inputs()
        for i in range(0, nInputs):
            # print("self.getn_inputs() is :" + str(nInputs) + " i = " + str(i))
            attHere = Attributes.getInputAttribute(Attributes, i)
            # print("attHere.getNumNominalValues()== " + str(attHere.getNumNominalValues()))
            if attHere.getNumNominalValues() > 0:
                rangos[i][0] = 0.0
                rangos[i][1] = attHere.getNumNominalValues() - 1
                # print(" attHere.getNumNominalValues() > 0,rangos[" + str(i) + "][0]==" + str(rangos[i][0]) + ",rangos[i][1]== " + str(rangos[i][1]))

            else:
                rangos[i][0] = attHere.get_min_granularity_attribute(
                    data_set_x_array, i)
                rangos[i][1] = attHere.get_max_granularity_attribute(
                    data_set_x_array, i)
                # print(" attHere.getNumNominalValues() <= 0, rangos[" + str(i) + "][0]==" + str(rangos[i][0]) + ",rangos[i][1]== " + str(rangos[i][1]))
        last_min_value = Attributes.getOutputAttribute(Attributes,
                                                       0).getMinAttribute()
        last_max_value = Attributes.getOutputAttribute(Attributes,
                                                       0).getMaxAttribute()
        # print("The last_min_value is " + str(last_min_value)+" The last_max_value is " + str(last_max_value))
        rangos[self.get_nvars() - 1][0] = last_min_value
        rangos[self.get_nvars() - 1][1] = last_max_value
        return rangos

Ejemplo n.º 6

0

Mostrar archivo

Archivo: InstanceSet.py Proyecto: minminmail/Chi_negative_rules_python

    def insertInputOutput(self, line, lineCount, collection, type, isTrain):

        # print(" processing insertInputOutput: " + line)

        # Declaring StringTokenizer
        st = line.split(",")

        for attName in st:
            attName = str(attName.strip())
            # print("attrName: " + attName)
            attrItem = Attributes.getAttributeByName(Attributes, attName)
            attributes = Attributes.getAttributes(Attributes)
            # for att in attributes:
                # print("att name is :" + str(att.getName()))
            # print("numbers of items that attributes:"+str(len(attributes)))
            if attrItem is None:
                # print("Attributes.getAttribute == None")
                # If this attribute has not been declared, generate error
                er = ErrorInfo(ErrorInfo.InputTestAttributeNotDefined, 0, lineCount, 0, 0, isTrain,
                               ("The attribute " + attName + " defined in @" + type +
                                " in test, it has not been defined in @inputs in its train dataset. It will be ignored"))
                InstanceSet.errorLogger.setError(er)

            else:
                # for itemCollection in collection:
                    # print("Item in collection is " + itemCollection)
                # print("Attributes.getAttribute != None")
                # print("   > " + str(type) + " attribute considered: " + attName)
                if attName not in collection:
                    # print("attName:" + attName + " is not in collection")
                    collection.append(attName)

Ejemplo n.º 7

0

Mostrar archivo

Archivo: MyDataSet.py Proyecto: minminmail/FARCHD_Negative_Rules

    def get_type(self, variable):
        if self.attributes.getAttributeByPos(
                variable).getType() == Attributes.getAttributeByPos(0).INTEGER:
            return self.INTEGER

        if self.attributes.getAttributeByPos(
                variable).getType() == Attributes.getAttributeByPos(0).REAL:
            return self.REAL

        if self.attributes.getAttributeByPos(
                variable).getType() == Attributes.getAttributeByPos(0).NOMINAL:
            return self.NOMINAL

        return 0

Ejemplo n.º 8

0

Mostrar archivo

Archivo: MyDataSet.py Proyecto: minminmail/pythonFARCHDShare

 def getClasses(self):
     classes = ["" for x in range(self.__nClasses)]
     print(" getClasses,self.__nClasses: " + str(self.__nClasses))
     for i in range(0, self.__nClasses):
         classes[i] = Attributes.getOutputAttribute(Attributes,
                                                    0).getNominalValue(i)
     return classes

Ejemplo n.º 9

0

Mostrar archivo

Archivo: InstanceSet.py Proyecto: minminmail/Chi_negative_rules_python

    def printInsSet(self):
        print("------------- ATTRIBUTES --------------")
        if self.storeAttributesAsNonStatic and self.attributes is not None:
            self.attributes.printAttributes()

        else:
            Attributes.printAttributes()

        print("-------------- INSTANCES --------------")
        for i in range(0, self.instanceSet.length):
            print("\n> Instance " + str(i) + ":")

            if self.storeAttributesAsNonStatic and self.attributes is not None:
                self.instanceSet[i].printInsSet(self.attributes)
        else:
            self.instanceSet[i].printInsSet()

Ejemplo n.º 10

0

Mostrar archivo

Archivo: MyDataSet.py Proyecto: minminmail/FarcHD_scikit

 def get_classes(self):
     clases = ["" for x in range(self.nclasses)]
     # print(" getClasses,self.nclasses: " + str(self.nclasses))
     for i in range(0, self.nclasses):
         # print(" getClasses method i is "+str(i))
         clases[i] = Attributes.getOutputAttribute(Attributes,
                                                   0).getNominalValue(i)
     return clases

Ejemplo n.º 11

0

Mostrar archivo

Archivo: InstanceSet.py Proyecto: minminmail/Chi_negative_rules_python

    def getOriginalHeaderWithoutInOut(self):

        line = ""
        attrs = []

        # Getting the relation name and the attributes
        if self.storeAttributesAsNonStatic and self.attributes is not None:
            line = "@relation " + self.attributes.getRelationName() + "\n"
            attrs = self.attributes.getAttributes()

        else:
            line = "@relation " + Attributes.getRelationName() + "\n"
            attrs = Attributes.getAttributes()

        for i in range(0, len(attrs)):
            line = line + str(attrs[i]) + "\n"
        return line

Ejemplo n.º 12

0

Mostrar archivo

Archivo: InstanceSet.py Proyecto: minminmail/pythonFARCHDShare

    def processInputsAndOutputs(self, isTrain, inputsDef, outputsDef,
                                outputAttrNames, inputAttrNames):
        # After parsing the header, the inputs and the outputs are prepared.
        print("Processing inputs and outputs")
        self.outputInfered = False  #set default value
        if isTrain is True:
            #print("isTrain is True")
            if inputsDef == False and outputsDef == False:
                #print("is neither inputAtt no outputAtt")
                posHere = Attributes.getNumAttributes(self) - 1

                outputAttrNames.append(
                    Attributes.getAttributeByPos(self, posHere).getName())
                inputAttrNames = Attributes.getAttributesExcept(
                    Attributes, outputAttrNames)
                self.outputInfered = True
            elif inputsDef == False and outputsDef == True:
                #print("inputsDef == False and outputsDef == True")
                inputAttrNames = Attributes.getAttributesExcept(
                    Attributes, outputAttrNames)
            elif inputsDef == True and outputsDef == False:
                #print("inputsDef == True and outputsDef == False")
                outputAttrNames = Attributes.getAttributesExcept(
                    Attributes, inputAttrNames)
                self.outputInfered = True
            #print("setOutputInputAttributes begin: ")
            Attributes.setOutputInputAttributes(Attributes, inputAttrNames,
                                                outputAttrNames)

Ejemplo n.º 13

0

Mostrar archivo

Archivo: MyDataSet.py Proyecto: minminmail/FarcHD_scikit

    def copy_header(self):

        p = ""
        # # print("copyHeader begin...., P is :" + p)
        p = "@relation " + Attributes.getRelationName(Attributes) + "\n"
        # # print(" after relation P is :" + p)
        p += Attributes.getInputAttributesHeader(Attributes)
        # # print(" after getInputAttributesHeader P is :" + p)
        p += Attributes.getOutputAttributesHeader(Attributes)
        # # print(" after getOutputAttributesHeader P is :" + p)
        p += Attributes.getInputHeader(Attributes) + "\n"
        # # print(" after getInputHeader P is :" + p)
        p += Attributes.getOutputHeader(Attributes) + "\n"
        # # print(" after getOutputHeader P is :" + p)
        p += "@data\n"

        # print("P is :" + p)
        return p

Ejemplo n.º 14

0

Mostrar archivo

Archivo: InstanceSet.py Proyecto: minminmail/pythonFARCHDShare

    def removeAttribute(self, tSet, inputAtt, whichAtt):
        attToDel = None
        # Getting a reference to the attribute to del
        if (inputAtt == True):
            if (self.storeAttributesAsNonStatic and self.attributes != None):
                attToDel = self.attributes.getInputAttribute(whichAtt)
            else:
                attToDel = Attributes.getInputAttribute(whichAtt)

        else:
            if (self.storeAttributesAsNonStatic and self.attributes != None):
                attToDel = self.attributes.getOutputAttribute(whichAtt)
            else:
                attToDel = Attributes.getOutputAttribute(whichAtt)

        if (self.storeAttributesAsNonStatic == True
                and self.attributes != None):
            print("Removing the attribute")
        if (self.attributes.removeAttribute(inputAtt, whichAtt) == False
                or (tSet != None and tSet.attributes.removeAttribute(
                    inputAtt, whichAtt)) == False):
            return False
        else:
            if (Attributes.removeAttribute(inputAtt, whichAtt) == False):
                return False
        for i in range(0, len(self.instanceSet)):
            if (self.storeAttributesAsNonStatic and self.attributes != None):
                self.instanceSet[i].removeAttribute(self.attributes, attToDel,
                                                    inputAtt, whichAtt)
            else:
                self.instanceSet[i].removeAttribute(attToDel, inputAtt,
                                                    whichAtt)

        if (tSet != None):
            for i in range(0, tSet.instanceSet.length):

                if (self.storeAttributesAsNonStatic == True
                        and self.attributes != None):
                    tSet.instanceSet[i].removeAttribute(
                        self.attributes, attToDel, inputAtt, whichAtt)
            else:
                tSet.instanceSet[i].removeAttribute(attToDel, inputAtt,
                                                    whichAtt)
            return True

Ejemplo n.º 15

0

Mostrar archivo

Archivo: InstanceSet.py Proyecto: minminmail/FARCHD_Negative_Rules

    def removeAttribute(self, tSet, inputAtt, whichAtt):
        attToDel = None
        # Getting a reference to the attribute to del
        if inputAtt:
            if self.storeAttributesAsNonStatic and self.attributes is not None:
                attToDel = self.attributes.getInputAttribute(whichAtt)
            else:
                attToDel = Attributes.getInputAttribute(whichAtt)

        else:
            if self.storeAttributesAsNonStatic and self.attributes is not None:
                attToDel = self.attributes.getOutputAttribute(whichAtt)
            else:
                attToDel = Attributes.getOutputAttribute(whichAtt)

        if self.storeAttributesAsNonStatic and self.attributes is not None:
            print("Removing the attribute")
        if (not self.attributes.removeAttribute(inputAtt, whichAtt) or
            (tSet is not None
             and not tSet.attributes.removeAttribute(inputAtt, whichAtt))):
            return False
        else:
            if not Attributes.removeAttribute(inputAtt, whichAtt):
                return False
        for i in range(0, len(self.instanceSet)):
            if self.storeAttributesAsNonStatic and self.attributes is not None:
                self.instanceSet[i].removeAttribute(self.attributes, attToDel,
                                                    inputAtt, whichAtt)
            else:
                self.instanceSet[i].removeAttribute(attToDel, inputAtt,
                                                    whichAtt)

        if tSet is not None:
            for i in range(0, tSet.instanceSet.length):

                if self.storeAttributesAsNonStatic and self.attributes is not None:
                    tSet.instanceSet[i].removeAttribute(
                        self.attributes, attToDel, inputAtt, whichAtt)
            else:
                tSet.instanceSet[i].removeAttribute(attToDel, inputAtt,
                                                    whichAtt)
            return True

Ejemplo n.º 16

0

Mostrar archivo

Archivo: MyDataSet.py Proyecto: minminmail/Chi-RW-Python

    def getType(self, variable):
        if (Attributes.getAttribute(variable).getType() ==
                Attributes.getAttributeByPos(Attributes, 0).INTEGER):
            return self.INTEGER

        if (Attributes.getAttribute(variable).getType() ==
                Attributes.getAttributeByPos(Attributes, 0).REAL):
            return self.REAL

        if (Attributes.getAttribute(variable).getType() ==
                Attributes.getAttributeByPos(Attributes, 0).NOMINAL):
            return self.NOMINAL

        return 0

Ejemplo n.º 17

0

Mostrar archivo

Archivo: MyDataSet.py Proyecto: minminmail/FarcHD_scikit

    def read_classification_set(self, dataset_file, train, file_path):
        try:
            # Load in memory a dataset that contains a classification problem
            print("Inside read_classification_set, datasetFile :" +
                  str(dataset_file))
            # print("train is :" + str(train))
            # print("object instanceSet is :" + str(self.instance_set))

            if self.instance_set is None:
                print("self.instance_set is Null")
            else:
                no_outputs = None
                print("self.instance_set is not None, train = " + str(train))
                self.instance_set.read_set(dataset_file, train, file_path)
                print(
                    "begin getNumInstances ...... in read_classification_set ")
                self.ndata = self.instance_set.getNumInstances()
                print(
                    "In readCread_classification_setlassificationSet , self.ndata is : "
                    + str(self.ndata))
                self.ninputs = Attributes.getInputNumAttributes(Attributes)
                print("In read_classification_set , self.ninputs is : " +
                      str(self.ninputs))
                self.nvars = self.ninputs + Attributes.getOutputNumAttributes(
                    Attributes)
                print("In read_classification_set , self.nvars is : " +
                      str(self.nvars))

                # outputInteger check that there is only one output variable
                if Attributes.getOutputNumAttributes(Attributes) > 1:
                    outAttrs = Attributes.getOutputAttributes(Attributes)
                    # print("Output Attributes number is bigger than 1")
                    i = 1
                    for outAtt in outAttrs:
                        # print("Att" + str(i) + str(outAtt.getName()))
                        i = i + 1
                    # print("" + Attributes.getOutputAttributesHeader(Attributes))
                    print(
                        "This algorithm can not process MIMO datasets !!! exit 1"
                    )
                    # print("All outputs but the first one will be removed")
                    exit(1)
                no_outputs = False
                if Attributes.getOutputNumAttributes(Attributes) < 1:
                    print(
                        "This algorithm can not process datasets without outputs !!!!!!"
                    )
                    # print("Zero-valued output generated")
                    no_outputs = True
                    exit(1)

                # print("define all the array in MyDataSet class......")
                # Initialice and fill our own tables
                # print("The two dimension array X, dimension 1 is :" + str(self.ndata) + " ,Dimension 2 is :" + str(self.ninputs))

                ndata_length = self.ndata
                ninput_length = self.ninputs
                print("nDataLength = " + str(ndata_length))
                # print("nInputLength = " + str(nInputLength))
                # [[0 for j in range(m)] for i in range(n)] first column, then row

                self.x_array = [[0.0 for y in range(ninput_length)]
                                for x in range(ndata_length)]

                self.missing_array = [[True for y in range(ninput_length)]
                                      for x in range(ndata_length)]

                self.nominal_array = [True for x in range(ninput_length)]
                self.integer_array = [True for x in range(ninput_length)]

                self.output_integer_array = [0 for x in range(ndata_length)]

                self.output_real_array = [0.0 for x in range(ndata_length)]
                self.output_array = ["" for x in range(ndata_length)]

                # Maximum and minimum of inputs
                self.emax = [0.0 for x in range(ninput_length)]
                self.emin = [0.0 for x in range(ninput_length)]

                for i in range(0, ninput_length):

                    attribute_instance: Attribute = Attributes.getInputAttribute(
                        Attributes, i)

                    if attribute_instance.getNumNominalValues() > 0:
                        self.emin[i] = 0
                        self.emax[i] = Attributes.getInputAttribute(
                            i).getNumNominalValues() - 1
                    else:
                        self.emax[i] = Attributes.getAttributeByPos(
                            Attributes, i).getMaxAttribute()
                        self.emin[i] = Attributes.getAttributeByPos(
                            Attributes, i).getMinAttribute()

                    if attribute_instance.getType() == Attribute.NOMINAL:
                        self.nominal_array[i] = True
                        self.integer_array[i] = False
                    elif attribute_instance.getType() == Attribute.INTEGER:
                        self.nominal_array[i] = False
                        self.integer_array[i] = True
                    else:
                        self.nominal_array[i] = False
                        self.integer_array[i] = False

                    # print("self.emax[n]:" + str(self.emax[n]))
                    # print("self.emin[n]:" + str(self.emin[n]))
                # All values are casted into double/integer

                self.nclasses = 0
                for i in range(0, ndata_length):
                    inst = self.instance_set.getInstance(i)
                    for j in range(0, ninput_length):
                        input_Numeric_Value = self.instance_set.getInputNumericValue(
                            i, j)
                        # # print("self.x_array [i] = " + str(i) + ",[j] = " + str(j) + ",input_Numeric_Value:" + str(
                        #  input_Numeric_Value))

                        self.x_array[i][
                            j] = input_Numeric_Value  # inst.getInputRealValues(j);
                        # # print("after get self.x_array[i][j]")
                        self.missing_array[i][
                            j] = inst.getInputMissingValuesWithPos(j)
                        # # print("after self.missing_array[i][j]")
                        if self.missing_array[i][j]:
                            self.x_array[i][j] = self.emin[j] - 1

                    if no_outputs:
                        # print("no_outputs==True")
                        self.output_integer_array[i] = 0
                        # elf.output_real_array[i] = 0.0
                        self.output_array[i] = ""
                    else:
                        # print("no_outputs==False")
                        self.output_integer_array[
                            i] = self.instance_set.getOutputNumericValue(i, 0)
                        # print(" 202001-1 self.output_integer_array[ "+str(i)+"]"+ str( self.output_integer_array[i]))
                        # self.output_real_array[i] = self.instance_set.getOutputNumericValue(i, 0)
                        # print("self.output_integer_array[" + str(i) + "] = " + str(self.output_integer_array[i]))
                        self.output_array[
                            i] = self.instance_set.getOutputNominalValue(i, 0)
                    # print(" 202001-1 self.output_integer_array[ " + str(i) + "]" + str(self.output_integer_array[i]))
                    if self.output_integer_array[i] > self.nclasses:
                        self.nclasses = self.output_integer_array[i]

                self.nclasses = self.nclasses + 1
                print('Number of classes=' + str(self.nclasses))
        except Exception as error:
            print(
                "read_classification_set: Exception in readSet, in read_classification_set:"
                + str(error))

        # self.computeStatistics()
        self.compute_instances_per_class()

Ejemplo n.º 18

0

Mostrar archivo

Archivo: MyDataSet.py Proyecto: minminmail/FarcHD_scikit

 def get_output_value(self, int_value):
     # # print("Before att get ")
     att = Attributes.getOutputAttribute(Attributes, 0)
     # # print("After att get ")
     return att.getNominalValue(int_value)

Ejemplo n.º 19

0

Mostrar archivo

Archivo: InstanceSet.py Proyecto: minminmail/FARCHD_Negative_Rules

class InstanceSet:
    # /////////////////////////////////////////////////////////////////////////////
    # //////////////// ATTRIBUTES OF THE INSTANCESET CLASS ////////////////////////
    # /////////////////////////////////////////////////////////////////////////////

    # Attribute where all the instances of the DB are stored.

    instanceSet = []

    # String where the header of the file is stored.

    header = ""

    # String where only the attributes definition header is stored
    attHeader = ""
    # '''
    #  * Object that collects all the errors happened while reading the test and
    #  * train datasets.
    # '''
    errorLogger = FormatErrorKeeper()

    # This object contains the attributes definitions

    attributes = InstanceAttributes()
    # '''
    #  * It indicates if the attributes has not be stored as non-static, permiting
    #  * the load of different datasets
    # '''
    storeAttributesAsNonStatic = None

    # It indicates that the output attribute has been infered as the last one

    outputInfered = None

    # /////////////////////////////////////////////////////////////////////////////
    # ///////////////// METHODS OF THE INSTANCESET CLASS //////////////////////////
    # /////////////////////////////////////////////////////////////////////////////

    # It instances a new instance of InstanceSet
    # data_folder = PureWindowsPath('C:/phd_experiments/threeAlgorithmsComparizasion/threeAlgorithmsComparizasion/ecoli')
    data_folder = None
    file_to_open = None
    data_lines = None

    # added by rui
    data_rows = None
    attributes_insance = None

    def __init__(self):
        # print("In __init__ method in InstanceSet.")
        self.storeAttributesAsNonStatic = False
        self.attributes = None
        self.attributes_insance = Attributes()

    def InstanceSetWithNonSAtrr(self, nonStaticAttributes):
        self.storeAttributesAsNonStatic = nonStaticAttributes
        # if ( storeAttributesAsNonStatic ) Attributes.clearAll();
        self.attributes = None

    def InstanceSetWithIns(self, ins):
        self.instanceSet = ins.instanSet.copy()

        self.header = str(ins.header)
        self.attHeader = str(ins.attHeader)
        self.attributes = str(ins.attributes)
        self.storeAttributesAsNonStatic = ins.storeAttributesAsNonStatic

    # end InstanceSet

    # * InstanceSet
    # *
    # * This constructor permit define if the attribute's definition need to be
    # * stored as non-static (nonStaticAttributes = true). Otherwise, if
    # * nonStaticAttributes = false, using this constructor is equivalent to use
    # * the constructor by default.

    # * Creates a new InstanceSet with the header and Instances from the passed object
    # * It performs a deep (new allocated) copy.
    # * @param is Original InstanceSe

    # * setAttributesAsNonStatic
    # *
    # * It stores the static-defined attributes in the class Attributes as
    # * non static in the object attributes. After this it does not remove the
    # * static-definition of the Attributes; this is in that way to permit to
    # * call this functions for differents datasets from the same problem, such
    # * as, a train dataset and the correspondent test dataset.
    # */

    def setAttributesAsNonStatic(self):
        self.attributes = InstanceAttributes()
        self.attributes.copyStaticAttributes()

        self.storeAttributesAsNonStatic = True

    # end setAttributesAsNonStatic

    # /**
    #  * getAttributeDefinitions
    #  *
    #  * It does return the definition of the attibutes contained in the dataset.
    #  *
    #  * @return InstanceAttributes contains the attribute's definitions.

    def getAttributeDefinitions(self):
        return self.attributes

    # end InstanceAttributes

    # * This method reads all the information in a DB and load it to memory.
    # * @param fileName is the database file name.
    # * @param isTrain is a flag that indicate if the database is for a train or for a test.
    # * @throws DatasetException if there is any semantical error in the input file.
    # * @throws HeaderFormatException if there is any lexical or sintactical error in the
    # * header of the input file

    def read_set(self, fileName, isTrain, file_path):
        print("Before try in readSet of InstanceSet, file_path is :" +
              str(file_path) + ".")
        print("Opening the file in readSet of InstanceSet: " + str(fileName) +
              ".")
        try:
            # Parsing the header of the DB.
            errorLogger = FormatErrorKeeper()
            self.file_to_open = Path.cwd() / file_path / fileName

            # Declaring an instance parser
            print("In readSet,file_to_open is:" + str(self.file_to_open))
            # to do The exception in init InstanceParserof InstanceParse is: can only concatenate str (not "WindowsPath") to str
            instance_parser = InstanceParser(self.file_to_open, isTrain)
            # Reading information in the header, i.e., @relation, @attribute, @inputs and @outputs
            # print("In readSet finished read file " + str(self.file_to_open))
            self.parseHeader(instance_parser, isTrain)
            # print(" The number of output attributes is: " + str(Attributes.getOutputNumAttributes(Attributes)))
            # The attributes statistics are init if we are in train mode.
            print("In readSet, isTrain is " + str(isTrain))
            if isTrain and self.attributes_insance.getOutputNumAttributes(
            ) == 1:
                print("Begin Attributes.initStatistics......")
                self.attributes_insance.initStatistics()
            # A temporal vector is used to store the instances read.

            print("Reading the data")
            tempSet = []
            print("begin instance_parser.getLines()...... ")

            new_data_lines = []
            print("*********  There are : " + str(len(self.data_lines)) +
                  "In original Data lines ********* ")

            for line in self.data_lines:
                print(" The line is :" + line)
                if ("@relation" not in line) and (
                        "@attribute"
                        not in line) and ("@inputs" not in line) and (
                            "@outputs" not in line) and ("@data" not in line):
                    new_data_lines.append(line)
            print("*********  There are : " + str(len(new_data_lines)) +
                  " In new Data lines ********* ")

            for line in new_data_lines:
                if new_data_lines is not None:
                    #print("Data line: " + str(line))
                    newInstance = Instance()
                    #print("how many data already in the instanceSet: " + str(len(tempSet)))
                    newInstance.setThreeParameters(line, isTrain, len(tempSet))
                    tempSet.append(newInstance)

                # The vector of instances is converted to an array of instances.
            sizeInstance = len(tempSet)
            print(" Number of instances read: " + str(sizeInstance))
            self.instanceSet = []

            for i in range(0, sizeInstance):
                self.instanceSet.append(tempSet[i])
            print("After converting all instances")

            if self.errorLogger.getNumErrors() > 0:
                errorNumber = len(errorLogger.getAllErrors())
                # print("There has been " + str(errorNumber) + "errors in the Dataset format.")
                for k in range(0, errorLogger.getNumErrors()):
                    errorLogger.getError(k).printErrorInfo()

            # print("There has been " + errorLogger.getAllErrors().size() + " errors in the Dataset format",
            #           errorLogger.getAllErrors());
            # print("Finishing the statistics: (isTrain)" + str(isTrain) + ", (# out attributes)" + str(Attributes.getOutputNumAttributes(Attributes)))
            # # If being on a train dataset, the statistics are finished
            if isTrain and Attributes.getOutputNumAttributes(Attributes) == 1:
                Attributes.finishStatistics(Attributes)
            # # close the stream
            instance_parser.close()
            # print("File LOADED CORRECTLY!!")
        except Exception as e:
            print("Unexpected error in readSet of InstanceSet class :" +
                  str(e))
        # end of InstanceSet constructor.

        # * It reads the information in the header of the file.
        # * It reads relation's name, attributes' names, and inputs and outputs.
        # *
        # * @param parser is the parser of the data set
        # * @param isTrain is a boolean indicating if this is a train set (and so
        # * parameters information must be read) or a test set (parameters information
        # * has not to be read).

        # read set from data row array for granularity
    def read_set_from_data_row_array(self, data_raw_array, isTrain):
        # print("Before try in read_set_from_data_row_array of InstanceSet")
        try:
            # Parsing the header of the DB.
            errorLogger = FormatErrorKeeper()
            # Declaring an instance parser

            # to do The exception in init InstanceParserof InstanceParse is: can only concatenate str (not "WindowsPath") to str
            instance_parser = InstanceParser.init_for_granularity_parser(
                data_raw_array, isTrain)
            # Reading information in the header, i.e., @relation, @attribute, @inputs and @outputs
            # print("data_raw_array size" + str(len(data_raw_array)))
            self.parse_header_from_data_row_array(instance_parser, isTrain)
            # print(" The number of output attributes is: " + str(Attributes.getOutputNumAttributes(Attributes)))
            # The attributes statistics are init if we are in train mode.
            # print("In readSet, isTrain is " + str(isTrain))
            if isTrain and Attributes.getOutputNumAttributes(Attributes) == 1:
                # print("Begin Attributes.initStatistics......")
                Attributes.initStatistics(Attributes)
            # A temporal vector is used to store the instances read.

            # print("Reading the data in read_set_from_data_row_array")
            tempSet = []
            # print("begin instance_parser.getLines()...... ")
            data_raw_array = self.data_rows
            new_data_rows = []
            number_of_rows = len(data_raw_array)
            # print("*********  There are : " + str(number_of_rows) + "In original Data rows ********* ")

            # print("*********  There are : " + str(len(new_data_rows)) + " In new Data rows ********* ")
            for i in range(0, number_of_rows):
                if len(new_data_rows) != 0:
                    # print("Data row: " + str(data_raw_array[i]))
                    newInstance = Instance()
                    # print("how many data already in the instanceSet: " + str(len(tempSet)))
                    newInstance.set_three_parameters_for_granularity_rules(
                        data_raw_array[i], isTrain, len(tempSet))
                    tempSet.append(newInstance)

                # The vector of instances is converted to an array of instances.
            sizeInstance = len(tempSet)
            # print(" Number of instances read: " + str(sizeInstance))
            self.instanceSet = []

            for i in range(0, sizeInstance):
                self.instanceSet.append(tempSet[i])
            # print("After converting all instances")
            # System.out.println("The error logger has any error: "+errorLogger.getNumErrors());
            if self.errorLogger.getNumErrors() > 0:
                errorNumber = len(errorLogger.getAllErrors())
                # print("There has been " + str(errorNumber) + "errors in the Dataset format.")
                for k in range(0, errorLogger.getNumErrors()):
                    errorLogger.getError(k).printErrorInfo()

            # print("There has been " + errorLogger.getAllErrors().size() + " errors in the Dataset format",
            #           errorLogger.getAllErrors());
            # print("Finishing the statistics: (isTrain)" + str(isTrain) + ", (# out attributes)" + str(Attributes.getOutputNumAttributes(Attributes)))
            # # If being on a train dataset, the statistics are finished
            if isTrain and Attributes.getOutputNumAttributes(Attributes) == 1:
                Attributes.finishStatistics(Attributes)
            # # close the stream
            instance_parser.close()
            # print("File LOADED CORRECTLY!!")
        except Exception as e:
            print("Unexpected error in readSet of InstanceSet class :" +
                  str(e))
        # end of InstanceSet constructor.

    def parseHeader(self, parser, isTrain):
        # 1. Declaration of variables
        inputAttrNames = []
        outputAttrNames = []
        inputsDef = False
        outputsDef = False
        self.header = ""
        attCount = 0
        lineCount = 0
        self.attHeader = None

        print(
            "Begin to call the InstanceParser.getLines(),parser.getLines(), in InstanceSet."
        )
        lines = parser.getLines()
        self.data_lines = lines

        print(
            "************************Before for line in lines *************************"
        )
        for line in lines:
            line = str(line).strip()
            print("In parseHeader method of InstanceSet, the line is:" + line)
            if line == "@data".lower():

                break
            else:
                # print("  Line read: " + line + ".")
                lineCount = lineCount + 1
                if "@relation" in line:

                    if isTrain:
                        relationName = str(line.replace("@relation",
                                                        "")).strip()
                        # print("set Relation name :" + str(relationName))
                        self.attributes_insance.setRelationName(relationName)
                elif "@attribute" in line:

                    if isTrain:
                        # print("Begin insertAttribute ......")
                        self.insertAttribute(line)
                        attCount = attCount + 1

                elif "@inputs" in line:

                    # print("@inputs in " + str(line))
                    self.attHeader = self.header
                    inputsDef = True

                    aux = line[8:]

                    if isTrain:
                        # print("Has @inputs, aux is :" + aux)
                        self.insertInputOutput(aux, lineCount, inputAttrNames,
                                               "inputs", isTrain)
                elif "@outputs" in line:

                    if self.attHeader is None:
                        self.attHeader = self.header
                    outputsDef = True
                    # print("Defining the output in line :" + line)
                    sub_line = line.split()  # To get the output attribute name
                    aux = sub_line[1]
                    if isTrain:
                        # print("Has @outputs, aux is :" + aux)
                        self.insertInputOutput(aux, lineCount, outputAttrNames,
                                               "outputs", isTrain)

                        # print("Size of the output is: " + str(len(outputAttrNames)))

                self.header += line + "\n"
        print(
            "************************After for line in lines.************************"
        )
        if self.attHeader is None:
            self.attHeader = self.header
        self.processInputsAndOutputs(isTrain, inputsDef, outputsDef,
                                     outputAttrNames, inputAttrNames)

    # end headerParse

    # added by rui for granularity rules
    def parse_header_from_data_row_array(self, parser, isTrain):
        # 1. Declaration of variables
        inputAttrNames = []
        outputAttrNames = []
        inputsDef = False
        outputsDef = False
        self.header = ""
        attCount = 0
        lineCount = 0
        self.attHeader = None

        # print("Begin to call the InstanceParser.getLines(),parser.getLines(), in InstanceSet.")
        self.data_rows = parser.get_rows()

    # end parse_header_from_data_row_array

    def insertAttribute(self, line):
        # print("Insert attribute begin :")
        indexL = 0
        indexR = 0
        type = ""

        # Treating string and declaring a string tokenizer
        if "{" in line:
            token_str = "{"

        elif "[" in line:
            token_str = "["

        token_withT = "\t" + token_str

        line = line.replace(token_str, token_withT)
        # print("token_double is:" + token_withT + ", line is :" + line)
        # System.out.println ("  > Processing line: "+  line );
        # st = line.split(" [{\t");

        st = line.split(
            "\t"
        )  # first we need to split the attribute line into two part , attribute name and attribute values

        # Disregarding the first token. It is @attribute
        st[0] = st[0].replace("@attribute", "").strip()  # delete @attribute
        # print("st[0] is:" + st[0])

        first_part = st[0].split()

        at = Attribute()

        # print("Get type once get instance object, at.getType() = " + str(type_string))
        at.setName(first_part[0])
        print("att set name as first_part[0] is:" + first_part[0])
        # # print( "Attribute name: "+ at.getName() )

        # to get the class name values we need to split the second part of the attribute line, to get values of attribute

        # Next action depends on the type of attribute: continuous or nominal
        if len(
                st
        ) == 1:  # Parsing a nominal attribute with no definition of values
            # print("Parsing nominal attribute without values: setType=0")
            # print("Get type =" + at.getType())
            at.setType(Attribute.NOMINAL)

        elif "{" in line:  # this because  it is the class values line
            # print("Parsing nominal attribute with values: " + line)
            # print("Get type =" + at.getType())
            # print("Before setType = 0")
            at.setType(Attribute.NOMINAL)
            # print("after setType= 0")
            at.setFixedBounds(True)

            indexL = line.index("{") + 1
            # print("indexL: " + indexL )
            indexR = line.index("}")
            # print("indexR: " + str(indexR))
            print("indexL : " + str(indexL) + "indexR : " + str(indexR))
            # print( "The Nominal values are: " + line[indexL: indexR]);
            lineSub = line[indexL:indexR]
            # print("The lineSub : " + lineSub)
            st2 = lineSub.split(",")

            for nominalStr in st2:
                at.addNominalValue(nominalStr.strip())

        else:  # Parsing an integer or real

            attType = first_part[1].lower()
            # print("attribute Name : " + str(first_part[0]) + ", attribute type = " + str(attType))

            # System.out.println ("    > Parsing "+ type + " attributes");

            if attType == "integer":
                at.setType(Attribute.INTEGER)
                # print("set integer type")
            if attType == "real":
                at.setType(Attribute.REAL)
                # print("set real type")
            indexL = line.index("[")
            indexR = line.index("]")

            # print("indexL is: " + str(indexL) + " indexR: " + str(indexR))

            if indexL != -1 and indexR != -1:
                # System.out.println ( "      > The real values are: " + line.substring( indexL+1, indexR) );
                lineSub = line[indexL + 1:indexR]
                # print("lineSub: " + lineSub)
                st2 = lineSub.split(",")

                # print("st2[0].strip() :" + st2[0])
                # print("st2[1].strip() :" + st2[1])
                minBound = float(st2[0].strip())
                maxBound = float(st2[1].strip())
                # print("Before at.setBounds(minBound, maxBound): ( " + str(minBound) + " , " + str(maxBound) + " )")
                at.setBounds(minBound, maxBound)

        # print("Before add attribute :::: ")
        self.attributes_insance.addAttribute(at)
        # print("insertAttribute is finished :::: ")

    # end insertAttribute

    def insertInputOutput(self, line, lineCount, collection, type, isTrain):

        # print(" processing insertInputOutput: " + line)

        # Declaring StringTokenizer
        st = line.split(",")

        for attName in st:
            attName = str(attName.strip())
            # print("attrName: " + attName)
            attrItem = self.attributes_insance.getAttributeByName(attName)
            attributes = self.attributes_insance.getAttributes()
            # for att in attributes:
            # print("att name is :" + str(att.getName()))
            # print("numbers of items that attributes:"+str(len(attributes)))
            if attrItem is None:
                # print("Attributes.getAttribute == None")
                # If this attribute has not been declared, generate error
                er = ErrorInfo(
                    ErrorInfo.InputTestAttributeNotDefined, 0, lineCount, 0, 0,
                    isTrain,
                    ("The attribute " + attName + " defined in @" + type +
                     " in test, it has not been defined in @inputs in its train dataset. It will be ignored"
                     ))
                InstanceSet.errorLogger.setError(er)

            else:
                # for itemCollection in collection:
                # print("Item in collection is " + itemCollection)
                # print("Attributes.getAttribute != None")
                # print("   > " + str(type) + " attribute considered: " + attName)
                if attName not in collection:
                    # print("attName:" + attName + " is not in collection")
                    collection.append(attName)

    # end insertInputOutput

    def processInputsAndOutputs(self, isTrain, inputsDef, outputsDef,
                                outputAttrNames, inputAttrNames):
        # After parsing the header, the inputs and the outputs are prepared.
        print("Processing inputs and outputs")
        self.outputInfered = False  # set default value
        if isTrain:
            print("isTrain == True")
            if not inputsDef and not outputsDef:
                # print("is neither inputAtt no outputAtt")
                posHere = self.attributes_insance.getNumAttributes() - 1

                outputAttrNames.append(
                    self.attributes_insance.getAttributeByPos(
                        posHere).getName())
                inputAttrNames = self.attributes_insance.getAttributesExcept(
                    outputAttrNames)
                self.outputInfered = True
            elif not inputsDef and outputsDef:
                # print("inputsDef == False and outputsDef == True")
                inputAttrNames = self.attributes_insance.getAttributesExcept(
                    outputAttrNames)
            elif inputsDef and not outputsDef:
                # print("inputsDef == True and outputsDef == False")
                outputAttrNames = self.attributes_insance.getAttributesExcept(
                    inputAttrNames)
                self.outputInfered = True
            print("setOutputInputAttributes begin: ")
            self.attributes_insance.setOutputInputAttributes(
                inputAttrNames, outputAttrNames)

    # end of processInputsAndOutputs

    # '''
    #  * Test if the output attribute has been infered.
    #  * @return True if the output attribute has been infered. False if not.
    #  '''

    def isOutputInfered(self):
        return self.outputInfered

    # '''
    #  * It returns the number of instances.
    #  * @return an int with the number of instances.
    # '''

    def getNumInstances(self):
        if self.instanceSet is not None:
            instanceNumber = len(self.instanceSet)
            print("instanceSet is not None, instanceNumber = " +
                  str(instanceNumber))
            return instanceNumber
        else:
            print("instanceSet is  None !!!")
            return 0
        # end numInstances

    # '''
    #  * Gets the instance located at the cursor position.
    #  * @return the instance located at the cursor position.
    # '''

    def getInstance(self, whichInstance):
        if whichInstance < 0 or whichInstance >= len(self.instanceSet):
            return None
        return self.instanceSet[whichInstance]

    # end getInstance

    #  * It returns all the instances of the class.
    #  * @return Instance[] with all the instances of the class.

    def getInstances(self):
        return self.instanceSet

    # //end getInstances
    # '''
    # '''
    #  * Returns the value of an integer or a real input attribute of an instance
    #  * in the instanceSet.
    #  * @param whichInst is the position of the instance.
    #  * @param whichAttr is the position of the input attribute.
    #  * @return a String with the numeric value.
    #  * @throws ArrayIndexOutOfBoundsException If the index is out of the instance
    #  * set size.
    # '''

    def getInputNumericValue(self, whichInst, whichAttr):
        # print("InstanceSet, getInputNumericValue begin...")
        instance_number = len(self.instanceSet)
        # print("whichInst = " + str(whichInst) + ", whichAttr =" + str(whichAttr))
        # print("len(self.instanceSet) = " + str(instance_number))

        if whichInst < 0 or whichInst >= instance_number:
            raise IndexError("You are trying to access to " + whichInst +
                             " instance and there are only " +
                             str(instance_number) + ".")
        instanceHere = self.instanceSet[whichInst]
        #print("instanceHere = " + str(instanceHere))
        numericValue = 0.0
        try:
            numericValue = instanceHere.getInputRealValues(whichAttr)
        except Exception as error:
            print("getInputRealValues has exception!! : " + str(error))

        return numericValue

    # end getInputNumericValue

    # '''
    #  * Returns the value of an integer or a real output attribute of an instance
    #  * in the instanceSet.
    #  * @param whichInst is the position of the instance.
    #  * @param whichAttr is the position of the output attribute.
    #  * @return a String with the numeric value.
    #  * @throws ArrayIndexOutOfBoundsException If the index is out of the instance
    #  * set size.
    # '''

    def getOutputNumericValue(self, whichInst, whichAttr):
        if whichInst < 0 or whichInst >= len(self.instanceSet):
            print(
                self.ArrayIndexOutOfBoundsException(
                    "You are trying to access to " + whichInst +
                    "instance and there are only" + self.instanceSet.length +
                    "."))
        return self.instanceSet[whichInst].getOutputRealValues(whichAttr)
        # end getOutputNumericValue

    #
    # '''
    #  * Returns the value of a nominal input attribute of an instance in the
    #  * instanceSet.
    #  * @param whichInst is the position of the instance.
    #  * @param whichAttr is the position of the input attribute.
    #  * @return a String with the nominal value.
    #  * @throws ArrayIndexOutOfBoundsException If the index is out of the instance
    #  * set size.
    # '''

    def getInputNominalValue(self, whichInst, whichAttr):
        if whichInst < 0 or whichInst >= len(self.instanceSet):
            print(
                self.ArrayIndexOutOfBoundsException(
                    "You are trying to access to " + whichInst +
                    " instance and there are only " +
                    str(len(self.instanceSet)) + "."))
        return self.instanceSet[whichInst].getOutputNominalValues(whichAttr)
        # end getInputNominalValue

    #
    # '''
    #  * Returns the value of a nominal output attribute of an instance in the
    #  * instanceSet.
    #  * @param whichInst is the position of the instance.
    #  * @param whichAttr is the position of the output attribute.
    #  * @return a String with the nominal value.
    #  * @throws ArrayIndexOutOfBoundsException If the index is out of the instance
    #  * set size.
    # '''

    def getOutputNominalValue(self, whichInst, whichAttr):
        if whichInst < 0 or whichInst >= len(self.instanceSet):
            print("You are trying to access to " + whichInst +
                  " instance and there are only " +
                  str(len(self.instanceSet)) + ".")
        return self.instanceSet[whichInst].getOutputNominalValues(whichAttr)
        # end getOutputNumericValue

    # '''
    #  * It does remove the instance i from the instanceSet.
    #  * @param instNum is the instance removed from the instanceSet.
    # '''

    def removeInstance(self, instNum):
        if instNum < 0 or instNum >= len(self.instanceSet):
            return
        aux = [Instance() for x in range(len(self.instanceSet) - 1)]
        add = 0
        for i in range(0, len(self.instanceSet)):
            if instNum == i:
                add = 1
            else:
                aux[i - add] = self.instanceSet[i]

        # Copying the auxiliar to the instanceSet variable
        self.instanceSet = aux
        aux = None  # avoiding memory leaks (not necessary in this case)

    # end removeInstance

    # '''
    #  * It does remove an attribute. To remove an attribute, the train and the
    #  * test sets have to be passed to mantain the coherence of the system.
    #  * Otherwise, only the attribute of the train set would be removed, leaving
    #  * inconsistent the instances of the test set, because of having one extra
    #  * attribute inexistent anymore.
    #  *
    #  * @param tSet is the test set.
    #  * @param inputAtt is a boolean that is true when the attribute that is
    #  * wanted to be removed is an input attribute.
    #  * @param whichAtt is a integer that indicate the position of the attriubte
    #  * to be deleted.
    #  * @return a boolean indicating if the attribute has been deleted
    # '''

    def removeAttribute(self, tSet, inputAtt, whichAtt):
        attToDel = None
        # Getting a reference to the attribute to del
        if inputAtt:
            if self.storeAttributesAsNonStatic and self.attributes is not None:
                attToDel = self.attributes.getInputAttribute(whichAtt)
            else:
                attToDel = Attributes.getInputAttribute(whichAtt)

        else:
            if self.storeAttributesAsNonStatic and self.attributes is not None:
                attToDel = self.attributes.getOutputAttribute(whichAtt)
            else:
                attToDel = Attributes.getOutputAttribute(whichAtt)

        if self.storeAttributesAsNonStatic and self.attributes is not None:
            print("Removing the attribute")
        if (not self.attributes.removeAttribute(inputAtt, whichAtt) or
            (tSet is not None
             and not tSet.attributes.removeAttribute(inputAtt, whichAtt))):
            return False
        else:
            if not Attributes.removeAttribute(inputAtt, whichAtt):
                return False
        for i in range(0, len(self.instanceSet)):
            if self.storeAttributesAsNonStatic and self.attributes is not None:
                self.instanceSet[i].removeAttribute(self.attributes, attToDel,
                                                    inputAtt, whichAtt)
            else:
                self.instanceSet[i].removeAttribute(attToDel, inputAtt,
                                                    whichAtt)

        if tSet is not None:
            for i in range(0, tSet.instanceSet.length):

                if self.storeAttributesAsNonStatic and self.attributes is not None:
                    tSet.instanceSet[i].removeAttribute(
                        self.attributes, attToDel, inputAtt, whichAtt)
            else:
                tSet.instanceSet[i].removeAttribute(attToDel, inputAtt,
                                                    whichAtt)
            return True

    # end removeAttribute

    # '''
    #  * It returns the header.
    #  * @return a String with the header of the file.
    # '''

    def getHeader(self):
        return self.header

    # end getHeader

    def setHeader(self, copia):
        self.header = str(copia)

    # end getHeader

    def getAttHeader(self):
        return self.attHeader

    # end getHeader

    def setAttHeader(self, copia):
        self.attHeader = str(copia)

    # end getHeader

    # '''
    #  * It does return a new header (not necessary the same header as the
    #  * input file one). It only includes the valid attributes, those ones
    #  * defined in @inputs and @outputs (or taken as that role following the
    #  * keel format specification).
    #  * @return a String with the new header
    # '''

    def getNewHeader(self):
        line = ""
        attrs = []

        # Getting the relation name and the attributes
        if self.storeAttributesAsNonStatic and self.attributes is not None:
            line = "@relation " + self.attributes.getRelationName() + "\n"
            attrs = self.attributes.getInputAttributes(Attributes)
        else:
            line = "@relation " + Attributes.getRelationName() + "\n"
            attrs = Attributes.getInputAttributes(Attributes)

        for i in range(0, attrs.length):
            line += attrs[i].toString() + "\n"
            # Gettin all the outputs attributes
        if self.storeAttributesAsNonStatic and self.attributes is not None:
            attrs = self.attributes.getOutputAttributes()
            line += attrs[0].toString() + "\n"
            # Getting @inputs and @outputs
            line += self.attributes.getInputHeader() + "\n"
            line += self.attributes.getOutputHeader() + "\n"

        else:
            attrs = Attributes.getOutputAttributes()
            line += str(attrs[0]) + "\n"

        # Getting @inputs and @outputs
        line += Attributes.getInputHeader() + "\n"
        line += Attributes.getOutputHeader() + "\n"

        return line

    # end getNewHeader

    # '''
    #  * It does return the original header definiton but
    #  * without @input and @output in there
    # '''

    def getOriginalHeaderWithoutInOut(self):

        line = ""
        attrs = []

        # Getting the relation name and the attributes
        if self.storeAttributesAsNonStatic and self.attributes is not None:
            line = "@relation " + self.attributes.getRelationName() + "\n"
            attrs = self.attributes.getAttributes()

        else:
            line = "@relation " + Attributes.getRelationName() + "\n"
            attrs = Attributes.getAttributes()

        for i in range(0, len(attrs)):
            line = line + str(attrs[i]) + "\n"
        return line
        # end getOriginalHeaderWithoutInOut;

    # '''
    #  * It prints the dataset to the specified PrintWriter
    #  * @param out is the PrintWriter where to print
    # '''

    def printOut(self, out):
        for i in range(0, len(self.instanceSet)):
            print("> Instance " + i + ":")
        if self.storeAttributesAsNonStatic and self.attributes is not None:
            self.instanceSet[i].printOut(self.attributes, out)
        else:
            self.instanceSet[i].printOut(out)

    # end print
    #
    # '''
    #  * It prints the dataset to the specified PrintWriter.
    #  * The order of the attributes is the same as in the
    #  * original file
    #  * @param out is the PrintWriter where to print
    #  * @param printInOut indicates if the @inputs (1), @outputs(2),
    #  * both of them (3) or any (0) has to be printed
    # '''

    def printAsOriginal(self, out, int):
        # Printing the header as the original one
        # print(self.header)

        if self.storeAttributesAsNonStatic and self.attributes is not None:
            if self.printInOut == 1 or self.printInOut == 3:
                print(self.attributes.getInputHeader())

        if self.printInOut == 2 or self.printInOut == 3:

            print(self.attributes.getOutputHeader())

        else:
            if self.printInOut == 1 or self.printInOut == 3:
                out.println(Attributes.getInputHeader())
            if self.printInOut == 2 or self.printInOut == 3:
                out.println(Attributes.getOutputHeader())

        print("@data")
        for i in range(0, len(self.instanceSet)):
            print()
            if self.storeAttributesAsNonStatic and self.attributes is not None:
                self.instanceSet[i].printAsOriginal(self.attributes, out)
        else:
            self.instanceSet[i].printAsOriginal(self.attributes, out)

    # end printAsOriginal

    def printInsSet(self):
        print("------------- ATTRIBUTES --------------")
        if self.storeAttributesAsNonStatic and self.attributes is not None:
            self.attributes.printAttributes()

        else:
            Attributes.printAttributes()

        print("-------------- INSTANCES --------------")
        for i in range(0, self.instanceSet.length):
            print("\n> Instance " + str(i) + ":")

            if self.storeAttributesAsNonStatic and self.attributes is not None:
                self.instanceSet[i].printInsSet(self.attributes)
        else:
            self.instanceSet[i].printInsSet()

    # end print

    # Remove all instances from this InstanceSet

    def clearInstances(self):
        self.instanceSet = None

    # '''
    #    * It adds the passed instance at the end of the present InstanceSet
    #    * @param inst the instance to be added
    # '''

    def addInstance(self, inst):
        i = 0
        nVector = []
        if self.instanceSet is not None:
            nVector = [Instance() for x in range(len(self.instanceSet) + 1)]
            for i in range(0, len(self.instanceSet)):
                nVector[i] = self.instanceSet[i]

        else:
            nVector = Instance[1]

        nVector[i] = inst
        self.instanceSet = nVector

    # '''
    #    * Clear the non-Static attributes. The static class Attributes is not modified.
    # '''

    def clearNonStaticAttributes(self):
        self.attributes = None

    # '''
    #    * Appends the given attribute to the non-static list of the current InstanceSet
    #    * @param at The Attribute to be Appended
    # '''

    def addAttribute(self, att):
        if self.attributes is None:
            self.attributes = InstanceAttributes()
        self.attributes.addAttribute(att)

Ejemplo n.º 20

0

Mostrar archivo

Archivo: MyDataSet.py Proyecto: minminmail/FarcHD_scikit

 def has_numerical_attributes(self):
     return Attributes.hasIntegerAttributes(
         self) or Attributes.hasRealAttributes(self)

Ejemplo n.º 21

0

Mostrar archivo

Archivo: MyDataSet.py Proyecto: minminmail/FarcHD_scikit

 def number_values(self, attribute):
     return Attributes.getInputAttribute(attribute).getNumNominalValues(
         Attributes)

Ejemplo n.º 22

0

Mostrar archivo

Archivo: InstanceSet.py Proyecto: minminmail/Chi_negative_rules_python

    def read_set_from_data_row_array(self, data_raw_array, isTrain):
        # print("Before try in read_set_from_data_row_array of InstanceSet")
        try:
            # Parsing the header of the DB.
            errorLogger = FormatErrorKeeper()
            # Declaring an instance parser

            # to do The exception in init InstanceParserof InstanceParse is: can only concatenate str (not "WindowsPath") to str
            instance_parser = InstanceParser.init_for_granularity_parser(data_raw_array, isTrain)
            # Reading information in the header, i.e., @relation, @attribute, @inputs and @outputs
            # print("data_raw_array size" + str(len(data_raw_array)))
            self.parse_header_from_data_row_array(instance_parser, isTrain)
            # print(" The number of output attributes is: " + str(Attributes.getOutputNumAttributes(Attributes)))
            # The attributes statistics are init if we are in train mode.
            # print("In readSet, isTrain is " + str(isTrain))
            if isTrain and Attributes.getOutputNumAttributes(Attributes) == 1:
                # print("Begin Attributes.initStatistics......")
                Attributes.initStatistics(Attributes)
            # A temporal vector is used to store the instances read.

            # print("Reading the data in read_set_from_data_row_array")
            tempSet = []
            # print("begin instance_parser.getLines()...... ")
            data_raw_array = self.data_rows
            new_data_rows = []
            number_of_rows= len(data_raw_array)
            # print("*********  There are : " + str(number_of_rows) + "In original Data rows ********* ")

            # print("*********  There are : " + str(len(new_data_rows)) + " In new Data rows ********* ")
            for i in range(0, number_of_rows):
                if len(new_data_rows) != 0:
                    # print("Data row: " + str(data_raw_array[i]))
                    newInstance = Instance()
                    # print("how many data already in the instanceSet: " + str(len(tempSet)))
                    newInstance.set_three_parameters_for_granularity_rules(data_raw_array[i], isTrain, len(tempSet))
                    tempSet.append(newInstance)

                # The vector of instances is converted to an array of instances.
            sizeInstance = len(tempSet)
            # print(" Number of instances read: " + str(sizeInstance))
            self.instanceSet = []

            for i in range(0, sizeInstance):
                self.instanceSet.append(tempSet[i])
            # print("After converting all instances")
            # System.out.println("The error logger has any error: "+errorLogger.getNumErrors());
            if self.errorLogger.getNumErrors() > 0:
                errorNumber = len(errorLogger.getAllErrors())
                # print("There has been " + str(errorNumber) + "errors in the Dataset format.")
                for k in range(0, errorLogger.getNumErrors()):
                    errorLogger.getError(k).printErrorInfo()

            # print("There has been " + errorLogger.getAllErrors().size() + " errors in the Dataset format",
            #           errorLogger.getAllErrors());
            # print("Finishing the statistics: (isTrain)" + str(isTrain) + ", (# out attributes)" + str(Attributes.getOutputNumAttributes(Attributes)))
            # # If being on a train dataset, the statistics are finished
            if isTrain and Attributes.getOutputNumAttributes(Attributes) == 1:
                Attributes.finishStatistics(Attributes)
            # # close the stream
            instance_parser.close()
            # print("File LOADED CORRECTLY!!")
        except Exception as e:
            print("Unexpected error in readSet of InstanceSet class :" + str(e))

Ejemplo n.º 23

0

Mostrar archivo

Archivo: MyDataSet.py Proyecto: minminmail/FarcHD_scikit

 def has_real_attributes(self):
     return Attributes.hasRealAttributes(self)

Ejemplo n.º 24

0

Mostrar archivo

Archivo: InstanceSet.py Proyecto: minminmail/Chi_negative_rules_python

    def parseHeader(self, parser, isTrain):
        # 1. Declaration of variables
        inputAttrNames = []
        outputAttrNames = []
        inputsDef = False
        outputsDef = False
        self.header = ""
        attCount = 0
        lineCount = 0
        self.attHeader = None

        # print("Begin to call the InstanceParser.getLines(),parser.getLines(), in InstanceSet.")
        lines = parser.getLines()
        self.data_lines = lines

        for line in lines:
            line = str(line).strip()
            # print("In parseHeader method of InstanceSet, the line is:" + line)
            if line == "@data".lower():

                break
            else:
                # print("  Line read: " + line + ".")
                lineCount = lineCount + 1
                if "@relation" in line:

                    if isTrain:
                        relationName = str(line.replace("@relation", "")).strip()
                        # print("set Relation name :" + str(relationName))
                        Attributes.setRelationName(self, relationName)
                elif "@attribute" in line:

                    if isTrain:
                        # print("Begin insertAttribute ......")
                        self.insertAttribute(line)
                        attCount = attCount + 1

                elif "@inputs" in line:

                    # print("@inputs in " + str(line))
                    self.attHeader = self.header
                    inputsDef = True

                    aux = line[8:]

                    if isTrain:
                        # print("Has @inputs, aux is :" + aux)
                        self.insertInputOutput(aux, lineCount, inputAttrNames, "inputs", isTrain)
                elif "@outputs" in line:

                    if self.attHeader is None:
                        self.attHeader = self.header
                    outputsDef = True
                    # print("Defining the output in line :" + line)
                    sub_line = line.split()  # To get the output attribute name
                    aux = sub_line[1]
                    if isTrain:
                        # print("Has @outputs, aux is :" + aux)
                        self.insertInputOutput(aux, lineCount, outputAttrNames, "outputs", isTrain)

                        # print("Size of the output is: " + str(len(outputAttrNames)))

                self.header += line + "\n"
        if self.attHeader is None:
            self.attHeader = self.header
        self.processInputsAndOutputs(isTrain, inputsDef, outputsDef, outputAttrNames, inputAttrNames)

Ejemplo n.º 25

0

Mostrar archivo

Archivo: MyDataSet.py Proyecto: minminmail/FarcHD_scikit

 def get_names(self):
     nombres = ["" for x in range(self.ninputs)]
     for i in range(0, self.ninputs):
         nombres[i] = Attributes.getInputAttribute(Attributes, i).getName()
     return nombres

Ejemplo n.º 26

0

Mostrar archivo

Archivo: InstanceSet.py Proyecto: minminmail/Chi_negative_rules_python

    def readSet(self, fileName, isTrain,file_path):
        print("Before try in readSet of InstanceSet, fileName is :" + str(fileName) + ".")
        print("Opening the file in readSet of InstanceSet: " + str(fileName) + ".")
        try:
            # Parsing the header of the DB.
            errorLogger = FormatErrorKeeper()
            self.data_folder = file_path
            self.file_to_open = self.data_folder + "\\dataset\\" + fileName
            # Declaring an instance parser
            print("In readSet,file_to_open is:" + str(self.file_to_open))
            # to do The exception in init InstanceParserof InstanceParse is: can only concatenate str (not "WindowsPath") to str
            instance_parser = InstanceParser(self.file_to_open, isTrain)
            # Reading information in the header, i.e., @relation, @attribute, @inputs and @outputs
            print("In readSet finished read file " + str(self.file_to_open))
            self.parseHeader(instance_parser, isTrain)
            print(" The number of output attributes is: " + str(Attributes.getOutputNumAttributes(Attributes)))
            # The attributes statistics are init if we are in train mode.
            print("In readSet, isTrain is " + str(isTrain))
            if isTrain and Attributes.getOutputNumAttributes(Attributes) == 1:
                print("Begin Attributes.initStatistics......")
                Attributes.initStatistics(Attributes)
            # A temporal vector is used to store the instances read.

            print("Reading the data")
            tempSet = []
            print("begin instance_parser.getLines()...... ")
            lines = self.data_lines
            new_data_lines = []
            print("*********  There are : " + str(len(lines)) + "In original Data lines ********* ")
            for line in lines:
                if ("@relation" not in line) and ("@attribute" not in line) and ("@inputs" not in line) and (
                        "@outputs" not in line) and ("@data" not in line):
                    new_data_lines.append(line)
            # print("*********  There are : " + str(len(new_data_lines)) + " In new Data lines ********* ")
            for line in new_data_lines:
                if new_data_lines is not None:
                    print("Data line: " + str(line))
                    newInstance = Instance()
                    # print("how many data already in the instanceSet: " + str(len(tempSet)))
                    newInstance.setThreeParameters(line, isTrain, len(tempSet))
                    tempSet.append(newInstance)

                # The vector of instances is converted to an array of instances.
            sizeInstance = len(tempSet)
            # print(" Number of instances read: " + str(sizeInstance))
            self.instanceSet = []

            for i in range(0, sizeInstance):
                self.instanceSet.append(tempSet[i])
            # print("After converting all instances")
            # System.out.println("The error logger has any error: "+errorLogger.getNumErrors());
            if self.errorLogger.getNumErrors() > 0:
                errorNumber = len(errorLogger.getAllErrors())
                # print("There has been " + str(errorNumber) + "errors in the Dataset format.")
                for k in range(0, errorLogger.getNumErrors()):
                    errorLogger.getError(k).printErrorInfo()

            # print("There has been " + errorLogger.getAllErrors().size() + " errors in the Dataset format",
            #           errorLogger.getAllErrors());
            # print("Finishing the statistics: (isTrain)" + str(isTrain) + ", (# out attributes)" + str(Attributes.getOutputNumAttributes(Attributes)))
            # # If being on a train dataset, the statistics are finished
            if isTrain and Attributes.getOutputNumAttributes(Attributes) == 1:
                Attributes.finishStatistics(Attributes)
            # # close the stream
            instance_parser.close()
            # print("File LOADED CORRECTLY!!")
        except Exception as e:
            print("Unexpected error in readSet of InstanceSet class :" + str(e))

Ejemplo n.º 27

0

Mostrar archivo

Archivo: MyDataSet.py Proyecto: minminmail/FARCHD_Negative_Rules

 def __init__(self):
     self.instance_set = InstanceSet()
     self.attributes = Attributes()

Ejemplo n.º 28

0

Mostrar archivo

Archivo: MyDataSet.py Proyecto: minminmail/FARCHD_Negative_Rules

class MyDataSet:
    # Number to represent type of variable real or double.
    REAL = 0
    # *Number to represent type of variable integer.*
    INTEGER = 1
    # *Number to represent type of variable nominal.*
    NOMINAL = 2

    x_array = []  # examples array
    missing_array = []  # possible missing values
    output_integer_array = [
    ]  # output of the data - set as integer values private
    output_real_array = []  # output of the data - set as double values
    output_array = []  # output of the data - set as string values
    emax_array = []  # max value of an attribute private
    emin_array = []  # min value of an attribute

    ndata = None  # Number of examples
    nvars = None  # Numer of variables
    ninputs = None  # Number of inputs
    nclasses = None  # Number of outputs

    instance_set = None  # The whole instance set
    stdev_array = []
    average_array = []  # standard deviation and average of each attribute
    instances_cl = []

    # nominal  attributes bool array
    nominal_array = []
    # integer   attributes int array
    integer_array = []

    frequent_class_array = []
    attributes = None

    #  *Init a new set of instances

    def __init__(self):
        self.instance_set = InstanceSet()
        self.attributes = Attributes()

    # '''
    #    * Outputs an array of examples with their corresponding attribute values.
    #    * @return double[][] an array of examples with their corresponding attribute values
    #  '''
    def get_x(self):
        return self.x_array

    def set_x(self, x_parameter):
        self.x_array = x_parameter

    # '''
    #    * Output a specific example
    #    * @param pos int position (id) of the example in the data-set
    #    * @return double[] the attributes of the given example
    # '''
    def get_example(self, pos):
        # # print(" In getExample, len(self.x_array) = " + str(len(self.x_array)) + ", pos = " + str(
        #   pos) + "  ," + "self.x_array[pos] ==" + str(self.x_array[pos]))
        return self.x_array[pos]

    # * Returns the output of the data-set as integer values
    # * @return int[] an array of integer values corresponding to the output values of the dataset

    def get_output_as_integer(self):
        size = len(self.output_integer_array)
        output = [0 for x in range(size)]
        for i in range(0, size):
            output[i] = self.output_integer_array[i]
        return output

    #    * Returns the output of the data-set as real values
    #    * @return double[] an array of real values corresponding to the output values of the dataset

    def get_output_as_real(self):
        output_length = len(self.output_real_array)
        output = [0.0 for x in range(output_length)]
        for i in range(0, len(self.output_real_array)):
            output[i] = self.output_integer_array[i]
        return output

    #    * Returns the output of the data-set as nominal values
    #    * @return String[] an array of nomianl values corresponding to the output values of the dataset
    #

    def get_output_as_string(self):
        output_length = len(self.output_array)
        output = ["" for x in range(output_length)]
        for i in range(0, output_length):
            output[i] = self.output_array[i]

        return output

    #    * It returns the output value of the example "pos"
    #    * @param pos int the position (id) of the example
    #    * @return String a string containing the output value

    def get_output_as_string_with_pos(self, pos):
        # # print("pos is in getOutputAsStringWithPos "+str(pos))
        # maybe the exception is here.
        return self.output_array[pos]

    #    * It returns the output value of the example "pos"
    #    * @param pos int the position (id) of the example
    #    * @return int an integer containing the output value

    def get_output_as_integer_with_pos(self, pos):
        return self.output_integer_array[pos]

    def set_output_integer_array(self, integer_array):
        self.output_integer_array = integer_array

    def set_output_array(self, output_array):
        self.output_array = output_array

    #    * It returns the output value of the example "pos"
    #    * @param pos int the position (id) of the example
    #    * @return double a real containing the output value

    def get_output_as_real_with_pos(self, pos):
        return self.output_real_array[pos]

        # *It returns an array with the maximum values of the attributes
        # * @ return double[] an array with the maximum values of the attributes
        #

    def get_emax(self):
        return self.emax_array

        # *It returns an array with the minimum values of the attributes
        # * @ return double[] an array with the minimum values of the attributes

    def get_emin(self):
        return self.emin_array

    # *It returns the maximum value of the given attribute
    # *
    # * @ param variable the index of the attribute
    # * @ return the maximum value of the given attribute

    def get_max(self, variable):
        return self.emax_array[variable]

    # *It returns the minimum value of the given attribute
    #
    # * @ param variable the index of the attribute
    # * @ return the minimum value of the given attribute

    def get_min(self, variable):
        return self.emin_array[variable]

    # *It gets the size of the data - set
    # * @ return int the number of examples in the data - set

    def get_ndata(self):

        return self.ndata

    def set_ndata(self, ndata):
        self.ndata = ndata

    # *It gets the number of variables of the data - set(including the output)
    # * @ return int the number of variables of the data - set(including the output)

    # modified at 2020-08-14
    def get_nvars(self):
        return self.nvars

    #    * It gets the number of input attributes of the data-set
    #    * @return int the number of input attributes of the data-set

    def get_ninputs(self):
        return self.ninputs

    def set_ninputs(self, ninputs_value):
        self.ninputs = ninputs_value

    #    * It gets the number of output attributes of the data-set (for example number of classes in classification)
    #    * @return int the number of different output values of the data-set

    def get_nclasses(self):
        return self.nclasses

    def set_nclasses(self, nclasses_value):
        self.nclasses = nclasses_value

    # added by rui for granularity rule generation
    def calculate_nclasses_for_small_granularity_zone(self,
                                                      output_integer_array):
        class_number = 0
        class_array = []
        has_class = False
        for i in range(0, len(output_integer_array)):
            # # print(" output_integer_array[i] " + str(output_integer_array[i]))
            if len(class_array) == 0:
                class_array.append(output_integer_array[i])
            else:
                has_class = False
                for j in range(0, len(class_array)):
                    if class_array[j] == output_integer_array[i]:
                        # # print(" class_array[j] " + str(class_array[j]))
                        has_class = True
                if not has_class:
                    class_array.append(output_integer_array[i])
        class_number = len(class_array)
        return class_number

    #  * This function checks if the attribute value is missing
    #  * @param i int Example id
    #  * @param j int Variable id
    #  * @return boolean True is the value is missing, else it returns false

    def is_missing(self, i, j):
        return self.missing_array[i][j]

    #  * It reads the whole input data-set and it stores each example and its associated output value in
    #  * local arrays to ease their use.
    #  * @param datasetFile String name of the file containing the dataset
    #  * @param train boolean It must have the value "true" if we are reading the training data-set
    #  * @throws IOException If there ocurs any problem with the reading of the data-set

    def read_classification_set(self, dataset_file, train, file_path):
        try:
            # Load in memory a dataset that contains a classification problem
            print("Inside read_classification_set, datasetFile :" +
                  str(dataset_file))
            # print("train is :" + str(train))
            # print("object instanceSet is :" + str(self.instance_set))

            if self.instance_set is None:
                print("self.instance_set is Null")
            else:
                no_outputs = None
                print("self.instance_set is not None, train = " + str(train))
                self.instance_set.read_set(dataset_file, train, file_path)
                print(
                    "begin getNumInstances ...... in read_classification_set ")
                self.ndata = self.instance_set.getNumInstances()
                print(
                    "In readCread_classification_setlassificationSet , self.ndata is : "
                    + str(self.ndata))
                self.ninputs = self.attributes.getInputNumAttributes()
                print("In read_classification_set , self.ninputs is : " +
                      str(self.ninputs))
                self.nvars = self.ninputs + self.attributes.getOutputNumAttributes(
                )
                print("In read_classification_set , self.nvars is : " +
                      str(self.nvars))

                # outputInteger check that there is only one output variable
                if self.attributes.getOutputNumAttributes() > 1:
                    outAttrs = self.attributes.getOutputAttributes()
                    # print("Output Attributes number is bigger than 1")
                    i = 1
                    for outAtt in outAttrs:
                        # print("Att" + str(i) + str(outAtt.getName()))
                        i = i + 1
                    # print("" + Attributes.getOutputAttributesHeader(Attributes))
                    print(
                        "This algorithm can not process MIMO datasets !!! exit 1"
                    )
                    # print("All outputs but the first one will be removed")
                    exit(1)
                no_outputs = False
                if self.attributes.getOutputNumAttributes() < 1:
                    print(
                        "This algorithm can not process datasets without outputs !!!!!!"
                    )
                    # print("Zero-valued output generated")
                    no_outputs = True
                    exit(1)

                # print("define all the array in MyDataSet class......")
                # Initialice and fill our own tables
                # print("The two dimension array X, dimension 1 is :" + str(self.ndata) + " ,Dimension 2 is :" + str(self.ninputs))

                ndata_length = self.ndata
                ninput_length = self.ninputs
                print("nDataLength = " + str(ndata_length))
                # print("nInputLength = " + str(nInputLength))
                # [[0 for j in range(m)] for i in range(n)] first column, then row

                self.x_array = [[0.0 for y in range(ninput_length)]
                                for x in range(ndata_length)]

                self.missing_array = [[True for y in range(ninput_length)]
                                      for x in range(ndata_length)]

                self.nominal_array = [True for x in range(ninput_length)]
                self.integer_array = [True for x in range(ninput_length)]

                self.output_integer_array = [0 for x in range(ndata_length)]

                self.output_real_array = [0.0 for x in range(ndata_length)]
                self.output_array = ["" for x in range(ndata_length)]

                # Maximum and minimum of inputs
                self.emax = [0.0 for x in range(ninput_length)]
                self.emin = [0.0 for x in range(ninput_length)]

                for i in range(0, ninput_length):

                    attribute_instance = self.attributes.getInputAttribute(i)

                    if attribute_instance.getNumNominalValues() > 0:
                        self.emin[i] = 0
                        self.emax[i] = self.attributes.getInputAttribute(
                            i).getNumNominalValues() - 1
                    else:
                        self.emax[i] = self.attributes.getAttributeByPos(
                            i).getMaxAttribute()
                        self.emin[i] = self.attributes.getAttributeByPos(
                            i).getMinAttribute()

                    if attribute_instance.getType() == Attribute.NOMINAL:
                        self.nominal_array[i] = True
                        self.integer_array[i] = False
                    elif attribute_instance.getType() == Attribute.INTEGER:
                        self.nominal_array[i] = False
                        self.integer_array[i] = True
                    else:
                        self.nominal_array[i] = False
                        self.integer_array[i] = False

                    # print("self.emax[n]:" + str(self.emax[n]))
                    # print("self.emin[n]:" + str(self.emin[n]))
                # All values are casted into double/integer

                self.nclasses = 0
                for i in range(0, ndata_length):
                    inst = self.instance_set.getInstance(i)
                    for j in range(0, ninput_length):
                        input_Numeric_Value = self.instance_set.getInputNumericValue(
                            i, j)
                        # # print("self.x_array [i] = " + str(i) + ",[j] = " + str(j) + ",input_Numeric_Value:" + str(
                        #  input_Numeric_Value))

                        self.x_array[i][
                            j] = input_Numeric_Value  # inst.getInputRealValues(j);
                        # # print("after get self.x_array[i][j]")
                        self.missing_array[i][
                            j] = inst.getInputMissingValuesWithPos(j)
                        # # print("after self.missing_array[i][j]")
                        if self.missing_array[i][j]:
                            self.x_array[i][j] = self.emin[j] - 1

                    if no_outputs:
                        # print("no_outputs==True")
                        self.output_integer_array[i] = 0
                        # elf.output_real_array[i] = 0.0
                        self.output_array[i] = ""
                    else:
                        # print("no_outputs==False")
                        self.output_integer_array[
                            i] = self.instance_set.getOutputNumericValue(i, 0)
                        # print(" 202001-1 self.output_integer_array[ "+str(i)+"]"+ str( self.output_integer_array[i]))
                        # self.output_real_array[i] = self.instance_set.getOutputNumericValue(i, 0)
                        # print("self.output_integer_array[" + str(i) + "] = " + str(self.output_integer_array[i]))
                        self.output_array[
                            i] = self.instance_set.getOutputNominalValue(i, 0)
                    # print(" 202001-1 self.output_integer_array[ " + str(i) + "]" + str(self.output_integer_array[i]))
                    if self.output_integer_array[i] > self.nclasses:
                        self.nclasses = self.output_integer_array[i]

                self.nclasses = self.nclasses + 1
                print('Number of classes=' + str(self.nclasses))
        except Exception as error:
            print(
                "read_classification_set: Exception in readSet, in read_classification_set:"
                + str(error))

        # self.computeStatistics()
        self.compute_instances_per_class()

    #   * It reads the whole input data-set and it stores each example and its associated output value in
    #   * local arrays to ease their use.
    #   * @param datasetFile String name of the file containing the dataset
    #   * @param train boolean It must have the value "true" if we are reading the training data-set
    #   * @throws IOException If there ocurs any problem with the reading of the data-set

    # added by rui for granularity rule generation
    def read_classification_set_from_data_row_array(self, data_row_array):

        self.compute_statistics_data_row_array(data_row_array)
        self.compute_instances_perclass_data_row_array(data_row_array)

    def readRegressionSet(self, datasetFile, train, file_path):

        try:
            # Load in memory a dataset that contains a regression problem
            self.instance_set.readSet(datasetFile, train, file_path)
            self.ndata = self.instance_set.getNumInstances()
            self.ninputs = self.attributes.getInputNumAttributes()
            self.nvars = self.ninputs + self.attributes.getOutputNumAttributes(
            )
            # print("In readRegressionSet , self.ndata is : " + str(self.ndata))
            # print("In readRegressionSet , self.ninputs is : " + str(self.ninputs))
            # print("In readRegressionSet , self.nvars is : " + str(self.nvars))

            # outputIntegerheck that there is only one output variable
            if self.attributes.getOutputNumAttributes() > 1:
                # print("Out put attribute: ")
                outPutAttHeader = self.attributes.getOutputAttributesHeader()
                # print(outPutAttHeader)
                # print("This algorithm can not process MIMO datasets")
                # print("All outputs but the first one will be removed")
                exit(1)

            noOutputs = False
            if self.attributes.getOutputNumAttributes() < 1:
                # print("This algorithm can not process datasets without outputs")
                # print("Zero-valued output generated")
                noOutputs = True
                print("noOutputs = True, exit 1 !!!!!")
                exit(1)
            # Initialice and fill our own tables
            self.x_array = [[0.0 for y in range(self.ninputs)]
                            for x in range(self.ndata)]
            self.missing_array = [[False for y in range(self.ninputs)]
                                  for x in range(self.ndata)]
            self.output_integer_array = [0 for x in range(self.ndata)]

            # Maximum and minimum of inputs
            self.emax_array = [None for x in range(self.ninputs)]
            self.emin_array = [None for x in range(self.ninputs)]
            for i in range(0, self.ninputs):
                self.emax_array[i] = self.attributes.getAttributeByPos(
                    i).getMaxAttribute()
                self.emin_array[i] = self.attributes.getAttributeByPos(
                    i).getMinAttribute()

            # All values are casted into double / integer
            self.nclasses = 0

            for i in range(0, self.ndata):
                inst = self.instance_set.getInstance(i)
                for j in range(0, self.ninputs):
                    self.x_array[i][
                        j] = self.instance_set.getInputNumericValue(
                            i, j)  # inst.getInputRealValues(j);
                    self.missing_array[i][j] = inst.getInputMissingValues(j)
                    if self.missing_array[i][j]:
                        self.x_array[i][j] = self.emin_array[j] - 1

                if noOutputs:
                    print("noOutputs self.output_real_array[i]" + str(i) +
                          "is 0 ")
                    self.output_real_array[i] = 0

                    self.output_integer_array[i] = 0

                else:
                    print("noOutputs else part:")

                    self.output_real_array[
                        i] = self.instance_set.getOutputNumericValue(i, 0)
                    print("self.output_real_array[i]" + str(i) +
                          str(self.output_real_array[i]))
                    self.output_integer_array[i] = int(
                        self.output_real_array[i])
        except OSError as error:
            print("OS error: {0}".format(error))
        except Exception as otherException:
            # print("DBG: Exception in readSet:", sys.exc_info()[0])
            print(" In readRegressionSet other Exception  is :" +
                  str(otherException))

        self.computeStatistics()

    # *It copies the header of the dataset
    # * @ return String A string containing all the data - set information

    def copy_header(self):

        p = ""
        # # print("copyHeader begin...., P is :" + p)
        p = "@relation " + self.attributes.getRelationName() + "\n"
        # # print(" after relation P is :" + p)
        p += self.attributes.getInputAttributesHeader()
        # # print(" after getInputAttributesHeader P is :" + p)
        p += self.attributes.getOutputAttributesHeader()
        # # print(" after getOutputAttributesHeader P is :" + p)
        p += self.attributes.getInputHeader() + "\n"
        # # print(" after getInputHeader P is :" + p)
        p += self.attributes.getOutputHeader() + "\n"
        # # print(" after getOutputHeader P is :" + p)
        p += "@data\n"

        # print("P is :" + p)
        return p

    #    * It transform the input space into the [0,1] range

    def normalize(self):
        atts = self.getn_inputs()
        maxs = [0.0 for x in range(atts)]
        for j in range(0, atts):
            maxs[j] = 1.0 / (self.emax_array[j] - self.emin_array[j])

        for i in range(0, self.get_ndata()):
            for j in range(0, atts):
                if not self.isMissing(
                        i, j):  # this process ignores missing values
                    self.x_array[i][j] = (self.x_array[i][j] -
                                          self.__emin[j]) * maxs[j]

    # * It checks if the data-set has any real value
    # * @return boolean True if it has some real values, else false.

    def has_real_attributes(self):
        return Attributes.hasRealAttributes(self)

    #    * It checks if the data-set has any real value
    #    * @return boolean True if it has some real values, else false.

    def has_numerical_attributes(self):
        return Attributes.hasIntegerAttributes(
            self) or Attributes.hasRealAttributes(self)

    #    * It checks if the data-set has any missing value
    #    * @return boolean True if it has some missing values, else false.

    def has_missing_attributes(self):
        return self.size_without_missing() < self.get_ndata()

    #    * It return the size of the data-set without having account the missing values
    #    * @return int the size of the data-set without having account the missing values

    def size_without_missing(self):
        tam = 0
        # # print("self.ndata is :" + str(self.ndata) + ", self.ninputs :" + str(self.ninputs))
        for i in range(0, self.ndata):
            for j in range(1, self.ninputs):
                # changed the isMissing condition inside if
                if self.is_missing(i, j):
                    # print("It is missing value is i = " + str(i) + ",j==" + str(j))
                    break
            j = j + 1
            # # print("sizeWithoutMissing,  i = " + str(i) + ",j==" + str(j))
            if j == self.ninputs:
                tam = tam + 1
        # print("tam=" + str(tam))
        return tam

    #    * It returns the number of examples
    #    *
    #    * @return the number of examples

    def size(self):
        return self.ndata

    #    * It computes the average and standard deviation of the input attributes

    def compute_statistics(self):
        try:
            print("Begin computeStatistics......")
            var_num = self.get_nvars()
            print("varNum = " + str(var_num))
            self.stdev_array = [
                0.0 for x in range(var_num)
            ]  # original was double ,changed into float in python
            self.average_array = [0.0 for x in range(var_num)]

            input_num = self.getn_inputs()
            data_num = self.get_ndata()
            print("inputNum = " + str(input_num) + ",dataNum = " +
                  str(data_num))
            for i in range(0, input_num):
                self.average_array[i] = 0
                for j in range(0, data_num):
                    if not self.isMissing(j, i):
                        self.average_array[
                            i] = self.average_array[i] + self.x_array[j][i]
                if data_num != 0:
                    self.average_array[i] = self.average_array[i] / data_num
            average_length = len(self.average_array)
            print(" average_length is " + str(average_length))
            self.average_array[average_length - 1] = 0
            if len(self.output_real_array) == 0:
                print("len(self.output_real_array) is  0")

            else:
                # print("len(self.output_real_array) is " + str(len(self.output_real_array)))
                for j in range(0, len(self.output_real_array)):
                    # print("self.output_real_array[j] is : "+str(self.output_real_array[j]) + " ,j is :"+str(j))
                    self.average_array[average_length - 1] = self.average_array[average_length - 1] + \
                                                             self.output_real_array[j]
            if len(self.output_real_array) != 0:
                self.average_array[average_length - 1] = self.average_array[
                    average_length - 1] / len(self.output_real_array)
                print("before the loop for inputNum")
                for i in range(0, input_num):
                    sum_value = 0.0
                    for j in range(0, data_num):
                        if not self.isMissing(j, i):
                            # print("self.isMissing(j, i)==False")
                            sum_value = sum_value + (
                                self.x_array[j][i] - self.average_array[i]) * (
                                    self.x_array[j][i] - self.average_array[i])

                    if data_num != 0:
                        # print("dataNum != 0" + " , dataNum=" + str(data_num))
                        sum_value = sum_value / data_num
                    self.stdev_array[i] = math.sqrt(sum_value)

                sum_value = 0.0
                for j in range(0, len(self.output_real_array)):
                    sum_value += (self.output_real_array[j] -
                                  self.average_array[average_length - 1]) * (
                                      self.output_real_array[j] -
                                      self.average_array[average_length - 1])
                if len(self.output_real_array) != 0:
                    sum_value /= len(self.output_real_array)
                self.stdev_array[len(self.stdev_array) -
                                 1] = math.sqrt(sum_value)
                print("sum is :" + str(sum_value) + "  self.stdev_array :" +
                      str(self.stdev_array))
        except Exception as error:
            print("Exception in computeStatistics : " + str(error))

    #    * It return the standard deviation of an specific attribute
    #    * @param position int attribute id (position of the attribute)
    #    * @return double the standard deviation  of the attribute

    def std_dev(self, position):
        return self.stdev_array[position]

    #    * It return the average of an specific attribute
    #    * @param position int attribute id (position of the attribute)
    #    * @return double the average of the attribute

    def average(self, position):
        return self.average_array[position]

    #     *It computes the number of examples per class

    def compute_instances_per_class(self):
        # print("compute_instances_per_class begin..., self.nclasses = " + str(self.nclasses))
        self.instances_cl = [0 for x in range(self.nclasses)]
        self.frequent_class_array = [
            Decimal(0.0) for x in range(self.nclasses)
        ]
        data_num = self.get_ndata()
        # print("dataNum = " + str(dataNum))

        for i in range(0, data_num):
            integer_in_loop = self.output_integer_array[i]
            # # print("outputInteger[" + str(i) + "]" + str(integerInLoop))
            self.instances_cl[
                integer_in_loop] = self.instances_cl[integer_in_loop] + 1

        for i in range(0, self.nclasses):
            if data_num is 0:
                self.frequent_class_array[i] = 0
            else:
                self.frequent_class_array[i] = (1.0 * self.instances_cl[i] /
                                                data_num)

    #     *It returns the number of examples for a given class
    #     * @ param clas int the class label id
    #     * @ return int the number of examples
    #     for the class

    def number_instances(self, clas):
        return self.instances_cl[clas]

    # /**
    #  * It returns the number of labels for a nominal attribute
    #  * @param attribute int the attribute position in the data-set
    #  * @return int the number of labels for the attribute
    #  */
    #

    def number_values(self, attribute):
        return Attributes.getInputAttribute(attribute).getNumNominalValues(
            Attributes)

    #    * It returns the class label (string) given a class id (int)
    #    * @param intValue int the class id
    #    * @return String the corrresponding class label
    #

    #    * It returns the class label (string) given a class id (int)
    #    * @param intValue int the class id
    #    * @return String the corrresponding class label

    def get_output_value(self, int_value):
        # # print("Before att get ")
        att = Attributes.getOutputAttribute(Attributes, 0)
        # # print("After att get ")
        return att.getNominalValue(int_value)

    #  * It returns the type of the variable
    #  * @param variable int the variable id
    #  * @return int a code for the type of the variable (INTEGER, REAL or NOMINAL)

    def get_type(self, variable):
        if self.attributes.getAttributeByPos(
                variable).getType() == Attributes.getAttributeByPos(0).INTEGER:
            return self.INTEGER

        if self.attributes.getAttributeByPos(
                variable).getType() == Attributes.getAttributeByPos(0).REAL:
            return self.REAL

        if self.attributes.getAttributeByPos(
                variable).getType() == Attributes.getAttributeByPos(0).NOMINAL:
            return self.NOMINAL

        return 0

    #  * It returns the discourse universe for the input and output variables
    #  * @return double[][] The minimum [0] and maximum [1] range of each variable
    def set_nvars(self, nvar_value):
        self.nvars = nvar_value

    # modified at 2020-08-14
    def get_ranges(self):

        # print("self.get_nvars()" + str(self.get_nvars()))
        rangos = [[0.0 for y in range(2)] for x in range(self.get_nvars())]
        # print("rangos has two dimensions, first is self.get_nvars()==" + str(self.getn_inputs()) + ",second is 2")
        ninputs = self.get_ninputs()
        for i in range(0, ninputs):
            # print("self.getn_inputs() is :" + str(nInputs) + " i = " + str(i))
            attHere = self.attributes.getInputAttribute(i)
            # print("attHere.getNumNominalValues()== " + str(attHere.getNumNominalValues()))
            if attHere.getNumNominalValues() > 0:
                rangos[i][0] = 0.0
                rangos[i][1] = attHere.getNumNominalValues() - 1
                # print(" attHere.getNumNominalValues() > 0,rangos[" + str(i) + "][0]==" + str(rangos[i][0]) + ",rangos[i][1]== " + str(rangos[i][1]))

            else:
                rangos[i][0] = attHere.getMinAttribute()
                rangos[i][1] = attHere.getMaxAttribute()
                # print(" attHere.getNumNominalValues() <= 0, rangos[" + str(i) + "][0]==" + str(rangos[i][0]) + ",rangos[i][1]== " + str(rangos[i][1]))
        # save the output rango in the last range array
        rangos[self.get_nvars() -
               1][0] = self.attributes.getOutputAttribute(0).getMinAttribute()
        rangos[self.get_nvars() -
               1][1] = self.attributes.getOutputAttribute(0).getMaxAttribute()
        return rangos

    def get_granularity_zone_ranges(self, data_set_x_array):

        # print("self.get_nvars()" + str(self.get_nvars()))
        rangos = [[0.0 for y in range(2)] for x in range(self.get_nvars())]
        # print("rangos has two dimensions, first is self.get_nvars()==" + str(self.get_nvars()) + ",second is 2")
        nInputs = self.getn_inputs()
        for i in range(0, nInputs):
            # print("self.getn_inputs() is :" + str(nInputs) + " i = " + str(i))
            attHere = Attributes.getInputAttribute(Attributes, i)
            # print("attHere.getNumNominalValues()== " + str(attHere.getNumNominalValues()))
            if attHere.getNumNominalValues() > 0:
                rangos[i][0] = 0.0
                rangos[i][1] = attHere.getNumNominalValues() - 1
                # print(" attHere.getNumNominalValues() > 0,rangos[" + str(i) + "][0]==" + str(rangos[i][0]) + ",rangos[i][1]== " + str(rangos[i][1]))

            else:
                rangos[i][0] = attHere.get_min_granularity_attribute(
                    data_set_x_array, i)
                rangos[i][1] = attHere.get_max_granularity_attribute(
                    data_set_x_array, i)
                # print(" attHere.getNumNominalValues() <= 0, rangos[" + str(i) + "][0]==" + str(rangos[i][0]) + ",rangos[i][1]== " + str(rangos[i][1]))
        last_min_value = self.attributes.getOutputAttribute(
            0).getMinAttribute()
        last_max_value = self.attributes.getOutputAttribute(
            0).getMaxAttribute()
        # print("The last_min_value is " + str(last_min_value)+" The last_max_value is " + str(last_max_value))
        rangos[self.get_nvars() - 1][0] = last_min_value
        rangos[self.get_nvars() - 1][1] = last_max_value
        return rangos

    #    * It returns the attribute labels for the input features
    #    * @return String[] the attribute labels for the input features

    def get_names(self):
        names = ["" for x in range(self.ninputs)]
        for i in range(0, self.ninputs):
            names[i] = self.attributes.getInputAttribute(i).getName()
            print(" attributes' names[" + str(i) + "]:" + names[i])
        return names

    #    * It returns the class labels
    #    * @return String[] the class labels

    def get_classes(self):
        clases = ["" for x in range(self.nclasses)]
        # print(" getClasses,self.nclasses: " + str(self.nclasses))
        for i in range(0, self.nclasses):
            # print(" getClasses method i is "+str(i))
            clases[i] = self.attributes.getOutputAttribute(0).getNominalValue(
                i)
        return clases

    def is_nominal(self, index_i):
        return self.nominal_array[index_i]

    def is_integer(self, index_i):

        return self.integer_array[index_i]

    def get_frequent_class(self, class_value):
        print("class_value" + str(class_value))
        return self.frequent_class_array[class_value]

    """

     * It gets the number of input attributes of the data-set
     * @return int the number of input attributes of the data-set
    """

    def get_ninputs(self):
        return self.ninputs

    """
     * It returns the ratio of instances of the given class in the dataset
     *
     * @param clas the index of the class
     * @return the ratio of instances of the given class in the dataset
    """

    def frecuent_class(self, class_value):
        return self.frequent_class_array[class_value]

    def get_X(self):

        return np.array(self.x_array)

    def get_y(self, type_name='integer'):

        if type_name == "real":
            return np.array(self.output_real_array)
        elif type_name == "integer":
            return np.array(self.output_integer_array)
        else:
            return np.array(self.output_array)

    def copyHeader(self):

        p = ""
        # # print("copyHeader begin...., P is :" + p)
        p = "@relation " + self.attributes.getRelationName() + "\n"
        # # print(" after relation P is :" + p)
        p += self.attributes.getInputAttributesHeader()
        # # print(" after getInputAttributesHeader P is :" + p)
        p += self.attributes.getOutputAttributesHeader()
        # # print(" after getOutputAttributesHeader P is :" + p)
        p += self.attributes.getInputHeader() + "\n"
        # # print(" after getInputHeader P is :" + p)
        p += self.attributes.getOutputHeader() + "\n"
        # # print(" after getOutputHeader P is :" + p)
        p += "@data\n"

        # print("P is :" + p)
        return p

Ejemplo n.º 29

0

Mostrar archivo

Archivo: MyDataSet.py Proyecto: minminmail/FarcHD_scikit

    def readRegressionSet(self, datasetFile, train, file_path):

        try:
            # Load in memory a dataset that contains a regression problem
            self.instance_set.readSet(datasetFile, train, file_path)
            self.ndata = self.instance_set.getNumInstances()
            self.ninputs = Attributes.getInputNumAttributes(Attributes)
            self.nvars = self.ninputs + Attributes.getOutputNumAttributes(
                Attributes)
            # print("In readRegressionSet , self.ndata is : " + str(self.ndata))
            # print("In readRegressionSet , self.ninputs is : " + str(self.ninputs))
            # print("In readRegressionSet , self.nvars is : " + str(self.nvars))

            # outputIntegerheck that there is only one output variable
            if Attributes.getOutputNumAttributes(Attributes) > 1:
                # print("Out put attribute: ")
                outPutAttHeader = Attributes.getOutputAttributesHeader(
                    Attributes)
                # print(outPutAttHeader)
                # print("This algorithm can not process MIMO datasets")
                # print("All outputs but the first one will be removed")
                exit(1)

            noOutputs = False
            if Attributes.getOutputNumAttributes(Attributes) < 1:
                # print("This algorithm can not process datasets without outputs")
                # print("Zero-valued output generated")
                noOutputs = True
                print("noOutputs = True, exit 1 !!!!!")
                exit(1)
            # Initialice and fill our own tables
            self.x_array = [[0.0 for y in range(self.ninputs)]
                            for x in range(self.ndata)]
            self.missing_array = [[False for y in range(self.ninputs)]
                                  for x in range(self.ndata)]
            self.output_integer_array = [0 for x in range(self.ndata)]

            # Maximum and minimum of inputs
            self.emax_array = [None for x in range(self.ninputs)]
            self.emin_array = [None for x in range(self.ninputs)]
            for i in range(0, self.ninputs):
                self.emax_array[i] = Attributes.getAttributeByPos(
                    Attributes, i).getMaxAttribute()
                self.emin_array[i] = Attributes.getAttributeByPos(
                    Attributes, i).getMinAttribute()

            # All values are casted into double / integer
            self.nclasses = 0

            for i in range(0, self.ndata):
                inst = self.instance_set.getInstance(i)
                for j in range(0, self.ninputs):
                    self.x_array[i][
                        j] = self.instance_set.getInputNumericValue(
                            i, j)  # inst.getInputRealValues(j);
                    self.missing_array[i][j] = inst.getInputMissingValues(j)
                    if self.missing_array[i][j]:
                        self.x_array[i][j] = self.emin_array[j] - 1

                if noOutputs:
                    print("noOutputs self.output_real_array[i]" + str(i) +
                          "is 0 ")
                    self.output_real_array[i] = 0

                    self.output_integer_array[i] = 0

                else:
                    print("noOutputs else part:")

                    self.output_real_array[
                        i] = self.instance_set.getOutputNumericValue(i, 0)
                    print("self.output_real_array[i]" + str(i) +
                          str(self.output_real_array[i]))
                    self.output_integer_array[i] = int(
                        self.output_real_array[i])
        except OSError as error:
            print("OS error: {0}".format(error))
        except Exception as otherException:
            # print("DBG: Exception in readSet:", sys.exc_info()[0])
            print(" In readRegressionSet other Exception  is :" +
                  str(otherException))

        self.computeStatistics()

Ejemplo n.º 30

0

Mostrar archivo

Archivo: InstanceSet.py Proyecto: minminmail/Chi_negative_rules_python

    def insertAttribute(self, line):
        # print("Insert attribute begin :")
        indexL = 0
        indexR = 0
        type = ""

        # Treating string and declaring a string tokenizer
        if "{" in line:
            token_str = "{"

        elif "[" in line:
            token_str = "["

        token_withT = "\t" + token_str

        line = line.replace(token_str, token_withT)
        # print("token_double is:" + token_withT + ", line is :" + line)
        # System.out.println ("  > Processing line: "+  line );
        # st = line.split(" [{\t");

        st = line.split(
            "\t")  # first we need to split the attribute line into two part , attribute name and attribute values

        # Disregarding the first token. It is @attribute
        st[0] = st[0].replace("@attribute", "").strip()  # delete @attribute
        # print("st[0] is:" + st[0])

        first_part = st[0].split()

        at = Attribute()

        # print("Get type once get instance object, at.getType() = " + str(type_string))
        at.setName(first_part[0])
        print("att set name as first_part[0] is:" + first_part[0])
        # # print( "Attribute name: "+ at.getName() )

        # to get the class name values we need to split the second part of the attribute line, to get values of attribute

        # Next action depends on the type of attribute: continuous or nominal
        if len(st) == 1:  # Parsing a nominal attribute with no definition of values
            # print("Parsing nominal attribute without values: setType=0")
            # print("Get type =" + at.getType())
            at.setType(Attribute.NOMINAL)

        elif "{" in line:  # this because  it is the class values line
            # print("Parsing nominal attribute with values: " + line)
            # print("Get type =" + at.getType())
            # print("Before setType = 0")
            at.setType(Attribute.NOMINAL)
            # print("after setType= 0")
            at.setFixedBounds(True)

            indexL = line.index("{") + 1
            # print("indexL: " + indexL )
            indexR = line.index("}")
            # print("indexR: " + str(indexR))
            print("indexL : " + str(indexL) + "indexR : " + str(indexR))
            # print( "The Nominal values are: " + line[indexL: indexR]);
            lineSub = line[indexL: indexR]
            # print("The lineSub : " + lineSub)
            st2 = lineSub.split(",")

            for nominalStr in st2:
                at.addNominalValue(nominalStr.strip())

        else:  # Parsing an integer or real

            attType = first_part[1].lower()
            # print("attribute Name : " + str(first_part[0]) + ", attribute type = " + str(attType))

            # System.out.println ("    > Parsing "+ type + " attributes");

            if attType == "integer":
                at.setType(Attribute.INTEGER)
                # print("set integer type")
            if attType == "real":
                at.setType(Attribute.REAL)
                # print("set real type")
            indexL = line.index("[")
            indexR = line.index("]")

            # print("indexL is: " + str(indexL) + " indexR: " + str(indexR))

            if indexL != -1 and indexR != - 1:
                # System.out.println ( "      > The real values are: " + line.substring( indexL+1, indexR) );
                lineSub = line[indexL + 1: indexR]
                # print("lineSub: " + lineSub)
                st2 = lineSub.split(",")

                # print("st2[0].strip() :" + st2[0])
                # print("st2[1].strip() :" + st2[1])
                minBound = float(st2[0].strip())
                maxBound = float(st2[1].strip())
                # print("Before at.setBounds(minBound, maxBound): ( " + str(minBound) + " , " + str(maxBound) + " )")
                at.setBounds(minBound, maxBound)

        # print("Before add attribute :::: ")
        Attributes.addAttribute(Attributes, at)