def getNewHeader(self): line = "" attrs = [] # Getting the relation name and the attributes if (self.storeAttributesAsNonStatic == True and self.attributes != None): line = "@relation " + self.attributes.getRelationName() + "\n" attrs = self.attributes.getInputAttributes(Attributes) else: line = "@relation " + Attributes.getRelationName() + "\n" attrs = Attributes.getInputAttributes(Attributes) for i in range(0, attrs.length): line += attrs[i].toString() + "\n" # Gettin all the outputs attributes if (self.storeAttributesAsNonStatic and self.attributes != None): attrs = self.attributes.getOutputAttributes() line += attrs[0].toString() + "\n" # Getting @inputs and @outputs line += self.attributes.getInputHeader() + "\n" line += self.attributes.getOutputHeader() + "\n" else: attrs = Attributes.getOutputAttributes() line += str(attrs[0]) + "\n" # Getting @inputs and @outputs line += Attributes.getInputHeader() + "\n" line += Attributes.getOutputHeader() + "\n" return line
def readClassificationSet(self, datasetFile, train): try: # Load in memory a dataset that contains a classification problem print("Inside readClassificationSet, datasetFile :" + str(datasetFile)) print("train is :" + str(train)) print("object instanceSet is :" + str(self.__instanceSet)) if (self.__instanceSet is None): print("self.__instanceSet is Null") else: print("self.__instanceSet is not None, train = " + str(train)) self.__instanceSet.readSet(datasetFile, train) print("begin getNumInstances ...... in readClassificationSet ") self.__nData = self.__instanceSet.getNumInstances() print("In readClassificationSet , self.__nData is : " + str(self.__nData)) self.__nInputs = Attributes.getInputNumAttributes(Attributes) print("In readClassificationSet , self.__nInputs is : " + str(self.__nInputs)) self.__nVars = self.__nInputs + Attributes.getOutputNumAttributes( Attributes) print("In readClassificationSet , self.__nVars is : " + str(self.__nVars)) # outputIntegerheck that there is only one output variable if (Attributes.getOutputNumAttributes(Attributes) > 1): outAttrs = Attributes.getOutputAttributes(Attributes) print("Output Attributes number is bigger than 1") for outAtt in outAttrs: i = 1 print("Att" + str(i) + str(outAtt.getName())) i += 1 print("" + Attributes.getOutputAttributesHeader(Attributes)) print("This algorithm can not process MIMO datasets") print("All outputs but the first one will be removed") exit(1) noOutputs = False if (Attributes.getOutputNumAttributes(Attributes) < 1): print( "This algorithm can not process datasets without outputs" ) print("Zero-valued output generated") noOutputs = True exit(1) print("define all the array in MyDataSet class......") #Initialice and fill our own tables print("The two dimension array X, dimension 1 is :" + str(self.__nData) + " ,Dimension 2 is :" + str(self.__nInputs)) nDataLength = self.__nData nInputLength = self.__nInputs print("nDataLength = " + str(nDataLength)) print("nInputLength = " + str(nInputLength)) #[[0 for j in range(m)] for i in range(n)] first column, then row self.__X = [[None for y in range(nInputLength)] for x in range(nDataLength)] self.__y = [None for x in range(nDataLength)] self.__missing = [[None for y in range(nInputLength)] for x in range(nDataLength)] self.__outputInteger = [None for x in range(nDataLength)] self.__outputReal = [None for x in range(nDataLength)] self.__output = ["" for x in range(nDataLength)] # Maximum and minimum of inputs self.emax = [0.0 for x in range(nInputLength)] self.emin = [0.0 for x in range(nInputLength)] for n in range(0, nInputLength): self.emax[n] = Attributes.getAttributeByPos( Attributes, n).getMaxAttribute() self.emin[n] = Attributes.getAttributeByPos( Attributes, n).getMinAttribute() print("self.emax[n]:" + str(self.emax[n])) print("self.emin[n]:" + str(self.emin[n])) # All values are casted into double/integer self.__nClasses = 0 for i in range(0, nDataLength): inst = self.__instanceSet.getInstance(i) # add class y from instance to y array here self.__y[i] = self.__instanceSet.getInstance(i).y_class for j in range(0, nInputLength): input_Numeric_Value = self.__instanceSet.getInputNumericValue( i, j) print("self.__X [i] = " + str(i) + ",[j] = " + str(j) + ",input_Numeric_Value:" + str(input_Numeric_Value)) self.__X[i][ j] = input_Numeric_Value #inst.getInputRealValues(j); print("after get self.__X[i][j]") self.__missing[i][ j] = inst.getInputMissingValuesWithPos(j) print("after self.__missing[i][j]") if (self.__missing[i][j]): self.__X[i][j] = self.emin[j] - 1 if noOutputs: print("noOutputs==True") self.__outputInteger[i] = 0 self.__output[i] = "" else: print("noOutputs==False") self.__outputInteger[ i] = self.__instanceSet.getOutputNumericValue( i, 0) print("self.__outputInteger[" + str(i) + "] = " + str(self.__outputInteger[i])) self.__output[ i] = self.__instanceSet.getOutputNominalValue( i, 0) if (self.__outputInteger[i] > self.__nClasses): self.__nClasses = self.__outputInteger[i] self.__nClasses = self.__nClasses + 1 print('Number of classes=' + str(self.__nClasses)) except Exception as error: print( "readClassificationSet: Exception in readSet, in readClassificationSet:" + str(error)) self.computeStatistics() self.computeInstancesPerClass()