#print(trainDataArray) #print(trainResultsArray) #print(trainDataArray.sum(axis=0)) #print(testDataArray) #print(testResultsArray) # start training tree numLabelY = np.sum(trainResultsArray) # num of Yes in Label numLabelN = numTrainData - numLabelY trainTree = Tree(trainDataArray, trainResultsArray, classL, testDataArray, testResultsArray) # first output #print('[%d+/%d-]' % (numLabelY,numLabelN)) trainTree.setLabelDist([numLabelY, numLabelN]) numAttrYArray = trainDataArray.sum(axis=0) # num of Yes (1's) per attribute pAttrYArray = numAttrYArray/numTrainData # probability of Yes per attribute pLabelY = numLabelY/numTrainData # probability of Yes in Label # calculate entropy for each attribute HAttrArray = np.zeros([classL.numAttr]) for i in range(classL.numAttr): HAttrArray[i] = H(pAttrYArray[i]) HLabel = H(pLabelY) # calculate entropy for label # for root node, MI(Y;X) = H(Y) - H(Y|X), X = label, Y = attributes # calculate max MI