def testGenerateIndicatorVertices(self): egoGenerator = EgoGenerator() numVertices = 500000 means = numpy.array([1, 10]) vars = numpy.array([[5, 1], [1, 2]]) p = 0.1 vList = egoGenerator.generateIndicatorVertices(numVertices, means, vars, p) X = numpy.zeros((numVertices, means.shape[0]+1)) for i in range(0, numVertices): X[i, :] = vList.getVertex(i) (means2, vars2) = Util.computeMeanVar(X) self.assertTrue((X.astype(numpy.int32) == X).all()) self.assertAlmostEquals(numpy.linalg.norm(means2[0:2] - means), 0, places=1) self.assertAlmostEquals(numpy.linalg.norm(vars2[0:2][:,0:2] - vars), 0, places=0) self.assertAlmostEquals(p, means2[2],places=2) #Try non-symmetric variance matrix vars = numpy.array([[5, 1], [8, 2]]) self.assertRaises(ValueError, egoGenerator.generateIndicatorVertices, numVertices, means, vars, p)
def checkDistributions(): matFileName = "../../data/EgoAlterTransmissions.mat" examplesList = ExamplesList.readFromMatFile(matFileName) numFeatures = examplesList.getDataFieldSize("X", 1) X = examplesList.getDataField("X")[:, 0:numFeatures/2] Z = examplesList.getDataField("X")[:, numFeatures/2:numFeatures] y = examplesList.getDataField("y") A = Z[y==-1, :] #Now load directly from the CSV file #Learn the distribution of the egos eCsvReader = EgoCsvReader() egoFileName = "../../data/EgoData.csv" alterFileName = "../../data/AlterData.csv" egoQuestionIds = eCsvReader.getEgoQuestionIds() alterQuestionIds = eCsvReader.getAlterQuestionIds() (X2, titles) = eCsvReader.readFile(egoFileName, egoQuestionIds) X2[:, eCsvReader.ageIndex] = eCsvReader.ageToCategories(X2[:, eCsvReader.ageIndex]) (mu, sigma) = Util.computeMeanVar(X) (mu2, sigma2) = Util.computeMeanVar(X2) (mu3, sigma3) = Util.computeMeanVar(Z) (mu4, sigma4) = Util.computeMeanVar(A) #Seems okay. Next check alters print(("Mean " + str(mu - mu4))) print(("Variance " + str(numpy.diag(sigma - sigma4)))) """ Analysis between the Egos in EgoData.csv and those in EgoAlterTransmissions.mat reveals that the distributions match closely. The main differences are in the means and variances in Q44A - D, but this isn't too suprising. """ """
def testGenerateIndicatorVertices2(self): egoGenerator = EgoGenerator() numVertices = 500000 means = numpy.array([1, 10]) vars = numpy.array([[5, 1], [1, 2]]) mins = numpy.array([-1000, -1000]) maxs = numpy.array([1000, 1000]) p = 0.1 vList = egoGenerator.generateIndicatorVertices2(numVertices, means, vars, p, mins, maxs) X = numpy.zeros((numVertices, means.shape[0]+1)) for i in range(0, numVertices): X[i, :] = vList.getVertex(i) (means2, vars2) = Util.computeMeanVar(X) self.assertTrue((X.astype(numpy.int32) == X).all()) self.assertAlmostEquals(numpy.linalg.norm(means2[0:2] - means), 0, places=1) self.assertAlmostEquals(numpy.linalg.norm(vars2[0:2][:,0:2] - vars), 0, places=0) self.assertAlmostEquals(p, means2[2],places=2) self.assertTrue((X[:, 0:2].min(0) >= mins).all()) self.assertTrue((X[:, 0:2].max(0) <= maxs).all()) #Try non-symmetric variance matrix vars = numpy.array([[5, 1], [8, 2]]) self.assertRaises(ValueError, egoGenerator.generateIndicatorVertices2, numVertices, means, vars, p, mins, maxs) #Test min > max vars = numpy.array([[5, 1], [1, 2]]) mins = numpy.array([-2, 6]) maxs = numpy.array([10, 5]) self.assertRaises(ValueError, egoGenerator.generateIndicatorVertices2, numVertices, means, vars, p, mins, maxs) #Test min == max numVertices = 1000 vars = numpy.array([[5, 1], [1, 2]]) mins = numpy.array([-2, 5]) maxs = numpy.array([10, 5]) vList = egoGenerator.generateIndicatorVertices2(numVertices, means, vars, p, mins, maxs) for i in range(0, numVertices): self.assertTrue(vList.getVertex(i)[1] == 5) #Try a new example with small range of min and max - check the mean and var numVertices = 500000 means = numpy.array([1, 10]) vars = numpy.array([[2, 0], [0, 2]]) mins = numpy.array([-3, 6]) maxs = numpy.array([5, 14]) p = 0.1 vList = egoGenerator.generateIndicatorVertices2(numVertices, means, vars, p, mins, maxs) X = numpy.zeros((numVertices, means.shape[0]+1)) for i in range(0, numVertices): X[i, :] = vList.getVertex(i) (means2, vars2) = Util.computeMeanVar(X) self.assertAlmostEquals(numpy.linalg.norm(means2[0:2] - means), 0, places=1) self.assertAlmostEquals(numpy.linalg.norm(vars2[0:2][:,0:2] - vars), 0, places=0) self.assertAlmostEquals(p, means2[2],places=2) logging.debug((X[:, 0:2].min(0))) logging.debug((X[:, 0:2].max(0))) self.assertTrue((X[:, 0:2].min(0) >= mins).all()) self.assertTrue((X[:, 0:2].max(0) <= maxs).all())