Esempio n. 1
0
    def testReadFile(self): 
        eCsvReader = EgoCsvReader()
        #logging.debug(os.getcwd())
        dir = PathDefaults.getDataDir()
        fileName = dir + "test/TestData.csv"
        questionIds = [("Q14", 0), ("Q12", 1) , ("Q2", 0)]

        missing = 1
        (X, titles) = eCsvReader.readFile(fileName, questionIds, missing)
        
        X2 = numpy.zeros((10, 3))
        X2[0, :] = [0.621903386,0.608560354,0.33290608]
        X2[1, :] = [0.318548924,0.402390713,0.129956291]
        X2[2, :] = [0.956658404,0.344317772,0.680386616]
        X2[3, :] = [0.267607668,0.119647983,0.116893619]
        X2[4, :] = [0.686589498,0.402390713,0.426789174]
        X2[5, :] = [0.373575769,0.025846789,0.797125005]
        X2[6, :] = [0.493793948,0.402390713,0.990507109]
        X2[7, :] = [0.524534585,0.525169385,0.772917183]
        X2[8, :] = [0.339055395,0.402390713,0.684788001]
        X2[9, :] = [0.997774183,0.790801992,0.643252009]
        
        self.assertAlmostEquals(numpy.linalg.norm(X-X2),0, places=6)
Esempio n. 2
0
def checkDistributions():
    matFileName = "../../data/EgoAlterTransmissions.mat"
    examplesList = ExamplesList.readFromMatFile(matFileName)

    numFeatures = examplesList.getDataFieldSize("X", 1)
    X = examplesList.getDataField("X")[:, 0:numFeatures/2]
    Z = examplesList.getDataField("X")[:, numFeatures/2:numFeatures]
    y = examplesList.getDataField("y")
    A = Z[y==-1, :]

    #Now load directly from the CSV file
    #Learn the distribution of the egos
    eCsvReader = EgoCsvReader()
    egoFileName = "../../data/EgoData.csv"
    alterFileName = "../../data/AlterData.csv"
    egoQuestionIds = eCsvReader.getEgoQuestionIds()
    alterQuestionIds = eCsvReader.getAlterQuestionIds()
    (X2, titles) = eCsvReader.readFile(egoFileName, egoQuestionIds)
    X2[:, eCsvReader.ageIndex] = eCsvReader.ageToCategories(X2[:, eCsvReader.ageIndex])

    (mu, sigma) = Util.computeMeanVar(X)
    (mu2, sigma2) = Util.computeMeanVar(X2)
    (mu3, sigma3) = Util.computeMeanVar(Z)
    (mu4, sigma4) = Util.computeMeanVar(A)

    #Seems okay. Next check alters
    print(("Mean " + str(mu - mu4)))
    print(("Variance " + str(numpy.diag(sigma - sigma4))))

    """
    Analysis between the Egos in EgoData.csv and those in EgoAlterTransmissions.mat
    reveals that the distributions match closely. The main differences are
    in the means and variances in Q44A - D, but this isn't too suprising.
    """

    """
Esempio n. 3
0
    
    #checkDistributions()

    """
    We will read ego and alters data, and check they have the same values.
    """

    egoFileName = "../../data/EgoData.csv"
    alterFileName = "../../data/AlterData.csv"

    eCsvReader = EgoCsvReader()
    egoQuestionIds = eCsvReader.getEgoQuestionIds()
    alterQuestionIds = eCsvReader.getAlterQuestionIds()

    missing = 0 
    (egoX, titles) = eCsvReader.readFile(egoFileName, egoQuestionIds, missing)
    egoX[:, eCsvReader.ageIndex] = eCsvReader.ageToCategories(egoX[:, eCsvReader.ageIndex])

    (alterX, titles) = eCsvReader.readFile(alterFileName, alterQuestionIds, missing)
    alterX[:, eCsvReader.ageIndex] = eCsvReader.ageToCategories(alterX[:, eCsvReader.ageIndex])

    numFeatures = egoX.shape[1]
    numEgoExamples = egoX.shape[0]
    numAlterExamples = alterX.shape[0]

    for i in range(0, numFeatures):
        (histE, uniqElementsE) = Util.histogram(egoX[:, i])
        (histA, uniqElementsA) = Util.histogram(alterX[:, i])

        print((str(i) + " " + str(egoQuestionIds[i])))
        print(("Ego   " + str(uniqElementsE)))
Esempio n. 4
0
    def testReadFiles(self):
        p = 0.5
        eCsvReader = EgoCsvReader()
        eCsvReader.setP(p)

        dataDir = PathDefaults.getDataDir() + "infoDiffusion/"
        egoFileName = dataDir + "EgoData3.csv"
        alterFileName = dataDir + "AlterData10.csv"
        examplesList, egoIndicesR, alterIndices, egoIndicesNR, alterIndicesNR  = eCsvReader.readFiles(egoFileName, alterFileName)
        #logging.debug(examplesList.getDataField("X"))
        
        #Read in the ego and alter arrays 
        (egoArray, _) = eCsvReader.readFile(egoFileName, eCsvReader.getEgoQuestionIds())
        (alterArray, _) = eCsvReader.readFile(alterFileName, eCsvReader.getAlterQuestionIds())
        
        #Make up the correct results 
        numFeatures = examplesList.getDataFieldSize("X", 1)
        numPersonFeatures = numFeatures/2 

        #Note: no alters in this case 
        numTransmissons = 6
        X2 = numpy.zeros((numTransmissons, numFeatures))
        y2 = numpy.zeros((numTransmissons, 1))
        
        X2[0, 0:numPersonFeatures] = egoArray[0, :]
        X2[0, numPersonFeatures:numFeatures] = egoArray[1, :]
        y2[0, 0] = -1
        
        X2[1, 0:numPersonFeatures] = egoArray[0, :]
        X2[1, numPersonFeatures:numFeatures] = egoArray[2, :]
        y2[1, 0] = -1
        
        X2[2, 0:numPersonFeatures] = egoArray[1, :]
        X2[2, numPersonFeatures:numFeatures] = egoArray[0, :]
        y2[2, 0] = -1
        
        X2[3, 0:numPersonFeatures] = egoArray[1, :]
        X2[3, numPersonFeatures:numFeatures] = egoArray[2, :]
        y2[3, 0] = -1
        
        X2[4, 0:numPersonFeatures] = egoArray[2, :]
        X2[4, numPersonFeatures:numFeatures] = egoArray[0, :]
        y2[4, 0] = -1
        
        X2[5, 0:numPersonFeatures] = egoArray[2, :]
        X2[5, numPersonFeatures:numFeatures] = egoArray[1, :]
        y2[5, 0] = -1

        self.assertTrue((X2 == examplesList.getDataField("X")).all())
        self.assertTrue((y2 == examplesList.getDataField("y")).all())



        #Second test
        #================
        #I modified EgoData3 so that person 2 is the same age as person 1, and
        # hence a homophile of 1. She (2) is excluded from the non-receivers, since
        #she is a homophile of person 1.

        p = 0
        eCsvReader = EgoCsvReader()
        eCsvReader.setP(p)

        examplesList, egoIndicesR, alterIndices, egoIndicesNR, alterIndicesNR  = eCsvReader.readFiles(egoFileName, alterFileName)

        numTransmissons = 5
        X2 = numpy.zeros((numTransmissons, numFeatures))
        y2 = numpy.zeros((numTransmissons, 1))

        X2[0, 0:numPersonFeatures] = egoArray[0, :]
        X2[0, numPersonFeatures:numFeatures] = egoArray[2, :]
        y2[0, 0] = -1

        X2[1, 0:numPersonFeatures] = egoArray[1, :]
        X2[1, numPersonFeatures:numFeatures] = egoArray[0, :]
        y2[1, 0] = -1

        X2[2, 0:numPersonFeatures] = egoArray[1, :]
        X2[2, numPersonFeatures:numFeatures] = egoArray[2, :]
        y2[2, 0] = -1

        X2[3, 0:numPersonFeatures] = egoArray[2, :]
        X2[3, numPersonFeatures:numFeatures] = egoArray[0, :]
        y2[3, 0] = -1

        X2[4, 0:numPersonFeatures] = egoArray[2, :]
        X2[4, numPersonFeatures:numFeatures] = egoArray[1, :]
        y2[4, 0] = -1

        self.assertTrue((X2 == examplesList.getDataField("X")).all())
        self.assertTrue((y2 == examplesList.getDataField("y")).all())