Ejemplo n.º 1
0
#Find all of the decays in information transmission 
egoTestFileName = dataDir + "EgoInfoTest.csv"
alterTestFileName = dataDir + "AltersInfoTest.csv"
decayGraph = eCsvReader.findInfoDecayGraph(egoTestFileName, alterTestFileName, egoIndicesR, alterIndices, egoIndicesNR, alterIndicesNR, egoFileName, alterFileName, missing)

logging.info("Size of decay graph: " + str(decayGraph.getNumVertices()))
logging.info("Number of edges: " + str(decayGraph.getNumEdges()))

#Now write the decays to a simplegraph file 
decayFileName = dataDir + "EgoAlterDecays.dat"
decayGraph.save(decayFileName)
logging.info("Wrote decays to file " + decayFileName)

#Now write out transmissions
sampleSize = 1000 
indices = Util.sampleWithoutReplacement(sampleSize, examplesList.getNumExamples())
examplesListSample = examplesList.getSubExamplesList(indices)

outputFileName1 = dataDir + "EgoAlterTransmissions"
outputFileName2 = dataDir + "EgoAlterTransmissions1000"

examplesList.writeToMatFile(outputFileName1)
examplesListSample.writeToMatFile(outputFileName2)


#Let's also write out the csv file for analysis
X = examplesList.getDataField(examplesList.getDefaultExamplesName())
y = examplesList.getDataField(examplesList.getLabelsName())

X = numpy.c_[X, y]
Ejemplo n.º 2
0
    def generateNonReceivers(self, egoArray, numContactsIndices, homophileIndexPairs, receiverCounts):
        """
        Generate a series of non receivers from egoArray based on homophility information. 
        egoArray is the array of all egos
        Inputs
        ------
        numContactsIndices - a list of indices of the number of various contacts (friends, family etc.)
        homophileIndexPairs - a list of pairs. The first is the index of the homophility and the second
        is the index of the variable.
        receiverCounts - the number of receivers for each ego 
        Outputs
        -------
        contactsArray - the array of non-receivers
        egoIndices - the corresponding 1D array of ego indices
        """ 
        (numEgos, numEgoFeatures) = (egoArray.shape[0], egoArray.shape[1])
        egoIndices = numpy.zeros(0, numpy.int32) #Store the index of each ego to each contact
        alterIndices = numpy.zeros(0, numpy.int32) #Store the index of each alter to each contact
        contactsArray = numpy.zeros((0, numEgoFeatures))
        
        logging.info("Generating non-receivers for " + str(numEgos) + " egos")
        
        #Assume number of contacts above 9 is just 10 (final category)
        for i in range(0, numEgos):  
            Util.printIteration(i, self.printIterationStep, numEgos)
            
            totalContacts = 0
            for j in range(0, len(numContactsIndices)): 
                totalContacts = totalContacts + int(egoArray[i, numContactsIndices[j]]*2)

            totalContacts = max(0, totalContacts - receiverCounts[i])

            #Get a sample of indices for similar people (remove the current person)
            homophileIndices = numpy.array([], numpy.int)
            
            for j in range(0, len(homophileIndexPairs)): 
                if egoArray[i, homophileIndexPairs[j][0]] == 1:
                    if j==0:
                        homophileIndices = numpy.setdiff1d(numpy.array(list(range(0, numEgos))), numpy.array([i]))

                    subset = numpy.nonzero(egoArray[:, homophileIndexPairs[j][1]] == egoArray[i, homophileIndexPairs[j][1]])[0]
                    homophileIndices = numpy.intersect1d(homophileIndices, subset)
                    
            nonHomophileIndices = numpy.setdiff1d(numpy.array(list(range(0, numEgos))), numpy.array([i]))
            nonHomophileIndices = numpy.setdiff1d(nonHomophileIndices, homophileIndices)

            numHomophileContacts = min(int(round(self.p * totalContacts)), homophileIndices.shape[0])
            numNonHomophilesContacts = min(totalContacts-numHomophileContacts, nonHomophileIndices.shape[0])
            tempContacts =  numpy.zeros((numHomophileContacts+numNonHomophilesContacts, numEgoFeatures))
            
            #Add homophiles 
            perm = Util.sampleWithoutReplacement(numHomophileContacts, homophileIndices.shape[0])
            tempContacts[0:numHomophileContacts, :] = egoArray[homophileIndices[perm], :]
            alterIndices = numpy.r_[alterIndices, homophileIndices[perm]]
                
            #Add non homophiles
            perm = Util.sampleWithoutReplacement(numNonHomophilesContacts, nonHomophileIndices.shape[0])
            tempContacts[numHomophileContacts:numHomophileContacts+numNonHomophilesContacts, :] = egoArray[nonHomophileIndices[perm], :]
            alterIndices = numpy.r_[alterIndices, nonHomophileIndices[perm]]

            tempEgoIndices = numpy.ones(numHomophileContacts+numNonHomophilesContacts) * i 
            
            contactsArray = numpy.r_[contactsArray, tempContacts]
            egoIndices = numpy.r_[egoIndices, tempEgoIndices]
            
        
        logging.info("Done - generated " + str(egoIndices.shape[0]) + " non-receivers")
        return contactsArray, egoIndices, alterIndices