Example #1
0
    def saveParams(paramsFile):
        """
        This method runs model selection for the SVM and saves the parameters
        and errors. Only need to do this once for the data.
        """
        graphFileName = InfoExperiment.getGraphFileName()
        graph  = SparseGraph.load(graphFileName)

        logging.info("Find all ego networks")
        trees = graph.findTrees()
        subgraphSize = 10000
        subgraphIndices = []
        

        for i in range(len(trees)):
            subgraphIndices.extend(trees[i])
            
            if len(subgraphIndices) > subgraphSize:
                logging.info("Chose " + str(i) + " ego networks.")
                break 

        graph = graph.subgraph(subgraphIndices)
        logging.info("Taking random subgraph of size " + str(graph.getNumVertices()))

        folds = 3
        sampleSize = graph.getNumEdges()

        lambda1s = 2.0**numpy.arange(-8,-2)
        lambda2s = 2.0**numpy.arange(-8,-2)
        sigmas = 2.0**numpy.arange(-6,0)

        #lambda1s = [0.0625, 2.0, 10.0]
        #lambda2s = [0.0625, 5.0, 30.0]
        #sigmas = [0.0625, 1.0, 10.0]

        logging.info("lambda1s = " + str(lambda1s))
        logging.info("lambda2s = " + str(lambda2s))
        logging.info("sigmas = " + str(sigmas))

        lmbda = 0.1
        alpha = 5.0 #Note that this is not the same as that used the errorFunc
        kernel = LinearKernel()
        alterRegressor = PrimalWeightedRidgeRegression(lmbda, alpha)
        egoRegressor = KernelRidgeRegression(kernel, lmbda)
        predictor = EgoEdgeLabelPredictor(alterRegressor, egoRegressor)

        simulator = EgoNetworkSimulator(graph, predictor)
        errorFunc = Evaluator.weightedRootMeanSqError

        paramList = []
        paramFuncs = [egoRegressor.setLambda, alterRegressor.setLambda]

        #First just use the linear kernel 
        for i in lambda1s:
            for j in lambda2s:
                paramList.append([i, j])

        params, paramFuncs, error = simulator.modelSelection(paramList, paramFuncs, folds, errorFunc, sampleSize)
        
        #Now try the RBF kernel
        kernel = GaussianKernel()
        egoRegressor.setKernel(kernel)

        paramFuncs2 = [egoRegressor.setLambda, alterRegressor.setLambda, kernel.setSigma]
        paramList2 = []

        for i in lambda1s:
            for j in lambda2s:
                for k in sigmas:
                    paramList2.append([i, j, k])

        params2, paramFuncs2, error2 = simulator.modelSelection(paramList2, paramFuncs2, folds, errorFunc, sampleSize)

        if error2 < error:
            params = params2
            paramFuncs = paramFuncs2

        paramsFile = InfoExperiment.getParamsFileName()
        (means, vars) = simulator.evaluateClassifier(params, paramFuncs, folds, errorFunc, sampleSize)

        logging.info("Evaluated classifier with mean errors " + str(means))
        simulator.getClassifier().saveParams(params, paramFuncs, paramsFile)
class EgoNetworkSimulatorTest(unittest.TestCase):
    def setUp(self):
        logging.basicConfig(stream=sys.stdout, level=logging.INFO)
        dataDir = PathDefaults.getDataDir() + "infoDiffusion/"

        numVertices = 100
        numFeatures = 5

        c = numpy.random.rand(numFeatures)

        vList = VertexList(numVertices, numFeatures)
        vList.setVertices(numpy.random.rand(numVertices, numFeatures))
        graph = SparseGraph(vList)

        p = 0.1
        generator = ErdosRenyiGenerator(p)
        graph = generator.generate(graph)

        #Now rewrite some vertices to have different labels which depend only
        #on the alter. 
        edges = graph.getAllEdges()

        for i in range(edges.shape[0]):
            edgeLabel = numpy.dot(graph.getVertex(edges[i, 1]), c)
            graph.addEdge(edges[i, 0], edges[i, 1], edgeLabel)

        #Create the predictor
        lmbda = 0.01

        self.alterRegressor = PrimalRidgeRegression(lmbda)
        self.egoRegressor = PrimalRidgeRegression(lmbda)
        predictor = EgoEdgeLabelPredictor(self.alterRegressor, self.egoRegressor)

        self.egoNetworkSimulator = EgoNetworkSimulator(graph, predictor)
        self.graph = graph 

    def testSampleExamples(self):
        numEdges = 100 
        graph = self.egoNetworkSimulator.sampleEdges(numEdges)

        self.assertEquals(graph.getNumEdges(), numEdges)
        self.assertEquals(graph.getNumVertices(), self.graph.getNumVertices())
        self.assertEquals(graph.isUndirected(), self.graph.isUndirected())


    def testModelSelection(self):
        paramList = [[0.1, 0.1], [0.2, 0.1], [0.1, 0.2]]
        paramFunc = [self.egoRegressor.setLambda, self.alterRegressor.setLambda] 
        folds = 3
        errorFunc = Evaluator.rootMeanSqError
        sampleSize = 100 

        params, paramFuncs, errors = self.egoNetworkSimulator.modelSelection(paramList, paramFunc, folds, errorFunc, sampleSize)

        logging.debug(params)
        logging.debug(paramFuncs)
        logging.debug(errors)

    def testEvaluateClassifier(self):
        params = [0.1, 0.1]
        paramFunc = [self.egoRegressor.setLambda, self.alterRegressor.setLambda]
        folds = 3
        errorFunc = Evaluator.rootMeanSqError
        sampleSize = 100 

        (means, vars) = self.egoNetworkSimulator.evaluateClassifier(params, paramFunc, folds, errorFunc, sampleSize)

        logging.debug(means)
        logging.debug(vars)

    def testTrainClassifier(self):
        params= [0.1, 0.1]
        paramFuncs = [self.egoRegressor.setLambda, self.alterRegressor.setLambda]
        sampleSize = 100 

        self.egoNetworkSimulator.trainClassifier(params, paramFuncs, sampleSize)

    def testGenerateRandomGraph(self):
        egoFileName = PathDefaults.getDataDir() + "infoDiffusion/EgoData.csv"
        alterFileName = PathDefaults.getDataDir()  + "infoDiffusion/AlterData.csv"
        numVertices = 1000
        infoProb = 0.1
        graphType = "SmallWorld"
        p = 0.1
        neighbours = 10

        self.egoNetworkSimulator.generateRandomGraph(egoFileName, alterFileName, numVertices, infoProb, graphType, p, neighbours)

    def testRunSimulation(self):
        egoFileName = PathDefaults.getDataDir() + "infoDiffusion/EgoData.csv"
        alterFileName = PathDefaults.getDataDir()  + "infoDiffusion/AlterData.csv"
        numVertices = 1000
        infoProb = 0.1
        graphType = "SmallWorld"
        p = 0.1
        neighbours = 10

        params= [0.1, 0.1]
        paramFuncs = [self.egoRegressor.setLambda, self.alterRegressor.setLambda]
        sampleSize = 100 

        maxIterations = 5

        self.egoNetworkSimulator.trainClassifier(params, paramFuncs, sampleSize)
        self.egoNetworkSimulator.generateRandomGraph(egoFileName, alterFileName, numVertices, infoProb, graphType, p, neighbours)
        self.egoNetworkSimulator.runSimulation(maxIterations)