def profileRunExperiment(self):
     
     def run(): 
         dataArgs = argparse.Namespace()
         dataArgs.maxIter = 3 
         #Set iterStartDate to None for all iterations 
         #dataArgs.iterStartTimeStamp = None 
         dataArgs.iterStartTimeStamp = time.mktime(datetime(2005,1,1).timetuple())
         generator = MovieLensDataset(maxIter=dataArgs.maxIter, iterStartTimeStamp=dataArgs.iterStartTimeStamp)        
         
         defaultAlgoArgs = argparse.Namespace()
         defaultAlgoArgs.ks = numpy.array(2**numpy.arange(6, 7, 0.5), numpy.int)
         defaultAlgoArgs.svdAlgs = ["rsvd"]   
         defaultAlgoArgs.runSoftImpute = True
         
         dataParser = argparse.ArgumentParser(description="", add_help=False)
         dataParser.add_argument("-h", "--help", action="store_true", help="show this help message and exit")
         devNull, remainingArgs = dataParser.parse_known_args(namespace=dataArgs)
         
         dataArgs.extendedDirName = ""
         dataArgs.extendedDirName += "MovieLensDataset"
         
         recommendExpHelper = RecommendExpHelper(generator.getTrainIteratorFunc, generator.getTestIteratorFunc, remainingArgs, defaultAlgoArgs, dataArgs.extendedDirName)
         recommendExpHelper.printAlgoArgs()
         #    os.makedirs(resultsDir, exist_ok=True) # for python 3.2
         try:
             os.makedirs(recommendExpHelper.resultsDir)
         except OSError as err:
             if err.errno != errno.EEXIST:
                 raise
         
         recommendExpHelper.runExperiment()
         
     ProfileUtils.profile('run()', globals(), locals())    
    def profileIterator(self):

        def run(): 
            subgraphIndicesList = []
            for W in self.iterator: 
                subgraphIndicesList.append(range(W.shape[0])) 

        ProfileUtils.profile('run()', globals(), locals())
 def profileModelSelection(self): 
     dataset = ArnetMinerDataset(runLSI=False)   
     dataset.overwrite = True
     dataset.overwriteVectoriser = True
     dataset.overwriteModel = True
     
     dataset.dataFilename = dataset.dataDir + "DBLP-citation-100000.txt"
     
     ProfileUtils.profile('dataset.modelSelection()', globals(), locals())
 def profileMC2(self): 
     numVals = 5000
     list1 = numpy.random.permutation(numVals).tolist()      
     list2 = numpy.random.permutation(numVals).tolist()   
     lists = [list1, list2]
     
     itemList = numpy.arange(numVals).tolist()
     
     ProfileUtils.profile('RankAggregator.MC2(lists, itemList)', globals(), locals())  
 def profileSvd(self):
     n = 5000 
     p = 0.1 
     L = scipy.sparse.rand(n, n, p)            
     L = L.T.dot(L)
         
     k = 50 
     q = 2
     ProfileUtils.profile('RandomisedSVD.svd(L, k, q)', globals(), locals())
 def profileClusterFromIterator(self):
     iterator = IncreasingSubgraphListIterator(self.graph, self.subgraphIndicesList)
     dataDir = PathDefaults.getDataDir() + "cluster/"
     #iterator = getBemolGraphIterator(dataDir)
     
     def run(): 
         clusterList, timeList, boundList = self.clusterer.clusterFromIterator(iterator, verbose=True)
         print(timeList.cumsum(0))
         
     ProfileUtils.profile('run()', globals(), locals())
    def profileGreedyMethod2(self):

        n = 1000
        p = 0.1
        graph = igraph.Graph.Erdos_Renyi(n, p)
        print(graph.summary())

        k = 5
        numpy.random.seed(21)
        ProfileUtils.profile("MaxInfluence.greedyMethod2(graph, k, p=0.5, numRuns=1000)", globals(), locals())
 def profileTrainIterator(self):
     
     def run(): 
         dataset = NetflixDataset(maxIter=30)
 
         trainIterator = dataset.getTrainIteratorFunc()        
         
         for trainX in trainIterator: 
             print(trainX.shape)
         
     ProfileUtils.profile('run()', globals(), locals())    
 def profileComputeLDA(self): 
     field = "Boosting"
     dataset = ArnetMinerDataset(field)
     dataset.overwrite = True
     dataset.overwriteVectoriser = True
     dataset.overwriteModel = True
     dataset.maxRelevantAuthors = 100
     dataset.k = 200
     dataset.dataFilename = dataset.dataDir + "DBLP-citation-100000.txt"
     
     ProfileUtils.profile('dataset.computeLDA()', globals(), locals()) 
 def profileDecisionTreeRegressor(self): 
     numExamples = 1000
     numFeatures = 20
     minSplit = 10
     maxDepth = 20
     
     generator = ExamplesGenerator()
     X, y = generator.generateBinaryExamples(numExamples, numFeatures)   
         
     regressor = DecisionTreeRegressor(min_split=minSplit, max_depth=maxDepth, min_density=0.0)
     
     ProfileUtils.profile('regressor.fit(X, y)', globals(), locals())
    def profileSimulateCascades(self):
        n = 500
        p = 0.1
        graph = igraph.Graph.Erdos_Renyi(n, p)

        k = 50

        activeVertices = set(numpy.random.randint(0, n, 10))
        numRuns = 100

        ProfileUtils.profile(
            "MaxInfluence.simulateCascades(graph, activeVertices, numRuns, p=0.5)", globals(), locals()
        )
    def profileEigenRemove(self):
        k = 50
        n = 1000
        X = numpy.random.rand(n, n)
        m = 900

        XX = X.dot(X.T)
        self.omega, self.Q = numpy.linalg.eig(XX)

        def runEigenRemove():
            for i in range(10):
                EigenUpdater.eigenRemove(self.omega, self.Q, m, k)

        ProfileUtils.profile('runEigenRemove()', globals(), locals())
    def profileEigenConcat(self):
        k = 10
        n = 1000
        m = 100
        X = numpy.random.rand(n, n)

        XX = X.dot(X.T)
        self.AA = XX[0:m, 0:m]
        self.AB = XX[0:m, m:]
        self.BB = XX[m:, m:]

        self.omega, self.Q = numpy.linalg.eig(self.AA)

        ProfileUtils.profile('EigenUpdater.eigenConcat(self.omega, self.Q, self.AB, self.BB, k)', globals(), locals())
    def profilePowerIteration2(self): 
                
        p = 100 
        q = 5
        omega = numpy.random.randn(self.X.shape[1], p)
        L = GeneralLinearOperator.asLinearOperator(self.X, parallel=True)
        
        def run(): 
            Y = L.matmat(omega)

            for i in range(q):
                Y = L.rmatmat(Y)
                Y = L.matmat(Y)
                
        ProfileUtils.profile('run()', globals(), locals())
Exemple #15
0
    def profileModelSelect(self):
        learner = LibSVM()
        numExamples = 10000
        numFeatures = 10

        X = numpy.random.rand(numExamples, numFeatures)
        Y = numpy.array(numpy.random.rand(numExamples) < 0.1, numpy.int)*2-1

        def run():
            for i in range(5):
                print("Iteration " + str(i))
                idx = Sampling.crossValidation(self.folds, numExamples)
                learner.parallelModelSelect(X, Y, idx, self.paramDict)

        ProfileUtils.profile('run()', globals(), locals())
    def learnModel(self, X, Y):
        Parameter.checkClass(X, numpy.ndarray)
        Parameter.checkClass(Y, numpy.ndarray)
        Parameter.checkArray(X)
        Parameter.checkArray(Y)

        if numpy.unique(Y).shape[0] < 2:
            raise ValueError("Vector of labels must be binary, currently numpy.unique(Y) = " + str(numpy.unique(Y)))

        #If Y is 1D make it 2D
        if Y.ndim == 1:
            Y = numpy.array([Y]).T
        
        XY = self._getDataFrame(X, Y)
        formula = robjects.Formula('class ~ .')
        self.learnModelDataFrame(formula, XY)

        gc.collect()
        robjects.r('gc(verbose=TRUE)')
        robjects.r('memory.profile()')
        gc.collect()

        if self.printMemStats:
            logging.debug(self.getLsos()())
            logging.debug(ProfileUtils.memDisplay(locals()))
    def profileEigenAdd2(self):
        k = 10
        n = 1000
        m = 200
        X = numpy.random.rand(n, n)
        Y = numpy.random.rand(n, m)

        XX = X.dot(X.T)

        self.omega, self.Q = numpy.linalg.eig(XX)

        def runEigenAdd2():
            for i in range(10):
                EigenUpdater.eigenAdd2(self.omega, self.Q, Y, Y, k)

        ProfileUtils.profile('runEigenAdd2()', globals(), locals())
 def profileLearnModel(self):
     numExamples = 1000
     numFeatures = 50
     minSplit = 10
     maxDepth = 20
     
     generator = ExamplesGenerator()
     X, y = generator.generateBinaryExamples(numExamples, numFeatures)   
     y = numpy.array(y, numpy.float)
         
     learner = DecisionTreeLearner(minSplit=minSplit, maxDepth=maxDepth, pruneType="REP-CV") 
     #learner.learnModel(X, y)
     #print("Done")
     ProfileUtils.profile('learner.learnModel(X, y) ', globals(), locals())
     
     print(learner.getTree().getNumVertices())
 def profilePredict(self): 
     #Make the prdiction function faster 
     numExamples = 1000
     numFeatures = 20
     minSplit = 1
     maxDepth = 20
     
     generator = ExamplesGenerator()
     X, y = generator.generateBinaryExamples(numExamples, numFeatures)   
         
     learner = DecisionTreeLearner(minSplit=minSplit, maxDepth=maxDepth) 
     learner.learnModel(X, y)
     
     print(learner.getTree().getNumVertices())
     ProfileUtils.profile('learner.predict(X)', globals(), locals())
     
     print(learner.getTree().getNumVertices())
Exemple #20
0
    def profileParallelPen(self): 
        learner = LibSVM(processes=8)
        learner.setChunkSize(2)
        numExamples = 10000
        numFeatures = 10

        X = numpy.random.rand(numExamples, numFeatures)
        Y = numpy.array(numpy.random.rand(numExamples) < 0.1, numpy.int)*2-1
        Cvs = [self.folds-1]

        def run():
            for i in range(2):
                print("Iteration " + str(i))
                idx = Sampling.crossValidation(self.folds, numExamples)
                learner.parallelPen(X, Y, idx, self.paramDict, Cvs)

        ProfileUtils.profile('run()', globals(), locals())
    def profileLearnModel(self):
        treeRank = TreeRank(self.leafRanklearner)
        treeRank.setMaxDepth(10)
        treeRank.setMinSplit(50)

        numExamples = 5000
        numFeatures = 10

        X = numpy.random.rand(numExamples, numFeatures)
        Y = numpy.array(numpy.random.rand(numExamples) < 0.1, numpy.int)*2-1

        def run():
            for i in range(5):
                print("Iteration " + str(i))
                treeRank.learnModel(X, Y)
                #print(treeRank.getTreeSize())
                #print(treeRank.getTreeDepth())

        ProfileUtils.profile('run()', globals(), locals())
    def profileModelSelect(self):
        learner = DecisionTreeLearner(minSplit=5, maxDepth=30, pruneType="CART") 
        numExamples = 1000
        numFeatures = 10
        
        folds = 5
        
        paramDict = {} 
        paramDict["setGamma"] =  numpy.array(numpy.round(2**numpy.arange(1, 7.5, 0.5)-1), dtype=numpy.int)

        X = numpy.random.rand(numExamples, numFeatures)
        Y = numpy.array(numpy.random.rand(numExamples) < 0.1, numpy.int)*2-1

        def run():
            for i in range(5):
                print("Iteration " + str(i))
                idx = Sampling.crossValidation(folds, numExamples)
                learner.parallelModelSelect(X, Y, idx, paramDict)

        ProfileUtils.profile('run()', globals(), locals())
    def profileLearnModel(self):
        treeRankForest = TreeRankForestR()
        treeRankForest.printMemStats = True
        treeRankForest.setMaxDepth(2)
        treeRankForest.setNumTrees(5)

        numExamples = 650
        numFeatures = 950

        X = numpy.random.rand(numExamples, numFeatures)
        Y = numpy.array(numpy.random.rand(numExamples) < 0.1, numpy.int)

        def run():
            for i in range(10):
                print("Iteration " + str(i))
                treeRankForest.learnModel(X, Y)
                #print(treeRank.getTreeSize())
                #print(treeRank.getTreeDepth())

        ProfileUtils.profile('run()', globals(), locals())
    def profileLearnModel(self):
        treeRank = TreeRankR()
        treeRank.printMemStats = True
        treeRank.setMaxDepth(2)
        treeRank.setMinSplit(50)
        treeRank.setLeafRank(treeRank.getLrLinearSvmPlain())

        numExamples = 650
        numFeatures = 950 

        X = numpy.random.rand(numExamples, numFeatures)
        Y = numpy.array(numpy.random.rand(numExamples) < 0.1, numpy.int)

        def run():
            for i in range(5):
                print("Iteration " + str(i))
                treeRank.learnModel(X, Y)
                #print(treeRank.getTreeSize())
                #print(treeRank.getTreeDepth())

        ProfileUtils.profile('run()', globals(), locals())
 def profileFindBestSplit(self):
     numExamples = 1000
     numFeatures = 100
     minSplit = 1
     maxDepth = 20
     
     generator = ExamplesGenerator()
     X, y = generator.generateBinaryExamples(numExamples, numFeatures)
     X = numpy.array(X, order="F")
     
     nodeInds = numpy.arange(X.shape[0])
     argsortX = numpy.zeros(X.shape, numpy.int, order="F")      
     
     for i in range(X.shape[1]): 
         argsortX[:, i] = numpy.argsort(X[:, i])
         argsortX[:, i] = numpy.argsort(argsortX[:, i])            
     
     def run(): 
         for i in range(10): 
             findBestSplit3(minSplit, X, y, nodeInds, argsortX) 
     
     ProfileUtils.profile('run()', globals(), locals())
    def profileCluster2(self):
        numVertices = 250
        graph = SparseGraph(GeneralVertexList(numVertices))

        p = 0.1
        generator = ErdosRenyiGenerator(p)
        graph = generator.generate(graph)
        
        W = graph.getWeightMatrix()

        WList = []

        for i in range(50):
            s = numpy.random.randint(0, numVertices)
            t = numpy.random.randint(0, numVertices)
            logging.info(s, t)
            W[s, t] += 0.5
            W[t, s] += 0.5 
            WList.append(W.copy())

        iterator = iter(WList)

        ProfileUtils.profile('self.clusterer.cluster(iterator)', globals(), locals())
Exemple #27
0
    def safeSvd(A, eps=10**-8, tol=10**-8):
        """
        Compute the SVD of a matrix using scipy.linalg.svd, and if convergence fails
        revert to Util.svd.
        """
        # check input matrix
        if __debug__:
            if not Parameter.checkArray(A, softCheck = True):
                logging.info("... in Util.safeSvd")

        try:
            # run scipy.linalg.svd
            try:
                P, sigma, Qh = scipy.linalg.svd(A, full_matrices=False)
            except scipy.linalg.LinAlgError as e:
                logging.warn(str(e))
                raise Exception('SVD decomposition has to be computed from EVD decomposition')
                
            # --- only when the SVD decomposition comes from scipy.linalg.svd ---
            # clean output singular values (sometimes scipy.linalg.svd returns NaN or negative singular values, let's remove them)
            inds = numpy.arange(sigma.shape[0])[sigma > tol]
            if inds.shape[0] < sigma.shape[0]:
                P, sigma, Q = Util.indSvd(P, sigma, Qh, inds)
                Qh = Q.conj().T
                # an expensive check but we really need it
                # rem: A*s = A.dot(diag(s)) ; A*s[:,new] = diag(s).dot(A)
                if not scipy.allclose(A, (P*sigma).dot(Qh)):
                    logging.warn(" After cleaning singular values from scipy.linalg.svd, the SVD decomposition is too far from the original matrix")
#                    numpy.savez("matrix_leading_to_bad_SVD.npz", A)
                    raise Exception('SVD decomposition has to be computed from EVD decomposition')
                    
            # check scipy.linalg.svd output matrices (expensive)
            if __debug__:
                badAnswerFromScipySvd = False
                if not Parameter.checkArray(P, softCheck=True, arrayInfo="P in Util.safeSvd()"):
                    badAnswerFromScipySvd = True
                if not Parameter.checkArray(sigma, softCheck = True, arrayInfo="sigma in Util.safeSvd()"):
                    badAnswerFromScipySvd = True
                if not Parameter.checkArray(Qh, softCheck = True, arrayInfo="Qh in Util.safeSvd()"):
                    badAnswerFromScipySvd = True
                if badAnswerFromScipySvd:
                    logging.warn(" After cleaning singular values from scipy.linalg.svd, the SVD decomposition still contains 'NaN', 'inf' or complex values")
                    raise Exception('SVD decomposition has to be computed from EVD decomposition')

        except Exception as inst:
            if inst.args != ('SVD decomposition has to be computed from EVD decomposition',):
                raise
            logging.warn(" Using EVD method to compute the SVD.")
            P, sigma, Qh = Util.svd(A, eps, tol)

            # check Util.svd output matrices (expensive)
            if __debug__:
                badAnswerFromUtilSvd = False
                if not Parameter.checkArray(P, softCheck = True):
                    logging.info("... in P in Util.safeSvd")
                    badAnswerFromUtilSvd = True
#                        print nan_rows in P: numpy.isnan(P).sum(0).nonzero()
                if not Parameter.checkArray(sigma, softCheck = True):
                    logging.info("... in sigma in Util.safeSvd")
                    badAnswerFromUtilSvd = True
#                        print numpy.isnan(sigma).nonzero()
                if not Parameter.checkArray(Qh, softCheck = True):
                    logging.info("... in Q in Util.safeSvd")
                    badAnswerFromUtilSvd = True
#                        blop = numpy.isnan(Qh).sum(1)
#                        print blop.nonzero()
#                        print blop[blop.nonzero()]
                if badAnswerFromUtilSvd:
                    logging.warn(" SVD decomposition obtained from EVD decomposition contains 'NaN', 'inf' or real values")

        from apgl.util.ProfileUtils import ProfileUtils
        if ProfileUtils.memory() > 10**9:
            ProfileUtils.memDisplay(locals())

        return P, sigma, Qh
 def testMemory(self):
     logging.info(ProfileUtils.memory())
    def profileCluster(self):
        iterator = IncreasingSubgraphListIterator(self.graph, self.subgraphIndicesList)

        ProfileUtils.profile('self.clusterer.cluster(iterator)', globals(), locals())
 def profileVectoriseDocuments(self): 
     field = "Boosting"
     dataset = ArnetMinerDataset(field)
     
     
     ProfileUtils.profile('dataset.vectoriseDocuments()', globals(), locals())  
 def testMemory(self):
     logging.info(ProfileUtils.memory())