예제 #1
0
 def profileDot2(self): 
     density = 0.01
     m = 10000
     n = 10000
     a_sppy = sppy.rand((m, n), density, storagetype='row')
     a_sppy_T = sppy.csarray(a_sppy.T, storagetype="col")
     ProfileUtils.profile('a_sppy.dot(a_sppy_T)', globals(), locals())
예제 #2
0
 def profileDot(self): 
     #Create random sparse matrix and numpy array 
     #Test speed of array creation 
     numpy.random.seed(21)
     m = 1000000
     n = 1000000      
     numInds = 10000000
     
     inds = numpy.random.randint(0, m*n, numInds)
     inds = numpy.unique(inds)
     vals = numpy.random.randn(inds.shape[0])
     
     rowInds, colInds = numpy.unravel_index(inds, (m, n), order="FORTRAN")
     rowInds = numpy.array(rowInds, numpy.int32)
     colInds = numpy.array(colInds, numpy.int32)
             
     A = csarray((m, n), storageType="rowMajor")
     A.put(vals, rowInds, colInds, True)
     A.compress()
     
     p = 500
     W = numpy.random.rand(n, p)
     
     
     ProfileUtils.profile('A.dot(W)', globals(), locals())
     
     #Compare versus scipy 
     #B = scipy.sparse.csc_matrix((vals, (rowInds, colInds)), (m, n))        
     #ProfileUtils.profile('B.dot(W)', globals(), locals())
     
     #Compare versus pdot       
     ProfileUtils.profile('A.pdot(W)', globals(), locals())
 def profileModelSelect(self):
     lmbdas = numpy.linspace(1.0, 0.01, 5)
     softImpute = IterativeSoftImpute(k=500)
     
     folds = 5
     cvInds = Sampling.randCrossValidation(folds, self.X.nnz)
     ProfileUtils.profile('softImpute.modelSelect(self.X, lmbdas, cvInds)', globals(), locals())
    def profileObjective(self):

        k = 10
        U = numpy.random.rand(self.m, k)
        V = numpy.random.rand(self.n, k)

        indPtr, colInds = SparseUtils.getOmegaListPtr(self.X)
        colIndsProbabilities = numpy.ones(colInds.shape[0])

        for i in range(self.m):
            colIndsProbabilities[indPtr[i]:indPtr[
                i + 1]] /= colIndsProbabilities[indPtr[i]:indPtr[i + 1]].sum()
            colIndsProbabilities[indPtr[i]:indPtr[i + 1]] = numpy.cumsum(
                colIndsProbabilities[indPtr[i]:indPtr[i + 1]])

        r = numpy.zeros(self.m)
        lmbda = 0.001
        rho = 1.0
        numAucSamples = 100

        def run():
            numRuns = 10
            for i in range(numRuns):
                objectiveApprox(indPtr, colInds, indPtr, colInds, U, V, r,
                                numAucSamples, lmbda, rho, False)

        ProfileUtils.profile('run()', globals(), locals())
예제 #5
0
    def profileDerivativeUiApprox(self):
        k = 10
        U = numpy.random.rand(self.m, k)
        V = numpy.random.rand(self.n, k)

        indPtr, colInds = SparseUtils.getOmegaListPtr(self.X)

        gp = numpy.random.rand(self.n)
        gp /= gp.sum()
        gq = numpy.random.rand(self.n)
        gq /= gq.sum()

        j = 3
        numRowSamples = 100
        numAucSamples = 10

        permutedRowInds = numpy.array(numpy.random.permutation(self.m), numpy.uint32)
        permutedColInds = numpy.array(numpy.random.permutation(self.n), numpy.uint32)

        maxLocalAuc = MaxLocalAUC(k, w=0.9)
        normGp, normGq = maxLocalAuc.computeNormGpq(indPtr, colInds, gp, gq, self.m)

        lmbda = 0.001
        normalise = True

        learner = MaxLocalAUCCython()

        def run():
            numRuns = 10
            for j in range(numRuns):
                for i in range(self.m):
                    learner.derivativeUiApprox(indPtr, colInds, U, V, gp, gq, permutedColInds, i)

        ProfileUtils.profile("run()", globals(), locals())
예제 #6
0
    def profileObjective(self):

        k = 10
        U = numpy.random.rand(self.m, k)
        V = numpy.random.rand(self.n, k)

        indPtr, colInds = SparseUtils.getOmegaListPtr(self.X)
        colIndsProbabilities = numpy.ones(colInds.shape[0])

        for i in range(self.m):
            colIndsProbabilities[indPtr[i] : indPtr[i + 1]] /= colIndsProbabilities[indPtr[i] : indPtr[i + 1]].sum()
            colIndsProbabilities[indPtr[i] : indPtr[i + 1]] = numpy.cumsum(
                colIndsProbabilities[indPtr[i] : indPtr[i + 1]]
            )

        r = numpy.zeros(self.m)
        lmbda = 0.001
        rho = 1.0
        numAucSamples = 100

        def run():
            numRuns = 10
            for i in range(numRuns):
                objectiveApprox(indPtr, colInds, indPtr, colInds, U, V, r, numAucSamples, lmbda, rho, False)

        ProfileUtils.profile("run()", globals(), locals())
예제 #7
0
    def profileLearnModel(self):
        #Profile full gradient descent
        X, U, V = DatasetUtils.syntheticDataset1(u=0.01, m=1000, n=2000)
        #X, U, V = DatasetUtils.syntheticDataset1()
        #X, U, V = DatasetUtils.syntheticDataset1(u=0.2, sd=0.2)
        #X = DatasetUtils.flixster()

        u = 0.2
        w = 1 - u
        eps = 10**-6
        alpha = 0.5
        maxLocalAuc = MaxLocalAUC(self.k,
                                  w,
                                  alpha=alpha,
                                  eps=eps,
                                  stochastic=True)
        maxLocalAuc.maxNormU = 10
        maxLocalAuc.maxNormV = 10
        maxLocalAuc.maxIterations = 100
        maxLocalAuc.initialAlg = "rand"
        maxLocalAuc.rate = "constant"
        maxLocalAuc.parallelSGD = True
        maxLocalAuc.numProcesses = 8
        maxLocalAuc.numAucSamples = 10
        maxLocalAuc.numRowSamples = 30
        maxLocalAuc.scaleAlpha = False
        maxLocalAuc.loss = "hinge"
        maxLocalAuc.validationUsers = 0.0
        print(maxLocalAuc)

        ProfileUtils.profile('maxLocalAuc.learnModel(X)', globals(), locals())
예제 #8
0
 def profileRunExperiment(self):
     
     def run(): 
         dataArgs = argparse.Namespace()
         dataArgs.maxIter = 3 
         #Set iterStartDate to None for all iterations 
         #dataArgs.iterStartTimeStamp = None 
         dataArgs.iterStartTimeStamp = time.mktime(datetime(2005,1,1).timetuple())
         generator = MovieLensDataset(maxIter=dataArgs.maxIter, iterStartTimeStamp=dataArgs.iterStartTimeStamp)        
         
         defaultAlgoArgs = argparse.Namespace()
         defaultAlgoArgs.ks = numpy.array(2**numpy.arange(6, 7, 0.5), numpy.int)
         defaultAlgoArgs.svdAlgs = ["rsvd"]   
         defaultAlgoArgs.runSoftImpute = True
         
         dataParser = argparse.ArgumentParser(description="", add_help=False)
         dataParser.add_argument("-h", "--help", action="store_true", help="show this help message and exit")
         devNull, remainingArgs = dataParser.parse_known_args(namespace=dataArgs)
         
         dataArgs.extendedDirName = ""
         dataArgs.extendedDirName += "MovieLensDataset"
         
         recommendExpHelper = RecommendExpHelper(generator.getTrainIteratorFunc, generator.getTestIteratorFunc, remainingArgs, defaultAlgoArgs, dataArgs.extendedDirName)
         recommendExpHelper.printAlgoArgs()
         #    os.makedirs(resultsDir, exist_ok=True) # for python 3.2
         try:
             os.makedirs(recommendExpHelper.resultsDir)
         except OSError as err:
             if err.errno != errno.EEXIST:
                 raise
         
         recommendExpHelper.runExperiment()
         
     ProfileUtils.profile('run()', globals(), locals())    
예제 #9
0
 def profileEigpsd(self):
     n = 1000 
     p = 0.1 
     L = scipy.sparse.rand(n, n, p)            
     L = L.T.dot(L)
         
     cols = 500
     ProfileUtils.profile('Nystrom.eigpsd(L, cols)', globals(), locals())
    def profileModelSelect(self):
        lmbdas = numpy.linspace(1.0, 0.01, 5)
        softImpute = IterativeSoftImpute(k=500)

        folds = 5
        cvInds = Sampling.randCrossValidation(folds, self.X.nnz)
        ProfileUtils.profile('softImpute.modelSelect(self.X, lmbdas, cvInds)',
                             globals(), locals())
예제 #11
0
 def profilePutPySparse(self): 
     
     def runPut(): 
         A = spmatrix.ll_mat(self.N, self.N)
         for i in range(self.k):         
             A.put(self.val, self.rowInds, self.colInds)
     
     ProfileUtils.profile('runPut()', globals(), locals())
예제 #12
0
    def profileEigpsd(self):
        n = 1000
        p = 0.1
        L = scipy.sparse.rand(n, n, p)
        L = L.T.dot(L)

        cols = 500
        ProfileUtils.profile('Nystrom.eigpsd(L, cols)', globals(), locals())
예제 #13
0
    def profileSvd(self):
        n = 5000
        p = 0.1
        L = scipy.sparse.rand(n, n, p)
        L = L.T.dot(L)

        k = 50
        q = 2
        ProfileUtils.profile('RandomisedSVD.svd(L, k, q)', globals(), locals())
예제 #14
0
 def profileGenerateSparseBinaryMatrixPL(self): 
     m = 500 
     n = 200 
     k = 10
     density = 0.2
     numpy.random.seed(21)
     #X = SparseUtils.generateSparseBinaryMatrixPL((m,n), k, density=density, csarray=True)   
     
     ProfileUtils.profile('SparseUtilsCython.generateSparseBinaryMatrixPL((m,n), k, density=density, csarray=True)', globals(), locals()) 
예제 #15
0
 def profileSvd(self):
     n = 5000 
     p = 0.1 
     L = scipy.sparse.rand(n, n, p)            
     L = L.T.dot(L)
         
     k = 50 
     q = 2
     ProfileUtils.profile('RandomisedSVD.svd(L, k, q)', globals(), locals())
예제 #16
0
 def profileMC2(self): 
     numVals = 10000
     list1 = numpy.random.permutation(numVals).tolist()      
     list2 = numpy.random.permutation(numVals).tolist()   
     lists = [list1, list2]
     
     itemList = numpy.arange(numVals).tolist()
     
     ProfileUtils.profile('RankAggregator.MC2(lists, itemList)', globals(), locals())  
 def profileModelSelection(self): 
     dataset = ArnetMinerDataset(runLSI=False)   
     dataset.overwrite = True
     dataset.overwriteVectoriser = True
     dataset.overwriteModel = True
     
     dataset.dataFilename = dataset.dataDir + "DBLP-citation-100000.txt"
     
     ProfileUtils.profile('dataset.modelSelection()', globals(), locals())
예제 #18
0
 def profilePartialReconstructValsPQ(self):
     shape = 5000, 10000
     r = 100 
     U, s, V = SparseUtils.generateLowRank(shape, r)
     
     k = 1000000 
     inds = numpy.unravel_index(numpy.random.randint(0, shape[0]*shape[1], k), dims=shape)
     
     ProfileUtils.profile('SparseUtilsCython.partialReconstructValsPQ(inds[0], inds[1], U, V)', globals(), locals())
예제 #19
0
 def profilePut2(self):
     def runPut(): 
         
         for i in range(self.k):         
             A = csarray((self.N, self.N))
             #A[(self.rowInds, self.colInds)] = self.val 
             A.put(self.val, self.rowInds, self.colInds)
     
     ProfileUtils.profile('runPut()', globals(), locals())
예제 #20
0
 def profileGreedyMethod2(self):
      
     n = 1000 
     p = 0.1
     graph = igraph.Graph.Erdos_Renyi(n, p)
     print(graph.summary())
         
     k = 5
     numpy.random.seed(21) 
     ProfileUtils.profile('MaxInfluence.greedyMethod2(graph, k, p=0.5, numRuns=1000)', globals(), locals())  
예제 #21
0
 def profileSliceSpa(self): 
     A = csarray((self.N, self.N))
     A.put(self.val, self.rowInds, self.colInds)
     
     def runSlice():     
         for i in range(10):  
             sliceInds = numpy.array(numpy.random.randint(0, self.M, self.N), dtype=numpy.int)
             B = A[:, sliceInds]
         
     ProfileUtils.profile('runSlice()', globals(), locals())
예제 #22
0
 def profileSumPys(self): 
     A = spmatrix.ll_mat(self.N, self.N)  
     A.put(self.val, self.rowInds, self.colInds)
     
     def runSum():     
         for i in range(1000):  
              i = PySparseUtils.sum(A)
         print(i)
         
     ProfileUtils.profile('runSum()', globals(), locals())
예제 #23
0
    def profileRandomChoice(self):
        a = numpy.random.randint(0, 10, 100)
        b = numpy.random.rand(100)
        b /= b.sum()

        def run():
            for i in range(10000):
                numpy.random.choice(a, 1000, p=b)

        ProfileUtils.profile('run()', globals(), locals())
예제 #24
0
 def profileSumSpa(self): 
     A = csarray((self.N, self.N))
     A.put(self.val, self.rowInds, self.colInds)
     
     def runSum():     
         for i in range(1000):  
              i = A.sum()
         print(i)
         
     ProfileUtils.profile('runSum()', globals(), locals())
 def profileClusterFromIterator(self):
     iterator = IncreasingSubgraphListIterator(self.graph, self.subgraphIndicesList)
     dataDir = PathDefaults.getDataDir() + "cluster/"
     #iterator = getBemolGraphIterator(dataDir)
     
     def run(): 
         clusterList, timeList, boundList = self.clusterer.clusterFromIterator(iterator, verbose=True)
         print(timeList.cumsum(0))
         
     ProfileUtils.profile('run()', globals(), locals())
예제 #26
0
 def profileRowSlice(self): 
     numpy.random.seed(21)
     m = 100000
     n = 1000000
     #numInds = 10000000
     
     X = sppy.rand((m, n), density=0.001, storagetype="row")
     
     
     #ProfileUtils.profile('X[0:1000, :] ', globals(), locals())
     ProfileUtils.profile('X.submatrix(0, 0, 1000, n)', globals(), locals())
 def profileComputeLDA(self): 
     field = "Boosting"
     dataset = ArnetMinerDataset(field)
     dataset.overwrite = True
     dataset.overwriteVectoriser = True
     dataset.overwriteModel = True
     dataset.maxRelevantAuthors = 100
     dataset.k = 200
     dataset.dataFilename = dataset.dataDir + "DBLP-citation-100000.txt"
     
     ProfileUtils.profile('dataset.computeLDA()', globals(), locals()) 
예제 #28
0
 def profileGetNonZerosSpa(self): 
     A = csarray((self.N, self.N)) 
     A.put(self.val, self.rowInds, self.colInds)
     
     def runNonZeros(): 
         for i in range(1000):
             rows, cols = A.nonzero()
             vals = A[rows, cols]
         print(numpy.sum(vals))
         
     ProfileUtils.profile('runNonZeros()', globals(), locals())
예제 #29
0
    def profileGetOmegaList(self):
        shape = (20000, 15000)
        r = 50
        k = 1000000

        X = SparseUtils.generateSparseLowRank(shape, r, k)
        import sppy
        X = sppy.csarray(X)

        ProfileUtils.profile('SparseUtils.getOmegaList(X)', globals(),
                             locals())
예제 #30
0
 def profileGetNonZerosPys(self): 
     A = spmatrix.ll_mat(self.N, self.N)  
     A.put(self.val, self.rowInds, self.colInds)
     
     def runNonZeros(): 
         for i in range(1000):
             (rows, cols) = PySparseUtils.nonzero(A)
             nzVals = numpy.zeros(len(rows))
             A.take(nzVals, rows, cols)
         
     ProfileUtils.profile('runNonZeros()', globals(), locals())
예제 #31
0
 def profileTrainIterator(self):
     
     def run(): 
         dataset = NetflixDataset(maxIter=30)
 
         trainIterator = dataset.getTrainIteratorFunc()        
         
         for trainX in trainIterator: 
             print(trainX.shape)
         
     ProfileUtils.profile('run()', globals(), locals())    
예제 #32
0
    def profileSubmatrix(self):
        shape = (100000, 15000)
        r = 50
        k = 5000000

        X = SparseUtils.generateSparseLowRank(shape, r, k)
        print(X.nnz, type(X))

        inds = numpy.random.permutation(X.nnz)[0:1000000]

        ProfileUtils.profile("SparseUtils.submatrix(X, inds)", globals(), locals())
예제 #33
0
 def profileSimulateCascades(self): 
     n = 500 
     p = 0.1
     graph = igraph.Graph.Erdos_Renyi(n, p)
         
     k = 50
     
     activeVertices = set(numpy.random.randint(0, n, 10))  
     numRuns = 100
     
     ProfileUtils.profile('MaxInfluence.simulateCascades(graph, activeVertices, numRuns, p=0.5)', globals(), locals())  
예제 #34
0
    def profileGetOmegaList(self):
        shape = (20000, 15000)
        r = 50
        k = 1000000

        X = SparseUtils.generateSparseLowRank(shape, r, k)
        import sppy

        X = sppy.csarray(X)

        ProfileUtils.profile("SparseUtils.getOmegaList(X)", globals(), locals())
예제 #35
0
    def profileRandom2Choice(self):
        n = 1000
        m = 1000

        V = numpy.random.rand(n, 2)

        def runRandom2Choice():
            reps = 100
            for i in range(reps):
                Util.randomChoice(V, m)

        ProfileUtils.profile('runRandom2Choice()', globals(), locals())
예제 #36
0
    def profileInverseChoice(self):

        n = 100000
        v = numpy.array(numpy.random.choice(n, 100), numpy.int32)
        v = numpy.sort(v)

        def run():
            numRuns = 2000000
            for i in range(numRuns):
                inverseChoicePy(v, n)

        ProfileUtils.profile('run()', globals(), locals())
예제 #37
0
    def profileSubmatrix(self):
        shape = (100000, 15000)
        r = 50
        k = 5000000

        X = SparseUtils.generateSparseLowRank(shape, r, k)
        print(X.nnz, type(X))

        inds = numpy.random.permutation(X.nnz)[0:1000000]

        ProfileUtils.profile('SparseUtils.submatrix(X, inds)', globals(),
                             locals())
예제 #38
0
 def profileLearnModel(self, useProfiler=True, eps=10**(-6)):
     k = 100
     lmbda = 0.001
     tmax=10**7
     gamma = 1
     
     learner = SGDNorm2Reg(k, lmbda, eps, tmax)
     
     if useProfiler:
         ProfileUtils.profile('learner.learnModel(self.X, storeAll=False)', globals(), locals())
     else:
         learner.learnModel(self.X, storeAll=False)
예제 #39
0
 def profileParallelSparseOp2(self):
     L = LinOperatorUtils.parallelSparseOp(self.X)
     
     p = 300
     W = numpy.random.rand(self.X.shape[1], p)        
     
     def run(): 
         numRuns = 1
         for i in range(numRuns): 
             L.matmat(W)
     
     ProfileUtils.profile('run()', globals(), locals())
예제 #40
0
 def profileDecisionTreeRegressor(self): 
     numExamples = 1000
     numFeatures = 20
     minSplit = 10
     maxDepth = 20
     
     generator = ExamplesGenerator()
     X, y = generator.generateBinaryExamples(numExamples, numFeatures)   
         
     regressor = DecisionTreeRegressor(min_split=minSplit, max_depth=maxDepth, min_density=0.0)
     
     ProfileUtils.profile('regressor.fit(X, y)', globals(), locals())
예제 #41
0
 def profileAsLinearOperator2(self):
     L = scipy.sparse.linalg.aslinearoperator(self.X)
     
     p = 300
     W = numpy.random.rand(self.X.shape[1], p)
     
     def run(): 
         numRuns = 1 
         for i in range(numRuns): 
             L.matmat(W)
     
     ProfileUtils.profile('run()', globals(), locals())
예제 #42
0
    def profileClusterFromIterator(self):
        iterator = IncreasingSubgraphListIterator(self.graph,
                                                  self.subgraphIndicesList)
        dataDir = PathDefaults.getDataDir() + "cluster/"

        #iterator = getBemolGraphIterator(dataDir)

        def run():
            clusterList, timeList, boundList = self.clusterer.clusterFromIterator(
                iterator, verbose=True)
            print(timeList.cumsum(0))

        ProfileUtils.profile('run()', globals(), locals())
예제 #43
0
    def profileRestrictOmega(self):
        X, U, V = DatasetUtils.syntheticDataset1(u=0.01, m=1000, n=2000)
        m, n = X.shape
        indPtr, colInds = SparseUtils.getOmegaListPtr(X)

        colIndsSubset = numpy.random.choice(n, 500, replace=False)

        def run():
            for i in range(100):
                newIndPtr, newColInds = restrictOmega(indPtr, colInds,
                                                      colIndsSubset)

        ProfileUtils.profile('run()', globals(), locals())
예제 #44
0
 def profileParallelSparseOp(self):
     L = LinOperatorUtils.parallelSparseOp(self.X)
     
     def run(): 
         numRuns = 10 
         for i in range(numRuns): 
             p = numpy.random.rand(self.X.shape[0])
             q = numpy.random.rand(self.X.shape[1])
             
             L.matvec(q)
             L.rmatvec(p)
     
     ProfileUtils.profile('run()', globals(), locals())
예제 #45
0
 def profileAsLinearOperator(self):
     L = scipy.sparse.linalg.aslinearoperator(self.X)
     
     def run(): 
         numRuns = 10 
         for i in range(numRuns): 
             p = numpy.random.rand(self.X.shape[0])
             q = numpy.random.rand(self.X.shape[1])
             
             L.matvec(q)
             L.rmatvec(p)
     
     ProfileUtils.profile('run()', globals(), locals())
예제 #46
0
    def profileRandomChoice(self):
        n = 10000
        m = 1000

        maxInt = 20
        v = numpy.random.randint(0, maxInt, n)

        def runRandomChoice():
            reps = 10000
            for i in range(reps):
                Util.randomChoice(v, m)

        ProfileUtils.profile('runRandomChoice()', globals(), locals())
예제 #47
0
    def profileEigenRemove(self):
        k = 50
        n = 1000
        X = numpy.random.rand(n, n)
        m = 900

        XX = X.dot(X.T)
        self.omega, self.Q = numpy.linalg.eig(XX)

        def runEigenRemove():
            for i in range(10):
                EigenUpdater.eigenRemove(self.omega, self.Q, m, k)

        ProfileUtils.profile('runEigenRemove()', globals(), locals())
예제 #48
0
    def profileEigenConcat(self):
        k = 10
        n = 1000
        m = 100
        X = numpy.random.rand(n, n)

        XX = X.dot(X.T)
        self.AA = XX[0:m, 0:m]
        self.AB = XX[0:m, m:]
        self.BB = XX[m:, m:]

        self.omega, self.Q = numpy.linalg.eig(self.AA)

        ProfileUtils.profile('EigenUpdater.eigenConcat(self.omega, self.Q, self.AB, self.BB, k)', globals(), locals())
예제 #49
0
    def profileFitDiscretePowerLaw(self):
        #Test with a large vector x
        alpha = 2.5
        exponent = (1 / (alpha - 1))
        numPoints = 50000
        x = 10 * numpy.random.rand(numPoints)**-exponent
        x = numpy.array(numpy.round(x), numpy.int)
        x = x[x <= 500]
        x = x[x >= 1]

        xmins = numpy.arange(1, 20)

        ProfileUtils.profile('Util.fitDiscretePowerLaw(x, xmins)', globals(),
                             locals())
예제 #50
0
 def profileLearnModel(self):
     #Profile full gradient descent 
     u = 0.2
     w = 1-u
     eps = 10**-6
     alpha = 0.5
     learner = BprRecommender(self.k)
     learner.maxIterations = 10
     learner.recordStep = 10
     learner.numAucSamples = 5
     print(learner)
     print(self.X.nnz)
             
     ProfileUtils.profile('learner.learnModel(self.X)', globals(), locals())
예제 #51
0
    def profileAltRandomChoice(self):
        n = 10000
        m = 1000
        maxInt = 20
        v = numpy.random.randint(0, maxInt, n)

        def runRandomChoice():
            #can just do non-zero entries
            w = Util.expandIntArray(v)

            reps = 10000
            for i in range(reps):
                w[numpy.random.randint(0, w.shape[0])]

        ProfileUtils.profile('runRandomChoice()', globals(), locals())
예제 #52
0
 def profileLearnModel(self):
     numExamples = 1000
     numFeatures = 50
     minSplit = 10
     maxDepth = 20
     
     generator = ExamplesGenerator()
     X, y = generator.generateBinaryExamples(numExamples, numFeatures)   
     y = numpy.array(y, numpy.float)
         
     learner = DecisionTreeLearner(minSplit=minSplit, maxDepth=maxDepth, pruneType="REP-CV") 
     #learner.learnModel(X, y)
     #print("Done")
     ProfileUtils.profile('learner.learnModel(X, y) ', globals(), locals())
     
     print(learner.getTree().getNumVertices())
예제 #53
0
    def learnModel(self, X, Y):
        Parameter.checkClass(X, numpy.ndarray)
        Parameter.checkClass(Y, numpy.ndarray)
        Parameter.checkArray(X)
        Parameter.checkArray(Y)

        if numpy.unique(Y).shape[0] < 2:
            raise ValueError(
                "Vector of labels must be binary, currently numpy.unique(Y) = "
                + str(numpy.unique(Y)))

        #If Y is 1D make it 2D
        if Y.ndim == 1:
            Y = numpy.array([Y]).T

        XY = self._getDataFrame(X, Y)
        formula = robjects.Formula('class ~ .')
        self.learnModelDataFrame(formula, XY)

        gc.collect()
        robjects.r('gc(verbose=TRUE)')
        robjects.r('memory.profile()')
        gc.collect()

        if self.printMemStats:
            logging.debug(self.getLsos()())
            logging.debug(ProfileUtils.memDisplay(locals()))
예제 #54
0
    def profileEigenAdd2(self):
        k = 10
        n = 1000
        m = 200
        X = numpy.random.rand(n, n)
        Y = numpy.random.rand(n, m)

        XX = X.dot(X.T)

        self.omega, self.Q = numpy.linalg.eig(XX)

        def runEigenAdd2():
            for i in range(10):
                EigenUpdater.eigenAdd2(self.omega, self.Q, Y, Y, k)

        ProfileUtils.profile('runEigenAdd2()', globals(), locals())
예제 #55
0
    def profileModelSelect(self):
        learner = LibSVM()
        numExamples = 10000
        numFeatures = 10

        X = numpy.random.rand(numExamples, numFeatures)
        Y = numpy.array(numpy.random.rand(numExamples) < 0.1,
                        numpy.int) * 2 - 1

        def run():
            for i in range(5):
                print("Iteration " + str(i))
                idx = Sampling.crossValidation(self.folds, numExamples)
                learner.parallelModelSelect(X, Y, idx, self.paramDict)

        ProfileUtils.profile('run()', globals(), locals())
예제 #56
0
 def profilePredict(self): 
     #Make the prdiction function faster 
     numExamples = 1000
     numFeatures = 20
     minSplit = 1
     maxDepth = 20
     
     generator = ExamplesGenerator()
     X, y = generator.generateBinaryExamples(numExamples, numFeatures)   
         
     learner = DecisionTreeLearner(minSplit=minSplit, maxDepth=maxDepth) 
     learner.learnModel(X, y)
     
     print(learner.getTree().getNumVertices())
     ProfileUtils.profile('learner.predict(X)', globals(), locals())
     
     print(learner.getTree().getNumVertices())
예제 #57
0
    def profileShuffleSplitRows(self):
        m = 10000
        n = 5000
        k = 5
        u = 0.1
        w = 1 - u
        X, U, s, V = SparseUtils.generateSparseBinaryMatrix((m, n),
                                                            k,
                                                            w,
                                                            csarray=True,
                                                            verbose=True,
                                                            indsPerRow=200)

        k2 = 10
        testSize = 2

        ProfileUtils.profile('Sampling.shuffleSplitRows(X, k2, testSize)',
                             globals(), locals())