Exemple #1
0
def recommend(learner): 
    """
    Take a list of coauthors and read in the complete graph into a sparse 
    matrix X such that X_ij = k means author i has worked with j, k times. Then 
    do matrix factorisation on the resulting methods. 
    """
    outputDir = PathDefaults.getOutputDir() + "erasm/" 
    matrixFileName = outputDir + "Toy"
    
    numExamples = 50 
    numFolds = 5    
      
    X = scipy.io.mmread(matrixFileName)
    X = scipy.sparse.csr_matrix(X)
    logging.debug("Loaded matrix " + str(X.shape) + " with " + str(X.getnnz()) + " non zeros")
    X = X.tocsr()
    X = X[0:numExamples ,:]
    X, maxS = preprocess(X)

    #Take out some ratings to form a training set
    rowInds, colInds = X.nonzero()
    randInds = numpy.random.permutation(rowInds.shape[0])
    indexList = Sampling.crossValidation(numFolds, rowInds.shape[0])
    
    paramList = [] 
    for j, (trnIdx, tstIdx) in enumerate(indexList): 
        trainInds = randInds[trnIdx]
        testInds = randInds[tstIdx]
        
        trainX = SparseUtils.selectMatrix(X, rowInds[trainInds], colInds[trainInds]).tocsr()
        testX = SparseUtils.selectMatrix(X, rowInds[testInds], colInds[testInds]).tocsr()
        
        paramList.append((trainX, testX, learner))
        
    pool = multiprocessing.Pool(processes=multiprocessing.cpu_count())
    results = pool.map(computeTestError, paramList)
    #results = map(computeTestError, paramList)
    
    testErrors = numpy.array(results)
    meanTestErrors = testErrors.mean()
    logging.debug("Test errors = " + str(meanTestErrors))
    
    errorFileName = outputDir + "results_" + learner.name()
    numpy.savez(errorFileName, meanTestErrors)   
    logging.debug("Saved results as " + errorFileName)
    def testSelectMatrix(self): 
        numRows = 10
        numCols = 10  
        A = scipy.sparse.rand(numRows, numCols, 0.5, "csr")
        
        #Select first row 
        rowInds = numpy.zeros(numCols)
        colInds = numpy.arange(10)

        newA = SparseUtils.selectMatrix(A, rowInds, colInds)
        
        for i in range(numCols): 
            self.assertEquals(A[0, i], newA[0, i])
            
        for i in range(1, numRows): 
            for j in range(numCols): 
                self.assertEquals(newA[i, j], 0)