Ejemplos de PathDefaults.getDataDir en Python, ejemplos de apgl.util.PathDefaults.PathDefaults.getDataDir en Python

Ejemplo n.º 1

0

Mostrar archivo

Archivo: DBLPDataset.py Proyecto: pierrebo/wallhack

    def __init__(self, field):
        numpy.random.seed(21)        
        
        dataDir = PathDefaults.getDataDir() + "dblp/"
        self.xmlFileName = dataDir + "dblp.xml"
        self.xmlCleanFilename = dataDir + "dblpClean.xml"        

        resultsDir = PathDefaults.getDataDir() + "reputation/" + field + "/"
        self.expertsFileName = resultsDir + "experts.txt"
        self.expertMatchesFilename = resultsDir + "experts_matches.csv"
        self.trainExpertMatchesFilename = resultsDir + "experts_train_matches.csv"
        self.testExpertMatchesFilename = resultsDir + "experts_test_matches.csv"
        self.coauthorsFilename = resultsDir + "coauthors.csv"
        self.publicationsFilename = resultsDir + "publications.csv"
        
        self.stepSize = 100000
        self.numLines = 33532888
        self.publicationTypes = set(["article" , "inproceedings", "proceedings", "book", "incollection", "phdthesis", "mastersthesis", "www"])
        self.p = 0.5     
        self.matchCutoff = 0.95
        
        
        self.cleanXML()
        self.matchExperts()
        logging.warning("Now you must disambiguate the matched experts if not ready done")

Ejemplo n.º 2

0

Mostrar archivo

Archivo: CreateRegressionBenchmarks.py Proyecto: malcolmreynolds/APGL

def processSimpleDataset(name, numRealisations, split, ext=".csv", delimiter=",", usecols=None, skiprows=1, converters=None):
    numpy.random.seed(21)
    dataDir = PathDefaults.getDataDir() + "modelPenalisation/regression/"
    fileName = dataDir + name + ext
    
    print("Loading data from file " + fileName)
    outputDir = PathDefaults.getDataDir() + "modelPenalisation/regression/" + name + "/"

    XY = numpy.loadtxt(fileName, delimiter=delimiter, skiprows=skiprows, usecols=usecols, converters=converters)
    X = XY[:, :-1]
    y = XY[:, -1]
    idx = Sampling.shuffleSplit(numRealisations, X.shape[0], split)
    preprocessSave(X, y, outputDir, idx)

Ejemplo n.º 3

0

Mostrar archivo

Archivo: SvmEgoSimulatorTest.py Proyecto: malcolmreynolds/APGL

    def testGenerateRandomGraph(self):
        egoFileName = PathDefaults.getDataDir() + "infoDiffusion/EgoData.csv"
        alterFileName = PathDefaults.getDataDir()  + "infoDiffusion/AlterData.csv"
        numVertices = 1000
        infoProb = 0.1

        
        p = 0.1
        neighbours = 10
        generator = SmallWorldGenerator(p, neighbours)
        graph = SparseGraph(VertexList(numVertices, 0))
        graph = generator.generate(graph)

        self.svmEgoSimulator.generateRandomGraph(egoFileName, alterFileName, infoProb, graph)

Ejemplo n.º 4

0

Mostrar archivo

Archivo: MetabolomicsUtils.py Proyecto: pierrebo/wallhack

 def __init__(self):
     self.labelNames = ["Cortisol.val", "Testosterone.val", "IGF1.val"]
     self.dataDir = PathDefaults.getDataDir() +  "metabolomic/"
     self.boundsDict = {}
     self.boundsDict["Cortisol"] = numpy.array([0, 89, 225, 573])
     self.boundsDict["Testosterone"] = numpy.array([0, 3, 9, 13])
     self.boundsDict["IGF1"] = numpy.array([0, 200, 441, 782])

Ejemplo n.º 5

0

Mostrar archivo

Archivo: ModelSelectUtilsTest.py Proyecto: malcolmreynolds/APGL

    def testComputeIdealPenalty(self):
        dataDir = PathDefaults.getDataDir() + "modelPenalisation/toy/"
        data = numpy.load(dataDir + "toyData.npz")
        gridPoints, X, y, pdfX, pdfY1X, pdfYminus1X = data["arr_0"], data["arr_1"], data["arr_2"], data["arr_3"], data["arr_4"], data["arr_5"]

        sampleSize = 100
        trainX, trainY = X[0:sampleSize, :], y[0:sampleSize]
        testX, testY = X[sampleSize:, :], y[sampleSize:]

        #We form a test set from the grid points
        fullX = numpy.zeros((gridPoints.shape[0]**2, 2))
        for m in range(gridPoints.shape[0]):
            fullX[m*gridPoints.shape[0]:(m+1)*gridPoints.shape[0], 0] = gridPoints
            fullX[m*gridPoints.shape[0]:(m+1)*gridPoints.shape[0], 1] = gridPoints[m]

        C = 1.0
        gamma = 1.0
        args = (trainX, trainY, fullX, C, gamma, gridPoints, pdfX, pdfY1X, pdfYminus1X)
        penalty = computeIdealPenalty(args)


        #Now compute penalty using data
        args = (trainX, trainY, testX, testY, C, gamma)
        penalty2 = computeIdealPenalty2(args)

        self.assertAlmostEquals(penalty2, penalty, 2)

Ejemplo n.º 6

0

Mostrar archivo

Archivo: BemolData.py Proyecto: malcolmreynolds/APGL

 def main(argv=None):
     if argv is None:
         argv = sys.argv
     try:
         # read options
         try:
             opts, args = getopt.getopt(argv[1:], "hd:n:D", ["help", "dir=", "nb_user="******"debug"])
         except getopt.error as msg:
              raise RGUsage(msg)
         # apply options
         dir = PathDefaults.getDataDir() + "cluster/"
         nb_user = None
         log_level = logging.INFO
         for o, a in opts:
             if o in ("-h", "--help"):
                 print(__doc__)
                 return 0
             elif o in ("-d", "--dir"):
                 dir = a
             elif o in ("-n", "--nb_user"):
                 nb_user = int(a)
             elif o in ("-D", "--debug"):
                 log_level = logging.DEBUG
         logging.basicConfig(stream=sys.stdout, level=log_level, format='%(levelname)s (%(asctime)s):%(message)s')
         # process: generate data files
         BemolData.generate_data_file(dir, nb_user)
     except RGUsage as err:
         logging.error(err.msg)
         logging.error("for help use --help")
         return 2

Ejemplo n.º 7

0

Mostrar archivo

Archivo: GenerateToyDataTest.py Proyecto: malcolmreynolds/APGL

    def testToyData(self):
        dataDir = PathDefaults.getDataDir() + "modelPenalisation/toy/"
        data = numpy.load(dataDir + "toyData.npz")
        gridPoints, X, y, pdfX, pdfY1X, pdfYminus1X = data["arr_0"], data["arr_1"], data["arr_2"], data["arr_3"], data["arr_4"], data["arr_5"]


        pxSum = 0
        pY1XSum = 0
        pYminus1XSum = 0

        px2Sum = 0 
        squareArea = (gridPoints[1]-gridPoints[0])**2

        for i in range(gridPoints.shape[0]-1):
            for j in range(gridPoints.shape[0]-1):
                px = (pdfX[i,j]+pdfX[i+1,j]+pdfX[i, j+1]+pdfX[i+1, j+1])/4
                pxSum += px*squareArea

                pY1X = (pdfY1X[i,j]+pdfY1X[i+1,j]+pdfY1X[i, j+1]+pdfY1X[i+1, j+1])/4
                pY1XSum += pY1X*squareArea

                pYminus1X = (pdfYminus1X[i,j]+pdfYminus1X[i+1,j]+pdfYminus1X[i, j+1]+pdfYminus1X[i+1, j+1])/4
                pYminus1XSum += pYminus1X*squareArea

                px2Sum += px*pY1X*squareArea + px*pYminus1X*squareArea

        self.assertAlmostEquals(pxSum, 1)
        print(pY1XSum)
        print(pYminus1XSum)

        self.assertAlmostEquals(px2Sum, 1)

Ejemplo n.º 8

0

Mostrar archivo

Archivo: TreeRankTest.py Proyecto: malcolmreynolds/APGL

    def testPredict2(self):
        #Test on Gauss2D dataset
        dataDir = PathDefaults.getDataDir()

        fileName = dataDir + "Gauss2D_learn.csv"
        XY = numpy.loadtxt(fileName, skiprows=1, usecols=(1,2,3), delimiter=",")
        X = XY[:, 0:2]
        y = XY[:, 2]

        fileName = dataDir + "Gauss2D_test.csv"
        testXY = numpy.loadtxt(fileName, skiprows=1, usecols=(1,2,3), delimiter=",")
        testX = testXY[:, 0:2]
        testY = testXY[:, 2]

        X = Standardiser().standardiseArray(X)
        testX = Standardiser().standardiseArray(testX)

        maxDepths = range(3, 10)
        trainAucs = numpy.array([0.7194734, 0.7284824, 0.7332185, 0.7348198, 0.7366152, 0.7367508, 0.7367508, 0.7367508])
        testAucs = numpy.array([0.6789078, 0.6844632, 0.6867918, 0.6873420, 0.6874820, 0.6874400, 0.6874400, 0.6874400])
        i = 0
        
        #The results are approximately the same, but not exactly 
        for maxDepth in maxDepths:
            treeRank = TreeRank(self.leafRanklearner)
            treeRank.setMaxDepth(maxDepth)
            treeRank.learnModel(X, y)
            trainScores = treeRank.predict(X)
            testScores = treeRank.predict(testX)

            self.assertAlmostEquals(Evaluator.auc(trainScores, y), trainAucs[i], 2)
            self.assertAlmostEquals(Evaluator.auc(testScores, testY), testAucs[i], 1)
            i+=1

Ejemplo n.º 9

0

Mostrar archivo

Archivo: CitationIterGeneratorTest.py Proyecto: malcolmreynolds/APGL

    def testEdgeFile(self):
        """
        Figure out the problem with the edge file 
        """
        dataDir = PathDefaults.getDataDir() + "cluster/"
        edgesFilename = dataDir + "Cit-HepTh.txt"

        edges = {}
        file = open(edgesFilename, 'r')
        file.readline()
        file.readline()
        file.readline()
        file.readline()

        vertices = {}

        for line in file:
            (vertex1, sep, vertex2) = line.partition("\t")
            vertex1 = vertex1.strip()
            vertex2 = vertex2.strip()
            edges[(vertex1, vertex2)] = 0
            vertices[vertex1] = 0
            vertices[vertex2] = 0

        #It says there are 352807 edges in paper and 27770 vertices
        self.assertEquals(len(edges), 352807)
        self.assertEquals(len(vertices), 27770)

Ejemplo n.º 10

0

Mostrar archivo

Archivo: EgoUtilsTest.py Proyecto: malcolmreynolds/APGL

 def testGraphFromMatFile(self):
     matFileName = PathDefaults.getDataDir() +  "infoDiffusion/EgoAlterTransmissions1000.mat"
     sGraph = EgoUtils.graphFromMatFile(matFileName)
     
     examplesList = ExamplesList.readFromMatFile(matFileName)
     numFeatures = examplesList.getDataFieldSize("X", 1)
     
     self.assertEquals(examplesList.getNumExamples(), sGraph.getNumEdges())
     self.assertEquals(examplesList.getNumExamples()*2, sGraph.getNumVertices())
     self.assertEquals(numFeatures/2+1, sGraph.getVertexList().getNumFeatures())
     
     #Every even vertex has information, odd does not 
     for i in range(0, sGraph.getNumVertices()): 
         vertex = sGraph.getVertex(i)
         
         if i%2 == 0: 
             self.assertEquals(vertex[sGraph.getVertexList().getNumFeatures()-1], 1)
         else: 
             self.assertEquals(vertex[sGraph.getVertexList().getNumFeatures()-1], 0)
             
     #Test the first few vertices are the same 
     for i in range(0, 10): 
         vertex1 = sGraph.getVertex(i*2)[0:numFeatures/2]
         vertex2 = sGraph.getVertex(i*2+1)[0:numFeatures/2]
         vertexEx1 = examplesList.getSubDataField("X", numpy.array([i])).ravel()[0:numFeatures/2]
         vertexEx2 = examplesList.getSubDataField("X", numpy.array([i])).ravel()[numFeatures/2:numFeatures]
         
         self.assertTrue((vertex1 == vertexEx1).all())
         self.assertTrue((vertex2 == vertexEx2).all())

Ejemplo n.º 11

0

Mostrar archivo

Archivo: MendeleyGroupsDataset.py Proyecto: pierrebo/wallhack

    def __init__(self, maxIter=None, iterStartTimeStamp=None): 
        outputDir = PathDefaults.getOutputDir() + "recommend/erasm/"

        if not os.path.exists(outputDir): 
            os.mkdir(outputDir)
            
        #iterStartDate is the starting date of the iterator 
        if iterStartTimeStamp != None: 
            self.iterStartTimeStamp = iterStartTimeStamp
        else: 
            self.iterStartTimeStamp = 1286229600
            
        self.timeStep = timedelta(30).total_seconds()             
                
        self.ratingFileName = outputDir + "data.npz"          
        self.userDictFileName = outputDir + "userIdDict.pkl"   
        self.groupDictFileName = outputDir + "groupIdDict.pkl" 
        self.isTrainRatingsFileName = outputDir + "is_train.npz"
    
        self.dataDir = PathDefaults.getDataDir() + "erasm/"
        self.dataFileName = self.dataDir + "groupMembers-29-11-12" 
        
        self.maxIter = maxIter 
        self.trainSplit = 4.0/5 
        
        self.processRatings()
        self.splitDataset()        
        self.loadProcessedData()

Ejemplo n.º 12

0

Mostrar archivo

Archivo: NetflixDataset.py Proyecto: pierrebo/wallhack

    def processRatings(self): 
        """
        Convert the dataset into a matrix and save the results for faster 
        access. 
        """
        if not os.path.exists(self.ratingFileName) or not os.path.exists(self.custDictFileName): 
            dataDir = PathDefaults.getDataDir() + "netflix/training_set/"

            logging.debug("Processing ratings given in " + dataDir)

            custIdDict = {} 
            custIdSet = set([])        
            
            movieIds = array.array("I")
            custIds = array.array("I")
            ratings = array.array("B")
            dates = array.array("L")
            j = 0
            
            for i in range(self.startMovieID, self.endMovieID+1): 
                Util.printIteration(i-1, 1, self.endMovieID-1)
                ratingsFile = open(dataDir + "mv_" + str(i).zfill(7) + ".txt")
                ratingsFile.readline()
                
                for line in ratingsFile: 
                    vals = line.split(",")
                    
                    custId = int(vals[0])
                    
                    if custId not in custIdSet: 
                        custIdSet.add(custId)
                        custIdDict[custId] = j
                        custInd = j 
                        j += 1 
                    else: 
                        custInd = custIdDict[custId]
                    
                    rating = int(vals[1])     
                    t = datetime.strptime(vals[2].strip(), "%Y-%m-%d")
                
                    movieIds.append(i-1)
                    custIds.append(custInd)   
                    ratings.append(rating)
                    dates.append(int(time.mktime(t.timetuple()))) 
                    
            movieIds = numpy.array(movieIds, numpy.uint32)
            custIds = numpy.array(custIds, numpy.uint32)
            ratings = numpy.array(ratings, numpy.uint8)
            dates = numpy.array(dates, numpy.uint32)
            
            assert ratings.shape[0] == self.numRatings            
            
            numpy.savez(self.ratingFileName, movieIds, custIds, ratings, dates) 
            logging.debug("Saved ratings file as " + self.ratingFileName)
            
            pickle.dump(custIdDict, open(self.custDictFileName, 'wb'))
            logging.debug("Saved custIdDict as " + self.custDictFileName)
        else: 
            logging.debug("Ratings file " + str(self.ratingFileName) + " already processed")

Ejemplo n.º 13

0

Mostrar archivo

Archivo: MetabolomicsUtils.py Proyecto: malcolmreynolds/APGL

    def loadData():
        """
        Return the raw spectra and the MDS transformed data as well as the DataFrame
        for the MDS data. 
        """
        utilsLib = importr('utils')

        dataDir = PathDefaults.getDataDir() +  "metabolomic/"
        fileName = dataDir + "data.RMN.total.6.txt"
        df = utilsLib.read_table(fileName, header=True, row_names=1, sep=",")
        maxNMRIndex = 951
        X = df.rx(robjects.IntVector(range(1, maxNMRIndex)))
        X = numpy.array(X).T

        #Load age and normalise (missing values are assinged the mean) 
        ages = numpy.array(df.rx(robjects.StrVector(["Age"]))).ravel()
        meanAge = numpy.mean(ages[numpy.logical_not(numpy.isnan(ages))])
        ages[numpy.isnan(ages)] = meanAge
        ages = Standardiser().standardiseArray(ages)

        Xs = X.copy()
        standardiser = Standardiser()
        Xs = standardiser.standardiseArray(X)

        fileName = dataDir + "data.sportsmen.log.AP.1.txt"
        df = utilsLib.read_table(fileName, header=True, row_names=1, sep=",")
        maxNMRIndex = 419
        X2 = df.rx(robjects.IntVector(range(1, maxNMRIndex)))
        X2 = numpy.array(X2).T

        #Load the OPLS corrected files
        fileName = dataDir + "IGF1.log.OSC.1.txt"
        df = utilsLib.read_table(fileName, header=True, row_names=1, sep=",")
        minNMRIndex = 22
        maxNMRIndex = 441
        Xopls1 = df.rx(robjects.IntVector(range(minNMRIndex, maxNMRIndex)))
        Xopls1 = numpy.array(Xopls1).T

        fileName = dataDir + "cort.log.OSC.1.txt"
        df = utilsLib.read_table(fileName, header=True, row_names=1, sep=",")
        minNMRIndex = 20
        maxNMRIndex = 439
        Xopls2 = df.rx(robjects.IntVector(range(minNMRIndex, maxNMRIndex)))
        Xopls2 = numpy.array(Xopls2).T

        fileName = dataDir + "testo.log.OSC.1.txt"
        df = utilsLib.read_table(fileName, header=True, row_names=1, sep=",")
        minNMRIndex = 22
        maxNMRIndex = 441
        Xopls3 = df.rx(robjects.IntVector(range(minNMRIndex, maxNMRIndex)))
        Xopls3 = numpy.array(Xopls3).T

        #Let's load all the label data here
        labelNames = MetabolomicsUtils.getLabelNames()
        YList = MetabolomicsUtils.createLabelList(df, labelNames)
        
        return X, X2, Xs, (Xopls1, Xopls2, Xopls3), YList, ages, df

Ejemplo n.º 14

0

Mostrar archivo

Archivo: DatasetStats.py Proyecto: malcolmreynolds/APGL

 def getIterator(): 
     dataDir = PathDefaults.getDataDir() + "cluster/"
     
     nbUser = 10000 # set to 'None' to have all users
     nbPurchasesPerIt = 500 # set to 'None' to take all the purchases per date
     startingIteration = 300
     endingIteration = 600 # set to 'None' to have all iterations
     stepSize = 1    
     
     return itertools.islice(BemolData.getGraphIterator(dataDir, nbUser, nbPurchasesPerIt), startingIteration, endingIteration, stepSize)

Ejemplo n.º 15

0

Mostrar archivo

Archivo: IterativeSpectralClusteringProfile.py Proyecto: malcolmreynolds/APGL

 def profileClusterFromIterator(self):
     iterator = IncreasingSubgraphListIterator(self.graph, self.subgraphIndicesList)
     dataDir = PathDefaults.getDataDir() + "cluster/"
     #iterator = getBemolGraphIterator(dataDir)
     
     def run(): 
         clusterList, timeList, boundList = self.clusterer.clusterFromIterator(iterator, verbose=True)
         print(timeList.cumsum(0))
         
     ProfileUtils.profile('run()', globals(), locals())

Ejemplo n.º 16

0

Mostrar archivo

    def testReadGraph(self):
        fileName = PathDefaults.getDataDir() + "test/simpleGraph.txt"

        graphReader = SimpleGraphReader()
        graph = graphReader.readFromFile(fileName)

        logging.debug((graph.getAllEdges()))

        self.assertEquals(graph.isUndirected(), True)
        self.assertEquals(graph.getNumVertices(), 5)
        self.assertEquals(graph.getNumEdges(), 4)

        self.assertEquals(graph.getEdge(0, 1), 1)
        self.assertEquals(graph.getEdge(2, 4), 1)
        self.assertEquals(graph.getEdge(2, 2), 1)
        self.assertEquals(graph.getEdge(4, 0), 1)

        #Now test reading a file with the same graph but vertices indexed differently
        fileName = PathDefaults.getDataDir() + "test/simpleGraph2.txt"
        graph = graphReader.readFromFile(fileName)

        self.assertEquals(graph.isUndirected(), True)
        self.assertEquals(graph.getNumVertices(), 5)
        self.assertEquals(graph.getNumEdges(), 4)

        self.assertEquals(graph.getEdge(0, 1), 1.1)
        self.assertEquals(graph.getEdge(2, 4), 1)
        self.assertEquals(graph.getEdge(2, 2), 1.6)
        self.assertEquals(graph.getEdge(4, 0), 1)

        #Now test a file with directed edges
        fileName = PathDefaults.getDataDir() + "test/simpleGraph3.txt"
        graph = graphReader.readFromFile(fileName)

        self.assertEquals(graph.isUndirected(), False)
        self.assertEquals(graph.getNumVertices(), 5)
        self.assertEquals(graph.getNumEdges(), 4)

        self.assertEquals(graph.getEdge(0, 1), 1)
        self.assertEquals(graph.getEdge(2, 4), 1)
        self.assertEquals(graph.getEdge(2, 2), 1)
        self.assertEquals(graph.getEdge(4, 0), 1)

Ejemplo n.º 17

0

Mostrar archivo

Archivo: SimpleGraphReaderTest.py Proyecto: charanpald/APGL

    def testReadGraph(self):
        fileName = PathDefaults.getDataDir() +  "test/simpleGraph.txt"

        graphReader = SimpleGraphReader()
        graph = graphReader.readFromFile(fileName)

        logging.debug((graph.getAllEdges()))

        self.assertEquals(graph.isUndirected(), True)
        self.assertEquals(graph.getNumVertices(), 5)
        self.assertEquals(graph.getNumEdges(), 4)

        self.assertEquals(graph.getEdge(0, 1), 1)
        self.assertEquals(graph.getEdge(2, 4), 1)
        self.assertEquals(graph.getEdge(2, 2), 1)
        self.assertEquals(graph.getEdge(4, 0), 1)

        #Now test reading a file with the same graph but vertices indexed differently
        fileName = PathDefaults.getDataDir() + "test/simpleGraph2.txt"
        graph = graphReader.readFromFile(fileName)

        self.assertEquals(graph.isUndirected(), True)
        self.assertEquals(graph.getNumVertices(), 5)
        self.assertEquals(graph.getNumEdges(), 4)

        self.assertEquals(graph.getEdge(0, 1), 1.1)
        self.assertEquals(graph.getEdge(2, 4), 1)
        self.assertEquals(graph.getEdge(2, 2), 1.6)
        self.assertEquals(graph.getEdge(4, 0), 1)

        #Now test a file with directed edges
        fileName = PathDefaults.getDataDir() +  "test/simpleGraph3.txt"
        graph = graphReader.readFromFile(fileName)

        self.assertEquals(graph.isUndirected(), False)
        self.assertEquals(graph.getNumVertices(), 5)
        self.assertEquals(graph.getNumEdges(), 4)

        self.assertEquals(graph.getEdge(0, 1), 1)
        self.assertEquals(graph.getEdge(2, 4), 1)
        self.assertEquals(graph.getEdge(2, 2), 1)
        self.assertEquals(graph.getEdge(4, 0), 1)

Ejemplo n.º 18

0

Mostrar archivo

Archivo: BemolDataProfile.py Proyecto: malcolmreynolds/APGL

 def __init__(self):
     dataDir = PathDefaults.getDataDir() + "cluster/"
     nbUser = 2000 # set to 'None' to have all users
     nbPurchasesPerIt = 50 # set to 'None' to take all the purchases
                                           # per date
     startingIteration = 20
     endingIteration = None # set to 'None' to have all iterations
     stepSize = 10    
     
     iterator = itertools.islice(BemolData.getGraphIterator(dataDir, nbUser, nbPurchasesPerIt), startingIteration, endingIteration, stepSize)
     self.iterator = iterator

Ejemplo n.º 19

0

Mostrar archivo

Archivo: Static2IdValDatasetTest.py Proyecto: pierrebo/wallhack

    def testGetTrainIteratorFunc(self):
        dataFilename = PathDefaults.getDataDir() + "reference/author_document_count" 
        dataset = Static2IdValDataset(dataFilename)

        trainIterator = dataset.getTrainIteratorFunc()()      
        testIterator = dataset.getTestIteratorFunc()()
        
        for trainX in trainIterator: 
            testX = testIterator.next() 
            
            print(trainX.shape, trainX.nnz, testX.nnz)
            self.assertEquals(trainX.shape, testX.shape)

Ejemplo n.º 20

0

Mostrar archivo

Archivo: MetabolomicsUtilsTest.py Proyecto: pierrebo/wallhack

    def testCreateIndicatorLabels(self):
        metaUtils = MetabolomicsUtils()
        X, XStd, X2, (XoplsCortisol, XoplsTesto, XoplsIgf1), YCortisol, YTesto, YIgf1, ages = metaUtils.loadData()
        
        YCortisol = YCortisol[numpy.logical_not(numpy.isnan(YCortisol))]
        YCortisolIndicators = metaUtils.createIndicatorLabel(YCortisol, metaUtils.boundsDict["Cortisol"])
        
        YTesto = YTesto[numpy.logical_not(numpy.isnan(YTesto))]
        YTestoIndicators = metaUtils.createIndicatorLabel(YTesto, metaUtils.boundsDict["Testosterone"])
        
        YIgf1 = YIgf1[numpy.logical_not(numpy.isnan(YIgf1))]
        YIgf1Indicators = metaUtils.createIndicatorLabel(YIgf1, metaUtils.boundsDict["IGF1"])

        s = numpy.sum(YCortisolIndicators, 1)
        nptst.assert_array_equal(s, numpy.ones(s.shape[0]))

        s = numpy.sum(YTestoIndicators, 1)
        nptst.assert_array_equal(s, numpy.ones(s.shape[0]))

        s = numpy.sum(YIgf1Indicators, 1)
        nptst.assert_array_equal(s, numpy.ones(s.shape[0]))

        #Now compare to those labels in the file
        X, X2, (XoplsCortisol, XoplsTesto, XoplsIgf1), YCortisol, YTesto, YIgf1, ages = metaUtils.loadData()
        dataDir = PathDefaults.getDataDir() +  "metabolomic/"
        fileName = dataDir + "data.RMN.total.6.txt"
        data = pandas.read_csv(fileName, delimiter=",") 

        YCortisolIndicators = metaUtils.createIndicatorLabel(YCortisol, metaUtils.boundsDict["Cortisol"])
        YCortisolIndicators2 = numpy.array(data[["Ind.Cortisol.1", "Ind.Cortisol.2", "Ind.Cortisol.3"]])
        
        for i in range(YCortisolIndicators.shape[0]): 
            if not numpy.isnan(YCortisol[i]) and not numpy.isnan(YCortisolIndicators2[i, :]).any(): 
                #nptst.assert_almost_equal(YCortisolIndicators2[i, :], YCortisolIndicators[i, :])
                pass 
        
        YTestoIndicators = metaUtils.createIndicatorLabel(YTesto, metaUtils.boundsDict["Testosterone"])
        YTestoIndicators2 = numpy.array(data[["Ind.Testo.1", "Ind.Testo.2", "Ind.Testo.3"]])
        
        for i in range(YTestoIndicators.shape[0]): 
            if not numpy.isnan(YTesto[i]) and not numpy.isnan(YTestoIndicators2[i, :]).any(): 
                #print(i, YTesto[i])
                nptst.assert_almost_equal(YTestoIndicators2[i, :], YTestoIndicators[i, :])
                
        YIgf1Indicators = metaUtils.createIndicatorLabel(YIgf1, metaUtils.boundsDict["IGF1"])
        YIgf1Indicators2 = numpy.array(data[["Ind.IGF1.1", "Ind.IGF1.2", "Ind.IGF1.3"]])
        
        for i in range(YIgf1Indicators.shape[0]): 
            if not numpy.isnan(YIgf1[i]) and not numpy.isnan(YIgf1Indicators2[i, :]).any(): 
                #print(i, YIgf1[i])
                #nptst.assert_almost_equal(YIgf1Indicators2[i, :], YIgf1Indicators[i, :])
                pass

Ejemplo n.º 21

0

Mostrar archivo

Archivo: CreateRegressionBenchmarks.py Proyecto: malcolmreynolds/APGL

def processParkinsonsDataset(name, numRealisations):
    numpy.random.seed(21)
    dataDir = PathDefaults.getDataDir() + "modelPenalisation/regression/"
    fileName = dataDir + name + ".data"
    

    XY = numpy.loadtxt(fileName, delimiter=",", skiprows=1)
    inds = list(set(range(XY.shape[1])) - set([5, 6]))
    X = XY[:, inds]

    y1 = XY[:, 5]
    y2 = XY[:, 6]
    #We don't keep whole collections of patients
    split = 0.5

    idx = Sampling.shuffleSplit(numRealisations, X.shape[0], split)

    outputDir = PathDefaults.getDataDir() + "modelPenalisation/regression/" + name + "-motor/"
    preprocessSave(X, y1, outputDir, idx)
    
    outputDir = PathDefaults.getDataDir() + "modelPenalisation/regression/" + name + "-total/"
    preprocessSave(X, y2, outputDir, idx)

Ejemplo n.º 22

0

Mostrar archivo

Archivo: ModelSelectUtilsTest.py Proyecto: malcolmreynolds/APGL

    def testBayesError(self):
        dataDir = PathDefaults.getDataDir() + "modelPenalisation/toy/"
        data = numpy.load(dataDir + "toyData.npz")
        gridPoints, X, y, pdfX, pdfY1X, pdfYminus1X = data["arr_0"], data["arr_1"], data["arr_2"], data["arr_3"], data["arr_4"], data["arr_5"]

        sampleSize = 100
        trainX, trainY = X[0:sampleSize, :], y[0:sampleSize]
        testX, testY = X[sampleSize:, :], y[sampleSize:]

        #We form a test set from the grid points
        gridX = numpy.zeros((gridPoints.shape[0]**2, 2))
        for m in range(gridPoints.shape[0]):
            gridX[m*gridPoints.shape[0]:(m+1)*gridPoints.shape[0], 0] = gridPoints
            gridX[m*gridPoints.shape[0]:(m+1)*gridPoints.shape[0], 1] = gridPoints[m]

        Cs = 2**numpy.arange(-5, 5, dtype=numpy.float)
        gammas = 2**numpy.arange(-5, 5, dtype=numpy.float)

        bestError = 1 

        for C in Cs:
            for gamma in gammas:
                svm = LibSVM(kernel="gaussian", C=C, kernelParam=gamma)
                svm.learnModel(trainX, trainY)
                predY, decisionsY = svm.predict(gridX, True)
                decisionGrid = numpy.reshape(decisionsY, (gridPoints.shape[0], gridPoints.shape[0]), order="F")
                error = ModelSelectUtils.bayesError(gridPoints, decisionGrid, pdfX, pdfY1X, pdfYminus1X)

                predY, decisionsY = svm.predict(testX, True)
                error2 = Evaluator.binaryError(testY, predY)
                print(error, error2)

                if error < bestError:
                    error = bestError
                    bestC = C
                    bestGamma = gamma

        svm = LibSVM(kernel="gaussian", C=bestC, kernelParam=bestGamma)
        svm.learnModel(trainX, trainY)
        predY, decisionsY = svm.predict(gridX, True)

        plt.figure(0)
        plt.contourf(gridPoints, gridPoints, decisionGrid, 100)
        plt.colorbar()

        plt.figure(1)
        plt.scatter(X[y==1, 0], X[y==1, 1], c='r' ,label="-1")
        plt.scatter(X[y==-1, 0], X[y==-1, 1], c='b',label="+1")
        plt.legend()
        plt.show()

Ejemplo n.º 23

0

Mostrar archivo

Archivo: ExamplesListTest.py Proyecto: malcolmreynolds/APGL

    def testReadFromCsvFile(self):
        dir = PathDefaults.getDataDir() + "test/"
        fileName = dir + "examplesList1.csv"

        examplesList = ExamplesList.readFromCsvFile(fileName)

        X = examplesList.getDataField(examplesList.getDefaultExamplesName())
        y = examplesList.getDataField(examplesList.getLabelsName())

        X2 = numpy.array([[10, 2], [4, -6], [24, 6]])
        y2 = numpy.array([[-1], [1], [-1]])

        self.assertTrue((X==X2).all())
        self.assertTrue((y==y2).all())

Ejemplo n.º 24

0

Mostrar archivo

Archivo: BemolData.py Proyecto: malcolmreynolds/APGL

def cluster():
    k1 = 20 # numCluster to learn
    k2 = 40 # numEigenVector kept

    dir = PathDefaults.getDataDir() + "cluster/"
    graphIterator = getBemolGraphIterator(dir)
    #===========================================
    # cluster
    print("compute clusters")
    clusterer = IterativeSpectralClustering(k1, k2)
    clustersList = clusterer.clusterFromIterator(graphIterator, True)

    for i in range(len(clustersList)):
              clusters = clustersList[i]
              print(clusters)

Ejemplo n.º 25

0

Mostrar archivo

Archivo: SvmEgoSimulatorTest.py Proyecto: malcolmreynolds/APGL

    def testRunSimulation(self):
        egoFileName = PathDefaults.getDataDir() + "infoDiffusion/EgoData.csv"
        alterFileName = PathDefaults.getDataDir()  + "infoDiffusion/AlterData.csv"
        numVertices = 1000
        infoProb = 0.1
        p = 0.1
        neighbours = 10

        generator = SmallWorldGenerator(p, neighbours)
        graph = SparseGraph(VertexList(numVertices, 0))
        graph = generator.generate(graph)
        
        CVal = 1.0
        kernel = "linear"
        kernelParamVal = 0.0
        errorCost = 0.5
        folds = 6
        sampleSize = 1000

        maxIterations = 5

        self.svmEgoSimulator.trainClassifier(CVal, kernel, kernelParamVal, errorCost, sampleSize)
        self.svmEgoSimulator.generateRandomGraph(egoFileName, alterFileName, infoProb, graph)
        self.svmEgoSimulator.runSimulation(maxIterations)

Ejemplo n.º 26

0

Mostrar archivo

Archivo: HIVGraphReader.py Proyecto: pierrebo/wallhack

    def readHIVGraph(self, undirected=True, indicators=True):
        """
        We will use pacdate5389.csv which contains the data of infection. The undirected
        parameter instructs whether to create an undirected graph. If indicators
        is true then categorical varibles are turned into collections of indicator
        ones. 
        """
        converters = {1: CsvConverters.dateConv, 3:CsvConverters.dateConv, 5:CsvConverters.detectionConv, 6:CsvConverters.provConv, 8: CsvConverters.dateConv }
        converters[9] = CsvConverters.genderConv
        converters[10] = CsvConverters.orientConv
        converters[11] = CsvConverters.numContactsConv
        converters[12] = CsvConverters.numContactsConv
        converters[13] = CsvConverters.numContactsConv

        def nanProcessor(X):
            means = numpy.zeros(X.shape[1])
            for i in range(X.shape[1]):
                if numpy.sum(numpy.isnan(X[:, i])) > 0:
                    logging.info("No. missing values in " + str(i) + "th column: " + str(numpy.sum(numpy.isnan(X[:, i]))))
                means[i] = numpy.mean(X[:, i][numpy.isnan(X[:, i]) == False])
                X[numpy.isnan(X[:, i]), i] = means[i]
            return X 

        idIndex = 0
        featureIndices = converters.keys()
        multiGraphCsvReader = MultiGraphCsvReader(idIndex, featureIndices, converters, nanProcessor)

        dataDir = PathDefaults.getDataDir()
        vertexFileName = dataDir + "HIV/alldata.csv"
        edgeFileNames = [dataDir + "HIV/grafdet2.csv", dataDir + "HIV/infect2.csv"]

        sparseMultiGraph = multiGraphCsvReader.readGraph(vertexFileName, edgeFileNames, undirected, delimiter="\t")

        #For learning purposes we will convert categorial variables into a set of
        #indicator features
        if indicators: 
            logging.info("Converting categorial features")
            vList = sparseMultiGraph.getVertexList()
            V = vList.getVertices(list(range(vList.getNumVertices())))
            catInds = [2, 3]
            generator = FeatureGenerator()
            V = generator.categoricalToIndicator(V, catInds)
            vList.replaceVertices(V)

        logging.info("Created " + str(sparseMultiGraph.getNumVertices()) + " examples with " + str(sparseMultiGraph.getVertexList().getNumFeatures()) + " features")

        return sparseMultiGraph

Ejemplo n.º 27

0

Mostrar archivo

Archivo: NetflixDataset.py Proyecto: pierrebo/wallhack

    def __init__(self, maxIter=None, iterStartTimeStamp=None): 
        """
        Return a training and test set for netflix based on the time each 
        rating was made. There are 62 iterations. 
        """ 
        self.timeStep = timedelta(30).total_seconds()  
        
        #startDate is used to convert dates into ints 
        #self.startDate = datetime(1998,1,1)
        #self.endDate = datetime(2005,12,31)
        
        #iterStartDate is the starting date of the iterator 
        if iterStartTimeStamp != None: 
            self.iterStartTimeStamp = iterStartTimeStamp
        else: 
            self.iterStartTimeStamp = time.mktime(datetime(2001,1,1).timetuple()) 

        self.startMovieID = 1 
        self.endMovieID = 17770
        
        self.numMovies = 17770
        self.numRatings = 100480507
        self.numProbeMovies = 16938
        self.numProbeRatings = 1408395
        self.numCustomers = 480189
        
        outputDir = PathDefaults.getOutputDir() + "recommend/netflix/"

        if not os.path.exists(outputDir): 
            os.mkdir(outputDir)
                
        self.ratingFileName = outputDir + "data.npz"  
        self.custDictFileName = outputDir + "custIdDict.pkl"
        self.probeFileName = PathDefaults.getDataDir() + "netflix/probe.txt"    
        self.testRatingsFileName = outputDir + "test_data.npz"
        self.isTrainRatingsFileName = outputDir + "is_train.npz"
        
        self.maxIter = maxIter 
        self.trainSplit = 4.0/5 

        self.processRatings()
        #self.processProbe()
        self.splitDataset()        
        self.loadProcessedData()
        
        if self.maxIter != None: 
            logging.debug("Maximum number of iterations: " + str(self.maxIter))

Ejemplo n.º 28

0

Mostrar archivo

Archivo: TreeRankForestTest.py Proyecto: malcolmreynolds/APGL

    def testPredict2(self):
        #Test on Gauss2D dataset
        dataDir = PathDefaults.getDataDir()

        fileName = dataDir + "Gauss2D_learn.csv"
        XY = numpy.loadtxt(fileName, skiprows=1, usecols=(1,2,3), delimiter=",")
        X = XY[:, 0:2]
        y = XY[:, 2]
        
        y = y*2 - 1 

        fileName = dataDir + "Gauss2D_test.csv"
        testXY = numpy.loadtxt(fileName, skiprows=1, usecols=(1,2,3), delimiter=",")
        testX = testXY[:, 0:2]
        testY = testXY[:, 2]
        
        testY = testY*2-1

        X = Standardiser().standardiseArray(X)
        testX = Standardiser().standardiseArray(testX)

        numTrees = 5
        minSplit = 50 
        maxDepths = range(3, 10)
        trainAucs = numpy.array([0.7252582, 0.7323278, 0.7350289, 0.7372529, 0.7399985, 0.7382176, 0.7395104, 0.7386347])
        testAucs = numpy.array([0.6806122, 0.6851614, 0.6886183, 0.6904147, 0.6897266, 0.6874600, 0.6875980, 0.6878801])

        i = 0
        
        #The results are approximately the same, but not exactly 
        for maxDepth in maxDepths:
            treeRankForest = TreeRankForest(self.leafRanklearner)
            treeRankForest.setMaxDepth(maxDepth)
            treeRankForest.setMinSplit(minSplit)
            treeRankForest.setNumTrees(numTrees)
            treeRankForest.learnModel(X, y)
            trainScores = treeRankForest.predict(X)
            testScores = treeRankForest.predict(testX)

            print(Evaluator.auc(trainScores, y), Evaluator.auc(testScores, testY))

            self.assertAlmostEquals(Evaluator.auc(trainScores, y), trainAucs[i], 1)
            self.assertAlmostEquals(Evaluator.auc(testScores, testY), testAucs[i], 1)
            i+=1

Ejemplo n.º 29

0

Mostrar archivo

Archivo: RandomForestTest.py Proyecto: malcolmreynolds/APGL

    def testPredict2(self):
        #We play around with parameters to maximise AUC on the IGF1_0-Haar data
        dataDir = PathDefaults.getDataDir()
        fileName = dataDir + "IGF1_0-Haar.npy"

        XY = numpy.load(fileName)
        X = XY[:, 0:XY.shape[1]-1]
        y = XY[:, XY.shape[1]-1].ravel()

        weight = numpy.bincount(numpy.array(y, numpy.int))[0]/float(y.shape[0])
        #weight = 0.5
        #weight = 0.9

        folds = 3
        randomForest = RandomForest()
        randomForest.setWeight(weight)
        randomForest.setMaxDepth(50)
        #randomForest.setMinSplit(100)
        mean, var = randomForest.evaluateCv(X, y, folds, Evaluator.auc)
        logging.debug("AUC = " + str(mean))
        logging.debug("Var = " + str(var))

Ejemplo n.º 30

0

Mostrar archivo

Archivo: BenchmarkExp.py Proyecto: malcolmreynolds/APGL

def computeLearningRates(datasetNames, numProcesses, fileNameSuffix, learnerName, sampleSizes, foldsSet): 
    dataDir = PathDefaults.getDataDir() + "modelPenalisation/"
    outputDir = PathDefaults.getOutputDir() + "modelPenalisation/"

    learner, loadMethod, dataDir, outputDir, paramDict = getSetup(learnerName, dataDir, outputDir, numProcesses)
    
    for i in range(len(datasetNames)):
        logging.debug("Learning using dataset " + datasetNames[i][0])
        outfileName = outputDir + datasetNames[i][0] + fileNameSuffix

        fileLock = FileLock(outfileName + ".npz")
        if not fileLock.isLocked() and not fileLock.fileExists():
            fileLock.lock()
            
            numRealisations = datasetNames[i][1]  
            gridShape = [numRealisations, sampleSizes.shape[0]]
            gridShape.extend(list(learner.gridShape(paramDict)))   
            gridShape = tuple(gridShape)            
            
            betaGrids = numpy.zeros(gridShape) 
            
            for k in range(sampleSizes.shape[0]):
                sampleSize = sampleSizes[k]
                
                logging.debug("Using sample size " + str(sampleSize))
                for j in range(numRealisations):
                        Util.printIteration(j, 1, numRealisations, "Realisation: ")
                        trainX, trainY, testX, testY = loadMethod(dataDir, datasetNames[i][0], j)
                        
                        numpy.random.seed(21)
                        trainInds = numpy.random.permutation(trainX.shape[0])[0:sampleSize]
                        validX = trainX[trainInds,:]
                        validY = trainY[trainInds]
                        
                        betaGrids[j, k, :] = learner.learningRate(validX, validY, foldsSet, paramDict)
            
            numpy.savez(outfileName, betaGrids)
            logging.debug("Saved results as file " + outfileName + ".npz")
            fileLock.unlock()

Ejemplo n.º 31

0

Mostrar archivo

Archivo: EgoCsvReaderTest.py Proyecto: malcolmreynolds/APGL

    def testReadFile(self): 
        eCsvReader = EgoCsvReader()
        #logging.debug(os.getcwd())
        dir = PathDefaults.getDataDir()
        fileName = dir + "test/TestData.csv"
        questionIds = [("Q14", 0), ("Q12", 1) , ("Q2", 0)]

        missing = 1
        (X, titles) = eCsvReader.readFile(fileName, questionIds, missing)
        
        X2 = numpy.zeros((10, 3))
        X2[0, :] = [0.621903386,0.608560354,0.33290608]
        X2[1, :] = [0.318548924,0.402390713,0.129956291]
        X2[2, :] = [0.956658404,0.344317772,0.680386616]
        X2[3, :] = [0.267607668,0.119647983,0.116893619]
        X2[4, :] = [0.686589498,0.402390713,0.426789174]
        X2[5, :] = [0.373575769,0.025846789,0.797125005]
        X2[6, :] = [0.493793948,0.402390713,0.990507109]
        X2[7, :] = [0.524534585,0.525169385,0.772917183]
        X2[8, :] = [0.339055395,0.402390713,0.684788001]
        X2[9, :] = [0.997774183,0.790801992,0.643252009]
        
        self.assertAlmostEquals(numpy.linalg.norm(X-X2),0, places=6)

Ejemplo n.º 32

0

Mostrar archivo

Archivo: MultiGraphCsvReaderTest.py Proyecto: pombredanne/APGL

    def testReadGraph(self):

        dir = PathDefaults.getDataDir()
        vertexFileName = dir + "test/deggraf10.csv"
        edgeFileNames = [dir + "test/testEdges1.csv", dir + "test/testEdges2.csv"]

        def genderConv(x):
            genderDict = {'"M"': 0, '"F"': 1}
            return genderDict[x]

        def orientConv(x):
            orientDict = {'"HT"': 0, '"HB"': 1}
            return orientDict[x]

        def fteConv(x):
            fteDict = {'"INTER"': 0, '"CONTA"': 1}
            return fteDict[x]

        def provConv(x):
            provDict = {'"CH"': 0, '"SC"': 1, '"SS"': 2, '"LH"' : 3, '"GM"' : 4}
            return provDict[x]

        converters = {3: genderConv, 4: orientConv, 5:fteConv, 6:provConv}

        idIndex = 0
        featureIndices = list(range(1,11))
        multiGraphCsvReader = MultiGraphCsvReader(idIndex, featureIndices, converters)
        sparseMultiGraph = multiGraphCsvReader.readGraph(vertexFileName, edgeFileNames)

        vertexValues = numpy.zeros((10, 10))
        vertexValues[0, :] = numpy.array([1986, 32, 0, 0, 0, 0, 0, 3, 3, 1])
        vertexValues[1, :] = numpy.array([1986, 27, 0, 0, 0, 1, 0, 4, 4, 1])
        vertexValues[2, :] = numpy.array([1986, 20, 0, 0, 0, 1, 0, 1, 1, 0])
        vertexValues[3, :] = numpy.array([1986, 20, 0, 0, 0, 1, 0, 2, 2, 0])
        vertexValues[4, :] = numpy.array([1986, 20, 0, 0, 0, 2, 0, 5, 5, 0])
        vertexValues[5, :] = numpy.array([1986, 28, 0, 0, 0, 3, 0, 1, 1, 1])
        vertexValues[6, :] = numpy.array([1986, 26, 1, 0, 1, 3, 6, 1, 1, 1])
        vertexValues[7, :] = numpy.array([1986, 35, 0, 0, 0, 2, 0, 0, 0, 0])
        vertexValues[8, :] = numpy.array([1986, 37, 0, 1, 0, 3, 0, 5, 3, 0])
        vertexValues[9, :] = numpy.array([1986, 40, 0, 1, 0, 4, 0, 3, 3, 0])

        #Check if the values of the vertices are correct 
        for i in range(sparseMultiGraph.getNumVertices()):
            self.assertTrue((sparseMultiGraph.getVertex(i) == vertexValues[i]).all())

        #Now check edges
        edges = numpy.zeros((10, 3))
        edges[0, :] = numpy.array([4, 0, 0])
        edges[1, :] = numpy.array([4, 1, 0])
        edges[2, :] = numpy.array([5, 3, 0])
        edges[3, :] = numpy.array([7, 1, 0])
        edges[4, :] = numpy.array([8, 0, 0])
        edges[5, :] = numpy.array([4, 1, 1])
        edges[6, :] = numpy.array([8, 1, 1])
        edges[7, :] = numpy.array([8, 2, 1])
        edges[8, :] = numpy.array([8, 4, 1])
        edges[9, :] = numpy.array([9, 0, 1])

        self.assertTrue((sparseMultiGraph.getAllEdges() == edges).all())

        #Now test directed graphs
        sparseMultiGraph = multiGraphCsvReader.readGraph(vertexFileName, edgeFileNames, False)

        for i in range(sparseMultiGraph.getNumVertices()):
            self.assertTrue((sparseMultiGraph.getVertex(i) == vertexValues[i]).all())


        edges = numpy.zeros((10, 3))
        edges[0, :] = numpy.array([0, 4, 0])
        edges[1, :] = numpy.array([1, 7, 0])
        edges[2, :] = numpy.array([3, 5, 0])
        edges[3, :] = numpy.array([4, 1, 0])
        edges[4, :] = numpy.array([8, 0, 0])
        edges[5, :] = numpy.array([0, 9, 1])
        edges[6, :] = numpy.array([1, 8, 1])
        edges[7, :] = numpy.array([2, 8, 1])
        edges[8, :] = numpy.array([4, 1, 1])
        edges[9, :] = numpy.array([8, 4, 1])
        
        self.assertTrue((sparseMultiGraph.getAllEdges() == edges).all())

Ejemplo n.º 33

0

Mostrar archivo

Archivo: MDLGraphsReaderTest.py Proyecto: pombredanne/APGL

    def testMDLGraphsReader(self):
        reader = MDLGraphsReader()
        dir = PathDefaults.getDataDir()
        fileName = dir + "test/testGraphs.mdl"

        graphs = reader.readFromFile(fileName)
        self.assertEquals(len(graphs), 2)

        #Check the first graph
        self.assertEquals(graphs[0].getNumVertices(), 26)
        self.assertEquals(graphs[0].getNumEdges(), 28)

        def getEdge(graph, i, j):
            return graph.getEdge(i - 1, j - 1)

        self.assertEquals(getEdge(graphs[0], 1, 6), 1)
        self.assertEquals(getEdge(graphs[0], 1, 2), 1)
        self.assertEquals(getEdge(graphs[0], 1, 18), 1)
        self.assertEquals(getEdge(graphs[0], 2, 3), 1)
        self.assertEquals(getEdge(graphs[0], 2, 19), 1)
        self.assertEquals(getEdge(graphs[0], 3, 4), 1)
        self.assertEquals(getEdge(graphs[0], 3, 20), 1)
        self.assertEquals(getEdge(graphs[0], 4, 10), 1)
        self.assertEquals(getEdge(graphs[0], 4, 5), 1)
        self.assertEquals(getEdge(graphs[0], 5, 6), 1)
        self.assertEquals(getEdge(graphs[0], 5, 7), 1)
        self.assertEquals(getEdge(graphs[0], 6, 21), 1)
        self.assertEquals(getEdge(graphs[0], 7, 8), 1)
        self.assertEquals(getEdge(graphs[0], 7, 22), 1)
        self.assertEquals(getEdge(graphs[0], 8, 9), 1)
        self.assertEquals(getEdge(graphs[0], 8, 23), 1)
        self.assertEquals(getEdge(graphs[0], 9, 14), 1)
        self.assertEquals(getEdge(graphs[0], 9, 10), 1)
        self.assertEquals(getEdge(graphs[0], 10, 11), 1)
        self.assertEquals(getEdge(graphs[0], 11, 12), 1)
        self.assertEquals(getEdge(graphs[0], 11, 24), 1)
        self.assertEquals(getEdge(graphs[0], 12, 13), 1)
        self.assertEquals(getEdge(graphs[0], 12, 25), 1)
        self.assertEquals(getEdge(graphs[0], 13, 14), 1)
        self.assertEquals(getEdge(graphs[0], 13, 15), 1)
        self.assertEquals(getEdge(graphs[0], 14, 26), 1)
        self.assertEquals(getEdge(graphs[0], 15, 16), 1)
        self.assertEquals(getEdge(graphs[0], 15, 17), 1)

        #Check the second graph
        self.assertEquals(graphs[1].getNumVertices(), 19)
        self.assertEquals(graphs[1].getNumEdges(), 20)

        self.assertEquals(getEdge(graphs[1], 1, 10), 1)
        self.assertEquals(getEdge(graphs[1], 1, 2), 1)
        self.assertEquals(getEdge(graphs[1], 1, 14), 1)
        self.assertEquals(getEdge(graphs[1], 2, 3), 1)
        self.assertEquals(getEdge(graphs[1], 2, 15), 1)
        self.assertEquals(getEdge(graphs[1], 3, 8), 1)
        self.assertEquals(getEdge(graphs[1], 3, 4), 1)
        self.assertEquals(getEdge(graphs[1], 4, 5), 1)
        self.assertEquals(getEdge(graphs[1], 4, 16), 1)
        self.assertEquals(getEdge(graphs[1], 5, 6), 1)
        self.assertEquals(getEdge(graphs[1], 5, 17), 1)
        self.assertEquals(getEdge(graphs[1], 6, 7), 1)
        self.assertEquals(getEdge(graphs[1], 6, 18), 1)
        self.assertEquals(getEdge(graphs[1], 7, 8), 1)
        self.assertEquals(getEdge(graphs[1], 8, 9), 1)
        self.assertEquals(getEdge(graphs[1], 9, 10), 1)
        self.assertEquals(getEdge(graphs[1], 9, 11), 1)
        self.assertEquals(getEdge(graphs[1], 10, 19), 1)
        self.assertEquals(getEdge(graphs[1], 11, 12), 1)
        self.assertEquals(getEdge(graphs[1], 11, 13), 1)

Ejemplo n.º 34

0

Mostrar archivo

Archivo: PathDefaultsTest.py Proyecto: spencer-ortega/DynamicGraph-AnomalyDetection

 def testGetDataDir(self):
     print((PathDefaults.getDataDir()))

Ejemplo n.º 35

0

Mostrar archivo

Archivo: CsvGraphReaderTest.py Proyecto: pombredanne/APGL

    def testReadFromFile(self):
        vertex1Indices = [0, 2, 3, 4, 5]
        vertex2Indices = [1, 6, 7, 8, 9]

        def genderConv(x):
            genderDict = {'"M"': 0, '"F"': 1}
            return genderDict[x]

        def orientConv(x):
            orientDict = {'"HT"': 0, '"HB"': 1}
            return orientDict[x]

        converters = {2: genderConv, 6: genderConv, 3:orientConv, 7:orientConv}

        csvGraphReader = CsvGraphReader(vertex1Indices, vertex2Indices, converters)

        dir = PathDefaults.getDataDir()
        fileName = dir + "test/infect5.csv"

        graph = csvGraphReader.readFromFile(fileName)

        self.assertTrue((graph.getVertex(0) == numpy.array([0, 0, 28, 1])).all())
        self.assertTrue((graph.getVertex(1) == numpy.array([1, 0, 26, 1])).all())
        self.assertTrue((graph.getVertex(2) == numpy.array([0, 1, 42, 2])).all())
        self.assertTrue((graph.getVertex(3) == numpy.array([1, 0, 33, 1])).all())
        self.assertTrue((graph.getVertex(4) == numpy.array([0, 1, 35, 37])).all())

        self.assertTrue(graph.getEdge(0, 1) == 1)
        self.assertTrue(graph.getEdge(2, 3) == 1)
        self.assertTrue(graph.getEdge(4, 6) == 1)
        self.assertTrue(graph.getEdge(6, 7) == 1)
        self.assertTrue(graph.getEdge(5, 8) == 1)

        self.assertEquals(graph.getNumEdges(), 5)
        self.assertTrue(graph.isUndirected())

        #Test a directed graph
        csvGraphReader = CsvGraphReader(vertex1Indices, vertex2Indices, converters, undirected=False)
        graph = csvGraphReader.readFromFile(fileName)

        self.assertTrue(graph.getEdge(1, 0) == None)
        self.assertTrue(graph.getEdge(3, 2) == None)
        self.assertTrue(graph.getEdge(6, 4) == None)
        self.assertTrue(graph.getEdge(7, 6) == None)
        self.assertTrue(graph.getEdge(8, 5) == None)

        self.assertEquals(graph.getNumEdges(), 5)
        self.assertFalse(graph.isUndirected())

        #Test graph with no vertex information
        vertex1Indices = [0]
        vertex2Indices = [1]
        fileName = dir + "test/infect5-0.csv"
        csvGraphReader = CsvGraphReader(vertex1Indices, vertex2Indices, {})
        graph = csvGraphReader.readFromFile(fileName)

        self.assertTrue(graph.getEdge(0, 1) == 1)
        self.assertTrue(graph.getEdge(2, 3) == 1)
        self.assertTrue(graph.getEdge(4, 6) == 1)
        self.assertTrue(graph.getEdge(6, 7) == 1)
        self.assertTrue(graph.getEdge(5, 8) == 1)

        self.assertEquals(graph.getNumEdges(), 5)
        self.assertTrue(graph.isUndirected())
        self.assertEquals(graph.getVertexList().getNumFeatures(), 0)