Python ARFFAdapterの例、pysgpp.extensions.datadriven.data.ARFFAdapter.ARFFAdapter Pythonの例

コード例 #1

0

ファイルを表示

ファイル: test_ARFFAdapter.py プロジェクト: ABAtanasov/Sparse-Grids

 def testLoadData(self):
     testPoints = [[0.307143,0.130137,0.050000],
                   [0.365584,0.105479,0.050000],
                   [0.178571,0.201027,0.050000],
                   [0.272078,0.145548,0.050000],
                   [0.318831,0.065411,0.050000],
                   [0.190260,0.086986,0.050000],
                   [0.190260,0.062329,0.072500],
                   [0.120130,0.068493,0.072500],
                   [0.225325,0.056164,0.072500],
                   [0.213636,0.050000,0.072500]
                  ]
     testValues = [-1.000000, 1.000000, 1.000000, 1.000000, 1.000000, 1.000000, -1.000000, -1.000000, -1.000000, -1.000000]
     filename = pathlocal + '/datasets/liver-disorders_normalized.arff.gz'
     adapter = ARFFAdapter(filename)
     container = adapter.loadData()
     points = container.getPoints()
     values = container.getValues()
     size = len(testPoints)
     dim = len(testPoints[0])
     testVector = DataVector(dim)
     for rowIdx in xrange(size):
         points.getRow(rowIdx, testVector)
         for colIdx in xrange(dim):
             if cvar.USING_DOUBLE_PRECISION:
                 self.assertEqual(testVector[colIdx], testPoints[rowIdx][colIdx])
             else:
                 self.assertAlmostEqual(testVector[colIdx], testPoints[rowIdx][colIdx])
         self.assertEqual(values[rowIdx], testValues[rowIdx])

コード例 #2

0

ファイルを表示

 def testLoadData(self):
     testPoints = [[0.307143, 0.130137, 0.050000],
                   [0.365584, 0.105479, 0.050000],
                   [0.178571, 0.201027, 0.050000],
                   [0.272078, 0.145548, 0.050000],
                   [0.318831, 0.065411, 0.050000],
                   [0.190260, 0.086986, 0.050000],
                   [0.190260, 0.062329, 0.072500],
                   [0.120130, 0.068493, 0.072500],
                   [0.225325, 0.056164, 0.072500],
                   [0.213636, 0.050000, 0.072500]]
     testValues = [
         -1.000000, 1.000000, 1.000000, 1.000000, 1.000000, 1.000000,
         -1.000000, -1.000000, -1.000000, -1.000000
     ]
     filename = pathlocal + '/datasets/liver-disorders_normalized.arff.gz'
     adapter = ARFFAdapter(filename)
     container = adapter.loadData()
     points = container.getPoints()
     values = container.getValues()
     size = len(testPoints)
     dim = len(testPoints[0])
     testVector = DataVector(dim)
     for rowIdx in xrange(size):
         points.getRow(rowIdx, testVector)
         for colIdx in xrange(dim):
             if cvar.USING_DOUBLE_PRECISION:
                 self.assertEqual(testVector[colIdx],
                                  testPoints[rowIdx][colIdx])
             else:
                 self.assertAlmostEqual(testVector[colIdx],
                                        testPoints[rowIdx][colIdx])
         self.assertEqual(values[rowIdx], testValues[rowIdx])

コード例 #3

0

ファイルを表示

ファイル: DataContainer.py プロジェクト: pfluegdk/SGpp

    def fromJson(cls, jsonObject):
        # initiate with train data, because they are always there
        specification = jsonObject['train']
        resultContainer = ARFFAdapter.ARFFAdapter(specification['filename']).loadData('train')
        # load data for other categories
        for category, specification in list(jsonObject.items()):
            if not (category == 'module' or category == 'train'):
                from pysgpp.extensions.datadriven.data.ARFFAdapter import ARFFAdapter
                
                container = ARFFAdapter.ARFFAdapter(specification['filename']).loadData(category)
                resultContainer = resultContainer.combine(container)

        return resultContainer

コード例 #4

0

ファイルを表示

    def testSave(self):
        filename = pathlocal + '/datasets/saving.arff.gz'
        testPoints = [[0.307143, 0.130137, 0.050000],
                      [0.365584, 0.105479, 0.050000],
                      [0.178571, 0.201027, 0.050000],
                      [0.272078, 0.145548, 0.050000],
                      [0.318831, 0.065411, 0.050000],
                      [0.190260, 0.086986, 0.050000],
                      [0.190260, 0.062329, 0.072500],
                      [0.120130, 0.068493, 0.072500],
                      [0.225325, 0.056164, 0.072500],
                      [0.213636, 0.050000, 0.072500]]
        testValues = [
            -1.000000, 1.000000, 1.000000, 1.000000, 1.000000, 1.000000,
            -1.000000, -1.000000, -1.000000, -1.000000
        ]
        attributes = {
            "x0": "NUMERIC",
            "x1": "NUMERIC",
            "x2": "NUMERIC",
            "class": "NUMERIC",
        }
        size = len(testPoints)
        dim = len(testPoints[0])
        point = DataVector(dim)
        points = DataMatrix(size, dim)

        for row in xrange(size):
            for col in xrange(dim):
                point[col] = testPoints[row][col]
            points.setRow(row, point)

        adapter = ARFFAdapter(filename)
        adapter.save(points, testValues, attributes)

        (points, values) = adapter.loadData().getPointsValues()
        size = len(testPoints)
        dim = len(testPoints[0])
        testVector = DataVector(dim)
        for rowIdx in xrange(size):
            points.getRow(rowIdx, testVector)
            for colIdx in xrange(dim):
                if cvar.USING_DOUBLE_PRECISION:
                    self.assertEqual(testVector[colIdx],
                                     testPoints[rowIdx][colIdx])
                else:
                    self.assertAlmostEqual(testVector[colIdx],
                                           testPoints[rowIdx][colIdx])
            self.assertEqual(values[rowIdx], testValues[rowIdx])

        os.remove(filename)

コード例 #5

0

ファイルを表示

ファイル: test_ARFFAdapter.py プロジェクト: ABAtanasov/Sparse-Grids

 def testLoadSpecification(self):
     attributes = {
                   "x0":"NUMERIC",
                   "x1":"NUMERIC",
                   "x2":"NUMERIC",
                   "class":"NUMERIC",
                   }
     filename = pathlocal + '/datasets/liver-disorders_normalized.arff.gz'
     adapter = ARFFAdapter(filename)
     spec = adapter.loadSpecification()
     testAttributes = spec.getAttributes()
     self.assertEqual(len(testAttributes), len(attributes))
     for key in testAttributes.keys():
         self.assertEqual(testAttributes[key],attributes[key])

コード例 #6

0

ファイルを表示

 def withTestingDataFromARFFFile(self, filename):
     """
     Signals to use data from ARFF file for testing dataset
     @param filename: Filename where to read the data from
     @return: LearnerBuilder object itself
     """
     adapter = ARFFAdapter(filename)
     dataContainer = adapter.loadData(DataContainer.TEST_CATEGORY)
     if self._learner.dataContainer is not None:
         dataContainer = self._learner.dataContainer.combine(dataContainer)
         self._learner.setDataContainer(dataContainer)
     else:
         self._learner.setDataContainer(dataContainer)
     return self

コード例 #7

0

ファイルを表示

ファイル: LearnerBuilder.py プロジェクト: ABAtanasov/Sparse-Grids

 def withTestingDataFromARFFFile(self, filename):
     """
     Signals to use data from ARFF file for testing dataset
     @param filename: Filename where to read the data from
     @return: LearnerBuilder object itself
     """
     adapter = ARFFAdapter(filename)
     dataContainer = adapter.loadData(DataContainer.TEST_CATEGORY)
     if self._learner.dataContainer is not None:
         dataContainer = self._learner.dataContainer.combine(dataContainer)
         self._learner.setDataContainer(dataContainer)
     else:
         self._learner.setDataContainer(dataContainer)
     return self

コード例 #8

0

ファイルを表示

 def testLoadSpecification(self):
     attributes = {
         "x0": "NUMERIC",
         "x1": "NUMERIC",
         "x2": "NUMERIC",
         "class": "NUMERIC",
     }
     filename = pathlocal + '/datasets/liver-disorders_normalized.arff.gz'
     adapter = ARFFAdapter(filename)
     spec = adapter.loadSpecification()
     testAttributes = spec.getAttributes()
     self.assertEqual(len(testAttributes), len(attributes))
     for key in testAttributes.keys():
         self.assertEqual(testAttributes[key], attributes[key])

コード例 #9

0

ファイルを表示

ファイル: test_FilesFoldingPolicy.py プロジェクト: pfluegdk/SGpp

    def setUp(self):
        files =  ['/datasets/foldf_fold0.arff', '/datasets/foldf_fold1.arff', '/datasets/foldf_fold2.arff']
        datasets = []
        fileCounter = 0
        self.dataContainer = ARFFAdapter(pathlocal + files[fileCounter]).loadData("train" + str(fileCounter))
        for fname in files[1:]:
            fileCounter += 1
            self.dataContainer = self.dataContainer.combine(ARFFAdapter(pathlocal + fname).loadData("train" + str(fileCounter)))

        self.policy = FilesFoldingPolicy(self.dataContainer)

        self.points = list(range(9))
        self.values = list(range(9))
        self.values.reverse()

コード例 #10

0

ファイルを表示

 def setUp(self):
     level = 2
     dim = 2
     l = 0.00001
     self.classifier = Classifier()
     dataContainer = ARFFAdapter(
         pathlocal + "/datasets/classifier.train.arff").loadData()
     self.classifier.setDataContainer(dataContainer)
     foldingPolicy = FoldingPolicy(dataContainer)
     self.classifier.setFoldingPolicy(foldingPolicy)
     grid = Grid.createLinearGrid(dim)
     storage = grid.createGridGenerator()
     storage.regular(level)
     self.classifier.setGrid(grid)
     self.classifier.setLearnedKnowledge(LearnedKnowledge())
     spec = TrainingSpecification()
     spec.setL(l)
     spec.setCOperator(createOperationLaplace(grid))
     self.classifier.setSpecification(spec)
     stopPolicy = TrainingStopPolicy()
     stopPolicy.setAdaptiveIterationLimit(0)
     self.classifier.setStopPolicy(stopPolicy)
     solver = CGSolver()
     #solver.attachEventController(InfoToScreen())
     solver.setImax(500)
     solver.setReuse(True)
     self.classifier.setSolver(solver)

コード例 #11

0

ファイルを表示

ファイル: LearnerBuilder.py プロジェクト: jsgphd/Sparse-Grids

 def withTrainingDataFromARFFFile(self, filename, name="train"):
     dataContainer = ARFFAdapter(filename).loadData(name)
     if self.__learner.dataContainer != None:
         self.__learner.setDataContainer(self.__learner.dataContainer.combine(dataContainer))
     else:
         self.__learner.setDataContainer(dataContainer)
     return self

コード例 #12

0

ファイルを表示

ファイル: LearnerBuilder.py プロジェクト: jsgphd/Sparse-Grids

 def withTestingDataFromARFFFile(self, filename):
     dataContainer = ARFFAdapter(filename).loadData(DataContainer.TEST_CATEGORY)
     if self.__learner.dataContainer != None:
         self.__learner.setDataContainer(self.__learner.dataContainer.combine(dataContainer))
     else:
         self.__learner.setDataContainer(dataContainer)
     return self

コード例 #13

0

ファイルを表示

ファイル: test_ARFFAdapter.py プロジェクト: ABAtanasov/Sparse-Grids

    def testSave(self):
        filename = pathlocal + '/datasets/saving.arff.gz'
        testPoints = [[0.307143,0.130137,0.050000],
                      [0.365584,0.105479,0.050000],
                      [0.178571,0.201027,0.050000],
                      [0.272078,0.145548,0.050000],
                      [0.318831,0.065411,0.050000],
                      [0.190260,0.086986,0.050000],
                      [0.190260,0.062329,0.072500],
                      [0.120130,0.068493,0.072500],
                      [0.225325,0.056164,0.072500],
                      [0.213636,0.050000,0.072500]
                     ]
        testValues = [-1.000000, 1.000000, 1.000000, 1.000000, 1.000000, 1.000000, -1.000000, -1.000000, -1.000000, -1.000000]
        attributes = {
                      "x0":"NUMERIC",
                      "x1":"NUMERIC",
                      "x2":"NUMERIC",
                      "class":"NUMERIC",
                      }
        size = len(testPoints)
        dim = len(testPoints[0])
        point = DataVector(dim)
        points = DataMatrix(size, dim)

        for row in xrange(size):
            for col in xrange(dim):
                point[col] = testPoints[row][col]
            points.setRow(row, point)

        adapter = ARFFAdapter(filename)
        adapter.save(points, testValues, attributes)

        (points, values) = adapter.loadData().getPointsValues()
        size = len(testPoints)
        dim = len(testPoints[0])
        testVector = DataVector(dim)
        for rowIdx in xrange(size):
            points.getRow(rowIdx, testVector)
            for colIdx in xrange(dim):
                if cvar.USING_DOUBLE_PRECISION:
                    self.assertEqual(testVector[colIdx], testPoints[rowIdx][colIdx])
                else:
                    self.assertAlmostEqual(testVector[colIdx], testPoints[rowIdx][colIdx])
            self.assertEqual(values[rowIdx], testValues[rowIdx])

        os.remove(filename)

コード例 #14

0

ファイルを表示

 def withTrainingDataFromARFFFile(self, filename, name="train"):
     """
     Signals to use data from ARFF file for training dataset
     @param filename: Filename where to read the data from
     @param name: Category name, default: "train"
     """
     dataContainer = ARFFAdapter(filename).loadData(name)
     if self._learner.dataContainer is not None:
         dataContainer = self._learner.dataContainer.combine(dataContainer)
         self._learner.setDataContainer(dataContainer)
     else:
         self._learner.setDataContainer(dataContainer)
     return self

コード例 #15

0

ファイルを表示

ファイル: test_FilesFoldingPolicy.py プロジェクト: ABAtanasov/Sparse-Grids

    def setUp(self):
        files =  ['/datasets/foldf_fold0.arff', '/datasets/foldf_fold1.arff', '/datasets/foldf_fold2.arff']
        datasets = []
        fileCounter = 0
        self.dataContainer = ARFFAdapter(pathlocal + files[fileCounter]).loadData("train" + str(fileCounter))
        for fname in files[1:]:
            fileCounter += 1
            self.dataContainer = self.dataContainer.combine(ARFFAdapter(pathlocal + fname).loadData("train" + str(fileCounter)))

        self.policy = FilesFoldingPolicy(self.dataContainer)

        self.points = range(9)
        self.values = range(9)
        self.values.reverse()

コード例 #16

0

ファイルを表示

ファイル: DataContainer.py プロジェクト: pfluegdk/SGpp

    def toString(self):
        # save the data as a file, if it's not saved yet
        for category, specification in list(self.specifications.items()):
            if not self.specifications[category].isSaved():
                from pysgpp.extensions.datadriven.data.ARFFAdapter import ARFFAdapter

                ARFFAdapter.ARFFAdapter(self.specifications[category].getFilename())\
                    .save(self.getPoints(category), self.getValues(category),
                          specification.getAttributes())
                specification.setSaved()

        serializedString = "'module' : '" + self.__module__ + "',\n"
        for category in list(self.specifications.keys()):
            serializedString += "'" + category + "' : " + self.specifications[category].toString() + ",\n"
        return "{" + serializedString.rstrip(",\n") + "}\n"

コード例 #17

0

ファイルを表示

class TestFilesFoldingPolicy(unittest.TestCase):

    ## Set up the variables
    def setUp(self):
        files = [
            '/datasets/foldf_fold0.arff', '/datasets/foldf_fold1.arff',
            '/datasets/foldf_fold2.arff'
        ]
        datasets = []
        fileCounter = 0
        self.dataContainer = ARFFAdapter(
            pathlocal + files[fileCounter]).loadData("train" +
                                                     str(fileCounter))
        for fname in files[1:]:
            fileCounter += 1
            self.dataContainer = self.dataContainer.combine(
                ARFFAdapter(pathlocal + fname).loadData("train" +
                                                        str(fileCounter)))

        self.policy = FilesFoldingPolicy(self.dataContainer)

        self.points = range(9)
        self.values = range(9)
        self.values.reverse()

    ##
    # Tests the function @link python.learner.folding.FoldingPolicy.FoldingPolicy.next() FilesFoldingPolicy.next() @endlink
    def testNext(self):
        #        validationCorrectData = [[4,0],[5,1], [6,2], [7,8,3]]
        #        self.assertEqual(self.level, len(self.policy.dataFold))
        step = 0
        for l in self.policy:
            points = l.getPoints()
            testPoints = self.points[:step * 3] + self.points[step * 3 + 3:]
            values = l.getValues()
            testValues = self.values[:step * 3] + self.values[step * 3 + 3:]
            self.assertEqual(points.getNrows(), len(testPoints))
            self.assertEqual(len(values), len(testValues))

            for i in xrange(points.getSize()):
                self.assertEqual(points.get(i, 0), testPoints[i])
                self.assertEqual(values[i], testValues[i])
            step += 1

コード例 #18

0

ファイルを表示

ファイル: test_Classifier.py プロジェクト: pfluegdk/SGpp

    def testLearnDataWithFolding(self, ):
        correct = [
            0.6612903226, 0.1428571429, 0.5741935484, 0.9142857143,
            0.6193548387, 0.5142857143, 0.5870967742, 0.7714285714,
            0.6032258065, 0.5714285714, 0.6387096774, 0.4000000000,
            0.5935483871, 0.7428571429, 0.6193548387, 0.5142857143,
            0.5903225806, 0.7714285714, 0.6063492063, 0.5666666667
        ]
        level = 2
        dim = 6
        l = 0.00001
        self.classifier = Classifier()
        dataContainer = ARFFAdapter(
            pathlocal +
            "/../../../datasets/liver/liver-disorders_normalized.arff"
        ).loadData()
        self.classifier.setDataContainer(dataContainer)
        foldingPolicy = SequentialFoldingPolicy(dataContainer, 10)
        self.classifier.setFoldingPolicy(foldingPolicy)
        grid = Grid.createLinearGrid(dim)
        storage = grid.getGenerator()
        storage.regular(level)
        self.classifier.setGrid(grid)
        self.classifier.setLearnedKnowledge(LearnedKnowledge())
        spec = TrainingSpecification()
        spec.setL(l)
        spec.setCOperator(createOperationLaplace(grid))
        spec.setCOperatorType('laplace')
        self.classifier.setSpecification(spec)
        stopPolicy = TrainingStopPolicy()
        stopPolicy.setAdaptiveIterationLimit(0)
        self.classifier.setStopPolicy(stopPolicy)
        self.classifier.setSolver(CGSolver())

        self.classifier.learnDataWithFolding()
        for i in range(10):
            self.assertAlmostEqual(correct[2 * i],
                                   self.classifier.trainAccuracy[i])
            self.assertAlmostEqual(correct[2 * i + 1],
                                   self.classifier.testAccuracy[i])

コード例 #19

0

ファイルを表示

ファイル: test_FilesFoldingPolicy.py プロジェクト: ABAtanasov/Sparse-Grids

class TestFilesFoldingPolicy(unittest.TestCase):

    ## Set up the variables
    def setUp(self):
        files =  ['/datasets/foldf_fold0.arff', '/datasets/foldf_fold1.arff', '/datasets/foldf_fold2.arff']
        datasets = []
        fileCounter = 0
        self.dataContainer = ARFFAdapter(pathlocal + files[fileCounter]).loadData("train" + str(fileCounter))
        for fname in files[1:]:
            fileCounter += 1
            self.dataContainer = self.dataContainer.combine(ARFFAdapter(pathlocal + fname).loadData("train" + str(fileCounter)))

        self.policy = FilesFoldingPolicy(self.dataContainer)

        self.points = range(9)
        self.values = range(9)
        self.values.reverse()


    ##
    # Tests the function @link python.learner.folding.FoldingPolicy.FoldingPolicy.next() FilesFoldingPolicy.next() @endlink
    def testNext(self):
#        validationCorrectData = [[4,0],[5,1], [6,2], [7,8,3]]
#        self.assertEqual(self.level, len(self.policy.dataFold))
        step = 0
        for l in self.policy:
            points = l.getPoints()
            testPoints = self.points[:step*3] + self.points[step*3+3:]
            values = l.getValues()
            testValues = self.values[:step*3] + self.values[step*3+3:]
            self.assertEqual(points.getNrows(), len(testPoints))
            self.assertEqual(len(values), len(testValues))

            for i in xrange(points.getSize()):
                self.assertEqual(points.get(i,0), testPoints[i])
                self.assertEqual(values[i], testValues[i])
            step += 1

コード例 #20

0

ファイルを表示

    def constructObjectsFromOptions(cls, options):
        #Create builder for specified learner
        builder = LearnerBuilder()
        if options.regression:
            builder.buildRegressor()

        else:
            builder.buildClassifier()

        # load alpha file
        if options.alpha:
            builder.withInitialAlphaFromARFFFile(options.alpha)

        #dataset options
        if len(options.data) == 1:
            builder.withTrainingDataFromARFFFile(options.data[0])
        elif len(options.data) > 1:
            fileCounter = 0
            for filename in options.data:
                builder.withTrainingDataFromARFFFile(
                    filename, DataContainer.TRAIN_CATEGORY + str(fileCounter))
                fileCounter += 1
        else:
            raise Exception('Define the path to the training data set')

        if options.test: builder.withTestingDataFromARFFFile(options.test)

        #grid options
        builder = builder.withGrid()
        if options.grid:
            builder.fromFile(options.grid)
        else:
            try:
                if options.level: builder.withLevel(options.level)
                if options.polynom: builder.withPolynomialBase(options.polynom)
                if options.trapezoidboundary:
                    builder.withBorder(BorderTypes.TRAPEZOIDBOUNDARY)
                elif options.completeboundary:
                    builder.withBorder(BorderTypes.COMPLETEBOUNDARY)
                    # @fixme (khakhutv)the name "NONE" for the border type should be changed to something more meaningful
                elif options.border:
                    builder.withBorder(BorderTypes.NONE)
            except:
                raise Exception('Grid configuration arguments incorrect')

        if options.adapt_start:
            builder.withStartingIterationNumber(options.adapt_start)

        #training specification
        builder = builder.withSpecification()
        if options.adapt_rate: builder.withAdaptRate(options.adapt_rate)
        elif options.adapt_points:
            builder.withAdaptPoints(options.adapt_points)

        if options.regparam: builder.withLambda(options.regparam)
        if options.zeh:
            if options.zeh == 'laplace': builder.withLaplaceOperator()
            elif options.zeh == 'identity': builder.withIdentityOperator()
            else: raise Exception('Incorrect regulariation operator type')

        if options.adapt_threshold:
            builder.withAdaptThreshold(options.adapt_threshold)

        #stop policy
        builder = builder.withStopPolicy()
        if options.adaptive:
            builder.withAdaptiveItarationLimit(options.adaptive)
        if options.grid_limit: builder.withGridSizeLimit(options.grid_limit)
        if options.mse_limit: builder.withMSELimit(options.mse_limi)
        if options.epochs_limit: builder.withEpochsLimit(options.epochs_limit)

        # linear solver
        builder = builder.withCGSolver()
        if options.r: builder.withAccuracy(options.r)
        if options.max_r: builder.withThreshold(options.max_r)
        if options.imax: builder.withImax(options.imax)

        #presentor
        if options.verbose:  # print to the screen
            if options.regression:
                builder.withProgressPresenter(InfoToScreenRegressor())
            else:
                builder.withProgressPresenter(InfoToScreen())
        if options.stats:
            builder.withProgressPresenter(InfoToFile(options.stats))

        #checkpoint
        if options.checkpoint:
            title = os.path.basename(options.checkpoint)
            path = os.path.dirname(options.checkpoint)
            checkpointController = CheckpointController(title, path)

            builder.withCheckpointController(checkpointController)

        # Folding
        if options.mode in [
                'fold', 'folds', 'foldstratified', 'foldf', 'foldr'
        ]:
            if options.mode == 'fold': builder.withRandomFoldingPolicy()
            elif options.mode == 'folds': builder.withSequentialFoldingPolicy()
            elif options.mode in ['foldstratified', 'foldr']:
                builder.withStratifiedFoldingPolicy()
            elif options.mode == 'foldf':
                builder.withFilesFoldingPolicy()
            if options.seed: builder.withSeed(options.seed)
            if options.level: builder.withLevel(options.level)

        #Get Results and perform wanted action
        learner = builder.andGetResult()
        options.mode = options.mode.lower()
        if options.mode == 'normal':
            learner.learnData()
        elif options.mode == 'test':
            learner.learnDataWithTest()
        elif options.mode == 'apply':
            learner.applyData(ARFFAdapter(options.data).loadData().getPoints())
        elif options.mode in [
                'fold', 'folds', 'foldstratified', 'foldf', 'foldr'
        ]:
            builder.getCheckpointController().generateFoldValidationJob(
                'PUT_YOUR_EMAIL_HERE')
        elif options.mode in ['eval', 'evalstdin']:
            raise Exception('This action is not implemented yet')
        else:
            raise Exception('Incorrect action configuration')

コード例 #21

0

ファイルを表示

    def constructObjectsFromFile(cls, filename):
        configuration = configparser.ConfigParser()
        configuration.readfp(open(filename, 'r'))

        #Create builder for specified learner
        builder = LearnerBuilder()
        learner_type = configuration.get('learner', 'type')
        if learner_type == 'classification':
            builder.buildClassifier()
        elif learner_type == 'regression':
            builder.buildRegressor()
        else:
            raise Exception('Wrong learner type in job configuration file')

        #dataset options
        options = TerminalController.itemsToDict(configuration.items('data'))

        if options['file_type'] == 'arff':
            if 'train_file' in options:
                if type(options['train_file']) != list:
                    builder.withTrainingDataFromARFFFile(options['train_file'])
                else:
                    fileCounter = 0
                    for train_file in options['train_file']:
                        builder.withTrainingDataFromARFFFile(
                            train_file,
                            DataContainer.TRAIN_CATEGORY + str(fileCounter))
                        fileCounter += 1

            else:
                raise Exception(
                    'Path to file with training data set is not defined in configurationfile'
                )

            if 'test_file' in options:
                builder.withTestingDataFromARFFFile(options['test_file'])

        else:
            raise Exception('Unsupported data type in job configuration file')

        #grid options
        builder = builder.withGrid()
        options = TerminalController.itemsToDict(configuration.items('grid'))

        if 'grid_file' in options:
            builder.fromFile(options['grid_file'])
        else:
            try:
                if 'level' in options: builder.withLevel(int(options['level']))
                if 'polynomial' in options:
                    builder.withPolynomialBase(int(options['polynomial']))
                if 'border' in options: builder.withBorder(options['border'])
            except:
                raise Exception('Grid configuration in job file is incorrect')

        #training specification
        builder = builder.withSpecification()
        options = TerminalController.itemsToDict(
            configuration.items('refinement'))

        if 'points' in options:
            if '.' in options['points']:
                builder.withAdaptRate(float(options['points']))
            else:
                builder.withAdaptPoints(int(options['points']))

        options = TerminalController.itemsToDict(
            configuration.items('learner'))

        if 'regularization_parameter' in options:
            builder.withLambda(float(options['regularization_parameter']))
        if 'regularization_operator' in options:
            if options['regularization_operator'] == 'laplace':
                builder.withLaplaceOperator()
            elif options['regularization_operator'] == 'idenitty':
                builder.withIdentityOperator()
            else:
                raise Exception('Incorrect regulariation operator type')

        if 'threshold' in options:
            builder.withAdaptThreshold(float(options['threshold']))

        #stop policy
        builder = builder.withStopPolicy()
        options = TerminalController.itemsToDict(
            configuration.items('refinement'))

        if 'iterations' in options:
            builder.withAdaptiveItarationLimit(int(options['iterations']))
        if 'gridsize' in options:
            builder.withGridSizeLimit(int(options['gridsize']))
        if 'mse' in options: builder.withMSELimit(float(options['mse']))
        if 'epochs' in options: builder.withEpochsLimit(int(options['epochs']))

        # linear solver
        builder = builder.withCGSolver()
        options = TerminalController.itemsToDict(configuration.items('solver'))

        if 'accuracy' in options:
            builder.withAccuracy(float(options['accuracy']))
        if 'imax' in options: builder.withImax(int(options['imax']))
        if 'max_threshold' in options:
            builder.withThreshold(float(options['max_threshold']))

        #presentor
        options = TerminalController.itemsToDict(configuration.items('output'))
        if 'type' in options:
            types = options['type'].split(',')
            for type in types:
                if type.strip() == 'InfoToScreen':
                    builder.withProgressPresenter(InfoToScreen())
                if type.strip() == 'InfoToScreenRegressor':
                    builder.withProgressPresenter(InfoToScreenRegressor())
                elif type.strip() == 'InfoToFile':
                    if 'filename' in options:
                        builder.withProgressPresenter(
                            InfoToFile(options['filename']))
                    else:
                        raise Exception(
                            'Define filename in order to use InfoToFile output'
                        )

        #checkpoint
        options = TerminalController.itemsToDict(
            configuration.items('checkpoints'))

        if 'name' in options:
            path = options['path'] if 'path' in options else None
            interval = options[
                'interval'] if 'inte    rval' in options else None
            checkpointController = CheckpointController(
                options['name'], path, interval)
            if 'restore_iteration' in options:
                learner = checkpointController.loadAll(
                    options['restore_iteration'])

            builder.withCheckpointController(checkpointController)

        #Get Results and perform wanted action
        # if learner was not created by checkpoint controller, create it with learner builder
        try:
            if learner not in dir():
                learner = builder.andGetResult()
        except:
            learner = builder.andGetResult()

        # Folding
        options = TerminalController.itemsToDict(
            configuration.items('folding'))
        if options.type in [
                'fold', 'folds', 'foldstratified', 'foldr', 'foldf'
        ]:
            if options.type == 'fold': builder.withRandomFoldingPolicy()
            elif options.type == 'folds': builder.withSequentialFoldingPolicy()
            elif options.type in ['foldstratified', 'foldr']:
                builder.withStratifiedFoldingPolicy()
            elif options.type == 'foldf':
                builder.withFilesFoldingPolicy()
            if options.seed: builder.withSeed(options.seed)
            if options.level: builder.withLevel(options.level)
        else: raise Exception('Unknown folding type')

        options = TerminalController.itemsToDict(
            configuration.items('learner'))
        if options['action'] == 'learn':
            if ('with_testing'
                    not in options) or options['with_testing'].lower() == 'no':
                learner.learnData()
            elif options['with_testing'].lower() == 'yes':
                learner.learnDataWithTest()
            else:
                raise Exception(
                    'with_testion can only be set to "yes" or "no"')
        elif options['action'] == 'apply':
            points_file = configuration.get('data', 'points_file')
            if points_file != None:
                learner.applyData(
                    ARFFAdapter(points_file).loadData().getPoints())
            else:
                raise Exception(
                    'To evaluate value of points define file path "points_file" in the section "data"'
                )
        elif options['action'] == 'fold':
            builder.getCheckpointController().generateFoldValidationJob(
                'PUT_YOUR_EMAIL_HERE')
        else:
            raise Exception('Incorrect action in job configuration file')