Beispiel #1
0
 def setUp(self):
     #Initialize all objects
     self.cos = CaffeOnSpark(sc)
     cmdargs = conf.get('spark.pythonargs')
     self.args = dict(self.grouper(cmdargs.split(), 2))
     self.cfg = Config(sc, self.args)
     self.train_source = DataSource(sc).getSource(self.cfg, True)
     self.validation_source = DataSource(sc).getSource(self.cfg, False)
 def setUp(self):
     #Initialize all objects
     self.cos=CaffeOnSpark(sc)
     cmdargs = conf.get('spark.pythonargs')
     self.args= dict(self.grouper(cmdargs.split(),2))
     self.cfg=Config(sc,self.args)
     self.train_source = DataSource(sc).getSource(self.cfg,True)
     self.validation_source = DataSource(sc).getSource(self.cfg,False)
Beispiel #3
0
class PythonApiTest(unittest.TestCase):
    def grouper(self, iterable, n, fillvalue=None):
        args = [iter(iterable)] * n
        return izip_longest(fillvalue=fillvalue, *args)

    def setUp(self):
        #Initialize all objects
        self.cos = CaffeOnSpark(sc)
        cmdargs = conf.get('spark.pythonargs')
        self.args = dict(self.grouper(cmdargs.split(), 2))
        self.cfg = Config(sc, self.args)
        self.train_source = DataSource(sc).getSource(self.cfg, True)
        self.validation_source = DataSource(sc).getSource(self.cfg, False)

    def testTrain(self):
        self.cos.train(self.train_source)
        self.assertTrue(
            os.path.isfile(self.args.get('-model').split(":")[1][3:]))
        result = self.cos.features(self.validation_source)
        self.assertTrue('accuracy' in result.columns)
        self.assertTrue('ip1' in result.columns)
        self.assertTrue('ip2' in result.columns)
        self.assertTrue(result.count() > 100)
        self.assertTrue(result.first()['SampleID'] == '00000000')
        result = self.cos.test(self.validation_source)
        self.assertTrue(result.get('accuracy') > 0.9)

    def testTrainWithValidation(self):
        result = self.cos.trainWithValidation(self.train_source,
                                              self.validation_source)
        self.assertEqual(len(result.columns), 2)
        self.assertEqual(result.columns[0], 'accuracy')
        self.assertEqual(result.columns[1], 'loss')
        result.show(2)

        row_count = result.count()
        last_row = result.rdd.zipWithIndex().filter(
            lambda (row, index): index == (row_count - 1)).collect()[0][0]
        finalAccuracy = last_row[0][0]
        self.assertTrue(finalAccuracy > 0.8)
        finalLoss = last_row[1][0]
        self.assertTrue(finalLoss < 0.5)
class PythonApiTest(unittest.TestCase):
    def grouper(self,iterable, n, fillvalue=None):
        args = [iter(iterable)] * n
        return izip_longest(fillvalue=fillvalue, *args)

    def setUp(self):
        #Initialize all objects
        self.cos=CaffeOnSpark(sc)
        cmdargs = conf.get('spark.pythonargs')
        self.args= dict(self.grouper(cmdargs.split(),2))
        self.cfg=Config(sc,self.args)
        self.train_source = DataSource(sc).getSource(self.cfg,True)
        self.validation_source = DataSource(sc).getSource(self.cfg,False)
        
    def testTrain(self):
        self.cos.train(self.train_source)
        self.assertTrue(os.path.isfile(self.args.get('-model').split(":")[1][3:]))
        result=self.cos.features(self.validation_source)
        self.assertTrue('accuracy' in result.columns)
        self.assertTrue('ip1' in result.columns)
        self.assertTrue('ip2' in result.columns)
        self.assertTrue(result.count() > 100)
        self.assertTrue(result.first()['SampleID'] == '00000000')
        result=self.cos.test(self.validation_source)
        self.assertTrue(result.get('accuracy') > 0.9)

    def testTrainWithValidation(self):
        result=self.cos.trainWithValidation(self.train_source, self.validation_source)
        self.assertEqual(len(result.columns), 2)
        self.assertEqual(result.columns[0], 'accuracy')
        self.assertEqual(result.columns[1], 'loss')
        result.show(2)

        row_count = result.count()
        last_row = result.rdd.zipWithIndex().filter(lambda (row,index): index==(row_count - 1)).collect()[0][0]
        finalAccuracy = last_row[0][0]
        self.assertTrue(finalAccuracy > 0.8)
        finalLoss = last_row[1][0]
        self.assertTrue(finalLoss < 0.5)
Beispiel #5
0
class PythonApiTest(unittest.TestCase):
    def grouper(self, iterable, n, fillvalue=None):
        args = [iter(iterable)] * n
        return izip_longest(fillvalue=fillvalue, *args)

    def setUp(self):
        #Initialize all objects
        self.cos = CaffeOnSpark(sc, sqlContext)
        cmdargs = conf.get('spark.pythonargs')
        self.args = dict(self.grouper(cmdargs.split(), 2))
        self.cfg = Config(sc, self.args)
        self.train_source = DataSource(sc).getSource(self.cfg, True)
        self.validation_source = DataSource(sc).getSource(self.cfg, False)

    def testTrain(self):
        self.cos.train(self.train_source)
        self.assertTrue(
            os.path.isfile(self.args.get('-model').split(":")[1][3:]))
        result = self.cos.features(self.validation_source)
        self.assertTrue('accuracy' in result.columns)
        self.assertTrue('ip1' in result.columns)
        self.assertTrue('ip2' in result.columns)
        result = self.cos.test(self.validation_source)
        self.assertTrue(result.get('accuracy') > 0.9)

    def testTrainWithValidation(self):
        result = self.cos.trainWithValidation(self.train_source,
                                              self.validation_source)
        self.assertEqual(self.cfg.solverParameter.getTestIter(0), len(result))
        finalAccuracy = 0
        finalLoss = 0
        for i in range(self.cfg.solverParameter.getTestIter(0)):
            finalAccuracy += result[i][0]
            finalLoss += result[i][1]

        self.assertTrue(
            finalAccuracy / self.cfg.solverParameter.getTestIter(0) > 0.8)
        self.assertTrue(
            finalLoss / self.cfg.solverParameter.getTestIter(0) < 0.5)
"""
This function calls CaffeOnSpark to train the model.  It is similar in 
structure to the LeNext example, e.g., see
https://github.com/yahoo/CaffeOnSpark/wiki/GetStarted_python
In fact, the Python interface for CaffeOnSpark currently (July 2016)
allows for very little deviation from this format. 
"""

if __name__ == '__main__':

    sparkConf = SparkConf().setAppName("BeijingTomorrow").setMaster("local")
    sc=SparkContext(conf=sparkConf)
    registerContext(sc)
    sqlContext = SQLContext(sc)
    registerSQLContext(sqlContext)
    cos=CaffeOnSpark(sc,sqlContext)
    cfg=Config(sc)
    this_file = os.path.abspath(inspect.getfile(inspect.currentframe()))
    project_dir = os.path.dirname(os.path.dirname(os.path.dirname(this_file)))
    visualProtoFile= os.path.join(project_dir,"resources/caffe_prototxt/beijing_pollution_solver_visual.prototxt")
    visualModelFile= os.path.join(project_dir,"resources/caffe_models/beijing_pollution_model_visual.model")
    aerosolProtoFile= os.path.join(project_dir,"resources/caffe_prototxt/beijing_pollution_solver_aerosol.prototxt")
    aerosolModelFile= os.path.join(project_dir,"resources/caffe_models/beijing_pollution_model_aerosol.model")

    cfg.protoFile = visualProtoFile
    cfg.modelPath = 'file:' + visualModelFile
    cfg.devices = 1
    cfg.isFeature=True
    cfg.label='label'
    cfg.features=['ip1']
    cfg.outputFormat = 'json'
Beispiel #7
0
"""
This function calls CaffeOnSpark to train the model.  It is similar in 
structure to the LeNext example, e.g., see
https://github.com/yahoo/CaffeOnSpark/wiki/GetStarted_python
In fact, the Python interface for CaffeOnSpark currently (July 2016)
allows for very little deviation from this format. 
"""

if __name__ == '__main__':

    sparkConf = SparkConf().setAppName("BeijingTomorrow").setMaster("local")
    sc = SparkContext(conf=sparkConf)
    registerContext(sc)
    sqlContext = SQLContext(sc)
    registerSQLContext(sqlContext)
    cos = CaffeOnSpark(sc, sqlContext)
    cfg = Config(sc)
    this_file = os.path.abspath(inspect.getfile(inspect.currentframe()))
    project_dir = os.path.dirname(os.path.dirname(os.path.dirname(this_file)))
    visualProtoFile = os.path.join(
        project_dir,
        "resources/caffe_prototxt/beijing_pollution_solver_visual.prototxt")
    visualModelFile = os.path.join(
        project_dir,
        "resources/caffe_models/beijing_pollution_model_visual.model")
    aerosolProtoFile = os.path.join(
        project_dir,
        "resources/caffe_prototxt/beijing_pollution_solver_aerosol.prototxt")
    aerosolModelFile = os.path.join(
        project_dir,
        "resources/caffe_models/beijing_pollution_model_aerosol.model")