Esempio n. 1
0
 def evaluate(self, data_rdd, steps, model_dir):
     md = ModelDir(model_dir, 'evaluate*')
     steps_per_epoch = data_rdd.count() if steps <= 0 else steps
     steps_per_epoch = math.ceil(steps_per_epoch / self.num_workers)
     worker = EvaluateWorker(steps_per_epoch=steps_per_epoch, **md.to_dict())
     md.delete_result_file()
     cluster = TFCluster.run(self.sc, worker, self.tf_args, self.cluster_size, self.num_ps,
                             input_mode=self.input_mode)
     cluster.train(data_rdd.rdd, num_epochs=1)
     cluster.shutdown()
     results = md.read_result()
     return self.sqlc.createDataFrame(results)
Esempio n. 2
0
 def recurrent_predict(self, data_rdd, units, steps, feature_type, model_dir):
     md = ModelDir(model_dir, 'recurrent_predict*')
     worker = RecurrentPredictWorker(units=units,
                                     steps=steps,
                                     feature_type=feature_type,
                                     **md.to_dict())
     md.delete_result_file()
     cluster = TFCluster.run(self.sc, worker, self.tf_args, self.cluster_size, self.num_ps,
                             input_mode=self.input_mode)
     cluster.train(data_rdd.rdd, num_epochs=1, feed_timeout=6000)
     cluster.shutdown()
     results = md.read_result(True)
     return self.sqlc.createDataFrame([{"result": result} for result in results])
Esempio n. 3
0
 def predict(self, data_rdd, steps, model_dir, output_prob=False):
     md = ModelDir(model_dir, 'predict*')
     steps_per_epoch = data_rdd.count() if steps <= 0 else steps
     steps_per_epoch = math.ceil(steps_per_epoch / self.num_workers)
     worker = PredictWorker(steps_per_epoch=steps_per_epoch,
                            output_prob=output_prob,
                            **md.to_dict())
     md.delete_result_file()
     cluster = TFCluster.run(self.sc, worker, self.tf_args, self.cluster_size, self.num_ps,
                             input_mode=self.input_mode)
     cluster.train(data_rdd.rdd, num_epochs=1, feed_timeout=6000)
     cluster.shutdown()
     results = md.read_result()
     return self.sqlc.createDataFrame(results)