def test_train(self): model = NumberRecognizeNN(Resource.INPUT_SIZE, Resource.OUTPUT_SIZE) r = Resource(self.TEST_DIR) trainer = Trainer(model, r) dp = DataProcessor() data, target = r.load_training_data() print("Test Train the model") trainer.train(data, target, epoch=5)
def post(self): data = self.get_arguments("data[]") if len(data) > 0: r = Resource() r.save_data(DATA_PATH, data) else: result = "feedback format is wrong." resp = {"result": ""} self.write(resp)
def test_normalization_parameter(self): means = (0.0, 1.0, 0.2) stds = (0.5, 0.2, 3.0) r = Resource(self.TEST_DIR) r.save_normalization_params(means, stds) self.assertTrue(os.path.isfile(r.param_file)) loaded_means, loaded_stds = r.load_normalization_params() for i in range(len(means)): self.assertTrue(means[i] - loaded_means[i] < 1e-10) self.assertTrue(stds[i] - loaded_stds[i] < 1e-10)
def test_model(self): model = NumberRecognizeNN(10, 10) r = Resource(self.TEST_DIR) r.save_model(model) time.sleep(1) r.save_model(model) r.load_model(model)
def test_save_data(self): r = Resource(self.TEST_DIR) data_file = self.TEST_DIR + "/data_file.txt" data1 = ["0"] + ["0"] * 6400 # label + feature data2 = ["9"] + ["1"] * 6400 # label + feature r.save_data(data_file, data1) r.save_data(data_file, data2) x, y = r.load_data(data_file) self.assertEqual(2, len(x)) self.assertEqual(2, len(y)) self.assertEqual(0, y[0]) self.assertEqual(9, y[1]) self.assertEqual(0, x[0][0]) self.assertEqual(1, x[1][0])
def test_model_api(self): model = NumberRecognizeNN(Resource.INPUT_SIZE, Resource.OUTPUT_SIZE) r = Resource(self.TEST_DIR) trainer = Trainer(model, r) dp = DataProcessor() data, target = r.load_training_data() api_test_size = 200 print("Train the model for API Test.") trainer.train(data[:-api_test_size], target[:-api_test_size], epoch=5) model_api = ModelAPI(r) predicted = model_api.predict(data[-api_test_size:]) teacher = target[-api_test_size:] score = accuracy_score(teacher, predicted) print("Model API score is {}".format(score))
def test_baseline(self): from sklearn.svm import SVC from sklearn.metrics import accuracy_score r = Resource(self.TEST_DIR) dp = DataProcessor() data, target = r.load_training_data() dp.set_normalization_params(data) f_data, f_target = dp.format_x(data), dp.format_y(target) test_size = 200 model = SVC() model.fit(f_data[:-test_size], f_target[:-test_size]) predicted = model.predict(f_data[-test_size:]) teacher = f_target[-test_size:] score = accuracy_score(teacher, predicted) print("Baseline score is {}".format(score))
def test_batch_iter(self): batch_size = 10 dp = DataProcessor() r = Resource() train_x, train_y = r.load_training_data() batch_count = math.ceil(len(train_y) / batch_size) i = 0 for x_batch, y_batch, epoch_end in dp.batch_iter(train_x, train_y, batch_size): self.assertEqual(batch_size, len(x_batch)) self.assertEqual(batch_size, len(y_batch)) if i < batch_count - 1: self.assertFalse(epoch_end) else: self.assertTrue(epoch_end) i += 1 self.assertEqual(i, batch_count)
def post(self): resp = {"result": str(-1)} data = self.get_arguments("data[]") r = Resource() if not os.path.isdir(r.model_path): from ml.model import NumberRecognizeNN from ml.trainer import Trainer model = NumberRecognizeNN(r.INPUT_SIZE, r.OUTPUT_SIZE) trainer = Trainer(model, r) x, y = r.load_training_data() trainer.train(x, y) api = ModelAPI(r) if len(data) > 0: _data = [float(d) for d in data] predicted = api.predict(_data) resp["result"] = str(predicted[0]) self.write(resp)
def post(self): # レスポンス用マップを作成 resp = {"result": str(-1)} # 送られてきたデータを受け取る data = self.get_arguments("data[]") r = Resource() # path が実在するディレクトリの場合 if not os.path.isdir(r.model_path): # model.pyのインポート from ml.model import NumberRecognizeNN # trainer.pyのインポート from ml.trainer import Trainer # モデル作成クラス (Chainer)をnew model = NumberRecognizeNN(r.INPUT_SIZE, r.OUTPUT_SIZE) # トレーニングクラスをnew trainer = Trainer(model, r) # トレーニングデータを取得 x, y = r.load_training_data() # トレーニング実施 trainer.train(x, y) # モデルをAPIに保存 api = ModelAPI(r) if len(data) > 0: _data = [float(d) for d in data] # 送られてきたデータから予測値を算出 predicted = api.predict(_data) # レスポンスにデータを格納 resp["result"] = str(predicted[0]) # データを送信 self.write(resp)
def train(data_file, batch_size, epoch, test_size): r = Resource() dp = DataProcessor() model = NumberRecognizeNN(Resource.INPUT_SIZE, Resource.OUTPUT_SIZE) try: dp.means, dp.stds = r.load_normalization_params() r.load_model(model) print("load the model") except Exception as ex: print("trained model does not exist.") x = None y = None if data_file: x, y = r.load_data(data_file) else: x, y = r.load_training_data() trainer = Trainer(model, r) print("begin training") trainer.train(x, y, batch_size=batch_size, epoch=epoch, test_size=test_size)