def __init__(self, resource):
        self.resource = resource
        self.model = NumberRecognizeNN(resource.INPUT_SIZE,
                                       resource.OUTPUT_SIZE)
        resource.load_model(self.model)

        means, stds = resource.load_normalization_params()
        self.dp = DataProcessor(means, stds)
 def test_format_x_resize(self):
     dp = DataProcessor()
     data = np.array([[1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16]])
     x = dp.format_x(data, size=4)
     v = x[0].tolist()
     self.assertEqual(v[0], 6)
     self.assertEqual(v[1], 8)
     self.assertEqual(v[2], 14)
     self.assertEqual(v[3], 16)
 def test_format_x(self):
     means = np.array([0, 0.1, 0.2])
     stds = np.array([1, 1.5, 0.5])
     dp = DataProcessor(means=means, stds=stds)
     data = np.array([[1, 2, 3], [4, 5, 6]])
     x = dp.format_x(data)
     _x = (data - means) / stds
     for i in range(x.shape[0]):
         for j in range(x.shape[1]):
             self.assertEqual(x[i][j], _x[i][j])
 def save_data(self, path, data):
     with open(path, "ab") as f:
         label = int(data[0])
         features = [float(d) for d in data[1:]]
         if len(features) > self.INPUT_SIZE:
             dp = DataProcessor()
             features = dp.adjust(np.array([features]), self.INPUT_SIZE).tolist()[0]
         elif len(features) < self.INPUT_SIZE:
             raise Exception("Size mismatch when saving the data.")
         line = "\t".join([str(e) for e in [label] + features]) + "\n"
         f.write(line.encode("utf-8"))
Ejemplo n.º 5
0
 def test_train(self):
     model = NumberRecognizeNN(Resource.INPUT_SIZE, Resource.OUTPUT_SIZE)
     r = Resource(self.TEST_DIR)
     trainer = Trainer(model, r)
     dp = DataProcessor()
     data, target = r.load_training_data()
     print("Test Train the model")
     trainer.train(data, target, epoch=5)
    def test_batch_iter(self):
        batch_size = 10
        dp = DataProcessor()
        r = Resource()
        train_x, train_y = r.load_training_data()
        batch_count = math.ceil(len(train_y) / batch_size)

        i = 0
        for x_batch, y_batch, epoch_end in dp.batch_iter(train_x, train_y, batch_size):
            self.assertEqual(batch_size, len(x_batch))
            self.assertEqual(batch_size, len(y_batch))
            if i < batch_count - 1:
                self.assertFalse(epoch_end)
            else:
                self.assertTrue(epoch_end)
            i += 1
        self.assertEqual(i, batch_count)
    def train(self, data, target, batch_size=100, epoch=5, test_size=0.3, report_interval_epoch=1):
        '''
        トレーニングをするメソッド
        '''
        dp = DataProcessor()
        dp.set_normalization_params(data)
        self.resource.save_normalization_params(dp.means, dp.stds)
        _data = dp.format_x(data)
        _target = dp.format_y(target)
        # データをトレーニング用とテスト用に分割
        train_x, test_x, train_y, test_y = train_test_split(_data, _target, test_size=test_size)

        optimizer = chainer.optimizers.Adam()
        optimizer.use_cleargrads()
        optimizer.setup(self.model)
        loss = lambda pred, teacher: softmax_cross_entropy.softmax_cross_entropy(pred, teacher)
        for x_batch, y_batch, epoch_end in dp.batch_iter(train_x, train_y, batch_size, epoch):
            predicted = self.model(x_batch)
            optimizer.update(loss, predicted, y_batch)
            if epoch_end:
                train_acc = accuracy.accuracy(predicted, y_batch)
                predicted_to_test = self.model(test_x)
                test_acc = accuracy.accuracy(predicted_to_test, test_y)
                print("train accuracy={}, test accuracy={}".format(train_acc.data, test_acc.data))
                self.resource.save_model(self.model)
def train(data_file, batch_size, epoch, test_size):
    r = Resource()
    dp = DataProcessor()
    model = NumberRecognizeNN(Resource.INPUT_SIZE, Resource.OUTPUT_SIZE)
    try:
        dp.means, dp.stds = r.load_normalization_params()
        r.load_model(model)
        print("load the model")
    except Exception as ex:
        print("trained model does not exist.")

    x = None
    y = None
    if data_file:
        x, y = r.load_data(data_file)
    else:
        x, y = r.load_training_data()
    
    trainer = Trainer(model, r)
    print("begin training")
    trainer.train(x, y, batch_size=batch_size, epoch=epoch, test_size=test_size)
    def test_model_api(self):
        model = NumberRecognizeNN(Resource.INPUT_SIZE, Resource.OUTPUT_SIZE)
        r = Resource(self.TEST_DIR)
        trainer = Trainer(model, r)
        dp = DataProcessor()
        data, target = r.load_training_data()
        api_test_size = 200

        print("Train the model for API Test.")
        trainer.train(data[:-api_test_size], target[:-api_test_size], epoch=5)

        model_api = ModelAPI(r)
        predicted = model_api.predict(data[-api_test_size:])
        teacher = target[-api_test_size:]
        score = accuracy_score(teacher, predicted)
        print("Model API score is {}".format(score))
class ModelAPI():
    def __init__(self, resource):
        self.resource = resource
        self.model = NumberRecognizeNN(resource.INPUT_SIZE,
                                       resource.OUTPUT_SIZE)
        resource.load_model(self.model)

        means, stds = resource.load_normalization_params()
        self.dp = DataProcessor(means, stds)

    def predict(self, data):
        _data = data
        if isinstance(data, (tuple, list)):
            _data = np.array([data], dtype=np.float32)

        f_data = self.dp.format_x(_data, size=self.resource.INPUT_SIZE)
        predicted = self.model(f_data)
        number = np.argmax(predicted.data, axis=1)
        return number
Ejemplo n.º 11
0
    def test_baseline(self):
        from sklearn.svm import SVC
        from sklearn.metrics import accuracy_score
        r = Resource(self.TEST_DIR)
        dp = DataProcessor()
        data, target = r.load_training_data()
        dp.set_normalization_params(data)
        f_data, f_target = dp.format_x(data), dp.format_y(target)

        test_size = 200
        model = SVC()
        model.fit(f_data[:-test_size], f_target[:-test_size])

        predicted = model.predict(f_data[-test_size:])
        teacher = f_target[-test_size:]
        score = accuracy_score(teacher, predicted)
        print("Baseline score is {}".format(score))