Example #1
0
def test(db_worker, model):
    test = build_test_validation_pipeline(db_worker)
    data = import_test_data(db_worker)
    # confs = [Conf([20, 20], [ActivationFunction.Relu, ActivationFunction.Relu])]
    # model = test.train_test(data)
    # model.save_model(paths.get_models_path()+"model_novo5")
    # DataPreprocessor.get_instance().save_scalars(paths.get_scalars_path())

    if model != None:
        DataPreprocessor.get_instance().load_scalers(paths.get_scalars_path())
        test.test_existing_model(model_name, data)
Example #2
0
def validate_and_test(db_worker):
    test = build_test_validation_pipeline(db_worker)
    data = import_test_data(db_worker)

    confs_parameters = [
        ([20, 20][ActivationFunction.Relu, ActivationFunction.Relu]),
        ([20, 20], [ActivationFunction.Tanh, ActivationFunction.Tanh]),
        ([10, 10], [ActivationFunction.Relu, ActivationFunction.Relu]),
        ([10, 10], [ActivationFunction.Tanh, ActivationFunction.Tanh])
    ]

    confs = [Conf(*conf_parameters) for conf_parameters in confs_parameters]

    model = test.train_validate_test(data, confs)
    model.save_model(paths.get_models_path() + "model_novo5")
    DataPreprocessor.get_instance().save_scalars(paths.get_scalars_path())
Example #3
0
    def build_trading_env(self, agent_state, ticker):
        data_preprocessor = DataPreprocessor.get_instance()
        data_preprocessor.load_scalers(paths.get_scalars_path())
        realtime_data_getter = FakeRealTimeTradingDataGetter(self.db_worker, data_preprocessor, '2018-11-24 23:00:00')

        reward = Reward()
        env_builder = EnvironmentBuilder(reward)

        env = env_builder.build_trading_environment(realtime_data_getter, ticker, self.action_performer, agent_state, data_preprocessor)
        return env
Example #4
0
def train(db_worker):
    num_of_actions = Action.num_of_actions

    data = import_data(db_worker)

    data['Price'].plot()
    data_preprocessor = DataPreprocessor.get_instance()
    env = create_env(data, data_preprocessor)
    # data_preprocessor.save_scalars(paths.get_scalars_path())
    mem = ReplayMemory(1000)
    evaluation = Evaluation()
    statistics = DeepQStatistics(env.get_num_of_states_per_training_episode())
    nn = create_net(env.num_of_features, num_of_actions)

    num_of_iterations = 1

    epsilon_strategy = LinearGreedyStrategy(
        num_of_actions, num_of_iterations,
        env.get_num_of_states_per_training_episode())

    deep_q = DeepQ(nn, env, mem, statistics, num_of_actions,
                   env.num_of_features, epsilon_strategy, num_of_iterations)

    target_net = create_net(env.num_of_features, num_of_actions)
    # deep_q = FixedTarget(deep_q, target_net)

    deep_q.train()

    evaluation.plot_actions_during_time(data['Price'],
                                        statistics.actions_for_last_iteration,
                                        model_name)
    evaluation.evaluate(statistics, model_name)

    agent_state = env.agent_state
    stats = '''Budget: {},
                Num of stocks: {},
                Reward: {}, 
                Reward for last 5 iterations: {},
                Num of stocks bought: {}, 
                Num of stocks sold: {}, 
                Actions: {}, 
          '''.format(
        agent_state.budget,
        agent_state.num_of_stocks,
        statistics.rewards_history[-1],
        sum(statistics.rewards_history[-5:]),
        agent_state.num_of_stocks_bought,
        agent_state.num_of_stocks_sold,
        statistics.all_actions[-1],
    )

    print stats

    nn.save_model(paths.get_models_path() + model_name)
Example #5
0
    def train(self, confs, data):
        models = []

        for conf in confs:
            train_env = self.env_builder.build_batch_environment(
                self.init_num_of_of_stocks, self.init_budget, data,
                DataPreprocessor.get_instance())
            model = self.get_model(conf, train_env, data)

            models.append(model)

        return models
Example #6
0
    def validate(self, models, data):
        validation_rewards = []

        for model in models:
            env = self.env_builder.build_batch_environment(
                self.init_num_of_of_stocks, self.init_budget, data,
                DataPreprocessor.get_instance())

            reward, budget, stocks = self.evaluator.test(model, env, data)
            validation_rewards.append(reward)

        best_model_index = np.argmax(np.array(validation_rewards))
        best_model = models[best_model_index]
        return best_model
Example #7
0
    def get_reward(self, state, action, new_state):
        new_state_tuple = State(*new_state)
        state_tuple = State(*state)

        self.preprocessor = DataPreprocessor.get_instance()

        p0 = self.preprocessor.inverse_transform_budget(state_tuple.profit)
        n0 = self.preprocessor.inverse_transform_stocks(state_tuple.stocks)

        inv = self.preprocessor.inverse_transform_price(state_tuple.inv)
        r = 0

        if action == Action.Sell:
            if inv == 0:
                r = 0
            else:
                r = max(state_tuple.data - state_tuple.inv, 0)

        if p0 <= 0 and action == Action.Buy:
            r -= 0.2
        elif n0 <= 0 and action == Action.Sell:
            r -= 0.2
        return r
Example #8
0
    def test(self, net, data):
        test_env = self.env_builder.build_batch_environment(
            self.init_num_of_of_stocks, self.init_budget, data,
            DataPreprocessor.get_instance())

        return self.evaluator.test(net, test_env, data)