Example #1
0
class FinancialTimeSeriesAnalysisModel(object):
    model = None

    def __init__(self, nb_time_step, dim_data, batch_size=1, model_path=None):
        self.model_path = model_path
        self.model_path = model_path
        self.batch_size = batch_size
        self.size_of_input_data_dim = dim_data
        self.size_of_input_timesteps = nb_time_step
        self.build()
        self.weight_loaded = False
        if model_path is not None:
            self.load_weights()

    def build(self):
        dim_data = self.size_of_input_data_dim
        nb_time_step = self.size_of_input_timesteps
        financial_time_series_input = Input(shape=(nb_time_step, dim_data), name='x1')
        lstm_layer_1 = LSTM(output_dim=nb_hidden_units, dropout_U=dropout, dropout_W=dropout,
                            W_regularizer=l2(l2_norm_alpha), b_regularizer=l2(l2_norm_alpha), activation='tanh',
                            return_sequences=True, name='lstm_layer1')
        lstm_layer_21 = LSTM(output_dim=nb_hidden_units, dropout_U=dropout, dropout_W=dropout,
                             W_regularizer=l2(l2_norm_alpha), b_regularizer=l2(l2_norm_alpha), activation='tanh',
                             return_sequences=True, name='lstm_layer2_loss1')
        lstm_layer_22 = LSTM(output_dim=nb_hidden_units, dropout_U=dropout, dropout_W=dropout,
                             W_regularizer=l2(l2_norm_alpha), b_regularizer=l2(l2_norm_alpha), activation='tanh',
                             return_sequences=True, name='lstm_layer2_loss2')
        lstm_layer_23 = LSTM(output_dim=nb_hidden_units, dropout_U=dropout, dropout_W=dropout,
                             W_regularizer=l2(l2_norm_alpha), b_regularizer=l2(l2_norm_alpha), activation='tanh',
                             return_sequences=True, name='lstm_layer2_loss3')

        lstm_layer_24 = LSTM(output_dim=nb_hidden_units, dropout_U=dropout, dropout_W=dropout,
                             W_regularizer=l2(l2_norm_alpha), b_regularizer=l2(l2_norm_alpha), activation='tanh',
                             return_sequences=True, name='lstm_layer2_loss4')

        lstm_layer_25 = LSTM(output_dim=nb_hidden_units, dropout_U=dropout, dropout_W=dropout,
                             W_regularizer=l2(l2_norm_alpha), b_regularizer=l2(l2_norm_alpha), activation='tanh',
                             return_sequences=True, name='lstm_layer2_loss5')
        h1 = lstm_layer_1(financial_time_series_input)
        h21 = lstm_layer_21(h1)
        h22 = lstm_layer_22(h1)
        h23 = lstm_layer_23(h1)
        h24 = lstm_layer_24(h1)
        h25 = lstm_layer_25(h1)
        time_series_predictions1 = TimeDistributed(Dense(1), name="p1")(h21)  # custom 1
        time_series_predictions2 = TimeDistributed(Dense(1), name="p2")(h22)  # custom 2
        time_series_predictions3 = TimeDistributed(Dense(1), name="p3")(h23)  # mse
        time_series_predictions4 = TimeDistributed(Dense(1, activation='sigmoid'), name="p4")(h24)  # logloss
        time_series_predictions5 = TimeDistributed(Dense(nb_labels, activation='softmax'), name="p5")(h25)  # cross
        self.model = Model(input=financial_time_series_input,
                           output=[time_series_predictions1, time_series_predictions2,
                                   time_series_predictions3, time_series_predictions4,
                                   time_series_predictions5],
                           name="multi-task deep rnn for financial time series forecasting")
        plot(self.model, to_file='model.png')

    def reset(self):
        for l in self.model.layers:
            if type(l) is LSTM:
                l.reset_status()

    def compile_model(self, lr=0.0001, arg_weight=1.):
        optimizer = Adam(lr=lr)
        loss = [custom_objective1, custom_objective2, 'mse', 'binary_crossentropy', 'categorical_crossentropy']
        self.model.compile(optimizer=optimizer, loss=loss)

    def fit_model(self, X, y, y_label, epoch=300):
        early_stopping = EarlyStopping(monitor='val_loss', patience=10, verbose=0)

        self.model.fit(X, [y]*3 + [y > 0] + [y_label], batch_size=self.batch_size, nb_epoch=epoch, validation_split=0.2,
                       shuffle=True, callbacks=[early_stopping])

    def save(self):
        self.model.save_weights(self.model_path, overwrite=True)

    def load_weights(self):
        if os.path.exists(self.model_path):
            self.model.load_weights(self.model_path)
            self.weight_loaded = True

    def print_weights(self, weights=None, detail=False):
        weights = weights or self.model.get_weights()
        for w in weights:
            print("w%s: sum(w)=%s, ave(w)=%s" % (w.shape, np.sum(w), np.average(w)))
        if detail:
            for w in weights:
                print("%s: %s" % (w.shape, w))

    def model_eval(self, X, y):
        y_hat = self.model.predict(X, batch_size=1)[0]
        count_true = 0
        count_all = y.shape[1]
        for i in range(y.shape[1]):
            count_true = count_true + 1 if y[0,i,0]*y_hat[0,i,0]>0 else count_true
            print(y[0,i,0],y_hat[0,i,0])
        print(count_all,count_true)
class PolicyNet():
    """policy network """
    def __init__(self,
                 board_width,
                 board_height,
                 model_file=None,
                 pretrained_file=None):
        self.board_width = board_width
        self.board_height = board_height
        self.l2_const = 1e-4  # coef of l2 penalty
        self.build_net()
        self._loss_train_op(0.001)

        if model_file:
            self.model.load_weights(model_file)
        if pretrained_file:
            self.model.load_weights(pretrained_file, by_name=True)

    def build_net(self):
        """create the policy value network """
        in_x = network = Input((2, self.board_width, self.board_height))

        # conv layers
        network = Conv2D(filters=32,
                         kernel_size=(3, 3),
                         padding="same",
                         data_format="channels_first",
                         activation="relu",
                         kernel_regularizer=l2(self.l2_const))(network)
        network = Conv2D(filters=64,
                         kernel_size=(3, 3),
                         padding="same",
                         data_format="channels_first",
                         activation="relu",
                         kernel_regularizer=l2(self.l2_const))(network)
        network = Conv2D(filters=128,
                         kernel_size=(3, 3),
                         padding="same",
                         data_format="channels_first",
                         activation="relu",
                         kernel_regularizer=l2(self.l2_const))(network)
        # action policy layers
        policy_net = Conv2D(filters=4,
                            kernel_size=(1, 1),
                            data_format="channels_first",
                            activation="relu",
                            kernel_regularizer=l2(self.l2_const))(network)
        policy_net = Flatten()(policy_net)
        self.policy_net = Dense(self.board_width * self.board_height,
                                activation="softmax",
                                kernel_regularizer=l2(
                                    self.l2_const))(policy_net)

        self.model = Model(in_x, self.policy_net)

        def policy_value(state_input):
            state_input_union = np.array(state_input)
            results = self.model.predict_on_batch(state_input_union)
            return results

        self.policy_value = policy_value

    def policy_fn(self, board):
        """
        input: board
        output: a list of (action, probability) tuples for each available action and the score of the board state
        """
        legal_positions = board.availables
        current_state = board.current_state()
        act_probs = self.policy_value(
            current_state.reshape(
                (-1, 2, self.board_width, self.board_height)))
        act_probs = list(
            zip(legal_positions,
                act_probs.flatten()[legal_positions]))
        return act_probs

    def _loss_train_op(self, initial_learning_rate):
        """
        Three loss terms:
        loss = (z - v)^2 + pi^T * log(p) + c||theta||^2
        """

        # get the train op
        # opt = Adam()
        self.session = K.get_session()
        global_step = tf.Variable(0, trainable=False)
        lr = tf.train.exponential_decay(initial_learning_rate, global_step,
                                        10000, 0.95, True)
        opt = tf.train.AdamOptimizer(learning_rate=lr)
        one_hot_move_ph = tf.placeholder(
            tf.float32, (None, self.board_width * self.board_height), "moves")
        reward_ph = tf.placeholder(tf.float32, (None, ), "rewards")

        def self_entropy(probs):
            return -np.mean(np.sum(probs * np.log(probs + 1e-10), axis=1))

        def loss_op():

            objective = tf.log(tf.nn.softmax(self.model.output[0],
                                             axis=-1)) * one_hot_move_ph
            objective = tf.reduce_sum(objective, axis=-1, keepdims=False)
            objective = objective * reward_ph
            return -1 * objective

        self.loss_op = loss_op()
        self.minimize_op = opt.minimize(self.loss_op, global_step=global_step)

        def train_step(states, reward, moves):
            np_state_input = np.array(states)

            np_reward = np.array(reward)
            np_moves = np.eye(self.board_height *
                              self.board_width)[np.array(moves)]

            # K.set_value(self.model.optimizer.lr, learning_rate)

            # loss = self.model.train_on_batch(np_state_input, [np_winner])
            feed_dict = {
                self.model.input: np_state_input,
                one_hot_move_ph: np_moves,
                reward_ph: np_reward
            }
            _, loss, new_probs = self.session.run(
                [self.minimize_op, self.loss_op, self.model.output], feed_dict)
            entropy = self_entropy(new_probs)
            return loss, entropy

        self.train_step = train_step

    def get_policy_param(self):
        net_params = self.model.get_weights()
        return net_params

    def save_model(self, model_path):
        """ save model params to file """
        # net_params = self.get_policy_param()
        # pickle.dump(net_params, open(model_file, 'wb'), protocol=2)
        self.model.save_weights(model_path)

    def load_model(self, model_path):
        self.model.load_weights(model_path)
Example #3
0
class AdditionNPIModel(NPIStep):
    model = None
    f_enc = None

    def __init__(self,
                 system: RuntimeSystem,
                 model_path: str = None,
                 program_set: AdditionProgramSet = None):
        self.system = system
        self.model_path = model_path
        self.program_set = program_set
        self.batch_size = 1
        self.build()
        self.weight_loaded = False
        self.load_weights()

    def build(self):
        enc_size = self.size_of_env_observation()
        argument_size = IntegerArguments.size_of_arguments
        input_enc = InputLayer(batch_input_shape=(self.batch_size, enc_size),
                               name='input_enc')
        input_arg = InputLayer(batch_input_shape=(self.batch_size,
                                                  argument_size),
                               name='input_arg')
        input_prg = Embedding(input_dim=PROGRAM_VEC_SIZE,
                              output_dim=PROGRAM_KEY_VEC_SIZE,
                              input_length=1,
                              batch_input_shape=(self.batch_size, 1))

        f_enc = Sequential(name='f_enc')
        f_enc.add(Merge([input_enc, input_arg], mode='concat'))
        f_enc.add(MaxoutDense(128, nb_feature=4))
        self.f_enc = f_enc

        program_embedding = Sequential(name='program_embedding')
        program_embedding.add(input_prg)

        f_enc_convert = Sequential(name='f_enc_convert')
        f_enc_convert.add(f_enc)
        f_enc_convert.add(RepeatVector(1))

        f_lstm = Sequential(name='f_lstm')
        f_lstm.add(Merge([f_enc_convert, program_embedding], mode='concat'))
        f_lstm.add(
            LSTM(256,
                 return_sequences=False,
                 stateful=True,
                 W_regularizer=l2(0.0000001)))
        f_lstm.add(Activation('relu', name='relu_lstm_1'))
        f_lstm.add(RepeatVector(1))
        f_lstm.add(
            LSTM(256,
                 return_sequences=False,
                 stateful=True,
                 W_regularizer=l2(0.0000001)))
        f_lstm.add(Activation('relu', name='relu_lstm_2'))
        # plot(f_lstm, to_file='f_lstm.png', show_shapes=True)

        f_end = Sequential(name='f_end')
        f_end.add(f_lstm)
        f_end.add(Dense(1, W_regularizer=l2(0.001)))
        f_end.add(Activation('sigmoid', name='sigmoid_end'))

        f_prog = Sequential(name='f_prog')
        f_prog.add(f_lstm)
        f_prog.add(Dense(PROGRAM_KEY_VEC_SIZE, activation="relu"))
        f_prog.add(Dense(PROGRAM_VEC_SIZE, W_regularizer=l2(0.0001)))
        f_prog.add(Activation('softmax', name='softmax_prog'))
        # plot(f_prog, to_file='f_prog.png', show_shapes=True)

        f_args = []
        for ai in range(1, IntegerArguments.max_arg_num + 1):
            f_arg = Sequential(name='f_arg%s' % ai)
            f_arg.add(f_lstm)
            f_arg.add(Dense(IntegerArguments.depth, W_regularizer=l2(0.0001)))
            f_arg.add(Activation('softmax', name='softmax_arg%s' % ai))
            f_args.append(f_arg)
        # plot(f_arg, to_file='f_arg.png', show_shapes=True)

        self.model = Model([input_enc.input, input_arg.input, input_prg.input],
                           [f_end.output, f_prog.output] +
                           [fa.output for fa in f_args],
                           name="npi")
        self.compile_model()
        plot(self.model, to_file='model.png', show_shapes=True)

    def reset(self):
        super(AdditionNPIModel, self).reset()
        for l in self.model.layers:
            if type(l) is LSTM:
                l.reset_states()

    def compile_model(self, lr=0.0001, arg_weight=1.):
        arg_num = IntegerArguments.max_arg_num
        optimizer = Adam(lr=lr)
        loss = ['binary_crossentropy', 'categorical_crossentropy'
                ] + ['categorical_crossentropy'] * arg_num
        self.model.compile(optimizer=optimizer,
                           loss=loss,
                           loss_weights=[0.25, 0.25] + [arg_weight] * arg_num)

    def fit(self, steps_list, epoch=3000):
        # 过滤一些问题
        def filter_question(condition_func):
            sub_steps_list = []
            for steps_dict in steps_list:
                question = steps_dict['q']
                if condition_func(question['in1'], question['in2']):
                    sub_steps_list.append(steps_dict)
            return sub_steps_list

        if not self.weight_loaded:
            self.train_f_enc(
                filter_question(lambda a, b: 10 <= a < 100 and 10 <= b < 100),
                epoch=100)
        self.f_enc.trainable = False

        self.update_learning_rate(0.0001)

        q_type = "training questions of a<100 and b<100"
        print(q_type)
        pr = 0.8
        all_ok = self.fit_to_subset(
            filter_question(lambda a, b: a < 100 and b < 100), pass_rate=pr)
        print("%s is pass_rate >= %s: %s" % (q_type, pr, all_ok))

        while True:
            if self.test_and_learn([10, 100, 1000]):
                break

            q_type = "training questions of ALL"
            print(q_type)

            q_num = 100
            skip_correct = False
            pr = 1.0
            questions = filter_question(lambda a, b: True)
            np.random.shuffle(questions)
            questions = questions[:q_num]
            all_ok = self.fit_to_subset(questions,
                                        pass_rate=pr,
                                        skip_correct=skip_correct)
            print("%s is pass_rate >= %s: %s" % (q_type, pr, all_ok))

    def fit_to_subset(self, steps_list, pass_rate=1.0, skip_correct=False):
        for i in range(10):
            all_ok = self.do_learn(steps_list,
                                   100,
                                   pass_rate=pass_rate,
                                   skip_correct=skip_correct)
            if all_ok:
                return True
        return False

    def test_and_learn(self, num_questions):
        for num in num_questions:
            print("test all type of %d questions" % num)
            cc, wc, wrong_questions = self.test_to_subset(
                create_random_questions(num))
            acc_rate = cc / (cc + wc)
            print("Accuracy %s(OK=%d, NG=%d)" % (acc_rate, cc, wc))
            if wc > 0:
                self.fit_to_subset(wrong_questions,
                                   pass_rate=1.0,
                                   skip_correct=False)
                return False
        return True

    def test_to_subset(self, questions):
        addition_env = AdditionEnv(FIELD_ROW, FIELD_WIDTH, FIELD_DEPTH)
        teacher = AdditionTeacher(self.program_set)
        npi_runner = TerminalNPIRunner(None, self)
        teacher_runner = TerminalNPIRunner(None, teacher)
        correct_count = wrong_count = 0
        wrong_steps_list = []
        for idx, question in enumerate(questions):
            question = copy(question)
            if self.question_test(addition_env, npi_runner, question):
                correct_count += 1
            else:
                self.question_test(addition_env, teacher_runner, question)
                wrong_steps_list.append({
                    "q": question,
                    "steps": teacher_runner.step_list
                })
                wrong_count += 1
        return correct_count, wrong_count, wrong_steps_list

    @staticmethod
    def dict_to_str(d):
        return str(tuple([(k, d[k]) for k in sorted(d)]))

    def do_learn(self, steps_list, epoch, pass_rate=1.0, skip_correct=False):
        addition_env = AdditionEnv(FIELD_ROW, FIELD_WIDTH, FIELD_DEPTH)
        npi_runner = TerminalNPIRunner(None, self)
        last_weights = None
        correct_count = Counter()
        no_change_count = 0
        last_loss = 1000
        for ep in range(1, epoch + 1):
            correct_new = wrong_new = 0
            losses = []
            ok_rate = []
            np.random.shuffle(steps_list)
            for idx, steps_dict in enumerate(steps_list):
                question = copy(steps_dict['q'])
                question_key = self.dict_to_str(question)
                if self.question_test(addition_env, npi_runner, question):
                    if correct_count[question_key] == 0:
                        correct_new += 1
                    correct_count[question_key] += 1
                    print("GOOD!: ep=%2d idx=%3d :%s CorrectCount=%s" %
                          (ep, idx, self.dict_to_str(question),
                           correct_count[question_key]))
                    ok_rate.append(1)
                    cc = correct_count[question_key]
                    if skip_correct or int(math.sqrt(cc))**2 != cc:
                        continue
                else:
                    ok_rate.append(0)
                    if correct_count[question_key] > 0:
                        print(
                            "Degraded: ep=%2d idx=%3d :%s CorrectCount=%s -> 0"
                            % (ep, idx, self.dict_to_str(question),
                               correct_count[question_key]))
                        correct_count[question_key] = 0
                        wrong_new += 1

                steps = steps_dict['steps']
                xs = []
                ys = []
                ws = []
                for step in steps:
                    xs.append(self.convert_input(step.input))
                    y, w = self.convert_output(step.output)
                    ys.append(y)
                    ws.append(w)

                self.reset()

                for i, (x, y, w) in enumerate(zip(xs, ys, ws)):
                    loss = self.model.train_on_batch(x, y, sample_weight=w)
                    if not np.isfinite(loss):
                        print("Loss is not finite!, Last Input=%s" %
                              ([i, (x, y, w)]))
                        self.print_weights(last_weights, detail=True)
                        raise RuntimeError("Loss is not finite!")
                    losses.append(loss)
                    last_weights = self.model.get_weights()
            if losses:
                cur_loss = np.average(losses)
                print(
                    "ep=%2d: ok_rate=%.2f%% (+%s -%s): ave loss %s (%s samples)"
                    % (ep, np.average(ok_rate) * 100, correct_new, wrong_new,
                       cur_loss, len(steps_list)))
                # self.print_weights()
                if correct_new + wrong_new == 0:
                    no_change_count += 1
                else:
                    no_change_count = 0

                if math.fabs(1 - cur_loss /
                             last_loss) < 0.001 and no_change_count > 5:
                    print(
                        "math.fabs(1 - cur_loss/last_loss) < 0.001 and no_change_count > 5:"
                    )
                    return False
                last_loss = cur_loss
                print("=" * 80)
            self.save()
            if np.average(ok_rate) >= pass_rate:
                return True
        return False

    def update_learning_rate(self, learning_rate, arg_weight=1.):
        print("Re-Compile Model lr=%s aw=%s" % (learning_rate, arg_weight))
        self.compile_model(learning_rate, arg_weight=arg_weight)

    def train_f_enc(self, steps_list, epoch=50):
        print("training f_enc")
        f_add0 = Sequential(name='f_add0')
        f_add0.add(self.f_enc)
        f_add0.add(Dense(FIELD_DEPTH))
        f_add0.add(Activation('softmax', name='softmax_add0'))

        f_add1 = Sequential(name='f_add1')
        f_add1.add(self.f_enc)
        f_add1.add(Dense(FIELD_DEPTH))
        f_add1.add(Activation('softmax', name='softmax_add1'))

        env_model = Model(self.f_enc.inputs, [f_add0.output, f_add1.output],
                          name="env_model")
        env_model.compile(optimizer='adam',
                          loss=['categorical_crossentropy'] * 2)

        for ep in range(epoch):
            losses = []
            for idx, steps_dict in enumerate(steps_list):
                prev = None
                for step in steps_dict['steps']:
                    x = self.convert_input(step.input)[:2]
                    env_values = step.input.env.reshape((4, -1))
                    in1 = np.clip(env_values[0].argmax() - 1, 0, 9)
                    in2 = np.clip(env_values[1].argmax() - 1, 0, 9)
                    carry = np.clip(env_values[2].argmax() - 1, 0, 9)
                    y_num = in1 + in2 + carry
                    now = (in1, in2, carry)
                    if prev == now:
                        continue
                    prev = now
                    y0 = to_one_hot_array((y_num % 10) + 1, FIELD_DEPTH)
                    y1 = to_one_hot_array((y_num // 10) + 1, FIELD_DEPTH)
                    y = [yy.reshape((self.batch_size, -1)) for yy in [y0, y1]]
                    loss = env_model.train_on_batch(x, y)
                    losses.append(loss)
            print("ep %3d: loss=%s" % (ep, np.average(losses)))
            if np.average(losses) < 1e-06:
                break

    def question_test(self, addition_env, npi_runner, question):
        addition_env.reset()
        self.reset()
        try:
            run_npi(addition_env, npi_runner, self.program_set.ADD, question)
            if question['correct']:
                return True
        except StopIteration:
            pass
        return False

    def convert_input(self, p_in: StepInput):
        x_pg = np.array((p_in.program.program_id, ))
        x = [
            xx.reshape((self.batch_size, -1))
            for xx in (p_in.env, p_in.arguments.values, x_pg)
        ]
        return x

    def convert_output(self, p_out: StepOutput):
        y = [np.array((p_out.r, ))]
        weights = [[1.]]
        if p_out.program:
            arg_values = p_out.arguments.values
            arg_num = len(p_out.program.args or [])
            y += [p_out.program.to_one_hot(PROGRAM_VEC_SIZE)]
            weights += [[1.]]
        else:
            arg_values = IntegerArguments().values
            arg_num = 0
            y += [np.zeros((PROGRAM_VEC_SIZE, ))]
            weights += [[1e-10]]

        for v in arg_values:  # split by each args
            y += [v]
        weights += [[1.]] * arg_num + [[1e-10]] * (len(arg_values) - arg_num)
        weights = [np.array(w) for w in weights]
        return [yy.reshape((self.batch_size, -1)) for yy in y], weights

    def step(self, env_observation: np.ndarray, pg: Program,
             arguments: IntegerArguments) -> StepOutput:
        x = self.convert_input(StepInput(env_observation, pg, arguments))
        results = self.model.predict(
            x, batch_size=1)  # if batch_size==1, returns single row

        r, pg_one_hot, arg_values = results[0], results[1], results[2:]
        program = self.program_set.get(pg_one_hot.argmax())
        ret = StepOutput(r, program,
                         IntegerArguments(values=np.stack(arg_values)))
        return ret

    def save(self):
        self.model.save_weights(self.model_path, overwrite=True)

    def load_weights(self):
        if os.path.exists(self.model_path):
            self.model.load_weights(self.model_path)
            self.weight_loaded = True

    def print_weights(self, weights=None, detail=False):
        weights = weights or self.model.get_weights()
        for w in weights:
            print("w%s: sum(w)=%s, ave(w)=%s" %
                  (w.shape, np.sum(w), np.average(w)))
        if detail:
            for w in weights:
                print("%s: %s" % (w.shape, w))

    @staticmethod
    def size_of_env_observation():
        return FIELD_ROW * FIELD_DEPTH
class FinancialTimeSeriesAnalysisModel(object):
    model = None

    def __init__(self, nb_time_step, dim_data, batch_size=1, model_path=None):
        self.model_path = model_path
        self.model_path = model_path
        self.batch_size = batch_size
        self.size_of_input_data_dim = dim_data
        self.size_of_input_timesteps = nb_time_step
        self.build()
        self.weight_loaded = False
        if model_path is not None:
            self.load_weights()

    def build(self):
        dim_data = self.size_of_input_data_dim
        nb_time_step = self.size_of_input_timesteps
        financial_time_series_input = Input(shape=(nb_time_step, dim_data))

        lstm_layer_1 = LSTM(output_dim=nb_hidden_units,
                            dropout_U=dropout,
                            dropout_W=dropout,
                            inner_activation='sigmoid',
                            W_regularizer=l2(l2_norm_alpha),
                            b_regularizer=l2(l2_norm_alpha),
                            activation='tanh',
                            return_sequences=True)
        lstm_layer_2 = LSTM(output_dim=nb_hidden_units,
                            dropout_U=dropout,
                            dropout_W=dropout,
                            inner_activation='sigmoid',
                            W_regularizer=l2(l2_norm_alpha),
                            b_regularizer=l2(l2_norm_alpha),
                            activation='tanh',
                            return_sequences=True)

        h1 = lstm_layer_1(financial_time_series_input)
        h2 = lstm_layer_2(h1)
        time_series_predictions = TimeDistributedDense(1)(h2)
        self.model = Model(
            financial_time_series_input,
            time_series_predictions,
            name="deep rnn for financial time series forecasting")

    def reset(self):
        for l in self.model.layers:
            if type(l) is LSTM:
                l.reset_status()

    def compile_model(self, lr=0.0001, arg_weight=1.):
        optimizer = Adam(lr=lr)
        loss = 'mse'
        self.model.compile(optimizer=optimizer, loss=loss)

    def fit_model(self, X, y, X_val=None, y_val=None, epoch=3):
        early_stopping = EarlyStopping(monitor='val_loss',
                                       patience=3,
                                       verbose=0)
        if X_val is None:
            self.model.fit(X,
                           y,
                           batch_size=self.batch_size,
                           nb_epoch=epoch,
                           validation_split=0.2,
                           shuffle=True,
                           callbacks=[early_stopping])
        else:
            self.model.fit(X,
                           y,
                           batch_size=self.batch_size,
                           nb_epoch=epoch,
                           validation_data=(X_val, y_val),
                           shuffle=True,
                           callbacks=[early_stopping])

    def save(self):
        self.model.save_weights(self.model_path, overwrite=True)

    def load_weights(self):
        if os.path.exists(self.model_path):
            self.model.load_weights(self.model_path)
            self.weight_loaded = True

    def print_weights(self, weights=None, detail=False):
        weights = weights or self.model.get_weights()
        for w in weights:
            print("w%s: sum(w)=%s, ave(w)=%s" %
                  (w.shape, np.sum(w), np.average(w)))
        if detail:
            for w in weights:
                print("%s: %s" % (w.shape, w))

    def model_eval(self, X, y):
        y_hat = self.model.predict(X, batch_size=1)
        count_true = 0
        count_all = y.shape[1]
        for i in range(y.shape[1]):
            count_true = count_true + 1 if y[0, i, 0] * y_hat[
                0, i, 0] > 0 else count_true
            print(y[0, i, 0], y_hat[0, i, 0])
        print(count_all, count_true)
class PolicyValueNet():
    """策略价值网络"""

    def __init__(self, board_width, board_height, model_file=None):
        self.board_width = board_width
        self.board_height = board_height
        self.l2_const = 1e-4  # coef of l2 penalty 
        self.create_policy_value_net()
        self._loss_train_op()

        if model_file:
            if platform.python_version().split('.')[0] == '3': #python3
                net_params = pickle.load(open(model_file, 'rb'), encoding='iso-8859-1')
            else:
                net_params = pickle.load(open(model_file, 'rb'))
            self.model.set_weights(net_params)

    def create_policy_value_net(self):
        """创建policy-value网络"""
        # 输入层
        in_x = network = Input((4, self.board_width, self.board_height))

        # conv layers
        network = Conv2D(filters=32, kernel_size=(3, 3), padding="same", data_format="channels_first", activation="relu", kernel_regularizer=l2(self.l2_const))(network)
        network = Conv2D(filters=64, kernel_size=(3, 3), padding="same", data_format="channels_first", activation="relu", kernel_regularizer=l2(self.l2_const))(network)
        network = Conv2D(filters=128, kernel_size=(3, 3), padding="same", data_format="channels_first", activation="relu", kernel_regularizer=l2(self.l2_const))(network)
        # 走子策略 action policy layers
        policy_net = Conv2D(filters=4, kernel_size=(1, 1), data_format="channels_first", activation="relu", kernel_regularizer=l2(self.l2_const))(network)
        policy_net = Flatten()(policy_net)
        self.policy_net = Dense(self.board_width * self.board_height, activation="softmax", kernel_regularizer=l2(self.l2_const))(policy_net)
        # 盘面价值 state value layers
        value_net = Conv2D(filters=2, kernel_size=(1, 1), data_format="channels_first", activation="relu", kernel_regularizer=l2(self.l2_const))(network)
        value_net = Flatten()(value_net)
        value_net = Dense(64, kernel_regularizer=l2(self.l2_const))(value_net)
        self.value_net = Dense(1, activation="tanh", kernel_regularizer=l2(self.l2_const))(value_net)

        # 创建网络模型
        self.model = Model(in_x, [self.policy_net, self.value_net])

        # 返回走子策略和价值概率
        def policy_value(state_input):
            state_input_union = np.array(state_input)
            results = self.model.predict_on_batch(state_input_union)
            return results

        self.policy_value = policy_value

    def policy_value_fn(self, board):
        """使用模型预测棋盘所有可落子位置价值概率"""
        # 棋盘所有可落子位置
        legal_positions = board.availables
        # 当前玩家角度的棋盘方格状态
        current_state = board.current_state()
        # 使用模型预测走子策略和价值概率
        act_probs, value = self.policy_value(current_state.reshape(-1, 4, self.board_width, self.board_height))
        act_probs = zip(legal_positions, act_probs.flatten()[legal_positions])
        # 返回[(action, 概率)] 以及当前玩家的后续走子value
        return act_probs, value[0][0]

    def _loss_train_op(self):
        """初始化损失
        3个损失函数因子
        loss = (z - v)^2 + pi^T * log(p) + c||theta||^2
        loss = value损失函数 + policy损失函数 + 惩罚项
        """
        # 定义优化器和损失函数
        opt = Adam()
        losses = ['categorical_crossentropy', 'mean_squared_error']
        self.model.compile(optimizer=opt, loss=losses)

        def self_entropy(probs):
            return -np.mean(np.sum(probs * np.log(probs + 1e-10), axis=1))

        def train_step(state_input, mcts_probs, winner, learning_rate):
            """输出训练过程中的结果"""
            state_input_union = np.array(state_input)
            mcts_probs_union = np.array(mcts_probs)
            winner_union = np.array(winner)
            # 评估
            loss = self.model.evaluate(state_input_union, [mcts_probs_union, winner_union], batch_size=len(state_input), verbose=0)
            # 预测
            action_probs, _ = self.model.predict_on_batch(state_input_union)
            entropy = self_entropy(action_probs)
            K.set_value(self.model.optimizer.lr, learning_rate)
            self.model.fit(state_input_union, [mcts_probs_union, winner_union], batch_size=len(state_input), verbose=0)
            return loss[0], entropy

        self.train_step = train_step

    def get_policy_param(self):
        """获得模型参数"""
        net_params = self.model.get_weights()
        return net_params

    def save_model(self, model_file):
        """保存模型参数到文件"""
        net_params = self.get_policy_param()
        pickle.dump(net_params, open(model_file, 'wb'), protocol=2)
class PolicyValueNet():
    """policy-value network """
    def __init__(self, board_width, board_height, model_file=None):
        self.board_width = board_width
        self.board_height = board_height 
        self.l2_const = 1e-4  # coef of l2 penalty 
        self.create_policy_value_net()   
        self._loss_train_op()

        if model_file:
            net_params = pickle.load(open(model_file, 'rb'))
            self.model.set_weights(net_params)
        
    def create_policy_value_net(self):
        """create the policy value network """   
        in_x = network = Input((4, self.board_width, self.board_height))

        # conv layers
        network = Conv2D(filters=32, kernel_size=(3, 3), padding="same", data_format="channels_first", activation="relu", kernel_regularizer=l2(self.l2_const))(network)
        network = Conv2D(filters=64, kernel_size=(3, 3), padding="same", data_format="channels_first", activation="relu", kernel_regularizer=l2(self.l2_const))(network)
        network = Conv2D(filters=128, kernel_size=(3, 3), padding="same", data_format="channels_first", activation="relu", kernel_regularizer=l2(self.l2_const))(network)
        # action policy layers
        policy_net = Conv2D(filters=4, kernel_size=(1, 1), data_format="channels_first", activation="relu", kernel_regularizer=l2(self.l2_const))(network)
        policy_net = Flatten()(policy_net)
        self.policy_net = Dense(self.board_width*self.board_height, activation="softmax", kernel_regularizer=l2(self.l2_const))(policy_net)
        # state value layers
        value_net = Conv2D(filters=2, kernel_size=(1, 1), data_format="channels_first", activation="relu", kernel_regularizer=l2(self.l2_const))(network)
        value_net = Flatten()(value_net)
        value_net = Dense(64, kernel_regularizer=l2(self.l2_const))(value_net)
        self.value_net = Dense(1, activation="tanh", kernel_regularizer=l2(self.l2_const))(value_net)

        self.model = Model(in_x, [self.policy_net, self.value_net])
        
        def policy_value(state_input):
            state_input_union = np.array(state_input)
            results = self.model.predict_on_batch(state_input_union)
            return results
        self.policy_value = policy_value
        
    def policy_value_fn(self, board):
        """
        input: board
        output: a list of (action, probability) tuples for each available action and the score of the board state
        """
        legal_positions = board.availables
        current_state = board.current_state()
        act_probs, value = self.policy_value(current_state.reshape(-1, 4, self.board_width, self.board_height))
        act_probs = zip(legal_positions, act_probs.flatten()[legal_positions])
        return act_probs, value[0][0]

    def _loss_train_op(self):
        """
        Three loss terms:
        loss = (z - v)^2 + pi^T * log(p) + c||theta||^2
        """

        # get the train op   
        opt = Adam()
        losses = ['categorical_crossentropy', 'mean_squared_error']
        self.model.compile(optimizer=opt, loss=losses)

        def self_entropy(probs):
            return -np.mean(np.sum(probs * np.log(probs + 1e-10), axis=1))

        def train_step(state_input, mcts_probs, winner, learning_rate):
            state_input_union = np.array(state_input)
            mcts_probs_union = np.array(mcts_probs)
            winner_union = np.array(winner)
            loss = self.model.evaluate(state_input_union, [mcts_probs_union, winner_union], batch_size=len(state_input), verbose=0)
            action_probs, _ = self.model.predict_on_batch(state_input_union)
            entropy = self_entropy(action_probs)
            K.set_value(self.model.optimizer.lr, learning_rate)
            self.model.fit(state_input_union, [mcts_probs_union, winner_union], batch_size=len(state_input), verbose=0)
            return loss[0], entropy
        
        self.train_step = train_step

    def get_policy_param(self):
        net_params = self.model.get_weights()        
        return net_params

    def save_model(self, model_file):
        """ save model params to file """
        net_params = self.get_policy_param()
        pickle.dump(net_params, open(model_file, 'wb'), protocol=2)
class FinancialNewsAnalysisModel(object):
    model = None

    def __init__(self, nb_time_step, dim_data, batch_size=1, model_path=None):
        self.model_path = model_path
        self.model_path = model_path
        self.batch_size = batch_size
        self.size_of_input_data_dim = dim_data
        self.size_of_input_timesteps = nb_time_step
        self.build()
        self.weight_loaded = False
        if model_path is not None:
            self.load_weights()

    def build(self):
        dim_data = self.size_of_input_data_dim
        nb_time_step = self.size_of_input_timesteps
        news_input = Input(shape=(nb_time_step, dim_data), name='x1')
        lstm = LSTM(output_dim=nb_hidden_units, dropout_U=dropout, dropout_W=dropout,
                    W_regularizer=l2(l2_norm_alpha), b_regularizer=l2(l2_norm_alpha),
                    activation='tanh', name='h1')
        bi_lstm = Bidirectional(lstm, input_shape=(nb_time_step, dim_data), merge_mode='concat', name='h1')
        all_news_rep = bi_lstm(news_input)
        news_predictions = Dense(1, activation='linear')(all_news_rep)
        self.model = Model(news_input, news_predictions, name="deep rnn for financial news analysis")

    def reset(self):
        for l in self.model.layers:
            if type(l) is LSTM:
                l.reset_status()

    def compile_model(self, lr=0.0001, loss_weights=0.1):
        optimizer = Adam(lr=lr)
        loss = 'mse'
        # loss = custom_objective
        self.model.compile(optimizer=optimizer, loss=loss)
                           #metrics=['mse'])
        plot(self.model, to_file='model.png')

    def fit_model(self, X, y, X_val=None, y_val=None, epoch=500):
        early_stopping = EarlyStopping(monitor='val_loss', patience=3, verbose=0)
        if X_val is None:
            self.model.fit(X, y, batch_size=self.batch_size, nb_epoch=epoch, validation_split=0.2,
                           shuffle=True, callbacks=[early_stopping])
        else:
            self.model.fit(X, y, batch_size=self.batch_size, nb_epoch=epoch, validation_data=(X_val, y_val),
                           shuffle=True, callbacks=[early_stopping])

    def save(self):
        self.model.save_weights(self.model_path, overwrite=True)

    def load_weights(self):
        if os.path.exists(self.model_path):
            self.model.load_weights(self.model_path)
            self.weight_loaded = True

    def print_weights(self, weights=None, detail=False):
        weights = weights or self.model.get_weights()
        for w in weights:
            print("w%s: sum(w)=%s, ave(w)=%s" % (w.shape, np.sum(w), np.average(w)))
        if detail:
            for w in weights:
                print("%s: %s" % (w.shape, w))

    def model_eval(self, X, y):
        y_hat = self.model.predict(X, batch_size=1)
        count_true = 0
        count_all = y.shape[0]
        for i in range(y.shape[0]):
            count_true = count_true + 1 if y[i,0]*y_hat[i,0]>0 else count_true
            print y[i,0],y_hat[i,0]
        print count_all,count_true
Example #8
0
class PolicyValueNet():
    """policy-value network """
    def __init__(self, board_width, board_height, model_file=None):
        self.board_width = board_width
        self.board_height = board_height
        self.l2_const = 1e-4  # coef of l2 penalty
        if model_file:
            #   net_params = pickle.load(open(model_file, 'rb'))
            #   self.model.set_weights(net_params)
            self.model = load_model(model_file)
        else:
            # self.create_policy_value_net()
            self.create_policy_value_resnet()
        self._loss_train_op()

    def create_policy_value_resnet(self):
        def _conv_bn_relu(filters=128, kernel_size=(3, 3)):
            def f(input):
                conv = Conv2D(kernel_size=kernel_size,
                              filters=filters,
                              padding="same",
                              data_format="channels_first",
                              kernel_regularizer=l2(self.l2_const))(input)
                norm = BatchNormalization(axis=1)(conv)
                return Activation("relu")(norm)

            return f

        def _conv_bn(filters=128, kernel_size=(3, 3)):
            def f(input):
                conv = Conv2D(kernel_size=kernel_size,
                              filters=filters,
                              padding="same",
                              data_format="channels_first",
                              kernel_regularizer=l2(self.l2_const))(input)
                norm = BatchNormalization(axis=1)(conv)
                return norm

            return f

        def _basic_block(nb_filters):
            def f(input):
                conv1 = _conv_bn_relu(nb_filters, (3, 3))(input)
                conv2 = _conv_bn(nb_filters, (3, 3))(conv1)
                shortcut = keras.layers.add([conv1, conv2])
                return Activation("relu")(shortcut)

            return f

        in_x = network = Input((4, self.board_width, self.board_height))

        network = _basic_block(64)(network)
        network = _basic_block(128)(network)
        '''
        layer1 = Conv2D(filters=64, kernel_size=(3, 3), padding="same", data_format="channels_first",
                         activation="relu", kernel_regularizer=l2(self.l2_const))(network)
        layer2 = Conv2D(filters=64, kernel_size=(3, 3), padding="same", data_format="channels_first",
                         activation="relu", kernel_regularizer=l2(self.l2_const))(layer1)
        network = Conv2D(filters=128, kernel_size=(3, 3), padding="same", data_format="channels_first",
                         activation="relu", kernel_regularizer=l2(self.l2_const))(network)
        '''

        # action policy layers
        policy_net = Conv2D(filters=4,
                            kernel_size=(1, 1),
                            data_format="channels_first",
                            activation="relu",
                            kernel_regularizer=l2(self.l2_const))(network)
        policy_net = Flatten()(policy_net)
        self.policy_net = Dense(self.board_width * self.board_height,
                                activation="softmax",
                                kernel_regularizer=l2(
                                    self.l2_const))(policy_net)
        # state value layers
        value_net = Conv2D(filters=2,
                           kernel_size=(1, 1),
                           data_format="channels_first",
                           activation="relu",
                           kernel_regularizer=l2(self.l2_const))(network)
        value_net = Flatten()(value_net)
        value_net = Dense(64, kernel_regularizer=l2(self.l2_const))(value_net)
        self.value_net = Dense(1,
                               activation="tanh",
                               kernel_regularizer=l2(self.l2_const))(value_net)

        self.model = Model(in_x, [self.policy_net, self.value_net])

        def policy_value(state_input):
            state_input_union = np.array(state_input)
            results = self.model.predict_on_batch(state_input_union)
            return results

        self.policy_value = policy_value

    def create_policy_value_net(self):
        """create the policy value network """
        in_x = network = Input((4, self.board_width, self.board_height))

        # conv layers
        '''
        network = Conv2D(filters=32, kernel_size=(3, 3), padding="same", data_format="channels_first", activation="relu", kernel_regularizer=l2(self.l2_const))(network)
        network = Conv2D(filters=64, kernel_size=(3, 3), padding="same", data_format="channels_first", activation="relu", kernel_regularizer=l2(self.l2_const))(network)
        network = Conv2D(filters=128, kernel_size=(3, 3), padding="same", data_format="channels_first", activation="relu", kernel_regularizer=l2(self.l2_const))(network)
        '''

        layer1 = Conv2D(filters=64,
                        kernel_size=(3, 3),
                        padding="same",
                        data_format="channels_first",
                        activation="relu",
                        kernel_regularizer=l2(self.l2_const))(network)
        layer2 = Conv2D(filters=64,
                        kernel_size=(3, 3),
                        padding="same",
                        data_format="channels_first",
                        activation="relu",
                        kernel_regularizer=l2(self.l2_const))(layer1)
        network = Conv2D(filters=128,
                         kernel_size=(3, 3),
                         padding="same",
                         data_format="channels_first",
                         activation="relu",
                         kernel_regularizer=l2(self.l2_const))(network)

        # action policy layers
        policy_net = Conv2D(filters=4,
                            kernel_size=(1, 1),
                            data_format="channels_first",
                            activation="relu",
                            kernel_regularizer=l2(self.l2_const))(network)
        policy_net = Flatten()(policy_net)
        self.policy_net = Dense(self.board_width * self.board_height,
                                activation="softmax",
                                kernel_regularizer=l2(
                                    self.l2_const))(policy_net)
        # state value layers
        value_net = Conv2D(filters=2,
                           kernel_size=(1, 1),
                           data_format="channels_first",
                           activation="relu",
                           kernel_regularizer=l2(self.l2_const))(network)
        value_net = Flatten()(value_net)
        value_net = Dense(64, kernel_regularizer=l2(self.l2_const))(value_net)
        self.value_net = Dense(1,
                               activation="tanh",
                               kernel_regularizer=l2(self.l2_const))(value_net)

        self.model = Model(in_x, [self.policy_net, self.value_net])
        '''
        def policy_value(state_input):
            state_input_union = np.array(state_input)
            results = self.model.predict_on_batch(state_input_union)
            return results
        self.policy_value = policy_value
        '''

    def policy_value_fn(self, board):
        """
        input: board
        output: a list of (action, probability) tuples for each available action and the score of the board state
        """
        legal_positions = board.availables
        current_state = board.current_state()
        act_probs, value = self.policy_value(
            current_state.reshape(-1, 4, self.board_width, self.board_height))
        act_probs = zip(legal_positions, act_probs.flatten()[legal_positions])
        return act_probs, value[0][0]

    def _loss_train_op(self):
        """
        Three loss terms:
        loss = (z - v)^2 + pi^T * log(p) + c||theta||^2
        """

        # get the train op
        opt = Adam()
        losses = ['categorical_crossentropy', 'mean_squared_error']
        self.model.compile(optimizer=opt, loss=losses)

        def self_entropy(probs):
            return -np.mean(np.sum(probs * np.log(probs + 1e-10), axis=1))

        def train_step(state_input, mcts_probs, winner, learning_rate):
            state_input_union = np.array(state_input)
            mcts_probs_union = np.array(mcts_probs)
            winner_union = np.array(winner)
            loss = self.model.evaluate(state_input_union,
                                       [mcts_probs_union, winner_union],
                                       batch_size=len(state_input),
                                       verbose=0)
            action_probs, _ = self.model.predict_on_batch(state_input_union)
            entropy = self_entropy(action_probs)
            K.set_value(self.model.optimizer.lr, learning_rate)
            self.model.fit(state_input_union, [mcts_probs_union, winner_union],
                           batch_size=len(state_input),
                           verbose=0)
            return loss[0], entropy

        self.train_step = train_step

    def get_policy_param(self):
        net_params = self.model.get_weights()
        return net_params

    def save_model(self, model_file):
        """ save model params to file """
        # net_params = self.get_policy_param()
        # pickle.dump(net_params, open(model_file, 'wb'), protocol=2)
        # self.model.save_weights(model_file)
        self.model.save(model_file)

    @staticmethod
    def _shortcut(self, input, residual):
        stride_width = input._keras_shape[2] / residual._keras_shape[2]
        stride_height = input._keras_shape[3] / residual._keras_shape[3]
        equal_channels = residual._keras_shape[1] == input._keras_shape[1]

        shortcut = input
        if stride_width > 1 or stride_height > 1 or not equal_channels:
            shortcut = Conv2D(nb_filter=residual._keras_shape[1],
                              nb_row=1,
                              nb_col=1,
                              subsample=(stride_width, stride_height),
                              init="he_normal",
                              border_mode="valid")(input)

        return merge([shortcut, residual], mode="sum")

    @staticmethod
    def _residual_block(self,
                        block_function,
                        nb_filters,
                        repetations,
                        is_first_layer=False):
        def f(input):
            for i in range(repetations):
                init_subsample = (1, 1)
                if i == 0 and not is_first_layer:
                    init_subsample = (2, 2)
                input = block_function(nb_filters=nb_filters,
                                       init_subsample=init_subsample)(input)
            return input

        return f

    def resnet(self):
        from keras.layers.convolutional import MaxPooling2D, AveragePooling2D

        input = Input(shape=(3, 224, 224))

        conv1 = self._conv_bn_relu(nb_filter=64,
                                   nb_row=7,
                                   nb_col=7,
                                   subsample=(2, 2))(input)
        pool1 = MaxPooling2D(pool_size=(3, 3),
                             strides=(2, 2),
                             border_mode="same")(conv1)

        # Build residual blocks..
        block_fn = self._basic_block
        block1 = self._residual_block(block_fn,
                                      nb_filters=64,
                                      repetations=3,
                                      is_first_layer=True)(pool1)
        block2 = self._residual_block(block_fn, nb_filters=128,
                                      repetations=4)(block1)
        block3 = self._residual_block(block_fn, nb_filters=256,
                                      repetations=6)(block2)
        block4 = self._residual_block(block_fn, nb_filters=512,
                                      repetations=3)(block3)

        # Classifier block
        pool2 = AveragePooling2D(pool_size=(7, 7),
                                 strides=(1, 1),
                                 border_mode="same")(block4)
        flatten1 = Flatten()(pool2)
        dense = Dense(output_dim=1000, init="he_normal",
                      activation="softmax")(flatten1)

        model = Model(input=input, output=dense)
        return model
Example #9
0
class Network:
    def __init__(self, conf):
        # All hyperparameters used in the model
        self._board_size = conf['board_size']  # the size of the playing board
        self._lr = conf['learning_rate']  # learning rate of SGD (2e-3)
        self._momentum = conf['momentum']  # nesterov momentum (1e-1)
        self._l2_coef = conf['l2']  # coefficient of L2 penalty (1e-4)
        self._mini_batch_size = conf['mini_batch_size']  # the size of batch when training the network
        self._fit_epochs = conf['fit_epochs']  # the number of iteration

        # Define Network
        self._build_network()

        # The location of the file which stores the parameters of the network
        self._net_para_file = conf['net_para_file']
        self._fit_history_file = conf['fit_history_file']

        # Whether we use previous model or not
        self._use_previous_model = conf['use_previous_model']
        if self._use_previous_model:
            if os.path.exists(self._net_para_file):
                self._model.load_weights(self._net_para_file)
            else:
                print('> error: [use_previous_model] = True, ' + self._net_para_file + ' not found')

    @log
    def _build_network(self):
        # Input_Layer
        init_x = Input((3, self._board_size, self._board_size))  # the input is a tensor with the shape 3*(15*15)
        x = init_x

        # First Convolutional Layer with 32 filters
        x = Conv2D(filters=32, kernel_size=(3, 3), strides=(1, 1), padding='same',
                   data_format='channels_first', kernel_regularizer=l2(self._l2_coef))(x)
        x = BatchNormalization()(x)
        x = Activation('relu')(x)

        # Two Residual Blocks
        x = self._residual_block(x)
        x = self._residual_block(x)
        x = self._residual_block(x)

        # Policy Head for generating prior probability vector for each action
        policy = Conv2D(filters=2, kernel_size=(1, 1), strides=(1, 1), padding='same',
                        data_format='channels_first', kernel_regularizer=l2(self._l2_coef))(x)
        policy = BatchNormalization()(policy)
        policy = Activation('relu')(policy)
        policy = Flatten()(policy)
        policy = Dense(self._board_size*self._board_size, kernel_regularizer=l2(self._l2_coef))(policy)
        self._policy = Activation('softmax')(policy)

        # Value Head for generating value of each action
        value = Conv2D(filters=1, kernel_size=(1, 1), strides=(1, 1), padding='same',
                       data_format="channels_first", kernel_regularizer=l2(self._l2_coef))(x)
        value = BatchNormalization()(value)
        value = Activation('relu')(value)
        value = Flatten()(value)
        value = Dense(32, kernel_regularizer=l2(self._l2_coef))(value)
        value = Activation('relu')(value)
        value = Dense(1, kernel_regularizer=l2(self._l2_coef))(value)
        self._value = Activation('tanh')(value)

        # Define Network
        self._model = Model(inputs=init_x, outputs=[self._policy, self._value])

        # Define the Loss Function
        opt = SGD(lr=self._lr, momentum=self._momentum, nesterov=True)  # stochastic gradient descend with momentum
        losses_type = ['categorical_crossentropy', 'mean_squared_error']  # cross-entrophy and MSE are weighted equally
        self._model.compile(optimizer=opt, loss=losses_type)

    def _residual_block(self, x):
        x_shortcut = x
        x = Conv2D(filters=32, kernel_size=(3, 3), strides=(1, 1), padding='same',
                   data_format="channels_first", kernel_regularizer=l2(self._l2_coef))(x)
        x = BatchNormalization()(x) 
        x = Activation('relu')(x)
        x = Conv2D(filters=32, kernel_size=(3, 3), strides=(1, 1), padding='same',
                   data_format="channels_first", kernel_regularizer=l2(self._l2_coef))(x)
        x = BatchNormalization()(x) 
        x = add([x, x_shortcut])  # Skip Connection
        x = Activation('relu')(x)
        return x
        
    def predict(self, board, color, last_move):
        if sum(sum(board)) == 0 and color == WHITE:
            print('error: network.predict')
        if sum(sum(board)) == 1 and color == BLACK:
            print('error: network.predict')
        tensor = board2tensor(board, color, last_move)
        policy, value_tensor = self._model.predict_on_batch(tensor)
        value = value_tensor[0][0]
        return policy, value

    def train(self, board_list, color_list, last_move_list, pi_list, z_list):
        size = len(color_list)
        for i in range(size):
            if sum(sum(board_list[i])) == 0 and color_list[i] == WHITE:
                print('error: network.train')
            if sum(sum(board_list[i])) == 1 and color_list[i] == BLACK:
                print('error: network.train')

        # Data Augmentation through symmetric and self-rotation transformation
        board_aug = []
        color_aug = []
        last_move_aug = []
        pi_aug = []
        z_aug = []
        for i in range(len(board_list)):
            new_board, new_color, new_last_move, new_pi, new_z = \
                data_augmentation(board_list[i], color_list[i], last_move_list[i], pi_list[i], z_list[i])
            board_aug.extend(new_board)
            color_aug.extend(new_color)
            last_move_aug.extend(new_last_move)
            pi_aug.extend(new_pi)
            z_aug.extend(new_z)
        board_list.extend(board_aug)
        color_list.extend(color_aug)
        last_move_list.extend(last_move_aug)
        pi_list.extend(pi_aug)
        z_list.extend(z_aug)

        # Regularize Data
        board_list = np.array([board2tensor(board_list[i], color_list[i], last_move_list[i], reshape_flag=False)
                               for i in range(len(board_list))])
        pi_list = np.array(pi_list)
        z_list = np.array(z_list)

        # Training
        hist = self._model.fit(board_list, [pi_list, z_list], epochs=self._fit_epochs, batch_size=self._mini_batch_size, verbose=1)
        hist_path = self._fit_history_file + '_' + str(self._fit_epochs) + '_' + str(self._mini_batch_size) + '.txt'
        with open(hist_path, 'a') as f:
            f.write(str(hist.history))
            return hist.history['loss'][0]  # only sample loss of first epoch
        
    def get_para(self):
        net_para = self._model.get_weights() 
        return net_para

    def save_model(self):
        """ save model para to file """
        self._model.save_weights(self._net_para_file)

    def load_model(self):
        if os.path.exists(self._net_para_file):
            self._model.load_weights(self._net_para_file)
        else:
            print('> error: ' + self._net_para_file + ' not found')
Example #10
0
class PolicyValueNet(object):
    """ AlphaGoZero-like Policy Value Net. """
    def __init__(self, size, saved_weights=None):
        """ Initialize Attributes. """
        self.size = size  # board edge size
        self.l2_const = 1e-4  # coef of l2 penalty
        self.build_network()  # build neural network
        if saved_weights:
            self.model.set_weights(pickle.load(open(saved_weights, 'rb')))

    def build_network(self):
        """ Build the Policy Value Neural Net using Keras. """
        inputs = Input(shape=(4, self.size, self.size))

        # 3 common conv layers
        c_conv1 = Conv2D(filters=32,
                         kernel_size=(3, 3),
                         padding="same",
                         data_format="channels_first",
                         activation="relu",
                         kernel_regularizer=l2(self.l2_const))(inputs)
        c_conv2 = Conv2D(filters=64,
                         kernel_size=(3, 3),
                         padding="same",
                         data_format="channels_first",
                         activation="relu",
                         kernel_regularizer=l2(self.l2_const))(c_conv1)
        c_conv3 = Conv2D(filters=128,
                         kernel_size=(3, 3),
                         padding="same",
                         data_format="channels_first",
                         activation="relu",
                         kernel_regularizer=l2(self.l2_const))(c_conv2)

        # policy head
        p_conv = Conv2D(filters=4,
                        kernel_size=(1, 1),
                        data_format="channels_first",
                        activation="relu",
                        kernel_regularizer=l2(self.l2_const))(c_conv3)
        p_flat = Flatten()(p_conv)
        self.policy_net = Dense(self.size * self.size,
                                activation="softmax",
                                kernel_regularizer=l2(self.l2_const))(p_flat)

        # value head
        v_conv = Conv2D(filters=2,
                        kernel_size=(1, 1),
                        data_format="channels_first",
                        activation="relu",
                        kernel_regularizer=l2(self.l2_const))(c_conv3)
        v_flat = Flatten()(v_conv)
        v_dense = Dense(64, kernel_regularizer=l2(self.l2_const))(v_flat)
        self.value_net = Dense(1,
                               activation="tanh",
                               kernel_regularizer=l2(self.l2_const))(v_dense)

        # connect and build the model
        self.model = Model(inputs, [self.policy_net, self.value_net])
        losses = ['categorical_crossentropy', 'mean_squared_error']
        self.model.compile(optimizer=Adam(), loss=losses)

    def get_state(self, go):
        """ Convert the go board data to a state of 4 boards.
            The 4 boards are: the agent's pieces, the opponent's pieces,
            difference from previous board, move first or not.
        Params: go: a GO object.
        Returns: a (4, 5, 5) numpy array.
        """
        piece_type = 1 if go.X_move else 2
        cur_board = np.array(go.board)
        state = np.zeros((4, self.size, self.size))
        if go.previous_board:
            pre_board = np.array(go.previous_board)
            state[0] = (cur_board == piece_type).astype(float)
            state[1] = (cur_board == 3 - piece_type).astype(float)
            state[2] = (cur_board != pre_board).astype(float)
        if piece_type == 1:
            state[3][:, :] = 1.0
        return state[:, ::-1, :]

    def policy(self, go):
        """ Policy function for current go board.
        Params: go: a go object.
        Returns: (move, prob) tuples and corresponding values.
        """
        piece_type = 1 if go.X_move else 2
        candidates = []
        for i in range(go.size**2):
            row, col = i // go.size, i % go.size
            if go.valid_place_check(row, col, piece_type):
                candidates.append(i)
        cur_state = self.get_state(go)
        # expand dimension to predict
        move_probs, value = self.model.predict_on_batch(
            np.array(cur_state.reshape(-1, 4, self.size, self.size)))
        move_probs = zip(candidates, move_probs.flatten()[candidates])
        return move_probs, value[0][0]

    def get_entropy(self, probs):
        """ Return entropy according to move probabilities. """
        return -np.mean(np.sum(probs * np.log(probs + 1e-10), axis=1))

    def train_core(self, states, mcts_probs, winners, lr):
        """ Training core function, performs one step of training.
        Params:
            states: list or numpy array, training data.
            mcts_probs: list or numpy array, training labels.
            winners: list or numpy array, training labels.
            lr: float, learning rate.
        Returns: tuple of floats, loss and entropy
        """
        states = np.array(states)
        mcts_probs = np.array(mcts_probs)
        winners = np.array(winners)
        loss = self.model.evaluate(states, [mcts_probs, winners],
                                   batch_size=states.shape[0],
                                   verbose=0)
        move_probs, _ = self.model.predict_on_batch(states)
        entropy = self.get_entropy(move_probs)
        K.set_value(self.model.optimizer.lr, lr)
        self.model.fit(states, [mcts_probs, winners],
                       batch_size=states.shape[0],
                       verbose=0)
        return loss[0], entropy

    def get_weights(self):
        """ Return model weights. """
        return self.model.get_weights()

    def save_weights(self, data_path='best_model.model'):
        """ Save model weights. """
        pickle.dump(self.get_weights(), open(data_path, 'wb'), protocol=2)
Example #11
0
class FinancialNewsAnalysisModel(object):
    model = None

    def __init__(self, nb_time_step, dim_data, batch_size=1, model_path=None):
        self.model_path = model_path
        self.model_path = model_path
        self.batch_size = batch_size
        self.size_of_input_data_dim = dim_data
        self.size_of_input_timesteps = nb_time_step
        self.build()
        self.weight_loaded = False
        if model_path is not None:
            self.load_weights()

    def build(self):
        dim_data = self.size_of_input_data_dim
        nb_time_step = self.size_of_input_timesteps
        news_input = Input(shape=(nb_time_step, dim_data))
        lstm = LSTM(output_dim=nb_hidden_units, dropout_U=dropout, dropout_W=dropout,
                    W_regularizer=l2(l2_norm_alpha), b_regularizer=l2(l2_norm_alpha), activation='tanh')
        bi_lstm = Bidirectional(lstm, input_shape=(nb_time_step, dim_data), merge_mode='concat')
        all_news_rep = bi_lstm(news_input)
        news_predictions = Dense(1, activation='linear')(all_news_rep)
        self.model = Model(news_input, news_predictions, name="deep rnn for financial news analysis")

    def reset(self):
        for l in self.model.layers:
            if type(l) is LSTM:
                l.reset_status()

    def compile_model(self, lr=0.0001, loss_weights=0.1):
        optimizer = Adam(lr=lr)
        loss = 'mse'
        # loss = custom_objective
        self.model.compile(optimizer=optimizer, loss=loss)
                           #metrics=['mse'])
        plot(self.model, to_file='model.png')

    def fit_model(self, X, y, X_val=None, y_val=None, epoch=500):
        early_stopping = EarlyStopping(monitor='val_loss', patience=100, verbose=0)
        if X_val is None:
            self.model.fit(X, y, batch_size=self.batch_size, nb_epoch=epoch, validation_split=0.2,
                           shuffle=True, callbacks=[early_stopping])
        else:
            self.model.fit(X, y, batch_size=self.batch_size, nb_epoch=epoch, validation_data=(X_val, y_val),
                           shuffle=True, callbacks=[early_stopping])

    def save(self):
        self.model.save_weights(self.model_path, overwrite=True)

    def load_weights(self):
        if os.path.exists(self.model_path):
            self.model.load_weights(self.model_path)
            self.weight_loaded = True

    def print_weights(self, weights=None, detail=False):
        weights = weights or self.model.get_weights()
        for w in weights:
            print("w%s: sum(w)=%s, ave(w)=%s" % (w.shape, np.sum(w), np.average(w)))
        if detail:
            for w in weights:
                print("%s: %s" % (w.shape, w))

    def model_eval(self, X, y):
        y_hat = self.model.predict(X, batch_size=1)
        count_true = 0
        count_all = y.shape[0]
        for i in range(y.shape[0]):
            count_true = count_true + 1 if y[i,0]*y_hat[i,0]>0 else count_true
            print y[i,0],y_hat[i,0]
        print count_all,count_true
Example #12
0
class CombinedAnalysisModel(object):
    model = None

    def __init__(self,
                 dim_input_x1,
                 time_step_x1,
                 dim_input_x2,
                 time_step_x2,
                 batch_size=1,
                 model_path=None,
                 fa_model_path=None,
                 ta_model_path=None):
        self.model_path = model_path
        self.fa_model_path = fa_model_path
        self.ta_model_path = ta_model_path
        self.batch_size = batch_size
        self.dim_input_x1 = dim_input_x1
        self.time_step_x1 = time_step_x1
        self.dim_input_x2 = dim_input_x2
        self.time_step_x2 = time_step_x2
        self.build()
        self.weight_loaded = False
        self.load_weights()

    def build(self):
        news_input = Input(shape=(self.time_step_x1, self.dim_input_x1),
                           name='x1')
        financial_time_series_input = Input(shape=(self.time_step_x2,
                                                   self.dim_input_x2),
                                            name='x2')
        lstm = LSTM(output_dim=nb_hidden_units,
                    dropout_U=dropout,
                    dropout_W=dropout,
                    W_regularizer=l2(l2_norm_alpha),
                    b_regularizer=l2(l2_norm_alpha),
                    activation='tanh',
                    name='h1',
                    trainable=False)
        bi_lstm = Bidirectional(lstm,
                                input_shape=(self.time_step_x1,
                                             self.dim_input_x1),
                                merge_mode='concat',
                                name='h1',
                                trainable=False)
        h1 = bi_lstm(news_input)

        lstm_layer_1 = LSTM(output_dim=nb_hidden_units,
                            dropout_U=dropout,
                            dropout_W=dropout,
                            W_regularizer=l2(l2_norm_alpha),
                            b_regularizer=l2(l2_norm_alpha),
                            activation='tanh',
                            return_sequences=True,
                            name='lstm_layer1',
                            trainable=False)
        lstm_layer_23 = LSTM(output_dim=nb_hidden_units,
                             dropout_U=dropout,
                             dropout_W=dropout,
                             W_regularizer=l2(l2_norm_alpha),
                             b_regularizer=l2(l2_norm_alpha),
                             activation='tanh',
                             return_sequences=False,
                             name='lstm_layer2_loss3',
                             trainable=False)
        h2_layer_1 = lstm_layer_1(financial_time_series_input)
        h2_layer_2 = lstm_layer_23(h2_layer_1)
        h_3 = Merge(mode='concat', name='h3')([h1, h2_layer_2])
        h_4 = Dense(nb_hidden_units, name='h4')(h_3)
        prediction = Dense(1, name='y3')(h_4)
        self.model = Model(input=[news_input, financial_time_series_input],
                           output=prediction,
                           name='combined model for financial analysis')
        plot(self.model, to_file='model.png')

    def reset(self):
        for l in self.model.layers:
            if type(l) is LSTM:
                l.reset_status()

    def compile_model(self, lr=0.0001, loss_weights=0.1):
        optimizer = Adam(lr=lr)
        loss = 'mse'
        # loss = custom_objective
        self.model.compile(optimizer=optimizer, loss=loss)

    def fit_model(self,
                  X1,
                  X2,
                  y,
                  X1_val=None,
                  X2_val=None,
                  y_val=None,
                  epoch=50):
        early_stopping = EarlyStopping(monitor='val_loss',
                                       patience=3,
                                       verbose=0)
        if X1_val is None:
            self.model.fit([X1, X2],
                           y,
                           batch_size=self.batch_size,
                           nb_epoch=epoch,
                           validation_split=0.2,
                           shuffle=True,
                           callbacks=[early_stopping])
        else:
            self.model.fit([X1, X2],
                           y,
                           batch_size=self.batch_size,
                           nb_epoch=epoch,
                           validation_data=([X1_val, X2_val], y_val),
                           shuffle=True,
                           callbacks=[early_stopping])

    def save(self):
        self.model.save_weights(self.model_path, overwrite=True)

    def load_weights(self):
        if self.model_path is not None and os.path.exists(self.model_path):
            self.model.load_weights(self.model_path)
            self.weight_loaded = True
        if self.ta_model_path is not None and os.path.exists(
                self.ta_model_path):
            self.model.load_weights(self.ta_model_path, by_name=True)
        if self.fa_model_path is not None and os.path.exists(
                self.fa_model_path):
            self.model.load_weights(self.fa_model_path, by_name=True)

    def print_weights(self, weights=None, detail=False):
        weights = weights or self.model.get_weights()
        for w in weights:
            print("w%s: sum(w)=%s, ave(w)=%s" %
                  (w.shape, np.sum(w), np.average(w)))
        if detail:
            for w in weights:
                print("%s: %s" % (w.shape, w))

    def model_eval(self, X1, X2, y):
        y_hat = self.model.predict([X1, X2], batch_size=1)
        count_true = 0
        count_all = y.shape[0]
        for i in range(y.shape[0]):
            count_true = count_true + 1 if y[i, 0] * y_hat[
                i, 0] > 0 else count_true
            print y[i, 0], y_hat[i, 0]
        print count_all, count_true
class FinancialTimeSeriesAnalysisModel(object):
    model = None

    def __init__(self, nb_time_step, dim_data, batch_size=1, model_path=None):
        self.model_path = model_path
        self.model_path = model_path
        self.batch_size = batch_size
        self.size_of_input_data_dim = dim_data
        self.size_of_input_timesteps = nb_time_step
        self.build()
        self.weight_loaded = False
        if model_path is not None:
            self.load_weights()

    def build(self):
        dim_data = self.size_of_input_data_dim
        nb_time_step = self.size_of_input_timesteps
        financial_time_series_input = Input(shape=(nb_time_step, dim_data),
                                            name='x1')
        lstm_layer_1 = LSTM(output_dim=nb_hidden_units,
                            dropout_U=dropout,
                            dropout_W=dropout,
                            W_regularizer=l2(l2_norm_alpha),
                            b_regularizer=l2(l2_norm_alpha),
                            activation='tanh',
                            return_sequences=True,
                            name='lstm_layer1')
        lstm_layer_21 = LSTM(output_dim=nb_hidden_units,
                             dropout_U=dropout,
                             dropout_W=dropout,
                             W_regularizer=l2(l2_norm_alpha),
                             b_regularizer=l2(l2_norm_alpha),
                             activation='tanh',
                             return_sequences=True,
                             name='lstm_layer2_loss1')
        lstm_layer_22 = LSTM(output_dim=nb_hidden_units,
                             dropout_U=dropout,
                             dropout_W=dropout,
                             W_regularizer=l2(l2_norm_alpha),
                             b_regularizer=l2(l2_norm_alpha),
                             activation='tanh',
                             return_sequences=True,
                             name='lstm_layer2_loss2')
        lstm_layer_23 = LSTM(output_dim=nb_hidden_units,
                             dropout_U=dropout,
                             dropout_W=dropout,
                             W_regularizer=l2(l2_norm_alpha),
                             b_regularizer=l2(l2_norm_alpha),
                             activation='tanh',
                             return_sequences=True,
                             name='lstm_layer2_loss3')

        lstm_layer_24 = LSTM(output_dim=nb_hidden_units,
                             dropout_U=dropout,
                             dropout_W=dropout,
                             W_regularizer=l2(l2_norm_alpha),
                             b_regularizer=l2(l2_norm_alpha),
                             activation='tanh',
                             return_sequences=True,
                             name='lstm_layer2_loss4')

        lstm_layer_25 = LSTM(output_dim=nb_hidden_units,
                             dropout_U=dropout,
                             dropout_W=dropout,
                             W_regularizer=l2(l2_norm_alpha),
                             b_regularizer=l2(l2_norm_alpha),
                             activation='tanh',
                             return_sequences=True,
                             name='lstm_layer2_loss5')
        h1 = lstm_layer_1(financial_time_series_input)
        h21 = lstm_layer_21(h1)
        h22 = lstm_layer_22(h1)
        h23 = lstm_layer_23(h1)
        h24 = lstm_layer_24(h1)
        h25 = lstm_layer_25(h1)
        time_series_predictions1 = TimeDistributed(Dense(1),
                                                   name="p1")(h21)  # custom 1
        time_series_predictions2 = TimeDistributed(Dense(1),
                                                   name="p2")(h22)  # custom 2
        time_series_predictions3 = TimeDistributed(Dense(1),
                                                   name="p3")(h23)  # mse
        time_series_predictions4 = TimeDistributed(Dense(1,
                                                         activation='sigmoid'),
                                                   name="p4")(h24)  # logloss
        time_series_predictions5 = TimeDistributed(Dense(nb_labels,
                                                         activation='softmax'),
                                                   name="p5")(h25)  # cross
        self.model = Model(
            input=financial_time_series_input,
            output=[
                time_series_predictions1, time_series_predictions2,
                time_series_predictions3, time_series_predictions4,
                time_series_predictions5
            ],
            name="multi-task deep rnn for financial time series forecasting")
        plot(self.model, to_file='model.png')

    def reset(self):
        for l in self.model.layers:
            if type(l) is LSTM:
                l.reset_status()

    def compile_model(self, lr=0.0001, arg_weight=1.):
        optimizer = Adam(lr=lr)
        loss = [
            custom_objective1, custom_objective2, 'mse', 'binary_crossentropy',
            'categorical_crossentropy'
        ]
        self.model.compile(optimizer=optimizer, loss=loss)

    def fit_model(self, X, y, y_label, epoch=300):
        early_stopping = EarlyStopping(monitor='val_loss',
                                       patience=3,
                                       verbose=0)

        self.model.fit(X, [y] * 3 + [y > 0] + [y_label],
                       batch_size=self.batch_size,
                       nb_epoch=epoch,
                       validation_split=0.3,
                       shuffle=True,
                       callbacks=[early_stopping])

    def save(self):
        self.model.save_weights(self.model_path, overwrite=True)

    def load_weights(self):
        if os.path.exists(self.model_path):
            self.model.load_weights(self.model_path)
            self.weight_loaded = True

    def print_weights(self, weights=None, detail=False):
        weights = weights or self.model.get_weights()
        for w in weights:
            print("w%s: sum(w)=%s, ave(w)=%s" %
                  (w.shape, np.sum(w), np.average(w)))
        if detail:
            for w in weights:
                print("%s: %s" % (w.shape, w))

    def model_eval(self, X, y):
        y_hat = self.model.predict(X, batch_size=1)[0]
        count_true = 0
        count_all = y.shape[1]
        for i in range(y.shape[1]):
            count_true = count_true + 1 if y[0, i, 0] * y_hat[
                0, i, 0] > 0 else count_true
            print(y[0, i, 0], y_hat[0, i, 0])
        print(count_all, count_true)
Example #14
0
class AdditionNPIModel(NPIStep):
    model = None
    f_enc = None

    def __init__(self, system: RuntimeSystem, model_path: str=None, program_set: AdditionProgramSet=None):
        self.system = system
        self.model_path = model_path
        self.program_set = program_set
        self.batch_size = 1
        self.build()
        self.weight_loaded = False
        self.load_weights()

    def build(self):
        enc_size = self.size_of_env_observation()
        argument_size = IntegerArguments.size_of_arguments
        input_enc = InputLayer(batch_input_shape=(self.batch_size, enc_size), name='input_enc')
        input_arg = InputLayer(batch_input_shape=(self.batch_size, argument_size), name='input_arg')
        input_prg = Embedding(input_dim=PROGRAM_VEC_SIZE, output_dim=PROGRAM_KEY_VEC_SIZE, input_length=1,
                              batch_input_shape=(self.batch_size, 1))

        f_enc = Sequential(name='f_enc')
        f_enc.add(Merge([input_enc, input_arg], mode='concat'))
        f_enc.add(Dense(256))
        f_enc.add(Dense(32))
        f_enc.add(Activation('relu', name='relu_enc'))
        self.f_enc = f_enc

        program_embedding = Sequential(name='program_embedding')
        program_embedding.add(input_prg)

        f_enc_convert = Sequential(name='f_enc_convert')
        f_enc_convert.add(f_enc)
        f_enc_convert.add(RepeatVector(1))

        f_lstm = Sequential(name='f_lstm')
        f_lstm.add(Merge([f_enc_convert, program_embedding], mode='concat'))
        # f_lstm.add(Activation('relu', name='relu_lstm_0'))
        f_lstm.add(LSTM(256, return_sequences=False, stateful=True))
        f_lstm.add(Activation('relu', name='relu_lstm_1'))
        f_lstm.add(RepeatVector(1))
        f_lstm.add(LSTM(256, return_sequences=False, stateful=True))
        f_lstm.add(Activation('relu', name='relu_lstm_2'))
        # plot(f_lstm, to_file='f_lstm.png', show_shapes=True)

        f_end = Sequential(name='f_end')
        f_end.add(f_lstm)
        f_end.add(Dense(10))
        f_end.add(Dense(1))
        f_end.add(Activation('hard_sigmoid', name='hard_sigmoid_end'))
        # plot(f_end, to_file='f_end.png', show_shapes=True)

        f_prog = Sequential(name='f_prog')
        f_prog.add(f_lstm)
        f_prog.add(Dense(PROGRAM_KEY_VEC_SIZE))
        f_prog.add(Dense(PROGRAM_VEC_SIZE))
        f_prog.add(Activation('softmax', name='softmax_prog'))
        # plot(f_prog, to_file='f_prog.png', show_shapes=True)

        f_args = []
        for ai in range(1, IntegerArguments.max_arg_num+1):
            f_arg = Sequential(name='f_arg%s' % ai)
            f_arg.add(f_lstm)
            f_arg.add(Dense(32))
            f_arg.add(Dense(IntegerArguments.depth))
            f_arg.add(Activation('softmax', name='softmax_arg%s' % ai))
            f_args.append(f_arg)
        # plot(f_arg, to_file='f_arg.png', show_shapes=True)

        self.model = Model([input_enc.input, input_arg.input, input_prg.input],
                           [f_end.output, f_prog.output] + [fa.output for fa in f_args],
                           name="npi")
        self.compile_model()
        plot(self.model, to_file='model.png', show_shapes=True)

    def reset(self):
        super(AdditionNPIModel, self).reset()
        for l in self.model.layers:
            if type(l) is LSTM:
                l.reset_states()

    def compile_model(self, lr=0.0001, arg_weight=1.):
        arg_num = IntegerArguments.max_arg_num
        optimizer = Adam(lr=lr)
        loss = ['binary_crossentropy', 'categorical_crossentropy'] + ['categorical_crossentropy'] * arg_num
        self.model.compile(optimizer=optimizer, loss=loss, loss_weights=[0.25, 0.25] + [arg_weight] * arg_num)

    def fit(self, steps_list, epoch=3000):
        """

        :param int epoch:
        :param typing.List[typing.Dict[q=dict, steps=typing.List[StepInOut]]] steps_list:
        :return:
        """

        def filter_question(condition_func):
            sub_steps_list = []
            for steps_dict in steps_list:
                question = steps_dict['q']
                if condition_func(question['in1'], question['in2']):
                    sub_steps_list.append(steps_dict)
            return sub_steps_list

        # self.print_weights()
        if not self.weight_loaded:
            self.train_f_enc(filter_question(lambda a, b: 10 <= a < 100 and 10 <= b < 100), epoch=100)
        self.f_enc.trainable = False

        q_type = "training questions of a+b < 10"
        print(q_type)
        pr = 0.8
        all_ok = self.fit_to_subset(filter_question(lambda a, b: a+b < 10), epoch=epoch, pass_rate=pr)
        print("%s is pass_rate >= %s: %s" % (q_type, pr, all_ok))

        q_type = "training questions of a<10 and b< 10 and 10 <= a+b"
        print(q_type)
        pr = 0.8
        all_ok = self.fit_to_subset(filter_question(lambda a, b: a<10 and b<10 and a + b >= 10), epoch=epoch, pass_rate=pr)
        print("%s is pass_rate >= %s: %s" % (q_type, pr, all_ok))

        q_type = "training questions of a<10 and b<10"
        print(q_type)
        pr = 0.8
        all_ok = self.fit_to_subset(filter_question(lambda a, b: a < 10 and b < 10), epoch=epoch, pass_rate=pr)
        print("%s is pass_rate >= %s: %s" % (q_type, pr, all_ok))

        q_type = "training questions of a<100 and b<100"
        print(q_type)
        pr = 0.8
        all_ok = self.fit_to_subset(filter_question(lambda a, b: a < 100 and b < 100), epoch=epoch, pass_rate=pr)
        print("%s is pass_rate >= %s: %s" % (q_type, pr, all_ok))

        while True:
            print("test all type of questions")
            cc, wc = self.test_to_subset(create_questions(1000))
            print("Accuracy %s(OK=%d, NG=%d)" % (cc/(cc+wc), cc, wc))
            if wc == 0:
                break

            q_type = "training questions of ALL"
            print(q_type)
            pr = 1.0
            self.fit_to_subset(filter_question(lambda a, b: True), epoch=epoch, pass_rate=pr)
            all_ok = self.fit_to_subset(filter_question(lambda a, b: True), epoch=epoch, pass_rate=pr, skip_correct=True)
            print("%s is pass_rate >= %s: %s" % (q_type, pr, all_ok))

    def fit_to_subset(self, steps_list, epoch=3000, pass_rate=1.0, skip_correct=False):
        learning_rate = 0.0001
        for i in range(30):
            all_ok = self.do_learn(steps_list, 30, learning_rate=learning_rate, pass_rate=pass_rate, arg_weight=1.,
                                   skip_correct=skip_correct)
            if all_ok:
                return True
            learning_rate *= 0.95
        return False

    def test_to_subset(self, questions):
        addition_env = AdditionEnv(FIELD_ROW, FIELD_WIDTH, FIELD_DEPTH)
        npi_runner = TerminalNPIRunner(None, self)
        correct_count = wrong_count = 0
        for idx, question in enumerate(questions):
            question = copy(question)
            if self.question_test(addition_env, npi_runner, question):
                correct_count += 1
            else:
                wrong_count += 1
        return correct_count, wrong_count

    @staticmethod
    def dict_to_str(d):
        return str(tuple([(k, d[k]) for k in sorted(d)]))

    def do_learn(self, steps_list, epoch, learning_rate=None, pass_rate=1.0, arg_weight=1., skip_correct=False):
        if learning_rate is not None:
            self.update_learning_rate(learning_rate, arg_weight)
        addition_env = AdditionEnv(FIELD_ROW, FIELD_WIDTH, FIELD_DEPTH)
        npi_runner = TerminalNPIRunner(None, self)
        last_weights = None
        correct_count = Counter()
        no_change_count = 0
        last_loss = 1000
        for ep in range(1, epoch+1):
            correct_new = wrong_new = 0
            losses = []
            ok_rate = []
            np.random.shuffle(steps_list)
            for idx, steps_dict in enumerate(steps_list):
                question = copy(steps_dict['q'])
                question_key = self.dict_to_str(question)
                if self.question_test(addition_env, npi_runner, question):
                    if correct_count[question_key] == 0:
                        correct_new += 1
                    correct_count[question_key] += 1
                    print("GOOD!: ep=%2d idx=%3d :%s CorrectCount=%s" % (ep, idx, self.dict_to_str(question), correct_count[question_key]))
                    ok_rate.append(1)
                    if skip_correct or int(math.sqrt(correct_count[question_key])) ** 2 != correct_count[question_key]:
                        continue
                else:
                    ok_rate.append(0)
                    if correct_count[question_key] > 0:
                        print("Degraded: ep=%2d idx=%3d :%s CorrectCount=%s -> 0" % (ep, idx, self.dict_to_str(question), correct_count[question_key]))
                        correct_count[question_key] = 0
                        wrong_new += 1

                steps = steps_dict['steps']
                xs = []
                ys = []
                ws = []
                for step in steps:
                    xs.append(self.convert_input(step.input))
                    y, w = self.convert_output(step.output)
                    ys.append(y)
                    ws.append(w)

                self.reset()

                for i, (x, y, w) in enumerate(zip(xs, ys, ws)):
                    loss = self.model.train_on_batch(x, y, sample_weight=w)
                    if not np.isfinite(loss):
                        print("Loss is not finite!, Last Input=%s" % ([i, (x, y, w)]))
                        self.print_weights(last_weights, detail=True)
                        raise RuntimeError("Loss is not finite!")
                    losses.append(loss)
                    last_weights = self.model.get_weights()
            if losses:
                cur_loss = np.average(losses)
                print("ep=%2d: ok_rate=%.2f%% (+%s -%s): ave loss %s (%s samples)" %
                      (ep, np.average(ok_rate)*100, correct_new, wrong_new, cur_loss, len(steps_list)))
                # self.print_weights()
                if correct_new + wrong_new == 0:
                    no_change_count += 1
                else:
                    no_change_count = 0

                if math.fabs(1 - cur_loss/last_loss) < 0.001 and no_change_count > 5:
                    print("math.fabs(1 - cur_loss/last_loss) < 0.001 and no_change_count > 5:")
                    return False
                last_loss = cur_loss
                print("=" * 80)
            self.save()
            if np.average(ok_rate) >= pass_rate:
                return True
        return False

    def update_learning_rate(self, learning_rate, arg_weight=1.):
        print("Re-Compile Model lr=%s aw=%s" % (learning_rate, arg_weight))
        self.compile_model(learning_rate, arg_weight=arg_weight)

    def train_f_enc(self, steps_list, epoch=50):
        print("training f_enc")
        f_add0 = Sequential(name='f_add0')
        f_add0.add(self.f_enc)
        f_add0.add(Dense(FIELD_DEPTH))
        f_add0.add(Activation('softmax', name='softmax_add0'))

        f_add1 = Sequential(name='f_add1')
        f_add1.add(self.f_enc)
        f_add1.add(Dense(FIELD_DEPTH))
        f_add1.add(Activation('softmax', name='softmax_add1'))

        env_model = Model(self.f_enc.inputs, [f_add0.output, f_add1.output], name="env_model")
        env_model.compile(optimizer='adam', loss=['categorical_crossentropy']*2)

        for ep in range(epoch):
            losses = []
            for idx, steps_dict in enumerate(steps_list):
                prev = None
                for step in steps_dict['steps']:
                    x = self.convert_input(step.input)[:2]
                    env_values = step.input.env.reshape((4, -1))
                    in1 = np.clip(env_values[0].argmax() - 1, 0, 9)
                    in2 = np.clip(env_values[1].argmax() - 1, 0, 9)
                    carry = np.clip(env_values[2].argmax() - 1, 0, 9)
                    y_num = in1 + in2 + carry
                    now = (in1, in2, carry)
                    if prev == now:
                        continue
                    prev = now
                    y0 = to_one_hot_array((y_num %  10)+1, FIELD_DEPTH)
                    y1 = to_one_hot_array((y_num // 10)+1, FIELD_DEPTH)
                    y = [yy.reshape((self.batch_size, -1)) for yy in [y0, y1]]
                    loss = env_model.train_on_batch(x, y)
                    losses.append(loss)
            print("ep %3d: loss=%s" % (ep, np.average(losses)))

    def question_test(self, addition_env, npi_runner, question):
        addition_env.reset()
        self.reset()
        try:
            run_npi(addition_env, npi_runner, self.program_set.ADD, question)
            if question['correct']:
                return True
        except StopIteration:
            pass
        return False

    def convert_input(self, p_in: StepInput):
        x_pg = np.array((p_in.program.program_id,))
        x = [xx.reshape((self.batch_size, -1)) for xx in (p_in.env, p_in.arguments.values, x_pg)]
        return x

    def convert_output(self, p_out: StepOutput):
        y = [np.array((p_out.r,))]
        weights = [[1.]]
        if p_out.program:
            arg_values = p_out.arguments.values
            arg_num = len(p_out.program.args or [])
            y += [p_out.program.to_one_hot(PROGRAM_VEC_SIZE)]
            weights += [[1.]]
        else:
            arg_values = IntegerArguments().values
            arg_num = 0
            y += [np.zeros((PROGRAM_VEC_SIZE, ))]
            weights += [[1e-10]]

        for v in arg_values:  # split by each args
            y += [v]
        weights += [[1.]] * arg_num + [[1e-10]] * (len(arg_values) - arg_num)
        weights = [np.array(w) for w in weights]
        return [yy.reshape((self.batch_size, -1)) for yy in y], weights

    def step(self, env_observation: np.ndarray, pg: Program, arguments: IntegerArguments) -> StepOutput:
        x = self.convert_input(StepInput(env_observation, pg, arguments))
        results = self.model.predict(x, batch_size=1)  # if batch_size==1, returns single row

        r, pg_one_hot, arg_values = results[0], results[1], results[2:]
        program = self.program_set.get(pg_one_hot.argmax())
        ret = StepOutput(r, program, IntegerArguments(values=np.stack(arg_values)))
        return ret

    def save(self):
        self.model.save_weights(self.model_path, overwrite=True)

    def load_weights(self):
        if os.path.exists(self.model_path):
            self.model.load_weights(self.model_path)
            self.weight_loaded = True

    def print_weights(self, weights=None, detail=False):
        weights = weights or self.model.get_weights()
        for w in weights:
            print("w%s: sum(w)=%s, ave(w)=%s" % (w.shape, np.sum(w), np.average(w)))
        if detail:
            for w in weights:
                print("%s: %s" % (w.shape, w))

    @staticmethod
    def size_of_env_observation():
        return FIELD_ROW * FIELD_DEPTH
class PolicyValueNet():
    """策略价值网络"""

    #def __init__(self, board_width, board_height, model_file=None):
    def __init__(self, policy_infer_size, model_file=None):
        #self.board_width = board_width
        #self.board_height = board_height
        self.policy_infer_size = policy_infer_size
        self.l2_const = 1e-4  # coef of l2 penalty
        self.create_policy_value_net()
        self._loss_train_op()

        self.load_model_done = True
        if model_file and os.path.exists(model_file):
            self.load_model_done = False
            self.load_model(model_file)

    def load_model(self, model_file):
        """重新加载模型(仅用于selfplay时load new model)"""
        try:
            #net_params = pickle.load(open(model_file, 'rb'), encoding='bytes') #iso-8859-1')
            net_params = utils.pickle_load(model_file)
            self.model.set_weights(net_params)
            self.load_model_done = True
        except:
            logging.error("load_model fail! {}\t{}".format(
                model_file, utils.get_trace()))
            self.load_model_done = False
        if os.path.exists(
                model_file
        ) and self.load_model_done is False:  #鏂囦欢瀛樺湪鍗村鍦ㄥけ璐ユ椂缁堟杩愯
            exit(-1)
        return self.load_model_done

    def create_policy_value_net(self):
        """创建policy-value网络"""
        # 输入层
        #in_x = network = Input((4, self.board_width, self.board_height))
        in_x = network = Input((4, 1, self.policy_infer_size))

        # conv layers
        network = Conv2D(filters=32,
                         kernel_size=(3, 3),
                         padding="same",
                         data_format="channels_first",
                         activation="relu",
                         kernel_regularizer=l2(self.l2_const))(network)
        network = Conv2D(filters=64,
                         kernel_size=(3, 3),
                         padding="same",
                         data_format="channels_first",
                         activation="relu",
                         kernel_regularizer=l2(self.l2_const))(network)
        network = Conv2D(filters=128,
                         kernel_size=(3, 3),
                         padding="same",
                         data_format="channels_first",
                         activation="relu",
                         kernel_regularizer=l2(self.l2_const))(network)
        # 走子策略 action policy layers
        policy_net = Conv2D(filters=4,
                            kernel_size=(1, 1),
                            data_format="channels_first",
                            activation="relu",
                            kernel_regularizer=l2(self.l2_const))(network)
        policy_net = Flatten()(policy_net)
        # infer self.board_width * self.board_height action_probs
        #self.policy_net = Dense(self.board_width * self.board_height, activation="softmax", kernel_regularizer=l2(self.l2_const))(policy_net)
        self.policy_net = Dense(self.policy_infer_size,
                                activation="softmax",
                                kernel_regularizer=l2(
                                    self.l2_const))(policy_net)
        # 盘面价值 state value layers
        value_net = Conv2D(filters=2,
                           kernel_size=(1, 1),
                           data_format="channels_first",
                           activation="relu",
                           kernel_regularizer=l2(self.l2_const))(network)
        value_net = Flatten()(value_net)
        value_net = Dense(64, kernel_regularizer=l2(self.l2_const))(value_net)
        # infer one current state score
        self.value_net = Dense(1,
                               activation="tanh",
                               kernel_regularizer=l2(self.l2_const))(value_net)

        # 创建网络模型
        self.model = Model(in_x, [self.policy_net, self.value_net])

        # 返回走子策略和价值概率
        def policy_value(state_input):
            state_input_union = np.array(state_input)
            #print(state_input_union)
            results = self.model.predict_on_batch(state_input_union)
            return results

        self.policy_value = policy_value

    def policy_value_fn(self, board):
        """使用模型预测棋盘所有actionid的价值概率"""
        # 棋盘所有可移动action_ids
        legal_positions = board.availables
        #print(legal_positions)
        # 当前玩家角度的actions过程
        current_actions = board.current_actions()
        #print(current_actions)
        # 使用模型预测走子策略和价值概率
        #print(self.policy_infer_size)
        #act_probs, value = self.policy_value(current_actions.reshape(-1, 4, self.board_width, self.board_height))
        act_probs, value = self.policy_value(
            current_actions.reshape(-1, 4, 1, self.policy_infer_size))
        act_probs = zip(legal_positions, act_probs.flatten()[legal_positions])
        # 返回[(action, 概率)] 以及当前玩家的后续走子value
        return act_probs, value[0][0]

    def _loss_train_op(self):
        """初始化损失
        3个损失函数因子
        loss = (z - v)^2 + pi^T * log(p) + c||theta||^2
        loss = value损失函数 + policy损失函数 + 惩罚项
        """
        # 定义优化器和损失函数
        opt = Adam()
        losses = ['categorical_crossentropy', 'mean_squared_error']
        self.model.compile(optimizer=opt, loss=losses)

        def self_entropy(probs):
            return -np.mean(np.sum(probs * np.log(probs + 1e-10), axis=1))

        def train_step(state_input, mcts_probs, winner, learning_rate):
            """输出训练过程中的结果"""
            state_input_union = np.array(state_input)
            mcts_probs_union = np.array(mcts_probs)
            winner_union = np.array(winner)
            # 评估
            loss = self.model.evaluate(state_input_union,
                                       [mcts_probs_union, winner_union],
                                       batch_size=len(state_input),
                                       verbose=0)
            # 预测
            action_probs, _ = self.model.predict_on_batch(state_input_union)
            entropy = self_entropy(action_probs)
            K.set_value(self.model.optimizer.lr, learning_rate)
            self.model.fit(state_input_union, [mcts_probs_union, winner_union],
                           batch_size=len(state_input),
                           verbose=0)
            return loss[0], entropy

        self.train_step = train_step

    def get_policy_param(self):
        """获得模型参数"""
        net_params = self.model.get_weights()
        return net_params

    def save_model(self, model_file):
        """保存模型参数到文件"""
        net_params = self.get_policy_param()
        #pickle.dump(net_params, open(model_file, 'wb'), protocol=4)
        utils.pickle_dump(net_params, model_file)
Example #16
0
class Network():
    def __init__(self, conf):
        # Some Hyperparameters
        self._board_size = conf['board_size']  # the size of the playing board
        self._lr = conf['learning_rate']  # learning rate of SGD (2e-3)
        self._momentum = conf['momentum']  # nesterov momentum (1e-1)
        self._l2_coef = conf['l2']  # coefficient of L2 penalty (1e-4)
        # Define Network
        self._build_network()
        # File Location
        self._net_para_file = conf['net_para_file']
        # If we use previous model or not
        self._use_previous_model = conf['use_previous_model']
        if self._use_previous_model:
            net_para = self._model.load_weights(self._net_para_file)
            self._model.set_weights(net_para)

    def _build_network(self):
        # Input_Layer
        init_x = Input((3, self._board_size, self._board_size))
        x = init_x
        # Convolutional Layer
        x = Conv2D(filters=32,
                   kernel_size=(3, 3),
                   strides=(1, 1),
                   padding='same',
                   data_format='channels_first',
                   kernel_regularizer=l2(self._l2_coef))(x)
        x = BatchNormalization()(x)
        x = Activation('relu')(x)
        # Residual Layer
        x = self._residual_block(x)
        x = self._residual_block(x)
        x = self._residual_block(x)
        # Policy Head
        policy = Conv2D(filters=2,
                        kernel_size=(1, 1),
                        strides=(1, 1),
                        padding='same',
                        data_format='channels_first',
                        kernel_regularizer=l2(self._l2_coef))(x)
        policy = BatchNormalization()(policy)
        policy = Activation('relu')(policy)
        policy = Flatten()(policy)
        policy = Dense(self._board_size * self._board_size,
                       kernel_regularizer=l2(self._l2_coef))(policy)
        self._policy = Activation('softmax')(policy)
        # Value Head
        value = Conv2D(filters=1,
                       kernel_size=(1, 1),
                       strides=(1, 1),
                       padding='same',
                       data_format="channels_first",
                       kernel_regularizer=l2(self._l2_coef))(x)
        value = BatchNormalization()(value)
        value = Activation('relu')(value)
        value = Flatten()(value)
        value = Dense(32, kernel_regularizer=l2(self._l2_coef))(value)
        value = Activation('relu')(value)
        value = Dense(1, kernel_regularizer=l2(self._l2_coef))(value)
        self._value = Activation('tanh')(value)
        # Define Network
        self._model = Model(inputs=init_x, outputs=[self._policy, self._value])
        # Define the Loss Function
        opt = SGD(lr=self._lr, momentum=self._momentum, nesterov=True)
        losses_type = ['categorical_crossentropy', 'mean_squared_error']
        self._model.compile(optimizer=opt, loss=losses_type)

    def _residual_block(self, x):
        x_shortcut = x
        x = Conv2D(filters=32,
                   kernel_size=(3, 3),
                   strides=(1, 1),
                   padding='same',
                   data_format="channels_first",
                   kernel_regularizer=l2(self._l2_coef))(x)
        x = BatchNormalization()(x)
        x = Activation('relu')(x)
        x = Conv2D(filters=32,
                   kernel_size=(3, 3),
                   strides=(1, 1),
                   padding='same',
                   data_format="channels_first",
                   kernel_regularizer=l2(self._l2_coef))(x)
        x = BatchNormalization()(x)
        x = add([x, x_shortcut])  # Skip Connection
        x = Activation('relu')(x)
        return x

    def predict(self, board, color, random_flip=False):
        if random_flip:
            b_t, method_index = input_transform(board)
            tensor_t = board2tensor(b_t, color, reshape_flag=True)
            prob_tensor_t, value_tensor = self._model.predict_on_batch(
                tensor_t)
            policy = output_decode(prob_tensor_t, method_index, board.shape[0])
            value = value_tensor[0][0]
            return policy, value
        else:
            tensor = board2tensor(board, color)
            policy, value_tensor = self._model.predict_on_batch(tensor)
            value = value_tensor[0][0]
            return policy, value

    def train(self, board_list, color_list, pi_list, z_list):
        # Reguliza Data
        tensor_list = np.array([
            board2tensor(board_list[i], color_list[i], reshape_flag=False)
            for i in range(len(board_list))
        ])
        pi_list = np.array(pi_list)
        z_list = np.array(z_list)
        # Training
        self._model.fit(tensor_list, [pi_list, z_list],
                        epochs=20,
                        batch_size=len(color_list),
                        verbose=1)
        # Calculate Loss Explicitly
        loss = self._model.evaluate(tensor_list, [pi_list, z_list],
                                    batch_size=len(board_list),
                                    verbose=0)
        loss = loss[0]
        return loss

    def get_para(self):
        net_para = self._model.get_weights()
        return net_para

    def save_model(self):
        """ save model para to file """
        self._model.save_weights(self._net_para_file)

    def load_model(self):
        self._model.load_weights(self._net_para_file)
class PolicyValueNet():
    """policy-value network """
    def __init__(self, board_width, board_height, model_file=None):
        self.board_width = board_width
        self.board_height = board_height
        self.l2_const = 1e-4  # coef of l2 penalty
        self.create_policy_value_net()

        if model_file:
            print("[Notice] load model from file")
            self.model = load_model(model_file)
        else:
            print("[Notice] create model")
            self.create_policy_value_net()
        self._loss_train_op()

    def create_policy_value_net(self):
        """create the policy value network """
        in_x = network = Input((4, self.board_width, self.board_height))

        # conv layers
        network = Conv2D(filters=32,
                         kernel_size=(3, 3),
                         padding="same",
                         data_format="channels_first",
                         activation="relu",
                         kernel_regularizer=l2(self.l2_const))(network)
        network = Conv2D(filters=64,
                         kernel_size=(3, 3),
                         padding="same",
                         data_format="channels_first",
                         activation="relu",
                         kernel_regularizer=l2(self.l2_const))(network)
        network = Conv2D(filters=128,
                         kernel_size=(3, 3),
                         padding="same",
                         data_format="channels_first",
                         activation="relu",
                         kernel_regularizer=l2(self.l2_const))(network)
        # action policy layers
        policy_net = Conv2D(filters=4,
                            kernel_size=(1, 1),
                            data_format="channels_first",
                            activation="relu",
                            kernel_regularizer=l2(self.l2_const))(network)
        policy_net = Flatten()(policy_net)
        self.policy_net = Dense(self.board_width * self.board_height,
                                activation="softmax",
                                kernel_regularizer=l2(
                                    self.l2_const))(policy_net)
        # state value layers
        value_net = Conv2D(filters=2,
                           kernel_size=(1, 1),
                           data_format="channels_first",
                           activation="relu",
                           kernel_regularizer=l2(self.l2_const))(network)
        value_net = Flatten()(value_net)
        value_net = Dense(64, kernel_regularizer=l2(self.l2_const))(value_net)
        self.value_net = Dense(1,
                               activation="tanh",
                               kernel_regularizer=l2(self.l2_const))(value_net)

        self.model = Model(in_x, [self.policy_net, self.value_net])

        def policy_value(state_input):
            state_input_union = np.array(state_input)
            results = self.model.predict_on_batch(state_input_union)
            return results

        self.policy_value = policy_value

    def policy_value_fn(self, board):
        """
        input: board
        output: a list of (action, probability) tuples for each available action and the score of the board state
        """
        legal_positions = board.availables
        current_state = board.current_state()
        act_probs, value = self.policy_value(
            current_state.reshape(-1, 4, self.board_width, self.board_height))
        act_probs = zip(legal_positions, act_probs.flatten()[legal_positions])
        return act_probs, value[0][0]

    def _loss_train_op(self):
        """
        Three loss terms:
        loss = (z - v)^2 + pi^T * log(p) + c||theta||^2
        """

        # get the train op
        opt = Adam()
        losses = ['categorical_crossentropy', 'mean_squared_error']
        self.model.compile(optimizer=opt, loss=losses)

        def self_entropy(probs):
            return -np.mean(np.sum(probs * np.log(probs + 1e-10), axis=1))

        def train_step(state_input, mcts_probs, winner, learning_rate):
            state_input_union = np.array(state_input)
            mcts_probs_union = np.array(mcts_probs)
            winner_union = np.array(winner)
            loss = self.model.evaluate(state_input_union,
                                       [mcts_probs_union, winner_union],
                                       batch_size=len(state_input),
                                       verbose=0)
            action_probs, _ = self.model.predict_on_batch(state_input_union)
            entropy = self_entropy(action_probs)
            K.set_value(self.model.optimizer.lr, learning_rate)
            self.model.fit(state_input_union, [mcts_probs_union, winner_union],
                           batch_size=len(state_input),
                           verbose=0)
            return loss[0], entropy

        self.train_step = train_step

    def get_policy_param(self):
        net_params = self.model.get_weights()
        return net_params

    def save_model(self, model_file):
        """ save model to file """
        print("save model file")
        self.model.save(model_file)
class PolicyValueNet():
    """policy-value network """
    def __init__(self, model_file=None):

        self.l2_const = 1e-4  # coef of l2 penaltyd
        self.create_policy_value_net()
        self._loss_train_op()

        if model_file:
            net_params = pickle.load(open(model_file, 'rb'))
            self.model.set_weights(net_params)
            plot_model(self.model, to_file='model.png')

    def create_policy_value_net(self):
        """create the policy value network """
        in_x = network = Input((13,))

        # conv layers
        network = Dense(64, activation='relu', kernel_regularizer=l2(self.l2_const))(network)
        network = Dense(64, activation='relu', kernel_regularizer=l2(self.l2_const))(network)
        network = Dense(32, activation='relu', kernel_regularizer=l2(self.l2_const))(network)
        network = Dense(32, activation='relu', kernel_regularizer=l2(self.l2_const))(network)


        self.policy_net = Dense(6, activation='softmax', kernel_regularizer=l2(self.l2_const))(network)
        # state value layers

        self.value_net = Dense(1, activation='tanh', kernel_regularizer=l2(self.l2_const))(network)

        self.model = Model(in_x, [self.policy_net, self.value_net])

        def policy_value(state_input):
            state_input_union = np.array(state_input)
            results = self.model.predict_on_batch(state_input_union)
            return results
        self.policy_value = policy_value

    def policy_value_fn(self, board):
        """
        input: board
        output: a list of (action, probability) tuples for each available action and the score of the board state
        """
        #legal_positions = board.availables
        #print(board.current_state())
        current_state = board.current_state()
        act_probs, value = self.policy_value( np.expand_dims(current_state ,0))
        #print(act_probs[0])
        #act_probs = zip(legal_positions, act_probs[0][legal_positions])

        actret = [(i, act_probs[0][i]) for i in range(6)]


        #print(board.current_state(), actret)


        return actret, value[0]

    def _loss_train_op(self):
        """
        Three loss terms:
        loss = (z - v)^2 + pi^T * log(p) + c||theta||^2
        """

        # get the train op
        opt = Adam()
        losses = ['categorical_crossentropy', 'mean_squared_error']
        self.model.compile(optimizer=opt, loss=losses)

        def self_entropy(probs):
            return -np.mean(np.sum(probs * np.log(probs + 1e-10), axis=1))

        def train_step(state_input, mcts_probs, winner, learning_rate):
            state_input_union = np.array(state_input)
            mcts_probs_union = np.array(mcts_probs)
            winner_union = np.array(winner)
            #print(mcts_probs_union)
            loss = self.model.evaluate(state_input_union, [mcts_probs_union, winner_union], batch_size=len(state_input), verbose=0)
            action_probs, _ = self.model.predict_on_batch(state_input_union)
            entropy = self_entropy(action_probs)
            K.set_value(self.model.optimizer.lr, learning_rate)
            self.model.fit(state_input_union, [mcts_probs_union, winner_union], batch_size=len(state_input), verbose=0)
            return loss[0], entropy

        self.train_step = train_step

    def get_policy_param(self):
        net_params = self.model.get_weights()
        return net_params

    def save_model(self, model_file):
        """ save model params to file """
        net_params = self.get_policy_param()
        pickle.dump(net_params, open(model_file, 'wb'), protocol=2)