class FinancialTimeSeriesAnalysisModel(object): model = None def __init__(self, nb_time_step, dim_data, batch_size=1, model_path=None): self.model_path = model_path self.model_path = model_path self.batch_size = batch_size self.size_of_input_data_dim = dim_data self.size_of_input_timesteps = nb_time_step self.build() self.weight_loaded = False if model_path is not None: self.load_weights() def build(self): dim_data = self.size_of_input_data_dim nb_time_step = self.size_of_input_timesteps financial_time_series_input = Input(shape=(nb_time_step, dim_data), name='x1') lstm_layer_1 = LSTM(output_dim=nb_hidden_units, dropout_U=dropout, dropout_W=dropout, W_regularizer=l2(l2_norm_alpha), b_regularizer=l2(l2_norm_alpha), activation='tanh', return_sequences=True, name='lstm_layer1') lstm_layer_21 = LSTM(output_dim=nb_hidden_units, dropout_U=dropout, dropout_W=dropout, W_regularizer=l2(l2_norm_alpha), b_regularizer=l2(l2_norm_alpha), activation='tanh', return_sequences=True, name='lstm_layer2_loss1') lstm_layer_22 = LSTM(output_dim=nb_hidden_units, dropout_U=dropout, dropout_W=dropout, W_regularizer=l2(l2_norm_alpha), b_regularizer=l2(l2_norm_alpha), activation='tanh', return_sequences=True, name='lstm_layer2_loss2') lstm_layer_23 = LSTM(output_dim=nb_hidden_units, dropout_U=dropout, dropout_W=dropout, W_regularizer=l2(l2_norm_alpha), b_regularizer=l2(l2_norm_alpha), activation='tanh', return_sequences=True, name='lstm_layer2_loss3') lstm_layer_24 = LSTM(output_dim=nb_hidden_units, dropout_U=dropout, dropout_W=dropout, W_regularizer=l2(l2_norm_alpha), b_regularizer=l2(l2_norm_alpha), activation='tanh', return_sequences=True, name='lstm_layer2_loss4') lstm_layer_25 = LSTM(output_dim=nb_hidden_units, dropout_U=dropout, dropout_W=dropout, W_regularizer=l2(l2_norm_alpha), b_regularizer=l2(l2_norm_alpha), activation='tanh', return_sequences=True, name='lstm_layer2_loss5') h1 = lstm_layer_1(financial_time_series_input) h21 = lstm_layer_21(h1) h22 = lstm_layer_22(h1) h23 = lstm_layer_23(h1) h24 = lstm_layer_24(h1) h25 = lstm_layer_25(h1) time_series_predictions1 = TimeDistributed(Dense(1), name="p1")(h21) # custom 1 time_series_predictions2 = TimeDistributed(Dense(1), name="p2")(h22) # custom 2 time_series_predictions3 = TimeDistributed(Dense(1), name="p3")(h23) # mse time_series_predictions4 = TimeDistributed(Dense(1, activation='sigmoid'), name="p4")(h24) # logloss time_series_predictions5 = TimeDistributed(Dense(nb_labels, activation='softmax'), name="p5")(h25) # cross self.model = Model(input=financial_time_series_input, output=[time_series_predictions1, time_series_predictions2, time_series_predictions3, time_series_predictions4, time_series_predictions5], name="multi-task deep rnn for financial time series forecasting") plot(self.model, to_file='model.png') def reset(self): for l in self.model.layers: if type(l) is LSTM: l.reset_status() def compile_model(self, lr=0.0001, arg_weight=1.): optimizer = Adam(lr=lr) loss = [custom_objective1, custom_objective2, 'mse', 'binary_crossentropy', 'categorical_crossentropy'] self.model.compile(optimizer=optimizer, loss=loss) def fit_model(self, X, y, y_label, epoch=300): early_stopping = EarlyStopping(monitor='val_loss', patience=10, verbose=0) self.model.fit(X, [y]*3 + [y > 0] + [y_label], batch_size=self.batch_size, nb_epoch=epoch, validation_split=0.2, shuffle=True, callbacks=[early_stopping]) def save(self): self.model.save_weights(self.model_path, overwrite=True) def load_weights(self): if os.path.exists(self.model_path): self.model.load_weights(self.model_path) self.weight_loaded = True def print_weights(self, weights=None, detail=False): weights = weights or self.model.get_weights() for w in weights: print("w%s: sum(w)=%s, ave(w)=%s" % (w.shape, np.sum(w), np.average(w))) if detail: for w in weights: print("%s: %s" % (w.shape, w)) def model_eval(self, X, y): y_hat = self.model.predict(X, batch_size=1)[0] count_true = 0 count_all = y.shape[1] for i in range(y.shape[1]): count_true = count_true + 1 if y[0,i,0]*y_hat[0,i,0]>0 else count_true print(y[0,i,0],y_hat[0,i,0]) print(count_all,count_true)
class PolicyNet(): """policy network """ def __init__(self, board_width, board_height, model_file=None, pretrained_file=None): self.board_width = board_width self.board_height = board_height self.l2_const = 1e-4 # coef of l2 penalty self.build_net() self._loss_train_op(0.001) if model_file: self.model.load_weights(model_file) if pretrained_file: self.model.load_weights(pretrained_file, by_name=True) def build_net(self): """create the policy value network """ in_x = network = Input((2, self.board_width, self.board_height)) # conv layers network = Conv2D(filters=32, kernel_size=(3, 3), padding="same", data_format="channels_first", activation="relu", kernel_regularizer=l2(self.l2_const))(network) network = Conv2D(filters=64, kernel_size=(3, 3), padding="same", data_format="channels_first", activation="relu", kernel_regularizer=l2(self.l2_const))(network) network = Conv2D(filters=128, kernel_size=(3, 3), padding="same", data_format="channels_first", activation="relu", kernel_regularizer=l2(self.l2_const))(network) # action policy layers policy_net = Conv2D(filters=4, kernel_size=(1, 1), data_format="channels_first", activation="relu", kernel_regularizer=l2(self.l2_const))(network) policy_net = Flatten()(policy_net) self.policy_net = Dense(self.board_width * self.board_height, activation="softmax", kernel_regularizer=l2( self.l2_const))(policy_net) self.model = Model(in_x, self.policy_net) def policy_value(state_input): state_input_union = np.array(state_input) results = self.model.predict_on_batch(state_input_union) return results self.policy_value = policy_value def policy_fn(self, board): """ input: board output: a list of (action, probability) tuples for each available action and the score of the board state """ legal_positions = board.availables current_state = board.current_state() act_probs = self.policy_value( current_state.reshape( (-1, 2, self.board_width, self.board_height))) act_probs = list( zip(legal_positions, act_probs.flatten()[legal_positions])) return act_probs def _loss_train_op(self, initial_learning_rate): """ Three loss terms: loss = (z - v)^2 + pi^T * log(p) + c||theta||^2 """ # get the train op # opt = Adam() self.session = K.get_session() global_step = tf.Variable(0, trainable=False) lr = tf.train.exponential_decay(initial_learning_rate, global_step, 10000, 0.95, True) opt = tf.train.AdamOptimizer(learning_rate=lr) one_hot_move_ph = tf.placeholder( tf.float32, (None, self.board_width * self.board_height), "moves") reward_ph = tf.placeholder(tf.float32, (None, ), "rewards") def self_entropy(probs): return -np.mean(np.sum(probs * np.log(probs + 1e-10), axis=1)) def loss_op(): objective = tf.log(tf.nn.softmax(self.model.output[0], axis=-1)) * one_hot_move_ph objective = tf.reduce_sum(objective, axis=-1, keepdims=False) objective = objective * reward_ph return -1 * objective self.loss_op = loss_op() self.minimize_op = opt.minimize(self.loss_op, global_step=global_step) def train_step(states, reward, moves): np_state_input = np.array(states) np_reward = np.array(reward) np_moves = np.eye(self.board_height * self.board_width)[np.array(moves)] # K.set_value(self.model.optimizer.lr, learning_rate) # loss = self.model.train_on_batch(np_state_input, [np_winner]) feed_dict = { self.model.input: np_state_input, one_hot_move_ph: np_moves, reward_ph: np_reward } _, loss, new_probs = self.session.run( [self.minimize_op, self.loss_op, self.model.output], feed_dict) entropy = self_entropy(new_probs) return loss, entropy self.train_step = train_step def get_policy_param(self): net_params = self.model.get_weights() return net_params def save_model(self, model_path): """ save model params to file """ # net_params = self.get_policy_param() # pickle.dump(net_params, open(model_file, 'wb'), protocol=2) self.model.save_weights(model_path) def load_model(self, model_path): self.model.load_weights(model_path)
class AdditionNPIModel(NPIStep): model = None f_enc = None def __init__(self, system: RuntimeSystem, model_path: str = None, program_set: AdditionProgramSet = None): self.system = system self.model_path = model_path self.program_set = program_set self.batch_size = 1 self.build() self.weight_loaded = False self.load_weights() def build(self): enc_size = self.size_of_env_observation() argument_size = IntegerArguments.size_of_arguments input_enc = InputLayer(batch_input_shape=(self.batch_size, enc_size), name='input_enc') input_arg = InputLayer(batch_input_shape=(self.batch_size, argument_size), name='input_arg') input_prg = Embedding(input_dim=PROGRAM_VEC_SIZE, output_dim=PROGRAM_KEY_VEC_SIZE, input_length=1, batch_input_shape=(self.batch_size, 1)) f_enc = Sequential(name='f_enc') f_enc.add(Merge([input_enc, input_arg], mode='concat')) f_enc.add(MaxoutDense(128, nb_feature=4)) self.f_enc = f_enc program_embedding = Sequential(name='program_embedding') program_embedding.add(input_prg) f_enc_convert = Sequential(name='f_enc_convert') f_enc_convert.add(f_enc) f_enc_convert.add(RepeatVector(1)) f_lstm = Sequential(name='f_lstm') f_lstm.add(Merge([f_enc_convert, program_embedding], mode='concat')) f_lstm.add( LSTM(256, return_sequences=False, stateful=True, W_regularizer=l2(0.0000001))) f_lstm.add(Activation('relu', name='relu_lstm_1')) f_lstm.add(RepeatVector(1)) f_lstm.add( LSTM(256, return_sequences=False, stateful=True, W_regularizer=l2(0.0000001))) f_lstm.add(Activation('relu', name='relu_lstm_2')) # plot(f_lstm, to_file='f_lstm.png', show_shapes=True) f_end = Sequential(name='f_end') f_end.add(f_lstm) f_end.add(Dense(1, W_regularizer=l2(0.001))) f_end.add(Activation('sigmoid', name='sigmoid_end')) f_prog = Sequential(name='f_prog') f_prog.add(f_lstm) f_prog.add(Dense(PROGRAM_KEY_VEC_SIZE, activation="relu")) f_prog.add(Dense(PROGRAM_VEC_SIZE, W_regularizer=l2(0.0001))) f_prog.add(Activation('softmax', name='softmax_prog')) # plot(f_prog, to_file='f_prog.png', show_shapes=True) f_args = [] for ai in range(1, IntegerArguments.max_arg_num + 1): f_arg = Sequential(name='f_arg%s' % ai) f_arg.add(f_lstm) f_arg.add(Dense(IntegerArguments.depth, W_regularizer=l2(0.0001))) f_arg.add(Activation('softmax', name='softmax_arg%s' % ai)) f_args.append(f_arg) # plot(f_arg, to_file='f_arg.png', show_shapes=True) self.model = Model([input_enc.input, input_arg.input, input_prg.input], [f_end.output, f_prog.output] + [fa.output for fa in f_args], name="npi") self.compile_model() plot(self.model, to_file='model.png', show_shapes=True) def reset(self): super(AdditionNPIModel, self).reset() for l in self.model.layers: if type(l) is LSTM: l.reset_states() def compile_model(self, lr=0.0001, arg_weight=1.): arg_num = IntegerArguments.max_arg_num optimizer = Adam(lr=lr) loss = ['binary_crossentropy', 'categorical_crossentropy' ] + ['categorical_crossentropy'] * arg_num self.model.compile(optimizer=optimizer, loss=loss, loss_weights=[0.25, 0.25] + [arg_weight] * arg_num) def fit(self, steps_list, epoch=3000): # 过滤一些问题 def filter_question(condition_func): sub_steps_list = [] for steps_dict in steps_list: question = steps_dict['q'] if condition_func(question['in1'], question['in2']): sub_steps_list.append(steps_dict) return sub_steps_list if not self.weight_loaded: self.train_f_enc( filter_question(lambda a, b: 10 <= a < 100 and 10 <= b < 100), epoch=100) self.f_enc.trainable = False self.update_learning_rate(0.0001) q_type = "training questions of a<100 and b<100" print(q_type) pr = 0.8 all_ok = self.fit_to_subset( filter_question(lambda a, b: a < 100 and b < 100), pass_rate=pr) print("%s is pass_rate >= %s: %s" % (q_type, pr, all_ok)) while True: if self.test_and_learn([10, 100, 1000]): break q_type = "training questions of ALL" print(q_type) q_num = 100 skip_correct = False pr = 1.0 questions = filter_question(lambda a, b: True) np.random.shuffle(questions) questions = questions[:q_num] all_ok = self.fit_to_subset(questions, pass_rate=pr, skip_correct=skip_correct) print("%s is pass_rate >= %s: %s" % (q_type, pr, all_ok)) def fit_to_subset(self, steps_list, pass_rate=1.0, skip_correct=False): for i in range(10): all_ok = self.do_learn(steps_list, 100, pass_rate=pass_rate, skip_correct=skip_correct) if all_ok: return True return False def test_and_learn(self, num_questions): for num in num_questions: print("test all type of %d questions" % num) cc, wc, wrong_questions = self.test_to_subset( create_random_questions(num)) acc_rate = cc / (cc + wc) print("Accuracy %s(OK=%d, NG=%d)" % (acc_rate, cc, wc)) if wc > 0: self.fit_to_subset(wrong_questions, pass_rate=1.0, skip_correct=False) return False return True def test_to_subset(self, questions): addition_env = AdditionEnv(FIELD_ROW, FIELD_WIDTH, FIELD_DEPTH) teacher = AdditionTeacher(self.program_set) npi_runner = TerminalNPIRunner(None, self) teacher_runner = TerminalNPIRunner(None, teacher) correct_count = wrong_count = 0 wrong_steps_list = [] for idx, question in enumerate(questions): question = copy(question) if self.question_test(addition_env, npi_runner, question): correct_count += 1 else: self.question_test(addition_env, teacher_runner, question) wrong_steps_list.append({ "q": question, "steps": teacher_runner.step_list }) wrong_count += 1 return correct_count, wrong_count, wrong_steps_list @staticmethod def dict_to_str(d): return str(tuple([(k, d[k]) for k in sorted(d)])) def do_learn(self, steps_list, epoch, pass_rate=1.0, skip_correct=False): addition_env = AdditionEnv(FIELD_ROW, FIELD_WIDTH, FIELD_DEPTH) npi_runner = TerminalNPIRunner(None, self) last_weights = None correct_count = Counter() no_change_count = 0 last_loss = 1000 for ep in range(1, epoch + 1): correct_new = wrong_new = 0 losses = [] ok_rate = [] np.random.shuffle(steps_list) for idx, steps_dict in enumerate(steps_list): question = copy(steps_dict['q']) question_key = self.dict_to_str(question) if self.question_test(addition_env, npi_runner, question): if correct_count[question_key] == 0: correct_new += 1 correct_count[question_key] += 1 print("GOOD!: ep=%2d idx=%3d :%s CorrectCount=%s" % (ep, idx, self.dict_to_str(question), correct_count[question_key])) ok_rate.append(1) cc = correct_count[question_key] if skip_correct or int(math.sqrt(cc))**2 != cc: continue else: ok_rate.append(0) if correct_count[question_key] > 0: print( "Degraded: ep=%2d idx=%3d :%s CorrectCount=%s -> 0" % (ep, idx, self.dict_to_str(question), correct_count[question_key])) correct_count[question_key] = 0 wrong_new += 1 steps = steps_dict['steps'] xs = [] ys = [] ws = [] for step in steps: xs.append(self.convert_input(step.input)) y, w = self.convert_output(step.output) ys.append(y) ws.append(w) self.reset() for i, (x, y, w) in enumerate(zip(xs, ys, ws)): loss = self.model.train_on_batch(x, y, sample_weight=w) if not np.isfinite(loss): print("Loss is not finite!, Last Input=%s" % ([i, (x, y, w)])) self.print_weights(last_weights, detail=True) raise RuntimeError("Loss is not finite!") losses.append(loss) last_weights = self.model.get_weights() if losses: cur_loss = np.average(losses) print( "ep=%2d: ok_rate=%.2f%% (+%s -%s): ave loss %s (%s samples)" % (ep, np.average(ok_rate) * 100, correct_new, wrong_new, cur_loss, len(steps_list))) # self.print_weights() if correct_new + wrong_new == 0: no_change_count += 1 else: no_change_count = 0 if math.fabs(1 - cur_loss / last_loss) < 0.001 and no_change_count > 5: print( "math.fabs(1 - cur_loss/last_loss) < 0.001 and no_change_count > 5:" ) return False last_loss = cur_loss print("=" * 80) self.save() if np.average(ok_rate) >= pass_rate: return True return False def update_learning_rate(self, learning_rate, arg_weight=1.): print("Re-Compile Model lr=%s aw=%s" % (learning_rate, arg_weight)) self.compile_model(learning_rate, arg_weight=arg_weight) def train_f_enc(self, steps_list, epoch=50): print("training f_enc") f_add0 = Sequential(name='f_add0') f_add0.add(self.f_enc) f_add0.add(Dense(FIELD_DEPTH)) f_add0.add(Activation('softmax', name='softmax_add0')) f_add1 = Sequential(name='f_add1') f_add1.add(self.f_enc) f_add1.add(Dense(FIELD_DEPTH)) f_add1.add(Activation('softmax', name='softmax_add1')) env_model = Model(self.f_enc.inputs, [f_add0.output, f_add1.output], name="env_model") env_model.compile(optimizer='adam', loss=['categorical_crossentropy'] * 2) for ep in range(epoch): losses = [] for idx, steps_dict in enumerate(steps_list): prev = None for step in steps_dict['steps']: x = self.convert_input(step.input)[:2] env_values = step.input.env.reshape((4, -1)) in1 = np.clip(env_values[0].argmax() - 1, 0, 9) in2 = np.clip(env_values[1].argmax() - 1, 0, 9) carry = np.clip(env_values[2].argmax() - 1, 0, 9) y_num = in1 + in2 + carry now = (in1, in2, carry) if prev == now: continue prev = now y0 = to_one_hot_array((y_num % 10) + 1, FIELD_DEPTH) y1 = to_one_hot_array((y_num // 10) + 1, FIELD_DEPTH) y = [yy.reshape((self.batch_size, -1)) for yy in [y0, y1]] loss = env_model.train_on_batch(x, y) losses.append(loss) print("ep %3d: loss=%s" % (ep, np.average(losses))) if np.average(losses) < 1e-06: break def question_test(self, addition_env, npi_runner, question): addition_env.reset() self.reset() try: run_npi(addition_env, npi_runner, self.program_set.ADD, question) if question['correct']: return True except StopIteration: pass return False def convert_input(self, p_in: StepInput): x_pg = np.array((p_in.program.program_id, )) x = [ xx.reshape((self.batch_size, -1)) for xx in (p_in.env, p_in.arguments.values, x_pg) ] return x def convert_output(self, p_out: StepOutput): y = [np.array((p_out.r, ))] weights = [[1.]] if p_out.program: arg_values = p_out.arguments.values arg_num = len(p_out.program.args or []) y += [p_out.program.to_one_hot(PROGRAM_VEC_SIZE)] weights += [[1.]] else: arg_values = IntegerArguments().values arg_num = 0 y += [np.zeros((PROGRAM_VEC_SIZE, ))] weights += [[1e-10]] for v in arg_values: # split by each args y += [v] weights += [[1.]] * arg_num + [[1e-10]] * (len(arg_values) - arg_num) weights = [np.array(w) for w in weights] return [yy.reshape((self.batch_size, -1)) for yy in y], weights def step(self, env_observation: np.ndarray, pg: Program, arguments: IntegerArguments) -> StepOutput: x = self.convert_input(StepInput(env_observation, pg, arguments)) results = self.model.predict( x, batch_size=1) # if batch_size==1, returns single row r, pg_one_hot, arg_values = results[0], results[1], results[2:] program = self.program_set.get(pg_one_hot.argmax()) ret = StepOutput(r, program, IntegerArguments(values=np.stack(arg_values))) return ret def save(self): self.model.save_weights(self.model_path, overwrite=True) def load_weights(self): if os.path.exists(self.model_path): self.model.load_weights(self.model_path) self.weight_loaded = True def print_weights(self, weights=None, detail=False): weights = weights or self.model.get_weights() for w in weights: print("w%s: sum(w)=%s, ave(w)=%s" % (w.shape, np.sum(w), np.average(w))) if detail: for w in weights: print("%s: %s" % (w.shape, w)) @staticmethod def size_of_env_observation(): return FIELD_ROW * FIELD_DEPTH
class FinancialTimeSeriesAnalysisModel(object): model = None def __init__(self, nb_time_step, dim_data, batch_size=1, model_path=None): self.model_path = model_path self.model_path = model_path self.batch_size = batch_size self.size_of_input_data_dim = dim_data self.size_of_input_timesteps = nb_time_step self.build() self.weight_loaded = False if model_path is not None: self.load_weights() def build(self): dim_data = self.size_of_input_data_dim nb_time_step = self.size_of_input_timesteps financial_time_series_input = Input(shape=(nb_time_step, dim_data)) lstm_layer_1 = LSTM(output_dim=nb_hidden_units, dropout_U=dropout, dropout_W=dropout, inner_activation='sigmoid', W_regularizer=l2(l2_norm_alpha), b_regularizer=l2(l2_norm_alpha), activation='tanh', return_sequences=True) lstm_layer_2 = LSTM(output_dim=nb_hidden_units, dropout_U=dropout, dropout_W=dropout, inner_activation='sigmoid', W_regularizer=l2(l2_norm_alpha), b_regularizer=l2(l2_norm_alpha), activation='tanh', return_sequences=True) h1 = lstm_layer_1(financial_time_series_input) h2 = lstm_layer_2(h1) time_series_predictions = TimeDistributedDense(1)(h2) self.model = Model( financial_time_series_input, time_series_predictions, name="deep rnn for financial time series forecasting") def reset(self): for l in self.model.layers: if type(l) is LSTM: l.reset_status() def compile_model(self, lr=0.0001, arg_weight=1.): optimizer = Adam(lr=lr) loss = 'mse' self.model.compile(optimizer=optimizer, loss=loss) def fit_model(self, X, y, X_val=None, y_val=None, epoch=3): early_stopping = EarlyStopping(monitor='val_loss', patience=3, verbose=0) if X_val is None: self.model.fit(X, y, batch_size=self.batch_size, nb_epoch=epoch, validation_split=0.2, shuffle=True, callbacks=[early_stopping]) else: self.model.fit(X, y, batch_size=self.batch_size, nb_epoch=epoch, validation_data=(X_val, y_val), shuffle=True, callbacks=[early_stopping]) def save(self): self.model.save_weights(self.model_path, overwrite=True) def load_weights(self): if os.path.exists(self.model_path): self.model.load_weights(self.model_path) self.weight_loaded = True def print_weights(self, weights=None, detail=False): weights = weights or self.model.get_weights() for w in weights: print("w%s: sum(w)=%s, ave(w)=%s" % (w.shape, np.sum(w), np.average(w))) if detail: for w in weights: print("%s: %s" % (w.shape, w)) def model_eval(self, X, y): y_hat = self.model.predict(X, batch_size=1) count_true = 0 count_all = y.shape[1] for i in range(y.shape[1]): count_true = count_true + 1 if y[0, i, 0] * y_hat[ 0, i, 0] > 0 else count_true print(y[0, i, 0], y_hat[0, i, 0]) print(count_all, count_true)
class PolicyValueNet(): """策略价值网络""" def __init__(self, board_width, board_height, model_file=None): self.board_width = board_width self.board_height = board_height self.l2_const = 1e-4 # coef of l2 penalty self.create_policy_value_net() self._loss_train_op() if model_file: if platform.python_version().split('.')[0] == '3': #python3 net_params = pickle.load(open(model_file, 'rb'), encoding='iso-8859-1') else: net_params = pickle.load(open(model_file, 'rb')) self.model.set_weights(net_params) def create_policy_value_net(self): """创建policy-value网络""" # 输入层 in_x = network = Input((4, self.board_width, self.board_height)) # conv layers network = Conv2D(filters=32, kernel_size=(3, 3), padding="same", data_format="channels_first", activation="relu", kernel_regularizer=l2(self.l2_const))(network) network = Conv2D(filters=64, kernel_size=(3, 3), padding="same", data_format="channels_first", activation="relu", kernel_regularizer=l2(self.l2_const))(network) network = Conv2D(filters=128, kernel_size=(3, 3), padding="same", data_format="channels_first", activation="relu", kernel_regularizer=l2(self.l2_const))(network) # 走子策略 action policy layers policy_net = Conv2D(filters=4, kernel_size=(1, 1), data_format="channels_first", activation="relu", kernel_regularizer=l2(self.l2_const))(network) policy_net = Flatten()(policy_net) self.policy_net = Dense(self.board_width * self.board_height, activation="softmax", kernel_regularizer=l2(self.l2_const))(policy_net) # 盘面价值 state value layers value_net = Conv2D(filters=2, kernel_size=(1, 1), data_format="channels_first", activation="relu", kernel_regularizer=l2(self.l2_const))(network) value_net = Flatten()(value_net) value_net = Dense(64, kernel_regularizer=l2(self.l2_const))(value_net) self.value_net = Dense(1, activation="tanh", kernel_regularizer=l2(self.l2_const))(value_net) # 创建网络模型 self.model = Model(in_x, [self.policy_net, self.value_net]) # 返回走子策略和价值概率 def policy_value(state_input): state_input_union = np.array(state_input) results = self.model.predict_on_batch(state_input_union) return results self.policy_value = policy_value def policy_value_fn(self, board): """使用模型预测棋盘所有可落子位置价值概率""" # 棋盘所有可落子位置 legal_positions = board.availables # 当前玩家角度的棋盘方格状态 current_state = board.current_state() # 使用模型预测走子策略和价值概率 act_probs, value = self.policy_value(current_state.reshape(-1, 4, self.board_width, self.board_height)) act_probs = zip(legal_positions, act_probs.flatten()[legal_positions]) # 返回[(action, 概率)] 以及当前玩家的后续走子value return act_probs, value[0][0] def _loss_train_op(self): """初始化损失 3个损失函数因子 loss = (z - v)^2 + pi^T * log(p) + c||theta||^2 loss = value损失函数 + policy损失函数 + 惩罚项 """ # 定义优化器和损失函数 opt = Adam() losses = ['categorical_crossentropy', 'mean_squared_error'] self.model.compile(optimizer=opt, loss=losses) def self_entropy(probs): return -np.mean(np.sum(probs * np.log(probs + 1e-10), axis=1)) def train_step(state_input, mcts_probs, winner, learning_rate): """输出训练过程中的结果""" state_input_union = np.array(state_input) mcts_probs_union = np.array(mcts_probs) winner_union = np.array(winner) # 评估 loss = self.model.evaluate(state_input_union, [mcts_probs_union, winner_union], batch_size=len(state_input), verbose=0) # 预测 action_probs, _ = self.model.predict_on_batch(state_input_union) entropy = self_entropy(action_probs) K.set_value(self.model.optimizer.lr, learning_rate) self.model.fit(state_input_union, [mcts_probs_union, winner_union], batch_size=len(state_input), verbose=0) return loss[0], entropy self.train_step = train_step def get_policy_param(self): """获得模型参数""" net_params = self.model.get_weights() return net_params def save_model(self, model_file): """保存模型参数到文件""" net_params = self.get_policy_param() pickle.dump(net_params, open(model_file, 'wb'), protocol=2)
class PolicyValueNet(): """policy-value network """ def __init__(self, board_width, board_height, model_file=None): self.board_width = board_width self.board_height = board_height self.l2_const = 1e-4 # coef of l2 penalty self.create_policy_value_net() self._loss_train_op() if model_file: net_params = pickle.load(open(model_file, 'rb')) self.model.set_weights(net_params) def create_policy_value_net(self): """create the policy value network """ in_x = network = Input((4, self.board_width, self.board_height)) # conv layers network = Conv2D(filters=32, kernel_size=(3, 3), padding="same", data_format="channels_first", activation="relu", kernel_regularizer=l2(self.l2_const))(network) network = Conv2D(filters=64, kernel_size=(3, 3), padding="same", data_format="channels_first", activation="relu", kernel_regularizer=l2(self.l2_const))(network) network = Conv2D(filters=128, kernel_size=(3, 3), padding="same", data_format="channels_first", activation="relu", kernel_regularizer=l2(self.l2_const))(network) # action policy layers policy_net = Conv2D(filters=4, kernel_size=(1, 1), data_format="channels_first", activation="relu", kernel_regularizer=l2(self.l2_const))(network) policy_net = Flatten()(policy_net) self.policy_net = Dense(self.board_width*self.board_height, activation="softmax", kernel_regularizer=l2(self.l2_const))(policy_net) # state value layers value_net = Conv2D(filters=2, kernel_size=(1, 1), data_format="channels_first", activation="relu", kernel_regularizer=l2(self.l2_const))(network) value_net = Flatten()(value_net) value_net = Dense(64, kernel_regularizer=l2(self.l2_const))(value_net) self.value_net = Dense(1, activation="tanh", kernel_regularizer=l2(self.l2_const))(value_net) self.model = Model(in_x, [self.policy_net, self.value_net]) def policy_value(state_input): state_input_union = np.array(state_input) results = self.model.predict_on_batch(state_input_union) return results self.policy_value = policy_value def policy_value_fn(self, board): """ input: board output: a list of (action, probability) tuples for each available action and the score of the board state """ legal_positions = board.availables current_state = board.current_state() act_probs, value = self.policy_value(current_state.reshape(-1, 4, self.board_width, self.board_height)) act_probs = zip(legal_positions, act_probs.flatten()[legal_positions]) return act_probs, value[0][0] def _loss_train_op(self): """ Three loss terms: loss = (z - v)^2 + pi^T * log(p) + c||theta||^2 """ # get the train op opt = Adam() losses = ['categorical_crossentropy', 'mean_squared_error'] self.model.compile(optimizer=opt, loss=losses) def self_entropy(probs): return -np.mean(np.sum(probs * np.log(probs + 1e-10), axis=1)) def train_step(state_input, mcts_probs, winner, learning_rate): state_input_union = np.array(state_input) mcts_probs_union = np.array(mcts_probs) winner_union = np.array(winner) loss = self.model.evaluate(state_input_union, [mcts_probs_union, winner_union], batch_size=len(state_input), verbose=0) action_probs, _ = self.model.predict_on_batch(state_input_union) entropy = self_entropy(action_probs) K.set_value(self.model.optimizer.lr, learning_rate) self.model.fit(state_input_union, [mcts_probs_union, winner_union], batch_size=len(state_input), verbose=0) return loss[0], entropy self.train_step = train_step def get_policy_param(self): net_params = self.model.get_weights() return net_params def save_model(self, model_file): """ save model params to file """ net_params = self.get_policy_param() pickle.dump(net_params, open(model_file, 'wb'), protocol=2)
class FinancialNewsAnalysisModel(object): model = None def __init__(self, nb_time_step, dim_data, batch_size=1, model_path=None): self.model_path = model_path self.model_path = model_path self.batch_size = batch_size self.size_of_input_data_dim = dim_data self.size_of_input_timesteps = nb_time_step self.build() self.weight_loaded = False if model_path is not None: self.load_weights() def build(self): dim_data = self.size_of_input_data_dim nb_time_step = self.size_of_input_timesteps news_input = Input(shape=(nb_time_step, dim_data), name='x1') lstm = LSTM(output_dim=nb_hidden_units, dropout_U=dropout, dropout_W=dropout, W_regularizer=l2(l2_norm_alpha), b_regularizer=l2(l2_norm_alpha), activation='tanh', name='h1') bi_lstm = Bidirectional(lstm, input_shape=(nb_time_step, dim_data), merge_mode='concat', name='h1') all_news_rep = bi_lstm(news_input) news_predictions = Dense(1, activation='linear')(all_news_rep) self.model = Model(news_input, news_predictions, name="deep rnn for financial news analysis") def reset(self): for l in self.model.layers: if type(l) is LSTM: l.reset_status() def compile_model(self, lr=0.0001, loss_weights=0.1): optimizer = Adam(lr=lr) loss = 'mse' # loss = custom_objective self.model.compile(optimizer=optimizer, loss=loss) #metrics=['mse']) plot(self.model, to_file='model.png') def fit_model(self, X, y, X_val=None, y_val=None, epoch=500): early_stopping = EarlyStopping(monitor='val_loss', patience=3, verbose=0) if X_val is None: self.model.fit(X, y, batch_size=self.batch_size, nb_epoch=epoch, validation_split=0.2, shuffle=True, callbacks=[early_stopping]) else: self.model.fit(X, y, batch_size=self.batch_size, nb_epoch=epoch, validation_data=(X_val, y_val), shuffle=True, callbacks=[early_stopping]) def save(self): self.model.save_weights(self.model_path, overwrite=True) def load_weights(self): if os.path.exists(self.model_path): self.model.load_weights(self.model_path) self.weight_loaded = True def print_weights(self, weights=None, detail=False): weights = weights or self.model.get_weights() for w in weights: print("w%s: sum(w)=%s, ave(w)=%s" % (w.shape, np.sum(w), np.average(w))) if detail: for w in weights: print("%s: %s" % (w.shape, w)) def model_eval(self, X, y): y_hat = self.model.predict(X, batch_size=1) count_true = 0 count_all = y.shape[0] for i in range(y.shape[0]): count_true = count_true + 1 if y[i,0]*y_hat[i,0]>0 else count_true print y[i,0],y_hat[i,0] print count_all,count_true
class PolicyValueNet(): """policy-value network """ def __init__(self, board_width, board_height, model_file=None): self.board_width = board_width self.board_height = board_height self.l2_const = 1e-4 # coef of l2 penalty if model_file: # net_params = pickle.load(open(model_file, 'rb')) # self.model.set_weights(net_params) self.model = load_model(model_file) else: # self.create_policy_value_net() self.create_policy_value_resnet() self._loss_train_op() def create_policy_value_resnet(self): def _conv_bn_relu(filters=128, kernel_size=(3, 3)): def f(input): conv = Conv2D(kernel_size=kernel_size, filters=filters, padding="same", data_format="channels_first", kernel_regularizer=l2(self.l2_const))(input) norm = BatchNormalization(axis=1)(conv) return Activation("relu")(norm) return f def _conv_bn(filters=128, kernel_size=(3, 3)): def f(input): conv = Conv2D(kernel_size=kernel_size, filters=filters, padding="same", data_format="channels_first", kernel_regularizer=l2(self.l2_const))(input) norm = BatchNormalization(axis=1)(conv) return norm return f def _basic_block(nb_filters): def f(input): conv1 = _conv_bn_relu(nb_filters, (3, 3))(input) conv2 = _conv_bn(nb_filters, (3, 3))(conv1) shortcut = keras.layers.add([conv1, conv2]) return Activation("relu")(shortcut) return f in_x = network = Input((4, self.board_width, self.board_height)) network = _basic_block(64)(network) network = _basic_block(128)(network) ''' layer1 = Conv2D(filters=64, kernel_size=(3, 3), padding="same", data_format="channels_first", activation="relu", kernel_regularizer=l2(self.l2_const))(network) layer2 = Conv2D(filters=64, kernel_size=(3, 3), padding="same", data_format="channels_first", activation="relu", kernel_regularizer=l2(self.l2_const))(layer1) network = Conv2D(filters=128, kernel_size=(3, 3), padding="same", data_format="channels_first", activation="relu", kernel_regularizer=l2(self.l2_const))(network) ''' # action policy layers policy_net = Conv2D(filters=4, kernel_size=(1, 1), data_format="channels_first", activation="relu", kernel_regularizer=l2(self.l2_const))(network) policy_net = Flatten()(policy_net) self.policy_net = Dense(self.board_width * self.board_height, activation="softmax", kernel_regularizer=l2( self.l2_const))(policy_net) # state value layers value_net = Conv2D(filters=2, kernel_size=(1, 1), data_format="channels_first", activation="relu", kernel_regularizer=l2(self.l2_const))(network) value_net = Flatten()(value_net) value_net = Dense(64, kernel_regularizer=l2(self.l2_const))(value_net) self.value_net = Dense(1, activation="tanh", kernel_regularizer=l2(self.l2_const))(value_net) self.model = Model(in_x, [self.policy_net, self.value_net]) def policy_value(state_input): state_input_union = np.array(state_input) results = self.model.predict_on_batch(state_input_union) return results self.policy_value = policy_value def create_policy_value_net(self): """create the policy value network """ in_x = network = Input((4, self.board_width, self.board_height)) # conv layers ''' network = Conv2D(filters=32, kernel_size=(3, 3), padding="same", data_format="channels_first", activation="relu", kernel_regularizer=l2(self.l2_const))(network) network = Conv2D(filters=64, kernel_size=(3, 3), padding="same", data_format="channels_first", activation="relu", kernel_regularizer=l2(self.l2_const))(network) network = Conv2D(filters=128, kernel_size=(3, 3), padding="same", data_format="channels_first", activation="relu", kernel_regularizer=l2(self.l2_const))(network) ''' layer1 = Conv2D(filters=64, kernel_size=(3, 3), padding="same", data_format="channels_first", activation="relu", kernel_regularizer=l2(self.l2_const))(network) layer2 = Conv2D(filters=64, kernel_size=(3, 3), padding="same", data_format="channels_first", activation="relu", kernel_regularizer=l2(self.l2_const))(layer1) network = Conv2D(filters=128, kernel_size=(3, 3), padding="same", data_format="channels_first", activation="relu", kernel_regularizer=l2(self.l2_const))(network) # action policy layers policy_net = Conv2D(filters=4, kernel_size=(1, 1), data_format="channels_first", activation="relu", kernel_regularizer=l2(self.l2_const))(network) policy_net = Flatten()(policy_net) self.policy_net = Dense(self.board_width * self.board_height, activation="softmax", kernel_regularizer=l2( self.l2_const))(policy_net) # state value layers value_net = Conv2D(filters=2, kernel_size=(1, 1), data_format="channels_first", activation="relu", kernel_regularizer=l2(self.l2_const))(network) value_net = Flatten()(value_net) value_net = Dense(64, kernel_regularizer=l2(self.l2_const))(value_net) self.value_net = Dense(1, activation="tanh", kernel_regularizer=l2(self.l2_const))(value_net) self.model = Model(in_x, [self.policy_net, self.value_net]) ''' def policy_value(state_input): state_input_union = np.array(state_input) results = self.model.predict_on_batch(state_input_union) return results self.policy_value = policy_value ''' def policy_value_fn(self, board): """ input: board output: a list of (action, probability) tuples for each available action and the score of the board state """ legal_positions = board.availables current_state = board.current_state() act_probs, value = self.policy_value( current_state.reshape(-1, 4, self.board_width, self.board_height)) act_probs = zip(legal_positions, act_probs.flatten()[legal_positions]) return act_probs, value[0][0] def _loss_train_op(self): """ Three loss terms: loss = (z - v)^2 + pi^T * log(p) + c||theta||^2 """ # get the train op opt = Adam() losses = ['categorical_crossentropy', 'mean_squared_error'] self.model.compile(optimizer=opt, loss=losses) def self_entropy(probs): return -np.mean(np.sum(probs * np.log(probs + 1e-10), axis=1)) def train_step(state_input, mcts_probs, winner, learning_rate): state_input_union = np.array(state_input) mcts_probs_union = np.array(mcts_probs) winner_union = np.array(winner) loss = self.model.evaluate(state_input_union, [mcts_probs_union, winner_union], batch_size=len(state_input), verbose=0) action_probs, _ = self.model.predict_on_batch(state_input_union) entropy = self_entropy(action_probs) K.set_value(self.model.optimizer.lr, learning_rate) self.model.fit(state_input_union, [mcts_probs_union, winner_union], batch_size=len(state_input), verbose=0) return loss[0], entropy self.train_step = train_step def get_policy_param(self): net_params = self.model.get_weights() return net_params def save_model(self, model_file): """ save model params to file """ # net_params = self.get_policy_param() # pickle.dump(net_params, open(model_file, 'wb'), protocol=2) # self.model.save_weights(model_file) self.model.save(model_file) @staticmethod def _shortcut(self, input, residual): stride_width = input._keras_shape[2] / residual._keras_shape[2] stride_height = input._keras_shape[3] / residual._keras_shape[3] equal_channels = residual._keras_shape[1] == input._keras_shape[1] shortcut = input if stride_width > 1 or stride_height > 1 or not equal_channels: shortcut = Conv2D(nb_filter=residual._keras_shape[1], nb_row=1, nb_col=1, subsample=(stride_width, stride_height), init="he_normal", border_mode="valid")(input) return merge([shortcut, residual], mode="sum") @staticmethod def _residual_block(self, block_function, nb_filters, repetations, is_first_layer=False): def f(input): for i in range(repetations): init_subsample = (1, 1) if i == 0 and not is_first_layer: init_subsample = (2, 2) input = block_function(nb_filters=nb_filters, init_subsample=init_subsample)(input) return input return f def resnet(self): from keras.layers.convolutional import MaxPooling2D, AveragePooling2D input = Input(shape=(3, 224, 224)) conv1 = self._conv_bn_relu(nb_filter=64, nb_row=7, nb_col=7, subsample=(2, 2))(input) pool1 = MaxPooling2D(pool_size=(3, 3), strides=(2, 2), border_mode="same")(conv1) # Build residual blocks.. block_fn = self._basic_block block1 = self._residual_block(block_fn, nb_filters=64, repetations=3, is_first_layer=True)(pool1) block2 = self._residual_block(block_fn, nb_filters=128, repetations=4)(block1) block3 = self._residual_block(block_fn, nb_filters=256, repetations=6)(block2) block4 = self._residual_block(block_fn, nb_filters=512, repetations=3)(block3) # Classifier block pool2 = AveragePooling2D(pool_size=(7, 7), strides=(1, 1), border_mode="same")(block4) flatten1 = Flatten()(pool2) dense = Dense(output_dim=1000, init="he_normal", activation="softmax")(flatten1) model = Model(input=input, output=dense) return model
class Network: def __init__(self, conf): # All hyperparameters used in the model self._board_size = conf['board_size'] # the size of the playing board self._lr = conf['learning_rate'] # learning rate of SGD (2e-3) self._momentum = conf['momentum'] # nesterov momentum (1e-1) self._l2_coef = conf['l2'] # coefficient of L2 penalty (1e-4) self._mini_batch_size = conf['mini_batch_size'] # the size of batch when training the network self._fit_epochs = conf['fit_epochs'] # the number of iteration # Define Network self._build_network() # The location of the file which stores the parameters of the network self._net_para_file = conf['net_para_file'] self._fit_history_file = conf['fit_history_file'] # Whether we use previous model or not self._use_previous_model = conf['use_previous_model'] if self._use_previous_model: if os.path.exists(self._net_para_file): self._model.load_weights(self._net_para_file) else: print('> error: [use_previous_model] = True, ' + self._net_para_file + ' not found') @log def _build_network(self): # Input_Layer init_x = Input((3, self._board_size, self._board_size)) # the input is a tensor with the shape 3*(15*15) x = init_x # First Convolutional Layer with 32 filters x = Conv2D(filters=32, kernel_size=(3, 3), strides=(1, 1), padding='same', data_format='channels_first', kernel_regularizer=l2(self._l2_coef))(x) x = BatchNormalization()(x) x = Activation('relu')(x) # Two Residual Blocks x = self._residual_block(x) x = self._residual_block(x) x = self._residual_block(x) # Policy Head for generating prior probability vector for each action policy = Conv2D(filters=2, kernel_size=(1, 1), strides=(1, 1), padding='same', data_format='channels_first', kernel_regularizer=l2(self._l2_coef))(x) policy = BatchNormalization()(policy) policy = Activation('relu')(policy) policy = Flatten()(policy) policy = Dense(self._board_size*self._board_size, kernel_regularizer=l2(self._l2_coef))(policy) self._policy = Activation('softmax')(policy) # Value Head for generating value of each action value = Conv2D(filters=1, kernel_size=(1, 1), strides=(1, 1), padding='same', data_format="channels_first", kernel_regularizer=l2(self._l2_coef))(x) value = BatchNormalization()(value) value = Activation('relu')(value) value = Flatten()(value) value = Dense(32, kernel_regularizer=l2(self._l2_coef))(value) value = Activation('relu')(value) value = Dense(1, kernel_regularizer=l2(self._l2_coef))(value) self._value = Activation('tanh')(value) # Define Network self._model = Model(inputs=init_x, outputs=[self._policy, self._value]) # Define the Loss Function opt = SGD(lr=self._lr, momentum=self._momentum, nesterov=True) # stochastic gradient descend with momentum losses_type = ['categorical_crossentropy', 'mean_squared_error'] # cross-entrophy and MSE are weighted equally self._model.compile(optimizer=opt, loss=losses_type) def _residual_block(self, x): x_shortcut = x x = Conv2D(filters=32, kernel_size=(3, 3), strides=(1, 1), padding='same', data_format="channels_first", kernel_regularizer=l2(self._l2_coef))(x) x = BatchNormalization()(x) x = Activation('relu')(x) x = Conv2D(filters=32, kernel_size=(3, 3), strides=(1, 1), padding='same', data_format="channels_first", kernel_regularizer=l2(self._l2_coef))(x) x = BatchNormalization()(x) x = add([x, x_shortcut]) # Skip Connection x = Activation('relu')(x) return x def predict(self, board, color, last_move): if sum(sum(board)) == 0 and color == WHITE: print('error: network.predict') if sum(sum(board)) == 1 and color == BLACK: print('error: network.predict') tensor = board2tensor(board, color, last_move) policy, value_tensor = self._model.predict_on_batch(tensor) value = value_tensor[0][0] return policy, value def train(self, board_list, color_list, last_move_list, pi_list, z_list): size = len(color_list) for i in range(size): if sum(sum(board_list[i])) == 0 and color_list[i] == WHITE: print('error: network.train') if sum(sum(board_list[i])) == 1 and color_list[i] == BLACK: print('error: network.train') # Data Augmentation through symmetric and self-rotation transformation board_aug = [] color_aug = [] last_move_aug = [] pi_aug = [] z_aug = [] for i in range(len(board_list)): new_board, new_color, new_last_move, new_pi, new_z = \ data_augmentation(board_list[i], color_list[i], last_move_list[i], pi_list[i], z_list[i]) board_aug.extend(new_board) color_aug.extend(new_color) last_move_aug.extend(new_last_move) pi_aug.extend(new_pi) z_aug.extend(new_z) board_list.extend(board_aug) color_list.extend(color_aug) last_move_list.extend(last_move_aug) pi_list.extend(pi_aug) z_list.extend(z_aug) # Regularize Data board_list = np.array([board2tensor(board_list[i], color_list[i], last_move_list[i], reshape_flag=False) for i in range(len(board_list))]) pi_list = np.array(pi_list) z_list = np.array(z_list) # Training hist = self._model.fit(board_list, [pi_list, z_list], epochs=self._fit_epochs, batch_size=self._mini_batch_size, verbose=1) hist_path = self._fit_history_file + '_' + str(self._fit_epochs) + '_' + str(self._mini_batch_size) + '.txt' with open(hist_path, 'a') as f: f.write(str(hist.history)) return hist.history['loss'][0] # only sample loss of first epoch def get_para(self): net_para = self._model.get_weights() return net_para def save_model(self): """ save model para to file """ self._model.save_weights(self._net_para_file) def load_model(self): if os.path.exists(self._net_para_file): self._model.load_weights(self._net_para_file) else: print('> error: ' + self._net_para_file + ' not found')
class PolicyValueNet(object): """ AlphaGoZero-like Policy Value Net. """ def __init__(self, size, saved_weights=None): """ Initialize Attributes. """ self.size = size # board edge size self.l2_const = 1e-4 # coef of l2 penalty self.build_network() # build neural network if saved_weights: self.model.set_weights(pickle.load(open(saved_weights, 'rb'))) def build_network(self): """ Build the Policy Value Neural Net using Keras. """ inputs = Input(shape=(4, self.size, self.size)) # 3 common conv layers c_conv1 = Conv2D(filters=32, kernel_size=(3, 3), padding="same", data_format="channels_first", activation="relu", kernel_regularizer=l2(self.l2_const))(inputs) c_conv2 = Conv2D(filters=64, kernel_size=(3, 3), padding="same", data_format="channels_first", activation="relu", kernel_regularizer=l2(self.l2_const))(c_conv1) c_conv3 = Conv2D(filters=128, kernel_size=(3, 3), padding="same", data_format="channels_first", activation="relu", kernel_regularizer=l2(self.l2_const))(c_conv2) # policy head p_conv = Conv2D(filters=4, kernel_size=(1, 1), data_format="channels_first", activation="relu", kernel_regularizer=l2(self.l2_const))(c_conv3) p_flat = Flatten()(p_conv) self.policy_net = Dense(self.size * self.size, activation="softmax", kernel_regularizer=l2(self.l2_const))(p_flat) # value head v_conv = Conv2D(filters=2, kernel_size=(1, 1), data_format="channels_first", activation="relu", kernel_regularizer=l2(self.l2_const))(c_conv3) v_flat = Flatten()(v_conv) v_dense = Dense(64, kernel_regularizer=l2(self.l2_const))(v_flat) self.value_net = Dense(1, activation="tanh", kernel_regularizer=l2(self.l2_const))(v_dense) # connect and build the model self.model = Model(inputs, [self.policy_net, self.value_net]) losses = ['categorical_crossentropy', 'mean_squared_error'] self.model.compile(optimizer=Adam(), loss=losses) def get_state(self, go): """ Convert the go board data to a state of 4 boards. The 4 boards are: the agent's pieces, the opponent's pieces, difference from previous board, move first or not. Params: go: a GO object. Returns: a (4, 5, 5) numpy array. """ piece_type = 1 if go.X_move else 2 cur_board = np.array(go.board) state = np.zeros((4, self.size, self.size)) if go.previous_board: pre_board = np.array(go.previous_board) state[0] = (cur_board == piece_type).astype(float) state[1] = (cur_board == 3 - piece_type).astype(float) state[2] = (cur_board != pre_board).astype(float) if piece_type == 1: state[3][:, :] = 1.0 return state[:, ::-1, :] def policy(self, go): """ Policy function for current go board. Params: go: a go object. Returns: (move, prob) tuples and corresponding values. """ piece_type = 1 if go.X_move else 2 candidates = [] for i in range(go.size**2): row, col = i // go.size, i % go.size if go.valid_place_check(row, col, piece_type): candidates.append(i) cur_state = self.get_state(go) # expand dimension to predict move_probs, value = self.model.predict_on_batch( np.array(cur_state.reshape(-1, 4, self.size, self.size))) move_probs = zip(candidates, move_probs.flatten()[candidates]) return move_probs, value[0][0] def get_entropy(self, probs): """ Return entropy according to move probabilities. """ return -np.mean(np.sum(probs * np.log(probs + 1e-10), axis=1)) def train_core(self, states, mcts_probs, winners, lr): """ Training core function, performs one step of training. Params: states: list or numpy array, training data. mcts_probs: list or numpy array, training labels. winners: list or numpy array, training labels. lr: float, learning rate. Returns: tuple of floats, loss and entropy """ states = np.array(states) mcts_probs = np.array(mcts_probs) winners = np.array(winners) loss = self.model.evaluate(states, [mcts_probs, winners], batch_size=states.shape[0], verbose=0) move_probs, _ = self.model.predict_on_batch(states) entropy = self.get_entropy(move_probs) K.set_value(self.model.optimizer.lr, lr) self.model.fit(states, [mcts_probs, winners], batch_size=states.shape[0], verbose=0) return loss[0], entropy def get_weights(self): """ Return model weights. """ return self.model.get_weights() def save_weights(self, data_path='best_model.model'): """ Save model weights. """ pickle.dump(self.get_weights(), open(data_path, 'wb'), protocol=2)
class FinancialNewsAnalysisModel(object): model = None def __init__(self, nb_time_step, dim_data, batch_size=1, model_path=None): self.model_path = model_path self.model_path = model_path self.batch_size = batch_size self.size_of_input_data_dim = dim_data self.size_of_input_timesteps = nb_time_step self.build() self.weight_loaded = False if model_path is not None: self.load_weights() def build(self): dim_data = self.size_of_input_data_dim nb_time_step = self.size_of_input_timesteps news_input = Input(shape=(nb_time_step, dim_data)) lstm = LSTM(output_dim=nb_hidden_units, dropout_U=dropout, dropout_W=dropout, W_regularizer=l2(l2_norm_alpha), b_regularizer=l2(l2_norm_alpha), activation='tanh') bi_lstm = Bidirectional(lstm, input_shape=(nb_time_step, dim_data), merge_mode='concat') all_news_rep = bi_lstm(news_input) news_predictions = Dense(1, activation='linear')(all_news_rep) self.model = Model(news_input, news_predictions, name="deep rnn for financial news analysis") def reset(self): for l in self.model.layers: if type(l) is LSTM: l.reset_status() def compile_model(self, lr=0.0001, loss_weights=0.1): optimizer = Adam(lr=lr) loss = 'mse' # loss = custom_objective self.model.compile(optimizer=optimizer, loss=loss) #metrics=['mse']) plot(self.model, to_file='model.png') def fit_model(self, X, y, X_val=None, y_val=None, epoch=500): early_stopping = EarlyStopping(monitor='val_loss', patience=100, verbose=0) if X_val is None: self.model.fit(X, y, batch_size=self.batch_size, nb_epoch=epoch, validation_split=0.2, shuffle=True, callbacks=[early_stopping]) else: self.model.fit(X, y, batch_size=self.batch_size, nb_epoch=epoch, validation_data=(X_val, y_val), shuffle=True, callbacks=[early_stopping]) def save(self): self.model.save_weights(self.model_path, overwrite=True) def load_weights(self): if os.path.exists(self.model_path): self.model.load_weights(self.model_path) self.weight_loaded = True def print_weights(self, weights=None, detail=False): weights = weights or self.model.get_weights() for w in weights: print("w%s: sum(w)=%s, ave(w)=%s" % (w.shape, np.sum(w), np.average(w))) if detail: for w in weights: print("%s: %s" % (w.shape, w)) def model_eval(self, X, y): y_hat = self.model.predict(X, batch_size=1) count_true = 0 count_all = y.shape[0] for i in range(y.shape[0]): count_true = count_true + 1 if y[i,0]*y_hat[i,0]>0 else count_true print y[i,0],y_hat[i,0] print count_all,count_true
class CombinedAnalysisModel(object): model = None def __init__(self, dim_input_x1, time_step_x1, dim_input_x2, time_step_x2, batch_size=1, model_path=None, fa_model_path=None, ta_model_path=None): self.model_path = model_path self.fa_model_path = fa_model_path self.ta_model_path = ta_model_path self.batch_size = batch_size self.dim_input_x1 = dim_input_x1 self.time_step_x1 = time_step_x1 self.dim_input_x2 = dim_input_x2 self.time_step_x2 = time_step_x2 self.build() self.weight_loaded = False self.load_weights() def build(self): news_input = Input(shape=(self.time_step_x1, self.dim_input_x1), name='x1') financial_time_series_input = Input(shape=(self.time_step_x2, self.dim_input_x2), name='x2') lstm = LSTM(output_dim=nb_hidden_units, dropout_U=dropout, dropout_W=dropout, W_regularizer=l2(l2_norm_alpha), b_regularizer=l2(l2_norm_alpha), activation='tanh', name='h1', trainable=False) bi_lstm = Bidirectional(lstm, input_shape=(self.time_step_x1, self.dim_input_x1), merge_mode='concat', name='h1', trainable=False) h1 = bi_lstm(news_input) lstm_layer_1 = LSTM(output_dim=nb_hidden_units, dropout_U=dropout, dropout_W=dropout, W_regularizer=l2(l2_norm_alpha), b_regularizer=l2(l2_norm_alpha), activation='tanh', return_sequences=True, name='lstm_layer1', trainable=False) lstm_layer_23 = LSTM(output_dim=nb_hidden_units, dropout_U=dropout, dropout_W=dropout, W_regularizer=l2(l2_norm_alpha), b_regularizer=l2(l2_norm_alpha), activation='tanh', return_sequences=False, name='lstm_layer2_loss3', trainable=False) h2_layer_1 = lstm_layer_1(financial_time_series_input) h2_layer_2 = lstm_layer_23(h2_layer_1) h_3 = Merge(mode='concat', name='h3')([h1, h2_layer_2]) h_4 = Dense(nb_hidden_units, name='h4')(h_3) prediction = Dense(1, name='y3')(h_4) self.model = Model(input=[news_input, financial_time_series_input], output=prediction, name='combined model for financial analysis') plot(self.model, to_file='model.png') def reset(self): for l in self.model.layers: if type(l) is LSTM: l.reset_status() def compile_model(self, lr=0.0001, loss_weights=0.1): optimizer = Adam(lr=lr) loss = 'mse' # loss = custom_objective self.model.compile(optimizer=optimizer, loss=loss) def fit_model(self, X1, X2, y, X1_val=None, X2_val=None, y_val=None, epoch=50): early_stopping = EarlyStopping(monitor='val_loss', patience=3, verbose=0) if X1_val is None: self.model.fit([X1, X2], y, batch_size=self.batch_size, nb_epoch=epoch, validation_split=0.2, shuffle=True, callbacks=[early_stopping]) else: self.model.fit([X1, X2], y, batch_size=self.batch_size, nb_epoch=epoch, validation_data=([X1_val, X2_val], y_val), shuffle=True, callbacks=[early_stopping]) def save(self): self.model.save_weights(self.model_path, overwrite=True) def load_weights(self): if self.model_path is not None and os.path.exists(self.model_path): self.model.load_weights(self.model_path) self.weight_loaded = True if self.ta_model_path is not None and os.path.exists( self.ta_model_path): self.model.load_weights(self.ta_model_path, by_name=True) if self.fa_model_path is not None and os.path.exists( self.fa_model_path): self.model.load_weights(self.fa_model_path, by_name=True) def print_weights(self, weights=None, detail=False): weights = weights or self.model.get_weights() for w in weights: print("w%s: sum(w)=%s, ave(w)=%s" % (w.shape, np.sum(w), np.average(w))) if detail: for w in weights: print("%s: %s" % (w.shape, w)) def model_eval(self, X1, X2, y): y_hat = self.model.predict([X1, X2], batch_size=1) count_true = 0 count_all = y.shape[0] for i in range(y.shape[0]): count_true = count_true + 1 if y[i, 0] * y_hat[ i, 0] > 0 else count_true print y[i, 0], y_hat[i, 0] print count_all, count_true
class FinancialTimeSeriesAnalysisModel(object): model = None def __init__(self, nb_time_step, dim_data, batch_size=1, model_path=None): self.model_path = model_path self.model_path = model_path self.batch_size = batch_size self.size_of_input_data_dim = dim_data self.size_of_input_timesteps = nb_time_step self.build() self.weight_loaded = False if model_path is not None: self.load_weights() def build(self): dim_data = self.size_of_input_data_dim nb_time_step = self.size_of_input_timesteps financial_time_series_input = Input(shape=(nb_time_step, dim_data), name='x1') lstm_layer_1 = LSTM(output_dim=nb_hidden_units, dropout_U=dropout, dropout_W=dropout, W_regularizer=l2(l2_norm_alpha), b_regularizer=l2(l2_norm_alpha), activation='tanh', return_sequences=True, name='lstm_layer1') lstm_layer_21 = LSTM(output_dim=nb_hidden_units, dropout_U=dropout, dropout_W=dropout, W_regularizer=l2(l2_norm_alpha), b_regularizer=l2(l2_norm_alpha), activation='tanh', return_sequences=True, name='lstm_layer2_loss1') lstm_layer_22 = LSTM(output_dim=nb_hidden_units, dropout_U=dropout, dropout_W=dropout, W_regularizer=l2(l2_norm_alpha), b_regularizer=l2(l2_norm_alpha), activation='tanh', return_sequences=True, name='lstm_layer2_loss2') lstm_layer_23 = LSTM(output_dim=nb_hidden_units, dropout_U=dropout, dropout_W=dropout, W_regularizer=l2(l2_norm_alpha), b_regularizer=l2(l2_norm_alpha), activation='tanh', return_sequences=True, name='lstm_layer2_loss3') lstm_layer_24 = LSTM(output_dim=nb_hidden_units, dropout_U=dropout, dropout_W=dropout, W_regularizer=l2(l2_norm_alpha), b_regularizer=l2(l2_norm_alpha), activation='tanh', return_sequences=True, name='lstm_layer2_loss4') lstm_layer_25 = LSTM(output_dim=nb_hidden_units, dropout_U=dropout, dropout_W=dropout, W_regularizer=l2(l2_norm_alpha), b_regularizer=l2(l2_norm_alpha), activation='tanh', return_sequences=True, name='lstm_layer2_loss5') h1 = lstm_layer_1(financial_time_series_input) h21 = lstm_layer_21(h1) h22 = lstm_layer_22(h1) h23 = lstm_layer_23(h1) h24 = lstm_layer_24(h1) h25 = lstm_layer_25(h1) time_series_predictions1 = TimeDistributed(Dense(1), name="p1")(h21) # custom 1 time_series_predictions2 = TimeDistributed(Dense(1), name="p2")(h22) # custom 2 time_series_predictions3 = TimeDistributed(Dense(1), name="p3")(h23) # mse time_series_predictions4 = TimeDistributed(Dense(1, activation='sigmoid'), name="p4")(h24) # logloss time_series_predictions5 = TimeDistributed(Dense(nb_labels, activation='softmax'), name="p5")(h25) # cross self.model = Model( input=financial_time_series_input, output=[ time_series_predictions1, time_series_predictions2, time_series_predictions3, time_series_predictions4, time_series_predictions5 ], name="multi-task deep rnn for financial time series forecasting") plot(self.model, to_file='model.png') def reset(self): for l in self.model.layers: if type(l) is LSTM: l.reset_status() def compile_model(self, lr=0.0001, arg_weight=1.): optimizer = Adam(lr=lr) loss = [ custom_objective1, custom_objective2, 'mse', 'binary_crossentropy', 'categorical_crossentropy' ] self.model.compile(optimizer=optimizer, loss=loss) def fit_model(self, X, y, y_label, epoch=300): early_stopping = EarlyStopping(monitor='val_loss', patience=3, verbose=0) self.model.fit(X, [y] * 3 + [y > 0] + [y_label], batch_size=self.batch_size, nb_epoch=epoch, validation_split=0.3, shuffle=True, callbacks=[early_stopping]) def save(self): self.model.save_weights(self.model_path, overwrite=True) def load_weights(self): if os.path.exists(self.model_path): self.model.load_weights(self.model_path) self.weight_loaded = True def print_weights(self, weights=None, detail=False): weights = weights or self.model.get_weights() for w in weights: print("w%s: sum(w)=%s, ave(w)=%s" % (w.shape, np.sum(w), np.average(w))) if detail: for w in weights: print("%s: %s" % (w.shape, w)) def model_eval(self, X, y): y_hat = self.model.predict(X, batch_size=1)[0] count_true = 0 count_all = y.shape[1] for i in range(y.shape[1]): count_true = count_true + 1 if y[0, i, 0] * y_hat[ 0, i, 0] > 0 else count_true print(y[0, i, 0], y_hat[0, i, 0]) print(count_all, count_true)
class AdditionNPIModel(NPIStep): model = None f_enc = None def __init__(self, system: RuntimeSystem, model_path: str=None, program_set: AdditionProgramSet=None): self.system = system self.model_path = model_path self.program_set = program_set self.batch_size = 1 self.build() self.weight_loaded = False self.load_weights() def build(self): enc_size = self.size_of_env_observation() argument_size = IntegerArguments.size_of_arguments input_enc = InputLayer(batch_input_shape=(self.batch_size, enc_size), name='input_enc') input_arg = InputLayer(batch_input_shape=(self.batch_size, argument_size), name='input_arg') input_prg = Embedding(input_dim=PROGRAM_VEC_SIZE, output_dim=PROGRAM_KEY_VEC_SIZE, input_length=1, batch_input_shape=(self.batch_size, 1)) f_enc = Sequential(name='f_enc') f_enc.add(Merge([input_enc, input_arg], mode='concat')) f_enc.add(Dense(256)) f_enc.add(Dense(32)) f_enc.add(Activation('relu', name='relu_enc')) self.f_enc = f_enc program_embedding = Sequential(name='program_embedding') program_embedding.add(input_prg) f_enc_convert = Sequential(name='f_enc_convert') f_enc_convert.add(f_enc) f_enc_convert.add(RepeatVector(1)) f_lstm = Sequential(name='f_lstm') f_lstm.add(Merge([f_enc_convert, program_embedding], mode='concat')) # f_lstm.add(Activation('relu', name='relu_lstm_0')) f_lstm.add(LSTM(256, return_sequences=False, stateful=True)) f_lstm.add(Activation('relu', name='relu_lstm_1')) f_lstm.add(RepeatVector(1)) f_lstm.add(LSTM(256, return_sequences=False, stateful=True)) f_lstm.add(Activation('relu', name='relu_lstm_2')) # plot(f_lstm, to_file='f_lstm.png', show_shapes=True) f_end = Sequential(name='f_end') f_end.add(f_lstm) f_end.add(Dense(10)) f_end.add(Dense(1)) f_end.add(Activation('hard_sigmoid', name='hard_sigmoid_end')) # plot(f_end, to_file='f_end.png', show_shapes=True) f_prog = Sequential(name='f_prog') f_prog.add(f_lstm) f_prog.add(Dense(PROGRAM_KEY_VEC_SIZE)) f_prog.add(Dense(PROGRAM_VEC_SIZE)) f_prog.add(Activation('softmax', name='softmax_prog')) # plot(f_prog, to_file='f_prog.png', show_shapes=True) f_args = [] for ai in range(1, IntegerArguments.max_arg_num+1): f_arg = Sequential(name='f_arg%s' % ai) f_arg.add(f_lstm) f_arg.add(Dense(32)) f_arg.add(Dense(IntegerArguments.depth)) f_arg.add(Activation('softmax', name='softmax_arg%s' % ai)) f_args.append(f_arg) # plot(f_arg, to_file='f_arg.png', show_shapes=True) self.model = Model([input_enc.input, input_arg.input, input_prg.input], [f_end.output, f_prog.output] + [fa.output for fa in f_args], name="npi") self.compile_model() plot(self.model, to_file='model.png', show_shapes=True) def reset(self): super(AdditionNPIModel, self).reset() for l in self.model.layers: if type(l) is LSTM: l.reset_states() def compile_model(self, lr=0.0001, arg_weight=1.): arg_num = IntegerArguments.max_arg_num optimizer = Adam(lr=lr) loss = ['binary_crossentropy', 'categorical_crossentropy'] + ['categorical_crossentropy'] * arg_num self.model.compile(optimizer=optimizer, loss=loss, loss_weights=[0.25, 0.25] + [arg_weight] * arg_num) def fit(self, steps_list, epoch=3000): """ :param int epoch: :param typing.List[typing.Dict[q=dict, steps=typing.List[StepInOut]]] steps_list: :return: """ def filter_question(condition_func): sub_steps_list = [] for steps_dict in steps_list: question = steps_dict['q'] if condition_func(question['in1'], question['in2']): sub_steps_list.append(steps_dict) return sub_steps_list # self.print_weights() if not self.weight_loaded: self.train_f_enc(filter_question(lambda a, b: 10 <= a < 100 and 10 <= b < 100), epoch=100) self.f_enc.trainable = False q_type = "training questions of a+b < 10" print(q_type) pr = 0.8 all_ok = self.fit_to_subset(filter_question(lambda a, b: a+b < 10), epoch=epoch, pass_rate=pr) print("%s is pass_rate >= %s: %s" % (q_type, pr, all_ok)) q_type = "training questions of a<10 and b< 10 and 10 <= a+b" print(q_type) pr = 0.8 all_ok = self.fit_to_subset(filter_question(lambda a, b: a<10 and b<10 and a + b >= 10), epoch=epoch, pass_rate=pr) print("%s is pass_rate >= %s: %s" % (q_type, pr, all_ok)) q_type = "training questions of a<10 and b<10" print(q_type) pr = 0.8 all_ok = self.fit_to_subset(filter_question(lambda a, b: a < 10 and b < 10), epoch=epoch, pass_rate=pr) print("%s is pass_rate >= %s: %s" % (q_type, pr, all_ok)) q_type = "training questions of a<100 and b<100" print(q_type) pr = 0.8 all_ok = self.fit_to_subset(filter_question(lambda a, b: a < 100 and b < 100), epoch=epoch, pass_rate=pr) print("%s is pass_rate >= %s: %s" % (q_type, pr, all_ok)) while True: print("test all type of questions") cc, wc = self.test_to_subset(create_questions(1000)) print("Accuracy %s(OK=%d, NG=%d)" % (cc/(cc+wc), cc, wc)) if wc == 0: break q_type = "training questions of ALL" print(q_type) pr = 1.0 self.fit_to_subset(filter_question(lambda a, b: True), epoch=epoch, pass_rate=pr) all_ok = self.fit_to_subset(filter_question(lambda a, b: True), epoch=epoch, pass_rate=pr, skip_correct=True) print("%s is pass_rate >= %s: %s" % (q_type, pr, all_ok)) def fit_to_subset(self, steps_list, epoch=3000, pass_rate=1.0, skip_correct=False): learning_rate = 0.0001 for i in range(30): all_ok = self.do_learn(steps_list, 30, learning_rate=learning_rate, pass_rate=pass_rate, arg_weight=1., skip_correct=skip_correct) if all_ok: return True learning_rate *= 0.95 return False def test_to_subset(self, questions): addition_env = AdditionEnv(FIELD_ROW, FIELD_WIDTH, FIELD_DEPTH) npi_runner = TerminalNPIRunner(None, self) correct_count = wrong_count = 0 for idx, question in enumerate(questions): question = copy(question) if self.question_test(addition_env, npi_runner, question): correct_count += 1 else: wrong_count += 1 return correct_count, wrong_count @staticmethod def dict_to_str(d): return str(tuple([(k, d[k]) for k in sorted(d)])) def do_learn(self, steps_list, epoch, learning_rate=None, pass_rate=1.0, arg_weight=1., skip_correct=False): if learning_rate is not None: self.update_learning_rate(learning_rate, arg_weight) addition_env = AdditionEnv(FIELD_ROW, FIELD_WIDTH, FIELD_DEPTH) npi_runner = TerminalNPIRunner(None, self) last_weights = None correct_count = Counter() no_change_count = 0 last_loss = 1000 for ep in range(1, epoch+1): correct_new = wrong_new = 0 losses = [] ok_rate = [] np.random.shuffle(steps_list) for idx, steps_dict in enumerate(steps_list): question = copy(steps_dict['q']) question_key = self.dict_to_str(question) if self.question_test(addition_env, npi_runner, question): if correct_count[question_key] == 0: correct_new += 1 correct_count[question_key] += 1 print("GOOD!: ep=%2d idx=%3d :%s CorrectCount=%s" % (ep, idx, self.dict_to_str(question), correct_count[question_key])) ok_rate.append(1) if skip_correct or int(math.sqrt(correct_count[question_key])) ** 2 != correct_count[question_key]: continue else: ok_rate.append(0) if correct_count[question_key] > 0: print("Degraded: ep=%2d idx=%3d :%s CorrectCount=%s -> 0" % (ep, idx, self.dict_to_str(question), correct_count[question_key])) correct_count[question_key] = 0 wrong_new += 1 steps = steps_dict['steps'] xs = [] ys = [] ws = [] for step in steps: xs.append(self.convert_input(step.input)) y, w = self.convert_output(step.output) ys.append(y) ws.append(w) self.reset() for i, (x, y, w) in enumerate(zip(xs, ys, ws)): loss = self.model.train_on_batch(x, y, sample_weight=w) if not np.isfinite(loss): print("Loss is not finite!, Last Input=%s" % ([i, (x, y, w)])) self.print_weights(last_weights, detail=True) raise RuntimeError("Loss is not finite!") losses.append(loss) last_weights = self.model.get_weights() if losses: cur_loss = np.average(losses) print("ep=%2d: ok_rate=%.2f%% (+%s -%s): ave loss %s (%s samples)" % (ep, np.average(ok_rate)*100, correct_new, wrong_new, cur_loss, len(steps_list))) # self.print_weights() if correct_new + wrong_new == 0: no_change_count += 1 else: no_change_count = 0 if math.fabs(1 - cur_loss/last_loss) < 0.001 and no_change_count > 5: print("math.fabs(1 - cur_loss/last_loss) < 0.001 and no_change_count > 5:") return False last_loss = cur_loss print("=" * 80) self.save() if np.average(ok_rate) >= pass_rate: return True return False def update_learning_rate(self, learning_rate, arg_weight=1.): print("Re-Compile Model lr=%s aw=%s" % (learning_rate, arg_weight)) self.compile_model(learning_rate, arg_weight=arg_weight) def train_f_enc(self, steps_list, epoch=50): print("training f_enc") f_add0 = Sequential(name='f_add0') f_add0.add(self.f_enc) f_add0.add(Dense(FIELD_DEPTH)) f_add0.add(Activation('softmax', name='softmax_add0')) f_add1 = Sequential(name='f_add1') f_add1.add(self.f_enc) f_add1.add(Dense(FIELD_DEPTH)) f_add1.add(Activation('softmax', name='softmax_add1')) env_model = Model(self.f_enc.inputs, [f_add0.output, f_add1.output], name="env_model") env_model.compile(optimizer='adam', loss=['categorical_crossentropy']*2) for ep in range(epoch): losses = [] for idx, steps_dict in enumerate(steps_list): prev = None for step in steps_dict['steps']: x = self.convert_input(step.input)[:2] env_values = step.input.env.reshape((4, -1)) in1 = np.clip(env_values[0].argmax() - 1, 0, 9) in2 = np.clip(env_values[1].argmax() - 1, 0, 9) carry = np.clip(env_values[2].argmax() - 1, 0, 9) y_num = in1 + in2 + carry now = (in1, in2, carry) if prev == now: continue prev = now y0 = to_one_hot_array((y_num % 10)+1, FIELD_DEPTH) y1 = to_one_hot_array((y_num // 10)+1, FIELD_DEPTH) y = [yy.reshape((self.batch_size, -1)) for yy in [y0, y1]] loss = env_model.train_on_batch(x, y) losses.append(loss) print("ep %3d: loss=%s" % (ep, np.average(losses))) def question_test(self, addition_env, npi_runner, question): addition_env.reset() self.reset() try: run_npi(addition_env, npi_runner, self.program_set.ADD, question) if question['correct']: return True except StopIteration: pass return False def convert_input(self, p_in: StepInput): x_pg = np.array((p_in.program.program_id,)) x = [xx.reshape((self.batch_size, -1)) for xx in (p_in.env, p_in.arguments.values, x_pg)] return x def convert_output(self, p_out: StepOutput): y = [np.array((p_out.r,))] weights = [[1.]] if p_out.program: arg_values = p_out.arguments.values arg_num = len(p_out.program.args or []) y += [p_out.program.to_one_hot(PROGRAM_VEC_SIZE)] weights += [[1.]] else: arg_values = IntegerArguments().values arg_num = 0 y += [np.zeros((PROGRAM_VEC_SIZE, ))] weights += [[1e-10]] for v in arg_values: # split by each args y += [v] weights += [[1.]] * arg_num + [[1e-10]] * (len(arg_values) - arg_num) weights = [np.array(w) for w in weights] return [yy.reshape((self.batch_size, -1)) for yy in y], weights def step(self, env_observation: np.ndarray, pg: Program, arguments: IntegerArguments) -> StepOutput: x = self.convert_input(StepInput(env_observation, pg, arguments)) results = self.model.predict(x, batch_size=1) # if batch_size==1, returns single row r, pg_one_hot, arg_values = results[0], results[1], results[2:] program = self.program_set.get(pg_one_hot.argmax()) ret = StepOutput(r, program, IntegerArguments(values=np.stack(arg_values))) return ret def save(self): self.model.save_weights(self.model_path, overwrite=True) def load_weights(self): if os.path.exists(self.model_path): self.model.load_weights(self.model_path) self.weight_loaded = True def print_weights(self, weights=None, detail=False): weights = weights or self.model.get_weights() for w in weights: print("w%s: sum(w)=%s, ave(w)=%s" % (w.shape, np.sum(w), np.average(w))) if detail: for w in weights: print("%s: %s" % (w.shape, w)) @staticmethod def size_of_env_observation(): return FIELD_ROW * FIELD_DEPTH
class PolicyValueNet(): """策略价值网络""" #def __init__(self, board_width, board_height, model_file=None): def __init__(self, policy_infer_size, model_file=None): #self.board_width = board_width #self.board_height = board_height self.policy_infer_size = policy_infer_size self.l2_const = 1e-4 # coef of l2 penalty self.create_policy_value_net() self._loss_train_op() self.load_model_done = True if model_file and os.path.exists(model_file): self.load_model_done = False self.load_model(model_file) def load_model(self, model_file): """重新加载模型(仅用于selfplay时load new model)""" try: #net_params = pickle.load(open(model_file, 'rb'), encoding='bytes') #iso-8859-1') net_params = utils.pickle_load(model_file) self.model.set_weights(net_params) self.load_model_done = True except: logging.error("load_model fail! {}\t{}".format( model_file, utils.get_trace())) self.load_model_done = False if os.path.exists( model_file ) and self.load_model_done is False: #鏂囦欢瀛樺湪鍗村鍦ㄥけ璐ユ椂缁堟杩愯 exit(-1) return self.load_model_done def create_policy_value_net(self): """创建policy-value网络""" # 输入层 #in_x = network = Input((4, self.board_width, self.board_height)) in_x = network = Input((4, 1, self.policy_infer_size)) # conv layers network = Conv2D(filters=32, kernel_size=(3, 3), padding="same", data_format="channels_first", activation="relu", kernel_regularizer=l2(self.l2_const))(network) network = Conv2D(filters=64, kernel_size=(3, 3), padding="same", data_format="channels_first", activation="relu", kernel_regularizer=l2(self.l2_const))(network) network = Conv2D(filters=128, kernel_size=(3, 3), padding="same", data_format="channels_first", activation="relu", kernel_regularizer=l2(self.l2_const))(network) # 走子策略 action policy layers policy_net = Conv2D(filters=4, kernel_size=(1, 1), data_format="channels_first", activation="relu", kernel_regularizer=l2(self.l2_const))(network) policy_net = Flatten()(policy_net) # infer self.board_width * self.board_height action_probs #self.policy_net = Dense(self.board_width * self.board_height, activation="softmax", kernel_regularizer=l2(self.l2_const))(policy_net) self.policy_net = Dense(self.policy_infer_size, activation="softmax", kernel_regularizer=l2( self.l2_const))(policy_net) # 盘面价值 state value layers value_net = Conv2D(filters=2, kernel_size=(1, 1), data_format="channels_first", activation="relu", kernel_regularizer=l2(self.l2_const))(network) value_net = Flatten()(value_net) value_net = Dense(64, kernel_regularizer=l2(self.l2_const))(value_net) # infer one current state score self.value_net = Dense(1, activation="tanh", kernel_regularizer=l2(self.l2_const))(value_net) # 创建网络模型 self.model = Model(in_x, [self.policy_net, self.value_net]) # 返回走子策略和价值概率 def policy_value(state_input): state_input_union = np.array(state_input) #print(state_input_union) results = self.model.predict_on_batch(state_input_union) return results self.policy_value = policy_value def policy_value_fn(self, board): """使用模型预测棋盘所有actionid的价值概率""" # 棋盘所有可移动action_ids legal_positions = board.availables #print(legal_positions) # 当前玩家角度的actions过程 current_actions = board.current_actions() #print(current_actions) # 使用模型预测走子策略和价值概率 #print(self.policy_infer_size) #act_probs, value = self.policy_value(current_actions.reshape(-1, 4, self.board_width, self.board_height)) act_probs, value = self.policy_value( current_actions.reshape(-1, 4, 1, self.policy_infer_size)) act_probs = zip(legal_positions, act_probs.flatten()[legal_positions]) # 返回[(action, 概率)] 以及当前玩家的后续走子value return act_probs, value[0][0] def _loss_train_op(self): """初始化损失 3个损失函数因子 loss = (z - v)^2 + pi^T * log(p) + c||theta||^2 loss = value损失函数 + policy损失函数 + 惩罚项 """ # 定义优化器和损失函数 opt = Adam() losses = ['categorical_crossentropy', 'mean_squared_error'] self.model.compile(optimizer=opt, loss=losses) def self_entropy(probs): return -np.mean(np.sum(probs * np.log(probs + 1e-10), axis=1)) def train_step(state_input, mcts_probs, winner, learning_rate): """输出训练过程中的结果""" state_input_union = np.array(state_input) mcts_probs_union = np.array(mcts_probs) winner_union = np.array(winner) # 评估 loss = self.model.evaluate(state_input_union, [mcts_probs_union, winner_union], batch_size=len(state_input), verbose=0) # 预测 action_probs, _ = self.model.predict_on_batch(state_input_union) entropy = self_entropy(action_probs) K.set_value(self.model.optimizer.lr, learning_rate) self.model.fit(state_input_union, [mcts_probs_union, winner_union], batch_size=len(state_input), verbose=0) return loss[0], entropy self.train_step = train_step def get_policy_param(self): """获得模型参数""" net_params = self.model.get_weights() return net_params def save_model(self, model_file): """保存模型参数到文件""" net_params = self.get_policy_param() #pickle.dump(net_params, open(model_file, 'wb'), protocol=4) utils.pickle_dump(net_params, model_file)
class Network(): def __init__(self, conf): # Some Hyperparameters self._board_size = conf['board_size'] # the size of the playing board self._lr = conf['learning_rate'] # learning rate of SGD (2e-3) self._momentum = conf['momentum'] # nesterov momentum (1e-1) self._l2_coef = conf['l2'] # coefficient of L2 penalty (1e-4) # Define Network self._build_network() # File Location self._net_para_file = conf['net_para_file'] # If we use previous model or not self._use_previous_model = conf['use_previous_model'] if self._use_previous_model: net_para = self._model.load_weights(self._net_para_file) self._model.set_weights(net_para) def _build_network(self): # Input_Layer init_x = Input((3, self._board_size, self._board_size)) x = init_x # Convolutional Layer x = Conv2D(filters=32, kernel_size=(3, 3), strides=(1, 1), padding='same', data_format='channels_first', kernel_regularizer=l2(self._l2_coef))(x) x = BatchNormalization()(x) x = Activation('relu')(x) # Residual Layer x = self._residual_block(x) x = self._residual_block(x) x = self._residual_block(x) # Policy Head policy = Conv2D(filters=2, kernel_size=(1, 1), strides=(1, 1), padding='same', data_format='channels_first', kernel_regularizer=l2(self._l2_coef))(x) policy = BatchNormalization()(policy) policy = Activation('relu')(policy) policy = Flatten()(policy) policy = Dense(self._board_size * self._board_size, kernel_regularizer=l2(self._l2_coef))(policy) self._policy = Activation('softmax')(policy) # Value Head value = Conv2D(filters=1, kernel_size=(1, 1), strides=(1, 1), padding='same', data_format="channels_first", kernel_regularizer=l2(self._l2_coef))(x) value = BatchNormalization()(value) value = Activation('relu')(value) value = Flatten()(value) value = Dense(32, kernel_regularizer=l2(self._l2_coef))(value) value = Activation('relu')(value) value = Dense(1, kernel_regularizer=l2(self._l2_coef))(value) self._value = Activation('tanh')(value) # Define Network self._model = Model(inputs=init_x, outputs=[self._policy, self._value]) # Define the Loss Function opt = SGD(lr=self._lr, momentum=self._momentum, nesterov=True) losses_type = ['categorical_crossentropy', 'mean_squared_error'] self._model.compile(optimizer=opt, loss=losses_type) def _residual_block(self, x): x_shortcut = x x = Conv2D(filters=32, kernel_size=(3, 3), strides=(1, 1), padding='same', data_format="channels_first", kernel_regularizer=l2(self._l2_coef))(x) x = BatchNormalization()(x) x = Activation('relu')(x) x = Conv2D(filters=32, kernel_size=(3, 3), strides=(1, 1), padding='same', data_format="channels_first", kernel_regularizer=l2(self._l2_coef))(x) x = BatchNormalization()(x) x = add([x, x_shortcut]) # Skip Connection x = Activation('relu')(x) return x def predict(self, board, color, random_flip=False): if random_flip: b_t, method_index = input_transform(board) tensor_t = board2tensor(b_t, color, reshape_flag=True) prob_tensor_t, value_tensor = self._model.predict_on_batch( tensor_t) policy = output_decode(prob_tensor_t, method_index, board.shape[0]) value = value_tensor[0][0] return policy, value else: tensor = board2tensor(board, color) policy, value_tensor = self._model.predict_on_batch(tensor) value = value_tensor[0][0] return policy, value def train(self, board_list, color_list, pi_list, z_list): # Reguliza Data tensor_list = np.array([ board2tensor(board_list[i], color_list[i], reshape_flag=False) for i in range(len(board_list)) ]) pi_list = np.array(pi_list) z_list = np.array(z_list) # Training self._model.fit(tensor_list, [pi_list, z_list], epochs=20, batch_size=len(color_list), verbose=1) # Calculate Loss Explicitly loss = self._model.evaluate(tensor_list, [pi_list, z_list], batch_size=len(board_list), verbose=0) loss = loss[0] return loss def get_para(self): net_para = self._model.get_weights() return net_para def save_model(self): """ save model para to file """ self._model.save_weights(self._net_para_file) def load_model(self): self._model.load_weights(self._net_para_file)
class PolicyValueNet(): """policy-value network """ def __init__(self, board_width, board_height, model_file=None): self.board_width = board_width self.board_height = board_height self.l2_const = 1e-4 # coef of l2 penalty self.create_policy_value_net() if model_file: print("[Notice] load model from file") self.model = load_model(model_file) else: print("[Notice] create model") self.create_policy_value_net() self._loss_train_op() def create_policy_value_net(self): """create the policy value network """ in_x = network = Input((4, self.board_width, self.board_height)) # conv layers network = Conv2D(filters=32, kernel_size=(3, 3), padding="same", data_format="channels_first", activation="relu", kernel_regularizer=l2(self.l2_const))(network) network = Conv2D(filters=64, kernel_size=(3, 3), padding="same", data_format="channels_first", activation="relu", kernel_regularizer=l2(self.l2_const))(network) network = Conv2D(filters=128, kernel_size=(3, 3), padding="same", data_format="channels_first", activation="relu", kernel_regularizer=l2(self.l2_const))(network) # action policy layers policy_net = Conv2D(filters=4, kernel_size=(1, 1), data_format="channels_first", activation="relu", kernel_regularizer=l2(self.l2_const))(network) policy_net = Flatten()(policy_net) self.policy_net = Dense(self.board_width * self.board_height, activation="softmax", kernel_regularizer=l2( self.l2_const))(policy_net) # state value layers value_net = Conv2D(filters=2, kernel_size=(1, 1), data_format="channels_first", activation="relu", kernel_regularizer=l2(self.l2_const))(network) value_net = Flatten()(value_net) value_net = Dense(64, kernel_regularizer=l2(self.l2_const))(value_net) self.value_net = Dense(1, activation="tanh", kernel_regularizer=l2(self.l2_const))(value_net) self.model = Model(in_x, [self.policy_net, self.value_net]) def policy_value(state_input): state_input_union = np.array(state_input) results = self.model.predict_on_batch(state_input_union) return results self.policy_value = policy_value def policy_value_fn(self, board): """ input: board output: a list of (action, probability) tuples for each available action and the score of the board state """ legal_positions = board.availables current_state = board.current_state() act_probs, value = self.policy_value( current_state.reshape(-1, 4, self.board_width, self.board_height)) act_probs = zip(legal_positions, act_probs.flatten()[legal_positions]) return act_probs, value[0][0] def _loss_train_op(self): """ Three loss terms: loss = (z - v)^2 + pi^T * log(p) + c||theta||^2 """ # get the train op opt = Adam() losses = ['categorical_crossentropy', 'mean_squared_error'] self.model.compile(optimizer=opt, loss=losses) def self_entropy(probs): return -np.mean(np.sum(probs * np.log(probs + 1e-10), axis=1)) def train_step(state_input, mcts_probs, winner, learning_rate): state_input_union = np.array(state_input) mcts_probs_union = np.array(mcts_probs) winner_union = np.array(winner) loss = self.model.evaluate(state_input_union, [mcts_probs_union, winner_union], batch_size=len(state_input), verbose=0) action_probs, _ = self.model.predict_on_batch(state_input_union) entropy = self_entropy(action_probs) K.set_value(self.model.optimizer.lr, learning_rate) self.model.fit(state_input_union, [mcts_probs_union, winner_union], batch_size=len(state_input), verbose=0) return loss[0], entropy self.train_step = train_step def get_policy_param(self): net_params = self.model.get_weights() return net_params def save_model(self, model_file): """ save model to file """ print("save model file") self.model.save(model_file)
class PolicyValueNet(): """policy-value network """ def __init__(self, model_file=None): self.l2_const = 1e-4 # coef of l2 penaltyd self.create_policy_value_net() self._loss_train_op() if model_file: net_params = pickle.load(open(model_file, 'rb')) self.model.set_weights(net_params) plot_model(self.model, to_file='model.png') def create_policy_value_net(self): """create the policy value network """ in_x = network = Input((13,)) # conv layers network = Dense(64, activation='relu', kernel_regularizer=l2(self.l2_const))(network) network = Dense(64, activation='relu', kernel_regularizer=l2(self.l2_const))(network) network = Dense(32, activation='relu', kernel_regularizer=l2(self.l2_const))(network) network = Dense(32, activation='relu', kernel_regularizer=l2(self.l2_const))(network) self.policy_net = Dense(6, activation='softmax', kernel_regularizer=l2(self.l2_const))(network) # state value layers self.value_net = Dense(1, activation='tanh', kernel_regularizer=l2(self.l2_const))(network) self.model = Model(in_x, [self.policy_net, self.value_net]) def policy_value(state_input): state_input_union = np.array(state_input) results = self.model.predict_on_batch(state_input_union) return results self.policy_value = policy_value def policy_value_fn(self, board): """ input: board output: a list of (action, probability) tuples for each available action and the score of the board state """ #legal_positions = board.availables #print(board.current_state()) current_state = board.current_state() act_probs, value = self.policy_value( np.expand_dims(current_state ,0)) #print(act_probs[0]) #act_probs = zip(legal_positions, act_probs[0][legal_positions]) actret = [(i, act_probs[0][i]) for i in range(6)] #print(board.current_state(), actret) return actret, value[0] def _loss_train_op(self): """ Three loss terms: loss = (z - v)^2 + pi^T * log(p) + c||theta||^2 """ # get the train op opt = Adam() losses = ['categorical_crossentropy', 'mean_squared_error'] self.model.compile(optimizer=opt, loss=losses) def self_entropy(probs): return -np.mean(np.sum(probs * np.log(probs + 1e-10), axis=1)) def train_step(state_input, mcts_probs, winner, learning_rate): state_input_union = np.array(state_input) mcts_probs_union = np.array(mcts_probs) winner_union = np.array(winner) #print(mcts_probs_union) loss = self.model.evaluate(state_input_union, [mcts_probs_union, winner_union], batch_size=len(state_input), verbose=0) action_probs, _ = self.model.predict_on_batch(state_input_union) entropy = self_entropy(action_probs) K.set_value(self.model.optimizer.lr, learning_rate) self.model.fit(state_input_union, [mcts_probs_union, winner_union], batch_size=len(state_input), verbose=0) return loss[0], entropy self.train_step = train_step def get_policy_param(self): net_params = self.model.get_weights() return net_params def save_model(self, model_file): """ save model params to file """ net_params = self.get_policy_param() pickle.dump(net_params, open(model_file, 'wb'), protocol=2)