def test_sparse_placeholder_predict(): test_inputs = [sparse.random(6, 3, density=0.25).tocsr() for _ in range(2)] in1 = Input(shape=(3,)) in2 = Input(shape=(3,), sparse=True) out1 = Dropout(0.5, name='dropout')(in1) out2 = Dense(4, name='dense_1')(in2) model = Model([in1, in2], [out1, out2]) model.compile('rmsprop', 'mse') model.predict(test_inputs, batch_size=2)
def test_sparse_placeholder_fit(): test_inputs = [sparse.random(6, 3, density=0.25).tocsr() for _ in range(2)] test_outputs = [sparse.random(6, i, density=0.25).tocsr() for i in range(3, 5)] in1 = Input(shape=(3,)) in2 = Input(shape=(3,), sparse=True) out1 = Dropout(0.5, name='dropout')(in1) out2 = Dense(4, name='dense_1')(in2) model = Model([in1, in2], [out1, out2]) model.predict(test_inputs, batch_size=2) model.compile('rmsprop', 'mse') model.fit(test_inputs, test_outputs, epochs=1, batch_size=2, validation_split=0.5) model.evaluate(test_inputs, test_outputs, batch_size=2)
def DNN_auto(x_train): encoding_dim = 128 #128 original input_img = Input(shape=(673, )) encoded = Dense(450, activation='relu')(input_img) # 450 - output (input layer) #encoded = Dense(250, activation='relu')(encoded) # 200 - output (hidden layer1) encoded = Dense(250, activation='relu')(encoded) # 100 - output (hidden layer2) encoder_output = Dense(encoding_dim)( encoded) # 128 - output (encoding layer) print() # decoder layers decoded = Dense(250, activation='relu')(encoder_output) #decoded = Dense(250, activation='relu')(decoded) decoded = Dense(450, activation='relu')(decoded) decoded = Dense(673, activation='tanh')(decoded) autoencoder = Model(input=input_img, output=decoded) encoder = Model(input=input_img, output=encoder_output) autoencoder.compile(optimizer='adam', loss='mse') autoencoder.fit( x_train, x_train, epochs=20, batch_size=100, shuffle=True ) # second x_train is given instead of train labels in DNN, ie here, i/p=o/p #batch_size=100 original encoded_imgs = encoder.predict(x_train) return encoder_output, encoded_imgs
def looking_at_covoluations(model, X_test): first_layer_output = model.layers[0].output first_layer_model = Model(inputs=model.layers[0].input, outputs=first_layer_output) activations = first_layer_model.predict(X_test) fig, axs = plt.subplots(2, 1) axs[0].matshow(activations[0, :, :, 14], cmap='viridis') axs[1].matshow(activations[0, :, :, 17], cmap='viridis') plt.show()
class ResNet50Data: def convert(self, paths, data_filename='resnet-data-{}.npy'): print('Loading ResNet50') self.resnet = ResNet50(weights='imagenet', include_top=False) # Discard last pooling & activation layer. self.resnet = Model(self.resnet.inputs, self.resnet.layers[-2].output) paths = list(paths) print('Loading images from {0} paths'.format(len(paths))) for index in range(2**12, len(paths), 2**12): print('Loading images') img_arrays = [ img_to_array(load_img(path, target_size=(224, 224))) for path in paths[:index] ] images = preprocess_input(np.array(img_arrays)) print('Processing images through ResNet50') np.save(data_filename.format(index), self.resnet.predict(images)) if len(paths[index:]) > 0: print('Loading images') img_arrays = [ img_to_array(load_img(path, target_size=(224, 224))) for path in paths[index:] ] images = preprocess_input(np.array(img_arrays)) print('Processing images through ResNet50') np.save(data_filename.format(len(paths)), self.resnet.predict(images)) return def load(self, data_filename='resnet-data-{}.npy'): # chain is a concatenating generator: ['ABC', 'DEF'] -> 'ABCDEF' return chain.from_iterable( np.load(fn) for fn in os.listdir('.') if fnmatch(fn, data_filename.replace('{}', '*')))
def get_interlayer_output(model, input_x, intermediate_layer_name='intermediate_dense'): ''' @param model: the model best has been trained ''' intermediate_layer_model = Model( inputs=model.input, outputs=model.get_layer(intermediate_layer_name).output) intermediate_x = intermediate_layer_model.predict(input_x) return intermediate_x
def test_model_with_input_feed_tensor(): """We test building a model with a TF variable as input. We should be able to call fit, evaluate, predict, by only passing them data for the placeholder inputs in the model. """ import tensorflow as tf input_a_np = np.random.random((10, 3)) input_b_np = np.random.random((10, 3)) output_a_np = np.random.random((10, 4)) output_b_np = np.random.random((10, 3)) a = Input(tensor=tf.Variable(input_a_np, dtype=tf.float32)) b = Input(shape=(3,), name='input_b') a_2 = Dense(4, name='dense_1')(a) dp = Dropout(0.5, name='dropout') b_2 = dp(b) model = Model([a, b], [a_2, b_2]) model.summary() optimizer = 'rmsprop' loss = 'mse' loss_weights = [1., 0.5] model.compile(optimizer, loss, metrics=['mean_squared_error'], loss_weights=loss_weights, sample_weight_mode=None) # test train_on_batch out = model.train_on_batch(input_b_np, [output_a_np, output_b_np]) out = model.train_on_batch({'input_b': input_b_np}, [output_a_np, output_b_np]) out = model.test_on_batch({'input_b': input_b_np}, [output_a_np, output_b_np]) out = model.predict_on_batch({'input_b': input_b_np}) # test fit out = model.fit({'input_b': input_b_np}, [output_a_np, output_b_np], epochs=1, batch_size=10) out = model.fit(input_b_np, [output_a_np, output_b_np], epochs=1, batch_size=10) # test evaluate out = model.evaluate({'input_b': input_b_np}, [output_a_np, output_b_np], batch_size=10) out = model.evaluate(input_b_np, [output_a_np, output_b_np], batch_size=10) # test predict out = model.predict({'input_b': input_b_np}, batch_size=10) out = model.predict(input_b_np, batch_size=10) assert len(out) == 2 # Now test a model with a single input # i.e. we don't pass any data to fit the model. a = Input(tensor=tf.Variable(input_a_np, dtype=tf.float32)) a_2 = Dense(4, name='dense_1')(a) a_2 = Dropout(0.5, name='dropout')(a_2) model = Model(a, a_2) model.summary() optimizer = 'rmsprop' loss = 'mse' model.compile(optimizer, loss, metrics=['mean_squared_error']) # test train_on_batch out = model.train_on_batch(None, output_a_np) out = model.train_on_batch(None, output_a_np) out = model.test_on_batch(None, output_a_np) out = model.predict_on_batch(None) out = model.train_on_batch([], output_a_np) out = model.train_on_batch({}, output_a_np) # test fit out = model.fit(None, output_a_np, epochs=1, batch_size=10) out = model.fit(None, output_a_np, epochs=1, batch_size=10) # test evaluate out = model.evaluate(None, output_a_np, batch_size=10) out = model.evaluate(None, output_a_np, batch_size=10) # test predict out = model.predict(None, steps=3) out = model.predict(None, steps=3) assert out.shape == (10 * 3, 4) # Same, without learning phase # i.e. we don't pass any data to fit the model. a = Input(tensor=tf.Variable(input_a_np, dtype=tf.float32)) a_2 = Dense(4, name='dense_1')(a) model = Model(a, a_2) model.summary() optimizer = 'rmsprop' loss = 'mse' model.compile(optimizer, loss, metrics=['mean_squared_error']) # test train_on_batch out = model.train_on_batch(None, output_a_np) out = model.train_on_batch(None, output_a_np) out = model.test_on_batch(None, output_a_np) out = model.predict_on_batch(None) out = model.train_on_batch([], output_a_np) out = model.train_on_batch({}, output_a_np) # test fit out = model.fit(None, output_a_np, epochs=1, batch_size=10) out = model.fit(None, output_a_np, epochs=1, batch_size=10) # test evaluate out = model.evaluate(None, output_a_np, batch_size=10) out = model.evaluate(None, output_a_np, batch_size=10) # test predict out = model.predict(None, steps=3) out = model.predict(None, steps=3) assert out.shape == (10 * 3, 4)
class AIPlayer(Player): def __init__(self, buffer_size, sim_count, train=True, model="", tau = 1, compile=False): self.buffer = ReplayBuffer(buffer_size) self.temp_state = deque() self.train = train self.loss = 0 self.acc = 0 self.batch_count = 0 self.sim_count = sim_count if model != "": self.load(model, compile) else: self.create_network() self.tau = tau @staticmethod def create_if_nonexistant(config): models = glob.glob(config.data.model_location + "*.h5") if len(models) == 0: ai = AIPlayer(config.buffer_size, config.game.simulation_num_per_move) ai.save(config.data.model_location+"model_0.h5") del ai def set_training(self, train): self.train = train @staticmethod def clear(): K.clear_session() def load(self, file, compile=False): try: del self.network except Exception: pass self.network = load_model(file, custom_objects={"objective_function_for_policy":AIPlayer.objective_function_for_policy, "objective_function_for_value":AIPlayer.objective_function_for_value}, compile=compile) def save(self, file): self.network.save(file) def create_network(self): x_in = Input((3, 8, 8)) x = Conv2D(filters=128, kernel_size=(3,3), padding="same", data_format="channels_first")(x_in) x = BatchNormalization(axis=1)(x) x = Activation("relu")(x) for _ in range(10): x = self._build_residual_block(x) res_out = x x = Conv2D(filters=2, kernel_size=1, data_format="channels_first")(res_out) x = BatchNormalization(axis=1)(x) x = Activation("relu")(x) x = Flatten()(x) policy_out = Dense(8*8+1, activation="softmax", name="policy_out")(x) x = Conv2D(filters=1, kernel_size=1, data_format="channels_first")(res_out) x = BatchNormalization(axis=1)(x) x = Activation("relu")(x) x = Flatten()(x) x = Dense(64, activation="relu")(x) value_out = Dense(1, activation="tanh", name="value_out")(x) self.network = Model(x_in, [policy_out, value_out], name="reversi_model") self.compile() def _build_residual_block(self, x): in_x = x x = Conv2D(filters=128, kernel_size=(3,3), padding="same", data_format="channels_first")(x) x = BatchNormalization(axis=1)(x) x = Activation("relu")(x) x = Conv2D(filters=128, kernel_size=(3,3), padding="same", data_format="channels_first")(x) x = BatchNormalization(axis=1)(x) x = Add()([in_x, x]) x = Activation("relu")(x) return x def compile(self): losses = [AIPlayer.objective_function_for_policy, AIPlayer.objective_function_for_value] self.network.compile(optimizer=optimizers.SGD(lr=1e-3, momentum=0.9), loss=losses) def update_lr(self, lr): K.set_value(self.network.optimizer.lr, lr) @staticmethod def objective_function_for_policy(y_true, y_pred): # can use categorical_crossentropy?? return K.sum(-y_true * K.log(y_pred + K.epsilon()), axis=-1) @staticmethod def objective_function_for_value(y_true, y_pred): return mean_squared_error(y_true, y_pred) def update_buffer(self, winner): if self.train: while len(self.temp_state) > 0: t = self.temp_state.pop() self.buffer.add((t[0], t[1], winner)) def train_batches(self, batch_size, batches=-1, verbose=2): if batches == -1: s_buffer = np.array([_[0] for _ in self.buffer.buffer]) p_buffer = np.array([_[1] for _ in self.buffer.buffer]) v_buffer = np.array([_[2] for _ in self.buffer.buffer]) else: sample_size = batch_size*batches sample = [] while sample_size > 0: sample += self.buffer.sample(sample_size) sample_size -= self.buffer.size() s_buffer = np.array([_[0] for _ in sample]) p_buffer = np.array([_[1] for _ in sample]) v_buffer = np.array([_[2] for _ in sample]) history = self.network.fit(s_buffer, [p_buffer, v_buffer], batch_size=batch_size, epochs=1, verbose=verbose) return history def preprocess_input(self, board, side): state = np.zeros((3, 8, 8), dtype=np.int) for i in range(8): for j in range(8): if board[i,j] == 1: state[0,i,j] = 1 elif board[i,j] == -1: state[1,i,j] = 1 if side == 1: state[2,i,j] = 1 return state def evaluate(self, game, side): current_input = self.preprocess_input(game.board, side) pred = self.network.predict(current_input[np.newaxis,:]) return pred[1][0] def pick_move(self, game, side): possible_moves = game.possible_moves(side) if len(possible_moves) == 0: possible_moves.append((-1,-1)) monte_prob = self.monte_carlo(game, side) if self.train: self.temp_state.append((self.preprocess_input(game.board, side), np.divide(monte_prob, np.sum(monte_prob)))) monte_prob = np.float_power(monte_prob, 1/self.tau) monte_prob = np.divide(monte_prob, np.sum(monte_prob)) r = random() for i, move in enumerate(possible_moves): r -= monte_prob[Othello.move_id(move)] if r <= 0: return move return possible_moves[-1] def monte_carlo(self, game, side): N = defaultdict(lambda: 0) W = defaultdict(lambda: 0) Q = defaultdict(lambda: 0) P = defaultdict(lambda: 0) possible_moves = game.possible_moves(side) if len(possible_moves) == 0: policy = np.zeros((65)) policy[64] = 1 return policy elif len(possible_moves) == 1: policy = np.zeros((65)) policy[Othello.move_id(possible_moves[0])] = 1 return policy current_input = self.preprocess_input(game.board, side) sid = Othello.state_id(game.board) pred = self.network.predict(current_input[np.newaxis,:]) policy = pred[0][0] total = 1e-10 for i, move in enumerate(possible_moves): total += policy[Othello.move_id(move)] for move in possible_moves: P[(sid, Othello.move_id(move))] = policy[Othello.move_id(move)]/total for i in range(self.sim_count): #print("Sim #%d"% i) clone = deepcopy(game) current_side = side visited = deque() while True: possible_moves = clone.possible_moves(current_side) if len(possible_moves) == 0: possible_moves.append((-1,-1)) best_move = None best_move_value = -2 sid = Othello.state_id(clone.board) for move in possible_moves: mid = Othello.move_id(move) qu_val = Q[(sid, mid)] + P[(sid, mid)]/(N[(sid, mid)]+1) if qu_val > best_move_value: best_move_value = qu_val best_move = move #print(best_move) if N[(sid, Othello.move_id(best_move))] == 0: visited.append((sid, Othello.move_id(best_move))) clone.play_move(best_move[0], best_move[1], current_side) current_side *= -1 if clone.game_over(): for node in visited: N[node] += 1 W[node] += clone.get_winner()*side Q[node] = W[node]/N[node] break current_input = self.preprocess_input(clone.board, current_side) sid = Othello.state_id(clone.board) pred = self.network.predict(current_input[np.newaxis,:]) policy = pred[0][0] value = pred[1][0] possible_moves = clone.possible_moves(current_side) if len(possible_moves) == 0: possible_moves.append((-1,-1)) total = 1e-10 for i, move in enumerate(possible_moves): total += policy[Othello.move_id(move)] for move in possible_moves: P[(sid, Othello.move_id(move))] = policy[Othello.move_id(move)]/total for node in visited: N[node] += 1 W[node] += value*side Q[node] = W[node]/N[node] #print() break else: visited.append((sid, Othello.move_id(best_move))) clone.play_move(best_move[0], best_move[1], current_side) current_side *= -1 if clone.game_over(): for node in visited: N[node] += 1 W[node] += clone.get_winner()*side Q[node] = W[node]/N[node] break policy = np.zeros((65)) possible_moves = game.possible_moves(side) sid = Othello.state_id(game.board) for move in possible_moves: mid = Othello.move_id(move) policy[mid] = N[(sid,mid)] return policy
fake_labels_f, true_labels_f, dummy_f = generate_label_data( batch_size, pred_size_f, feat_size_f) fake_labels_m, true_labels_m, dummy_m = generate_label_data( batch_size, pred_size_m, feat_size_m) fake_labels_l, true_labels_l, dummy_l = generate_label_data( batch_size, pred_size_l, feat_size_l) # GAN网络数据 x_gan, y_gan, x_and_y_gan = next(output_generator) # ---------------------- # 训练判别器 # ---------------------- # 为高分辨率判别器准备数据 y_gen_full, _, _, _, _, _, _ = gan_core.predict(x_full) x_and_y_gen_full = concatenateNumba(x_full, y_gen_full) # 为中分辨率判别器准备数据 y_gen_medium, _, _, _, _, _, _ = gan_core.predict(x_medium) x_and_y_gen_medium = concatenateNumba(x_medium, y_gen_medium) # 为低分辨率判别器准备数据 y_gen_low, _, _, _, _, _, _ = gan_core.predict(x_low) x_and_y_gen_low = concatenateNumba(x_low, y_gen_low) # 训练判别器 d_loss_fake_full = discriminator_full_multi.train_on_batch( x_and_y_gen_full, [fake_labels_f, dummy_f]) d_loss_real_full = discriminator_full_multi.train_on_batch( x_and_y_full, [true_labels_f, dummy_f])
def test_model_with_input_feed_tensor(): """We test building a model with a TF variable as input. We should be able to call fit, evaluate, predict, by only passing them data for the placeholder inputs in the model. """ import tensorflow as tf input_a_np = np.random.random((10, 3)) input_b_np = np.random.random((10, 3)) output_a_np = np.random.random((10, 4)) output_b_np = np.random.random((10, 3)) a = Input(tensor=tf.Variable(input_a_np, dtype=tf.float32)) b = Input(shape=(3, ), name='input_b') a_2 = Dense(4, name='dense_1')(a) dp = Dropout(0.5, name='dropout') b_2 = dp(b) model = Model([a, b], [a_2, b_2]) model.summary() optimizer = 'rmsprop' loss = 'mse' loss_weights = [1., 0.5] model.compile(optimizer, loss, metrics=['mean_squared_error'], loss_weights=loss_weights, sample_weight_mode=None) # test train_on_batch out = model.train_on_batch(input_b_np, [output_a_np, output_b_np]) out = model.train_on_batch({'input_b': input_b_np}, [output_a_np, output_b_np]) out = model.test_on_batch({'input_b': input_b_np}, [output_a_np, output_b_np]) out = model.predict_on_batch({'input_b': input_b_np}) # test fit out = model.fit({'input_b': input_b_np}, [output_a_np, output_b_np], epochs=1, batch_size=10) out = model.fit(input_b_np, [output_a_np, output_b_np], epochs=1, batch_size=10) # test evaluate out = model.evaluate({'input_b': input_b_np}, [output_a_np, output_b_np], batch_size=10) out = model.evaluate(input_b_np, [output_a_np, output_b_np], batch_size=10) # test predict out = model.predict({'input_b': input_b_np}, batch_size=10) out = model.predict(input_b_np, batch_size=10) assert len(out) == 2 # Now test a model with a single input # i.e. we don't pass any data to fit the model. a = Input(tensor=tf.Variable(input_a_np, dtype=tf.float32)) a_2 = Dense(4, name='dense_1')(a) a_2 = Dropout(0.5, name='dropout')(a_2) model = Model(a, a_2) model.summary() optimizer = 'rmsprop' loss = 'mse' model.compile(optimizer, loss, metrics=['mean_squared_error']) # test train_on_batch out = model.train_on_batch(None, output_a_np) out = model.train_on_batch(None, output_a_np) out = model.test_on_batch(None, output_a_np) out = model.predict_on_batch(None) out = model.train_on_batch([], output_a_np) out = model.train_on_batch({}, output_a_np) # test fit out = model.fit(None, output_a_np, epochs=1, batch_size=10) out = model.fit(None, output_a_np, epochs=1, batch_size=10) # test evaluate out = model.evaluate(None, output_a_np, batch_size=10) out = model.evaluate(None, output_a_np, batch_size=10) # test predict out = model.predict(None, steps=3) out = model.predict(None, steps=3) assert out.shape == (10 * 3, 4) # Same, without learning phase # i.e. we don't pass any data to fit the model. a = Input(tensor=tf.Variable(input_a_np, dtype=tf.float32)) a_2 = Dense(4, name='dense_1')(a) model = Model(a, a_2) model.summary() optimizer = 'rmsprop' loss = 'mse' model.compile(optimizer, loss, metrics=['mean_squared_error']) # test train_on_batch out = model.train_on_batch(None, output_a_np) out = model.train_on_batch(None, output_a_np) out = model.test_on_batch(None, output_a_np) out = model.predict_on_batch(None) out = model.train_on_batch([], output_a_np) out = model.train_on_batch({}, output_a_np) # test fit out = model.fit(None, output_a_np, epochs=1, batch_size=10) out = model.fit(None, output_a_np, epochs=1, batch_size=10) # test evaluate out = model.evaluate(None, output_a_np, batch_size=10) out = model.evaluate(None, output_a_np, batch_size=10) # test predict out = model.predict(None, steps=3) out = model.predict(None, steps=3) assert out.shape == (10 * 3, 4)
cnn = Convolution1D(filters=50, kernel_size=3, activation='tanh')(dropouted) cnn = Convolution1D(filters=50, kernel_size=3, activation='tanh')(cnn) flattened = Flatten()(cnn) dense = Dense(100, activation='tanh')(flattened) predict = Dense(2, activation='softmax')(dense) model = Model(input=[word, distance_e1, distance_e2], output=predict) # opt = RMSprop(lr=0.001, rho=0.9, epsilon=1e-06) # opt = Adagrad(lr=0.01, epsilon=1e-06) # opt = Adadelta(lr=1.0, rho=0.95, epsilon=1e-06) # opt = Adam(lr=0.001, beta_1=0.9, beta_2=0.999, epsilon=1e-08) opt = SGD(lr=0.01, decay=1e-6, momentum=0.9, nesterov=True) model.compile(loss='categorical_crossentropy', optimizer=opt) train_instances = [line.strip() for line in lines] label_array_t, word_array_t, dis_e1_array_t, dis_e2_array_t = rep.represent_instances( train_instances) model.fit([word_array_t, dis_e1_array_t, dis_e2_array_t], label_array_t, batch_size=128, epochs=epoch_size) model.save(output_file) label_array_ans = model.predict([word_array_t, dis_e1_array_t, dis_e2_array_t], batch_size=128) print(label_array_ans) print("训练完成!!") eval_mulclass(label_array_t, label_array_ans)
class AdditionNPIModel(NPIStep): model = None f_enc = None def __init__(self, system: RuntimeSystem, model_path: str = None, program_set: AdditionProgramSet = None): self.system = system self.model_path = model_path self.program_set = program_set self.batch_size = 1 self.build() self.weight_loaded = False self.load_weights() def build(self): enc_size = self.size_of_env_observation() argument_size = IntegerArguments.size_of_arguments input_enc = InputLayer(batch_input_shape=(self.batch_size, enc_size), name='input_enc') input_arg = InputLayer(batch_input_shape=(self.batch_size, argument_size), name='input_arg') input_prg = Embedding(input_dim=PROGRAM_VEC_SIZE, output_dim=PROGRAM_KEY_VEC_SIZE, input_length=1, batch_input_shape=(self.batch_size, 1)) f_enc = Sequential(name='f_enc') f_enc.add(Merge([input_enc, input_arg], mode='concat')) f_enc.add(MaxoutDense(128, nb_feature=4)) self.f_enc = f_enc program_embedding = Sequential(name='program_embedding') program_embedding.add(input_prg) f_enc_convert = Sequential(name='f_enc_convert') f_enc_convert.add(f_enc) f_enc_convert.add(RepeatVector(1)) f_lstm = Sequential(name='f_lstm') f_lstm.add(Merge([f_enc_convert, program_embedding], mode='concat')) f_lstm.add( LSTM(256, return_sequences=False, stateful=True, W_regularizer=l2(0.0000001))) f_lstm.add(Activation('relu', name='relu_lstm_1')) f_lstm.add(RepeatVector(1)) f_lstm.add( LSTM(256, return_sequences=False, stateful=True, W_regularizer=l2(0.0000001))) f_lstm.add(Activation('relu', name='relu_lstm_2')) # plot(f_lstm, to_file='f_lstm.png', show_shapes=True) f_end = Sequential(name='f_end') f_end.add(f_lstm) f_end.add(Dense(1, W_regularizer=l2(0.001))) f_end.add(Activation('sigmoid', name='sigmoid_end')) f_prog = Sequential(name='f_prog') f_prog.add(f_lstm) f_prog.add(Dense(PROGRAM_KEY_VEC_SIZE, activation="relu")) f_prog.add(Dense(PROGRAM_VEC_SIZE, W_regularizer=l2(0.0001))) f_prog.add(Activation('softmax', name='softmax_prog')) # plot(f_prog, to_file='f_prog.png', show_shapes=True) f_args = [] for ai in range(1, IntegerArguments.max_arg_num + 1): f_arg = Sequential(name='f_arg%s' % ai) f_arg.add(f_lstm) f_arg.add(Dense(IntegerArguments.depth, W_regularizer=l2(0.0001))) f_arg.add(Activation('softmax', name='softmax_arg%s' % ai)) f_args.append(f_arg) # plot(f_arg, to_file='f_arg.png', show_shapes=True) self.model = Model([input_enc.input, input_arg.input, input_prg.input], [f_end.output, f_prog.output] + [fa.output for fa in f_args], name="npi") self.compile_model() plot(self.model, to_file='model.png', show_shapes=True) def reset(self): super(AdditionNPIModel, self).reset() for l in self.model.layers: if type(l) is LSTM: l.reset_states() def compile_model(self, lr=0.0001, arg_weight=1.): arg_num = IntegerArguments.max_arg_num optimizer = Adam(lr=lr) loss = ['binary_crossentropy', 'categorical_crossentropy' ] + ['categorical_crossentropy'] * arg_num self.model.compile(optimizer=optimizer, loss=loss, loss_weights=[0.25, 0.25] + [arg_weight] * arg_num) def fit(self, steps_list, epoch=3000): # 过滤一些问题 def filter_question(condition_func): sub_steps_list = [] for steps_dict in steps_list: question = steps_dict['q'] if condition_func(question['in1'], question['in2']): sub_steps_list.append(steps_dict) return sub_steps_list if not self.weight_loaded: self.train_f_enc( filter_question(lambda a, b: 10 <= a < 100 and 10 <= b < 100), epoch=100) self.f_enc.trainable = False self.update_learning_rate(0.0001) q_type = "training questions of a<100 and b<100" print(q_type) pr = 0.8 all_ok = self.fit_to_subset( filter_question(lambda a, b: a < 100 and b < 100), pass_rate=pr) print("%s is pass_rate >= %s: %s" % (q_type, pr, all_ok)) while True: if self.test_and_learn([10, 100, 1000]): break q_type = "training questions of ALL" print(q_type) q_num = 100 skip_correct = False pr = 1.0 questions = filter_question(lambda a, b: True) np.random.shuffle(questions) questions = questions[:q_num] all_ok = self.fit_to_subset(questions, pass_rate=pr, skip_correct=skip_correct) print("%s is pass_rate >= %s: %s" % (q_type, pr, all_ok)) def fit_to_subset(self, steps_list, pass_rate=1.0, skip_correct=False): for i in range(10): all_ok = self.do_learn(steps_list, 100, pass_rate=pass_rate, skip_correct=skip_correct) if all_ok: return True return False def test_and_learn(self, num_questions): for num in num_questions: print("test all type of %d questions" % num) cc, wc, wrong_questions = self.test_to_subset( create_random_questions(num)) acc_rate = cc / (cc + wc) print("Accuracy %s(OK=%d, NG=%d)" % (acc_rate, cc, wc)) if wc > 0: self.fit_to_subset(wrong_questions, pass_rate=1.0, skip_correct=False) return False return True def test_to_subset(self, questions): addition_env = AdditionEnv(FIELD_ROW, FIELD_WIDTH, FIELD_DEPTH) teacher = AdditionTeacher(self.program_set) npi_runner = TerminalNPIRunner(None, self) teacher_runner = TerminalNPIRunner(None, teacher) correct_count = wrong_count = 0 wrong_steps_list = [] for idx, question in enumerate(questions): question = copy(question) if self.question_test(addition_env, npi_runner, question): correct_count += 1 else: self.question_test(addition_env, teacher_runner, question) wrong_steps_list.append({ "q": question, "steps": teacher_runner.step_list }) wrong_count += 1 return correct_count, wrong_count, wrong_steps_list @staticmethod def dict_to_str(d): return str(tuple([(k, d[k]) for k in sorted(d)])) def do_learn(self, steps_list, epoch, pass_rate=1.0, skip_correct=False): addition_env = AdditionEnv(FIELD_ROW, FIELD_WIDTH, FIELD_DEPTH) npi_runner = TerminalNPIRunner(None, self) last_weights = None correct_count = Counter() no_change_count = 0 last_loss = 1000 for ep in range(1, epoch + 1): correct_new = wrong_new = 0 losses = [] ok_rate = [] np.random.shuffle(steps_list) for idx, steps_dict in enumerate(steps_list): question = copy(steps_dict['q']) question_key = self.dict_to_str(question) if self.question_test(addition_env, npi_runner, question): if correct_count[question_key] == 0: correct_new += 1 correct_count[question_key] += 1 print("GOOD!: ep=%2d idx=%3d :%s CorrectCount=%s" % (ep, idx, self.dict_to_str(question), correct_count[question_key])) ok_rate.append(1) cc = correct_count[question_key] if skip_correct or int(math.sqrt(cc))**2 != cc: continue else: ok_rate.append(0) if correct_count[question_key] > 0: print( "Degraded: ep=%2d idx=%3d :%s CorrectCount=%s -> 0" % (ep, idx, self.dict_to_str(question), correct_count[question_key])) correct_count[question_key] = 0 wrong_new += 1 steps = steps_dict['steps'] xs = [] ys = [] ws = [] for step in steps: xs.append(self.convert_input(step.input)) y, w = self.convert_output(step.output) ys.append(y) ws.append(w) self.reset() for i, (x, y, w) in enumerate(zip(xs, ys, ws)): loss = self.model.train_on_batch(x, y, sample_weight=w) if not np.isfinite(loss): print("Loss is not finite!, Last Input=%s" % ([i, (x, y, w)])) self.print_weights(last_weights, detail=True) raise RuntimeError("Loss is not finite!") losses.append(loss) last_weights = self.model.get_weights() if losses: cur_loss = np.average(losses) print( "ep=%2d: ok_rate=%.2f%% (+%s -%s): ave loss %s (%s samples)" % (ep, np.average(ok_rate) * 100, correct_new, wrong_new, cur_loss, len(steps_list))) # self.print_weights() if correct_new + wrong_new == 0: no_change_count += 1 else: no_change_count = 0 if math.fabs(1 - cur_loss / last_loss) < 0.001 and no_change_count > 5: print( "math.fabs(1 - cur_loss/last_loss) < 0.001 and no_change_count > 5:" ) return False last_loss = cur_loss print("=" * 80) self.save() if np.average(ok_rate) >= pass_rate: return True return False def update_learning_rate(self, learning_rate, arg_weight=1.): print("Re-Compile Model lr=%s aw=%s" % (learning_rate, arg_weight)) self.compile_model(learning_rate, arg_weight=arg_weight) def train_f_enc(self, steps_list, epoch=50): print("training f_enc") f_add0 = Sequential(name='f_add0') f_add0.add(self.f_enc) f_add0.add(Dense(FIELD_DEPTH)) f_add0.add(Activation('softmax', name='softmax_add0')) f_add1 = Sequential(name='f_add1') f_add1.add(self.f_enc) f_add1.add(Dense(FIELD_DEPTH)) f_add1.add(Activation('softmax', name='softmax_add1')) env_model = Model(self.f_enc.inputs, [f_add0.output, f_add1.output], name="env_model") env_model.compile(optimizer='adam', loss=['categorical_crossentropy'] * 2) for ep in range(epoch): losses = [] for idx, steps_dict in enumerate(steps_list): prev = None for step in steps_dict['steps']: x = self.convert_input(step.input)[:2] env_values = step.input.env.reshape((4, -1)) in1 = np.clip(env_values[0].argmax() - 1, 0, 9) in2 = np.clip(env_values[1].argmax() - 1, 0, 9) carry = np.clip(env_values[2].argmax() - 1, 0, 9) y_num = in1 + in2 + carry now = (in1, in2, carry) if prev == now: continue prev = now y0 = to_one_hot_array((y_num % 10) + 1, FIELD_DEPTH) y1 = to_one_hot_array((y_num // 10) + 1, FIELD_DEPTH) y = [yy.reshape((self.batch_size, -1)) for yy in [y0, y1]] loss = env_model.train_on_batch(x, y) losses.append(loss) print("ep %3d: loss=%s" % (ep, np.average(losses))) if np.average(losses) < 1e-06: break def question_test(self, addition_env, npi_runner, question): addition_env.reset() self.reset() try: run_npi(addition_env, npi_runner, self.program_set.ADD, question) if question['correct']: return True except StopIteration: pass return False def convert_input(self, p_in: StepInput): x_pg = np.array((p_in.program.program_id, )) x = [ xx.reshape((self.batch_size, -1)) for xx in (p_in.env, p_in.arguments.values, x_pg) ] return x def convert_output(self, p_out: StepOutput): y = [np.array((p_out.r, ))] weights = [[1.]] if p_out.program: arg_values = p_out.arguments.values arg_num = len(p_out.program.args or []) y += [p_out.program.to_one_hot(PROGRAM_VEC_SIZE)] weights += [[1.]] else: arg_values = IntegerArguments().values arg_num = 0 y += [np.zeros((PROGRAM_VEC_SIZE, ))] weights += [[1e-10]] for v in arg_values: # split by each args y += [v] weights += [[1.]] * arg_num + [[1e-10]] * (len(arg_values) - arg_num) weights = [np.array(w) for w in weights] return [yy.reshape((self.batch_size, -1)) for yy in y], weights def step(self, env_observation: np.ndarray, pg: Program, arguments: IntegerArguments) -> StepOutput: x = self.convert_input(StepInput(env_observation, pg, arguments)) results = self.model.predict( x, batch_size=1) # if batch_size==1, returns single row r, pg_one_hot, arg_values = results[0], results[1], results[2:] program = self.program_set.get(pg_one_hot.argmax()) ret = StepOutput(r, program, IntegerArguments(values=np.stack(arg_values))) return ret def save(self): self.model.save_weights(self.model_path, overwrite=True) def load_weights(self): if os.path.exists(self.model_path): self.model.load_weights(self.model_path) self.weight_loaded = True def print_weights(self, weights=None, detail=False): weights = weights or self.model.get_weights() for w in weights: print("w%s: sum(w)=%s, ave(w)=%s" % (w.shape, np.sum(w), np.average(w))) if detail: for w in weights: print("%s: %s" % (w.shape, w)) @staticmethod def size_of_env_observation(): return FIELD_ROW * FIELD_DEPTH
def test_model_methods(): a = Input(shape=(3, ), name='input_a') b = Input(shape=(3, ), name='input_b') a_2 = Dense(4, name='dense_1')(a) dp = Dropout(0.5, name='dropout') b_2 = dp(b) model = Model([a, b], [a_2, b_2]) optimizer = 'rmsprop' loss = 'mse' loss_weights = [1., 0.5] input_a_np = np.random.random((10, 3)) input_b_np = np.random.random((10, 3)) output_a_np = np.random.random((10, 4)) output_b_np = np.random.random((10, 3)) # training/testing doesn't work before compiling. with pytest.raises(RuntimeError): model.train_on_batch([input_a_np, input_b_np], [output_a_np, output_b_np]) model.compile(optimizer, loss, metrics=[], loss_weights=loss_weights, sample_weight_mode=None) # test train_on_batch out = model.train_on_batch([input_a_np, input_b_np], [output_a_np, output_b_np]) out = model.train_on_batch({ 'input_a': input_a_np, 'input_b': input_b_np }, [output_a_np, output_b_np]) out = model.train_on_batch({ 'input_a': input_a_np, 'input_b': input_b_np }, { 'dense_1': output_a_np, 'dropout': output_b_np }) # test fit out = model.fit([input_a_np, input_b_np], [output_a_np, output_b_np], epochs=1, batch_size=4) out = model.fit({ 'input_a': input_a_np, 'input_b': input_b_np }, [output_a_np, output_b_np], epochs=1, batch_size=4) out = model.fit({ 'input_a': input_a_np, 'input_b': input_b_np }, { 'dense_1': output_a_np, 'dropout': output_b_np }, epochs=1, batch_size=4) # test validation_split out = model.fit([input_a_np, input_b_np], [output_a_np, output_b_np], epochs=1, batch_size=4, validation_split=0.5) out = model.fit({ 'input_a': input_a_np, 'input_b': input_b_np }, [output_a_np, output_b_np], epochs=1, batch_size=4, validation_split=0.5) # test validation data out = model.fit([input_a_np, input_b_np], [output_a_np, output_b_np], epochs=1, batch_size=4, validation_data=([input_a_np, input_b_np], [output_a_np, output_b_np])) out = model.fit({ 'input_a': input_a_np, 'input_b': input_b_np }, [output_a_np, output_b_np], epochs=1, batch_size=4, validation_split=0.5, validation_data=({ 'input_a': input_a_np, 'input_b': input_b_np }, [output_a_np, output_b_np])) out = model.fit({ 'input_a': input_a_np, 'input_b': input_b_np }, { 'dense_1': output_a_np, 'dropout': output_b_np }, epochs=1, batch_size=4, validation_split=0.5, validation_data=({ 'input_a': input_a_np, 'input_b': input_b_np }, { 'dense_1': output_a_np, 'dropout': output_b_np })) # test_on_batch out = model.test_on_batch([input_a_np, input_b_np], [output_a_np, output_b_np]) out = model.test_on_batch({ 'input_a': input_a_np, 'input_b': input_b_np }, [output_a_np, output_b_np]) out = model.test_on_batch({ 'input_a': input_a_np, 'input_b': input_b_np }, { 'dense_1': output_a_np, 'dropout': output_b_np }) # predict_on_batch out = model.predict_on_batch([input_a_np, input_b_np]) out = model.predict_on_batch({ 'input_a': input_a_np, 'input_b': input_b_np }) # predict, evaluate input_a_np = np.random.random((10, 3)) input_b_np = np.random.random((10, 3)) output_a_np = np.random.random((10, 4)) output_b_np = np.random.random((10, 3)) out = model.evaluate([input_a_np, input_b_np], [output_a_np, output_b_np], batch_size=4) out = model.predict([input_a_np, input_b_np], batch_size=4) # with sample_weight input_a_np = np.random.random((10, 3)) input_b_np = np.random.random((10, 3)) output_a_np = np.random.random((10, 4)) output_b_np = np.random.random((10, 3)) sample_weight = [None, np.random.random((10, ))] out = model.train_on_batch([input_a_np, input_b_np], [output_a_np, output_b_np], sample_weight=sample_weight) out = model.test_on_batch([input_a_np, input_b_np], [output_a_np, output_b_np], sample_weight=sample_weight) # test accuracy metric model.compile(optimizer, loss, metrics=['acc'], sample_weight_mode=None) out = model.train_on_batch([input_a_np, input_b_np], [output_a_np, output_b_np]) assert len(out) == 5 out = model.test_on_batch([input_a_np, input_b_np], [output_a_np, output_b_np]) assert len(out) == 5 # this should also work model.compile(optimizer, loss, metrics={'dense_1': 'acc'}, sample_weight_mode=None) out = model.train_on_batch([input_a_np, input_b_np], [output_a_np, output_b_np]) assert len(out) == 4 out = model.test_on_batch([input_a_np, input_b_np], [output_a_np, output_b_np]) assert len(out) == 4 # and this as well model.compile(optimizer, loss, metrics={'dense_1': ['acc']}, sample_weight_mode=None) out = model.train_on_batch([input_a_np, input_b_np], [output_a_np, output_b_np]) assert len(out) == 4 out = model.test_on_batch([input_a_np, input_b_np], [output_a_np, output_b_np]) assert len(out) == 4 # test starting from non-zero initial epoch trained_epochs = [] trained_batches = [] # define tracer callback def on_epoch_begin(epoch, logs): trained_epochs.append(epoch) def on_batch_begin(batch, logs): trained_batches.append(batch) tracker_cb = LambdaCallback(on_epoch_begin=on_epoch_begin, on_batch_begin=on_batch_begin) out = model.fit([input_a_np, input_b_np], [output_a_np, output_b_np], epochs=5, batch_size=4, initial_epoch=2, callbacks=[tracker_cb]) assert trained_epochs == [2, 3, 4] # test starting from non-zero initial epoch for generator too trained_epochs = [] def gen_data(batch_sz): while True: yield ([ np.random.random((batch_sz, 3)), np.random.random((batch_sz, 3)) ], [ np.random.random((batch_sz, 4)), np.random.random((batch_sz, 3)) ]) out = model.fit_generator(gen_data(4), steps_per_epoch=3, epochs=5, initial_epoch=2, callbacks=[tracker_cb]) assert trained_epochs == [2, 3, 4] # test with a custom metric function def mse(y_true, y_pred): return K.mean(K.pow(y_true - y_pred, 2)) model.compile(optimizer, loss, metrics=[mse], sample_weight_mode=None) out = model.train_on_batch([input_a_np, input_b_np], [output_a_np, output_b_np]) out_len = 1 + 2 * (1 + 1) # total loss + 2 outputs * (loss + metric) assert len(out) == out_len out = model.test_on_batch([input_a_np, input_b_np], [output_a_np, output_b_np]) assert len(out) == out_len input_a_np = np.random.random((10, 3)) input_b_np = np.random.random((10, 3)) output_a_np = np.random.random((10, 4)) output_b_np = np.random.random((10, 3)) out = model.fit([input_a_np, input_b_np], [output_a_np, output_b_np], batch_size=4, epochs=1) out = model.evaluate([input_a_np, input_b_np], [output_a_np, output_b_np], batch_size=4) out = model.predict([input_a_np, input_b_np], batch_size=4) # empty batch with pytest.raises(ValueError): def gen_data(): while True: yield (np.asarray([]), np.asarray([])) out = model.evaluate_generator(gen_data(), steps=1) # x is not a list of numpy arrays. with pytest.raises(ValueError): out = model.predict([None]) # x does not match _feed_input_names. with pytest.raises(ValueError): out = model.predict([input_a_np, None, input_b_np]) with pytest.raises(ValueError): out = model.predict([None, input_a_np, input_b_np]) # all input/output/weight arrays should have the same number of samples. with pytest.raises(ValueError): out = model.train_on_batch([input_a_np, input_b_np[:2]], [output_a_np, output_b_np], sample_weight=sample_weight) with pytest.raises(ValueError): out = model.train_on_batch([input_a_np, input_b_np], [output_a_np, output_b_np[:2]], sample_weight=sample_weight) with pytest.raises(ValueError): out = model.train_on_batch( [input_a_np, input_b_np], [output_a_np, output_b_np], sample_weight=[sample_weight[1], sample_weight[1][:2]]) # `sample_weight` is neither a dict nor a list. with pytest.raises(TypeError): out = model.train_on_batch([input_a_np, input_b_np], [output_a_np, output_b_np], sample_weight=tuple(sample_weight)) # `validation_data` is neither a tuple nor a triple. with pytest.raises(ValueError): out = model.fit([input_a_np, input_b_np], [output_a_np, output_b_np], epochs=1, batch_size=4, validation_data=([input_a_np, input_b_np], )) # `loss` does not match outputs. with pytest.raises(ValueError): model.compile(optimizer, loss=['mse', 'mae', 'mape']) # `loss_weights` does not match output_names. with pytest.raises(ValueError): model.compile(optimizer, loss='mse', loss_weights={'lstm': 0.5}) # `loss_weights` does not match outputs. with pytest.raises(ValueError): model.compile(optimizer, loss='mse', loss_weights=[0.5]) # `loss_weights` is invalid type. with pytest.raises(TypeError): model.compile(optimizer, loss='mse', loss_weights=(0.5, 0.5)) # `sample_weight_mode` does not match output_names. with pytest.raises(ValueError): model.compile(optimizer, loss='mse', sample_weight_mode={'lstm': 'temporal'}) # `sample_weight_mode` does not match output_names. with pytest.raises(ValueError): model.compile(optimizer, loss='mse', sample_weight_mode=['temporal']) # `sample_weight_mode` matches output_names partially. with pytest.raises(ValueError): model.compile(optimizer, loss='mse', sample_weight_mode={'dense_1': 'temporal'}) # `loss` does not exist. with pytest.raises(ValueError): model.compile(optimizer, loss=[]) model.compile(optimizer, loss=['mse', 'mae']) model.compile(optimizer, loss='mse', loss_weights={ 'dense_1': 0.2, 'dropout': 0.8 }) model.compile(optimizer, loss='mse', loss_weights=[0.2, 0.8]) # the rank of weight arrays should be 1. with pytest.raises(ValueError): out = model.train_on_batch( [input_a_np, input_b_np], [output_a_np, output_b_np], sample_weight=[None, np.random.random((10, 20, 30))]) model.compile(optimizer, loss='mse', sample_weight_mode={ 'dense_1': None, 'dropout': 'temporal' }) model.compile(optimizer, loss='mse', sample_weight_mode=[None, 'temporal']) # the rank of output arrays should be at least 3D. with pytest.raises(ValueError): out = model.train_on_batch([input_a_np, input_b_np], [output_a_np, output_b_np], sample_weight=sample_weight) model.compile(optimizer, loss, metrics=[], loss_weights=loss_weights, sample_weight_mode=None) trained_epochs = [] trained_batches = [] out = model.fit_generator(generator=RandomSequence(3), steps_per_epoch=3, epochs=5, initial_epoch=0, validation_data=RandomSequence(4), validation_steps=3, callbacks=[tracker_cb]) assert trained_epochs == [0, 1, 2, 3, 4] assert trained_batches == list(range(3)) * 5 # steps_per_epoch will be equal to len of sequence if it's unspecified trained_epochs = [] trained_batches = [] out = model.fit_generator(generator=RandomSequence(3), epochs=5, initial_epoch=0, validation_data=RandomSequence(4), callbacks=[tracker_cb]) assert trained_epochs == [0, 1, 2, 3, 4] assert trained_batches == list(range(12)) * 5 # fit_generator will throw an exception if steps is unspecified for regular generator with pytest.raises(ValueError): def gen_data(): while True: yield (np.asarray([]), np.asarray([])) out = model.fit_generator(generator=gen_data(), epochs=5, initial_epoch=0, validation_data=gen_data(), callbacks=[tracker_cb]) # predict_generator output shape behavior should be consistent def expected_shape(batch_size, n_batches): return (batch_size * n_batches, 4), (batch_size * n_batches, 3) # Multiple outputs and one step. batch_size = 5 sequence_length = 1 shape_0, shape_1 = expected_shape(batch_size, sequence_length) out = model.predict_generator( RandomSequence(batch_size, sequence_length=sequence_length)) assert np.shape(out[0]) == shape_0 and np.shape(out[1]) == shape_1 # Multiple outputs and multiple steps. batch_size = 5 sequence_length = 2 shape_0, shape_1 = expected_shape(batch_size, sequence_length) out = model.predict_generator( RandomSequence(batch_size, sequence_length=sequence_length)) assert np.shape(out[0]) == shape_0 and np.shape(out[1]) == shape_1 # Create a model with a single output. single_output_model = Model([a, b], a_2) single_output_model.compile(optimizer, loss, metrics=[], sample_weight_mode=None) # Single output and one step. batch_size = 5 sequence_length = 1 shape_0, _ = expected_shape(batch_size, sequence_length) out = single_output_model.predict_generator( RandomSequence(batch_size, sequence_length=sequence_length)) assert np.shape(out) == shape_0 # Single output and multiple steps. batch_size = 5 sequence_length = 2 shape_0, _ = expected_shape(batch_size, sequence_length) out = single_output_model.predict_generator( RandomSequence(batch_size, sequence_length=sequence_length)) assert np.shape(out) == shape_0
class BasicModel(object): def __init__(self, multi_class=False): self.multi_class = multi_class def build(self, input_shape, nn_type='Dense', bidirectional=True, vat=True): """ build model :param input_shape: shape=(number of input rows, 1) :param nn_type: select 'Dense' or 'RNN' or 'GRU' or 'LSTM' :param bidirectional: use_flag for Bidirectional rnn :param vat: use_flag for VAT :param multi_class: use_flag for multi_class :return: self """ input_layer = Input(input_shape) output_layer = self.core_data_flow(input_layer, nn_type, bidirectional) if vat: self.model = VATModel(input_layer, output_layer).setup_vat_loss() else: self.model = Model(input_layer, output_layer) return self def core_data_flow(self, input_layer, nn_type, bidirectional): """ build nn model :param input_layer: required for Model() :return: layer """ if nn_type == 'Dense': x = Dense(64, activation='relu')(input_layer) x = Dropout(0.5)(x) x = Dense(64, activation='relu')(x) x = Dropout(0.5)(x) x = Flatten()(x) if self.multi_class: x = Dense(5, activation='softmax')(x) else: x = Dense(1, activation='sigmoid')(x) else: x = Dense(160)(input_layer) x = BatchNormalization()(x) x = LeakyReLU()(x) if nn_type == 'RNN': x = Bidirectional(SimpleRNN(256))(x) elif nn_type == 'GRU': x = Bidirectional(GRU(256))(x) elif nn_type == 'LSTM': x = Bidirectional(LSTM(256))(x) x = BatchNormalization()(x) x = LeakyReLU()(x) if self.multi_class: x = Dense(5, activation='softmax')(x) else: x = Dense(1, activation='sigmoid')(x) return x def train(self, X_train, X_test, y_train, y_test, batch_size=128, epochs=100, early_stop=True): """ train rnn model :param X_train, X_test, y_train, y_test: X is feature vectol. y is label :param batch_size: onece per training size :param epochs: number of iterations :param early_stopinput_layer: use_flag for EarlyStopping :return: history data """ if self.multi_class: self.model.compile(loss='categorical_crossentropy', optimizer=SGD(lr=0.01, decay=1e-6, momentum=0.9, nesterov=True), metrics=['accuracy']) else: self.model.compile(loss='binary_crossentropy', optimizer='RMSprop', metrics=['accuracy']) np.random.seed(1337) # for reproducibility if early_stop: early_stopping = EarlyStopping(monitor='val_loss', mode='auto', patience=5) return self.model.fit(X_train, y_train, batch_size=batch_size, epochs=epochs, validation_data=(X_test, y_test), callbacks=[early_stopping]) else: return self.model.fit(X_train, y_train, batch_size=batch_size, epochs=epochs, validation_data=(X_test, y_test)) def predict(self, X): return self.model.predict(X) def evaluate(self, X, y): return self._score(y, self.model.predict_proba(X)[:, 1]) def _score(self, true_label, predicted_prob): """ calculate the performance score for binary calssification :param true_label: the ground truth score :param predicted_label: the predicted probability :return: a dict of scores """ score_dict = dict() score_dict['AUC'] = metrics.roc_auc_score(true_label, predicted_prob) predicted_label = [0 if prob < 0.5 else 1 for prob in predicted_prob] score_dict['Accuracy'] = metrics.accuracy_score( true_label, predicted_label) cm = metrics.confusion_matrix(true_label, predicted_label) score_dict['Confusion Matrix'] = cm score_dict['TPR'] = cm[1, 1] / float(cm[1, 0] + cm[1, 1]) score_dict['FPR'] = cm[0, 1] / float(cm[0, 0] + cm[0, 1]) return score_dict
class FinancialNewsAnalysisModel(object): model = None def __init__(self, nb_time_step, dim_data, batch_size=1, model_path=None): self.model_path = model_path self.model_path = model_path self.batch_size = batch_size self.size_of_input_data_dim = dim_data self.size_of_input_timesteps = nb_time_step self.build() self.weight_loaded = False if model_path is not None: self.load_weights() def build(self): dim_data = self.size_of_input_data_dim nb_time_step = self.size_of_input_timesteps news_input = Input(shape=(nb_time_step, dim_data), name='x1') lstm = LSTM(output_dim=nb_hidden_units, dropout_U=dropout, dropout_W=dropout, W_regularizer=l2(l2_norm_alpha), b_regularizer=l2(l2_norm_alpha), activation='tanh', name='h1') bi_lstm = Bidirectional(lstm, input_shape=(nb_time_step, dim_data), merge_mode='concat', name='h1') all_news_rep = bi_lstm(news_input) news_predictions = Dense(1, activation='linear')(all_news_rep) self.model = Model(news_input, news_predictions, name="deep rnn for financial news analysis") def reset(self): for l in self.model.layers: if type(l) is LSTM: l.reset_status() def compile_model(self, lr=0.0001, loss_weights=0.1): optimizer = Adam(lr=lr) loss = 'mse' # loss = custom_objective self.model.compile(optimizer=optimizer, loss=loss) #metrics=['mse']) plot(self.model, to_file='model.png') def fit_model(self, X, y, X_val=None, y_val=None, epoch=500): early_stopping = EarlyStopping(monitor='val_loss', patience=3, verbose=0) if X_val is None: self.model.fit(X, y, batch_size=self.batch_size, nb_epoch=epoch, validation_split=0.2, shuffle=True, callbacks=[early_stopping]) else: self.model.fit(X, y, batch_size=self.batch_size, nb_epoch=epoch, validation_data=(X_val, y_val), shuffle=True, callbacks=[early_stopping]) def save(self): self.model.save_weights(self.model_path, overwrite=True) def load_weights(self): if os.path.exists(self.model_path): self.model.load_weights(self.model_path) self.weight_loaded = True def print_weights(self, weights=None, detail=False): weights = weights or self.model.get_weights() for w in weights: print("w%s: sum(w)=%s, ave(w)=%s" % (w.shape, np.sum(w), np.average(w))) if detail: for w in weights: print("%s: %s" % (w.shape, w)) def model_eval(self, X, y): y_hat = self.model.predict(X, batch_size=1) count_true = 0 count_all = y.shape[0] for i in range(y.shape[0]): count_true = count_true + 1 if y[i,0]*y_hat[i,0]>0 else count_true print y[i,0],y_hat[i,0] print count_all,count_true
# return mean_square_loss # return mean_squared_error(y_true, y_pred) # Compiling the model using 'adam' optimizer and MSE as loss function # sgd = SGD(lr=0.01, momentum=0.0, decay=0.0, nesterov=False) # model.compile(optimizer=sgd, loss=my_loss_function, metrics=['mse', 'mae', 'mape'], loss_weights=[1.0, 1.0, 1.0]) model.compile(optimizer='adam', loss=my_loss_function, metrics=['mse', 'mae', 'mape'], loss_weights=[1.0, 1.0, 1.0]) # model.compile(optimizer='adam', loss='mean_squared_error', metrics=['mse', 'mae', 'mape'], loss_weights=[1.0, 1.0, 1.0]) #muti_outputs shape= tasks x train_samples callbacks = [] model.fit(x=dat_train, y=[label_train_1, label_train_2, label_train_3], epochs=5000, batch_size=32) pred1, pred2, pred3 = model.predict(dat_test) # inv_y = scaler.inverse_transform(inv_y) plot_x = dat_test[:, 0] plot_y = pred1.flatten() - label_test_1 # plt.scatter(x=plot_x, y=plot_y) final_data = np.array([ grid_cells_test, label_test_1, pred1.flatten(), grid_cells_id_test ]).transpose() ## sort by first column
class AdditionNPIModel(NPIStep): model = None f_enc = None def __init__(self, system: RuntimeSystem, model_path: str=None, program_set: AdditionProgramSet=None): self.system = system self.model_path = model_path self.program_set = program_set self.batch_size = 1 self.build() self.weight_loaded = False self.load_weights() def build(self): enc_size = self.size_of_env_observation() argument_size = IntegerArguments.size_of_arguments input_enc = InputLayer(batch_input_shape=(self.batch_size, enc_size), name='input_enc') input_arg = InputLayer(batch_input_shape=(self.batch_size, argument_size), name='input_arg') input_prg = Embedding(input_dim=PROGRAM_VEC_SIZE, output_dim=PROGRAM_KEY_VEC_SIZE, input_length=1, batch_input_shape=(self.batch_size, 1)) f_enc = Sequential(name='f_enc') f_enc.add(Merge([input_enc, input_arg], mode='concat')) f_enc.add(Dense(256)) f_enc.add(Dense(32)) f_enc.add(Activation('relu', name='relu_enc')) self.f_enc = f_enc program_embedding = Sequential(name='program_embedding') program_embedding.add(input_prg) f_enc_convert = Sequential(name='f_enc_convert') f_enc_convert.add(f_enc) f_enc_convert.add(RepeatVector(1)) f_lstm = Sequential(name='f_lstm') f_lstm.add(Merge([f_enc_convert, program_embedding], mode='concat')) # f_lstm.add(Activation('relu', name='relu_lstm_0')) f_lstm.add(LSTM(256, return_sequences=False, stateful=True)) f_lstm.add(Activation('relu', name='relu_lstm_1')) f_lstm.add(RepeatVector(1)) f_lstm.add(LSTM(256, return_sequences=False, stateful=True)) f_lstm.add(Activation('relu', name='relu_lstm_2')) # plot(f_lstm, to_file='f_lstm.png', show_shapes=True) f_end = Sequential(name='f_end') f_end.add(f_lstm) f_end.add(Dense(10)) f_end.add(Dense(1)) f_end.add(Activation('hard_sigmoid', name='hard_sigmoid_end')) # plot(f_end, to_file='f_end.png', show_shapes=True) f_prog = Sequential(name='f_prog') f_prog.add(f_lstm) f_prog.add(Dense(PROGRAM_KEY_VEC_SIZE)) f_prog.add(Dense(PROGRAM_VEC_SIZE)) f_prog.add(Activation('softmax', name='softmax_prog')) # plot(f_prog, to_file='f_prog.png', show_shapes=True) f_args = [] for ai in range(1, IntegerArguments.max_arg_num+1): f_arg = Sequential(name='f_arg%s' % ai) f_arg.add(f_lstm) f_arg.add(Dense(32)) f_arg.add(Dense(IntegerArguments.depth)) f_arg.add(Activation('softmax', name='softmax_arg%s' % ai)) f_args.append(f_arg) # plot(f_arg, to_file='f_arg.png', show_shapes=True) self.model = Model([input_enc.input, input_arg.input, input_prg.input], [f_end.output, f_prog.output] + [fa.output for fa in f_args], name="npi") self.compile_model() plot(self.model, to_file='model.png', show_shapes=True) def reset(self): super(AdditionNPIModel, self).reset() for l in self.model.layers: if type(l) is LSTM: l.reset_states() def compile_model(self, lr=0.0001, arg_weight=1.): arg_num = IntegerArguments.max_arg_num optimizer = Adam(lr=lr) loss = ['binary_crossentropy', 'categorical_crossentropy'] + ['categorical_crossentropy'] * arg_num self.model.compile(optimizer=optimizer, loss=loss, loss_weights=[0.25, 0.25] + [arg_weight] * arg_num) def fit(self, steps_list, epoch=3000): """ :param int epoch: :param typing.List[typing.Dict[q=dict, steps=typing.List[StepInOut]]] steps_list: :return: """ def filter_question(condition_func): sub_steps_list = [] for steps_dict in steps_list: question = steps_dict['q'] if condition_func(question['in1'], question['in2']): sub_steps_list.append(steps_dict) return sub_steps_list # self.print_weights() if not self.weight_loaded: self.train_f_enc(filter_question(lambda a, b: 10 <= a < 100 and 10 <= b < 100), epoch=100) self.f_enc.trainable = False q_type = "training questions of a+b < 10" print(q_type) pr = 0.8 all_ok = self.fit_to_subset(filter_question(lambda a, b: a+b < 10), epoch=epoch, pass_rate=pr) print("%s is pass_rate >= %s: %s" % (q_type, pr, all_ok)) q_type = "training questions of a<10 and b< 10 and 10 <= a+b" print(q_type) pr = 0.8 all_ok = self.fit_to_subset(filter_question(lambda a, b: a<10 and b<10 and a + b >= 10), epoch=epoch, pass_rate=pr) print("%s is pass_rate >= %s: %s" % (q_type, pr, all_ok)) q_type = "training questions of a<10 and b<10" print(q_type) pr = 0.8 all_ok = self.fit_to_subset(filter_question(lambda a, b: a < 10 and b < 10), epoch=epoch, pass_rate=pr) print("%s is pass_rate >= %s: %s" % (q_type, pr, all_ok)) q_type = "training questions of a<100 and b<100" print(q_type) pr = 0.8 all_ok = self.fit_to_subset(filter_question(lambda a, b: a < 100 and b < 100), epoch=epoch, pass_rate=pr) print("%s is pass_rate >= %s: %s" % (q_type, pr, all_ok)) while True: print("test all type of questions") cc, wc = self.test_to_subset(create_questions(1000)) print("Accuracy %s(OK=%d, NG=%d)" % (cc/(cc+wc), cc, wc)) if wc == 0: break q_type = "training questions of ALL" print(q_type) pr = 1.0 self.fit_to_subset(filter_question(lambda a, b: True), epoch=epoch, pass_rate=pr) all_ok = self.fit_to_subset(filter_question(lambda a, b: True), epoch=epoch, pass_rate=pr, skip_correct=True) print("%s is pass_rate >= %s: %s" % (q_type, pr, all_ok)) def fit_to_subset(self, steps_list, epoch=3000, pass_rate=1.0, skip_correct=False): learning_rate = 0.0001 for i in range(30): all_ok = self.do_learn(steps_list, 30, learning_rate=learning_rate, pass_rate=pass_rate, arg_weight=1., skip_correct=skip_correct) if all_ok: return True learning_rate *= 0.95 return False def test_to_subset(self, questions): addition_env = AdditionEnv(FIELD_ROW, FIELD_WIDTH, FIELD_DEPTH) npi_runner = TerminalNPIRunner(None, self) correct_count = wrong_count = 0 for idx, question in enumerate(questions): question = copy(question) if self.question_test(addition_env, npi_runner, question): correct_count += 1 else: wrong_count += 1 return correct_count, wrong_count @staticmethod def dict_to_str(d): return str(tuple([(k, d[k]) for k in sorted(d)])) def do_learn(self, steps_list, epoch, learning_rate=None, pass_rate=1.0, arg_weight=1., skip_correct=False): if learning_rate is not None: self.update_learning_rate(learning_rate, arg_weight) addition_env = AdditionEnv(FIELD_ROW, FIELD_WIDTH, FIELD_DEPTH) npi_runner = TerminalNPIRunner(None, self) last_weights = None correct_count = Counter() no_change_count = 0 last_loss = 1000 for ep in range(1, epoch+1): correct_new = wrong_new = 0 losses = [] ok_rate = [] np.random.shuffle(steps_list) for idx, steps_dict in enumerate(steps_list): question = copy(steps_dict['q']) question_key = self.dict_to_str(question) if self.question_test(addition_env, npi_runner, question): if correct_count[question_key] == 0: correct_new += 1 correct_count[question_key] += 1 print("GOOD!: ep=%2d idx=%3d :%s CorrectCount=%s" % (ep, idx, self.dict_to_str(question), correct_count[question_key])) ok_rate.append(1) if skip_correct or int(math.sqrt(correct_count[question_key])) ** 2 != correct_count[question_key]: continue else: ok_rate.append(0) if correct_count[question_key] > 0: print("Degraded: ep=%2d idx=%3d :%s CorrectCount=%s -> 0" % (ep, idx, self.dict_to_str(question), correct_count[question_key])) correct_count[question_key] = 0 wrong_new += 1 steps = steps_dict['steps'] xs = [] ys = [] ws = [] for step in steps: xs.append(self.convert_input(step.input)) y, w = self.convert_output(step.output) ys.append(y) ws.append(w) self.reset() for i, (x, y, w) in enumerate(zip(xs, ys, ws)): loss = self.model.train_on_batch(x, y, sample_weight=w) if not np.isfinite(loss): print("Loss is not finite!, Last Input=%s" % ([i, (x, y, w)])) self.print_weights(last_weights, detail=True) raise RuntimeError("Loss is not finite!") losses.append(loss) last_weights = self.model.get_weights() if losses: cur_loss = np.average(losses) print("ep=%2d: ok_rate=%.2f%% (+%s -%s): ave loss %s (%s samples)" % (ep, np.average(ok_rate)*100, correct_new, wrong_new, cur_loss, len(steps_list))) # self.print_weights() if correct_new + wrong_new == 0: no_change_count += 1 else: no_change_count = 0 if math.fabs(1 - cur_loss/last_loss) < 0.001 and no_change_count > 5: print("math.fabs(1 - cur_loss/last_loss) < 0.001 and no_change_count > 5:") return False last_loss = cur_loss print("=" * 80) self.save() if np.average(ok_rate) >= pass_rate: return True return False def update_learning_rate(self, learning_rate, arg_weight=1.): print("Re-Compile Model lr=%s aw=%s" % (learning_rate, arg_weight)) self.compile_model(learning_rate, arg_weight=arg_weight) def train_f_enc(self, steps_list, epoch=50): print("training f_enc") f_add0 = Sequential(name='f_add0') f_add0.add(self.f_enc) f_add0.add(Dense(FIELD_DEPTH)) f_add0.add(Activation('softmax', name='softmax_add0')) f_add1 = Sequential(name='f_add1') f_add1.add(self.f_enc) f_add1.add(Dense(FIELD_DEPTH)) f_add1.add(Activation('softmax', name='softmax_add1')) env_model = Model(self.f_enc.inputs, [f_add0.output, f_add1.output], name="env_model") env_model.compile(optimizer='adam', loss=['categorical_crossentropy']*2) for ep in range(epoch): losses = [] for idx, steps_dict in enumerate(steps_list): prev = None for step in steps_dict['steps']: x = self.convert_input(step.input)[:2] env_values = step.input.env.reshape((4, -1)) in1 = np.clip(env_values[0].argmax() - 1, 0, 9) in2 = np.clip(env_values[1].argmax() - 1, 0, 9) carry = np.clip(env_values[2].argmax() - 1, 0, 9) y_num = in1 + in2 + carry now = (in1, in2, carry) if prev == now: continue prev = now y0 = to_one_hot_array((y_num % 10)+1, FIELD_DEPTH) y1 = to_one_hot_array((y_num // 10)+1, FIELD_DEPTH) y = [yy.reshape((self.batch_size, -1)) for yy in [y0, y1]] loss = env_model.train_on_batch(x, y) losses.append(loss) print("ep %3d: loss=%s" % (ep, np.average(losses))) def question_test(self, addition_env, npi_runner, question): addition_env.reset() self.reset() try: run_npi(addition_env, npi_runner, self.program_set.ADD, question) if question['correct']: return True except StopIteration: pass return False def convert_input(self, p_in: StepInput): x_pg = np.array((p_in.program.program_id,)) x = [xx.reshape((self.batch_size, -1)) for xx in (p_in.env, p_in.arguments.values, x_pg)] return x def convert_output(self, p_out: StepOutput): y = [np.array((p_out.r,))] weights = [[1.]] if p_out.program: arg_values = p_out.arguments.values arg_num = len(p_out.program.args or []) y += [p_out.program.to_one_hot(PROGRAM_VEC_SIZE)] weights += [[1.]] else: arg_values = IntegerArguments().values arg_num = 0 y += [np.zeros((PROGRAM_VEC_SIZE, ))] weights += [[1e-10]] for v in arg_values: # split by each args y += [v] weights += [[1.]] * arg_num + [[1e-10]] * (len(arg_values) - arg_num) weights = [np.array(w) for w in weights] return [yy.reshape((self.batch_size, -1)) for yy in y], weights def step(self, env_observation: np.ndarray, pg: Program, arguments: IntegerArguments) -> StepOutput: x = self.convert_input(StepInput(env_observation, pg, arguments)) results = self.model.predict(x, batch_size=1) # if batch_size==1, returns single row r, pg_one_hot, arg_values = results[0], results[1], results[2:] program = self.program_set.get(pg_one_hot.argmax()) ret = StepOutput(r, program, IntegerArguments(values=np.stack(arg_values))) return ret def save(self): self.model.save_weights(self.model_path, overwrite=True) def load_weights(self): if os.path.exists(self.model_path): self.model.load_weights(self.model_path) self.weight_loaded = True def print_weights(self, weights=None, detail=False): weights = weights or self.model.get_weights() for w in weights: print("w%s: sum(w)=%s, ave(w)=%s" % (w.shape, np.sum(w), np.average(w))) if detail: for w in weights: print("%s: %s" % (w.shape, w)) @staticmethod def size_of_env_observation(): return FIELD_ROW * FIELD_DEPTH
class FinancialNewsAnalysisModel(object): model = None def __init__(self, nb_time_step, dim_data, batch_size=1, model_path=None): self.model_path = model_path self.model_path = model_path self.batch_size = batch_size self.size_of_input_data_dim = dim_data self.size_of_input_timesteps = nb_time_step self.build() self.weight_loaded = False if model_path is not None: self.load_weights() def build(self): dim_data = self.size_of_input_data_dim nb_time_step = self.size_of_input_timesteps news_input = Input(shape=(nb_time_step, dim_data)) lstm = LSTM(output_dim=nb_hidden_units, dropout_U=dropout, dropout_W=dropout, W_regularizer=l2(l2_norm_alpha), b_regularizer=l2(l2_norm_alpha), activation='tanh') bi_lstm = Bidirectional(lstm, input_shape=(nb_time_step, dim_data), merge_mode='concat') all_news_rep = bi_lstm(news_input) news_predictions = Dense(1, activation='linear')(all_news_rep) self.model = Model(news_input, news_predictions, name="deep rnn for financial news analysis") def reset(self): for l in self.model.layers: if type(l) is LSTM: l.reset_status() def compile_model(self, lr=0.0001, loss_weights=0.1): optimizer = Adam(lr=lr) loss = 'mse' # loss = custom_objective self.model.compile(optimizer=optimizer, loss=loss) #metrics=['mse']) plot(self.model, to_file='model.png') def fit_model(self, X, y, X_val=None, y_val=None, epoch=500): early_stopping = EarlyStopping(monitor='val_loss', patience=100, verbose=0) if X_val is None: self.model.fit(X, y, batch_size=self.batch_size, nb_epoch=epoch, validation_split=0.2, shuffle=True, callbacks=[early_stopping]) else: self.model.fit(X, y, batch_size=self.batch_size, nb_epoch=epoch, validation_data=(X_val, y_val), shuffle=True, callbacks=[early_stopping]) def save(self): self.model.save_weights(self.model_path, overwrite=True) def load_weights(self): if os.path.exists(self.model_path): self.model.load_weights(self.model_path) self.weight_loaded = True def print_weights(self, weights=None, detail=False): weights = weights or self.model.get_weights() for w in weights: print("w%s: sum(w)=%s, ave(w)=%s" % (w.shape, np.sum(w), np.average(w))) if detail: for w in weights: print("%s: %s" % (w.shape, w)) def model_eval(self, X, y): y_hat = self.model.predict(X, batch_size=1) count_true = 0 count_all = y.shape[0] for i in range(y.shape[0]): count_true = count_true + 1 if y[i,0]*y_hat[i,0]>0 else count_true print y[i,0],y_hat[i,0] print count_all,count_true
class QNetwork: def __init__(self, config: Config) -> None: self.config = config self.digest = None def build(self) -> None: mc = self.config.model in_x = x = Input((4, 5, 5)) x = Conv2D(filters=mc.cnn_filter_num, kernel_size=mc.cnn_filter_size, padding="same", data_format="channels_first", kernel_regularizer=l2(mc.l2_reg))(x) x = BatchNormalization(axis=1)(x) x = Activation("relu")(x) for _ in range(mc.res_layer_num): x = self._build_residual_block(x) res_out = x # for policy output x = Conv2D(filters=2, kernel_size=1, data_format="channels_first", kernel_regularizer=l2(mc.l2_reg))(res_out) x = BatchNormalization(axis=1)(x) x = Activation("relu")(x) x = Flatten()(x) # no output for 'pass' out = Dense(100, kernel_regularizer=l2(mc.l2_reg), activation="softmax", name="out")(x) # x = Dense(mc.value_fc_size, kernel_regularizer=l2(mc.l2_reg), # activation="relu")(x) # value_out = Dense(1, kernel_regularizer=l2(mc.l2_reg), # activation="tanh", name="value_out")(x) self.model = Model(in_x, out, name="slipe_model") self.model.compile(loss='mse', optimizer=Adam(lr=mc.learning_rate)) self.model.summary() def _build_residual_block(self, x): mc = self.config.model in_x = x x = Conv2D(filters=mc.cnn_filter_num, kernel_size=mc.cnn_filter_size, padding="same", data_format="channels_first", kernel_regularizer=l2(mc.l2_reg))(x) x = BatchNormalization(axis=1)(x) x = Activation("relu")(x) x = Conv2D(filters=mc.cnn_filter_num, kernel_size=mc.cnn_filter_size, padding="same", data_format="channels_first", kernel_regularizer=l2(mc.l2_reg))(x) x = BatchNormalization(axis=1)(x) x = Add()([in_x, x]) x = Activation("relu")(x) return x # 重みの学習 def replay(self, memory: Memory, batch_size: int, gamma: float, targetQN: 'QNetwork') -> None: inputs = np.zeros((batch_size, 4, 5, 5)) targets = np.zeros((batch_size, 100)) mini_batch = memory.sample(batch_size) for i, (state_b, action_b, reward_b, next_state_b) in enumerate(mini_batch): inputs[i] = state_b # shape=(4, 5, 5) target = reward_b # type: int # if not (next_state_b == 0).all(): # 価値計算(DDQNにも対応できるように、行動決定のQネットワークと価値関数のQネットワークは分離) retmainQs = self.model.predict(next_state_b) next_action = np.argmax(retmainQs) # 最大の報酬を返す行動を選択する target = reward_b + gamma * \ targetQN.model.predict(next_state_b)[0][next_action] targets[i] = self.model.predict(state_b)[0][0] # Qネットワークの出力 # 教師信号 action_b: int <= 100 targets[i, action_b] = target # epochsは訓練データの反復回数、verbose=0は表示なしの設定 self.model.fit(inputs, targets, epochs=1, verbose=0) @staticmethod def fetch_digest(weight_path: str): if os.path.exists(weight_path): m = hashlib.sha256() with open(weight_path, "rb") as f: m.update(f.read()) return m.hexdigest() def load(self, config_path: str, weight_path: str) -> bool: if os.path.exists(weight_path): # os.path.exists(config_path) and logger.debug(f"loading model from {config_path}") with open(config_path, "rt") as f: self.model = Model.from_config(json.load(f)) self.model.load_weights(weight_path) self.model.compile( loss='mse', optimizer=Adam(lr=self.config.model.learning_rate)) self.model.summary() self.digest = self.fetch_digest(weight_path) logger.debug(f"loaded model digest = {self.digest}") return True else: logger.debug( f"model files does not exist at {config_path} and {weight_path}" ) return False def save(self, config_path: str, weight_path: str) -> None: logger.debug(f"save model to {config_path}") with open(config_path, "wt") as f: json.dump(self.model.get_config(), f) self.model.save_weights(weight_path) self.digest = self.fetch_digest(weight_path) logger.debug(f"saved model digest {self.digest}")
class CombinedAnalysisModel(object): model = None def __init__(self, dim_input_x1, time_step_x1, dim_input_x2, time_step_x2, batch_size=1, model_path=None, fa_model_path=None, ta_model_path=None): self.model_path = model_path self.fa_model_path = fa_model_path self.ta_model_path = ta_model_path self.batch_size = batch_size self.dim_input_x1 = dim_input_x1 self.time_step_x1 = time_step_x1 self.dim_input_x2 = dim_input_x2 self.time_step_x2 = time_step_x2 self.build() self.weight_loaded = False self.load_weights() def build(self): news_input = Input(shape=(self.time_step_x1, self.dim_input_x1), name='x1') financial_time_series_input = Input(shape=(self.time_step_x2, self.dim_input_x2), name='x2') lstm = LSTM(output_dim=nb_hidden_units, dropout_U=dropout, dropout_W=dropout, W_regularizer=l2(l2_norm_alpha), b_regularizer=l2(l2_norm_alpha), activation='tanh', name='h1', trainable=False) bi_lstm = Bidirectional(lstm, input_shape=(self.time_step_x1, self.dim_input_x1), merge_mode='concat', name='h1', trainable=False) h1 = bi_lstm(news_input) lstm_layer_1 = LSTM(output_dim=nb_hidden_units, dropout_U=dropout, dropout_W=dropout, W_regularizer=l2(l2_norm_alpha), b_regularizer=l2(l2_norm_alpha), activation='tanh', return_sequences=True, name='lstm_layer1', trainable=False) lstm_layer_23 = LSTM(output_dim=nb_hidden_units, dropout_U=dropout, dropout_W=dropout, W_regularizer=l2(l2_norm_alpha), b_regularizer=l2(l2_norm_alpha), activation='tanh', return_sequences=False, name='lstm_layer2_loss3', trainable=False) h2_layer_1 = lstm_layer_1(financial_time_series_input) h2_layer_2 = lstm_layer_23(h2_layer_1) h_3 = Merge(mode='concat', name='h3')([h1, h2_layer_2]) h_4 = Dense(nb_hidden_units, name='h4')(h_3) prediction = Dense(1, name='y3')(h_4) self.model = Model(input=[news_input, financial_time_series_input], output=prediction, name='combined model for financial analysis') plot(self.model, to_file='model.png') def reset(self): for l in self.model.layers: if type(l) is LSTM: l.reset_status() def compile_model(self, lr=0.0001, loss_weights=0.1): optimizer = Adam(lr=lr) loss = 'mse' # loss = custom_objective self.model.compile(optimizer=optimizer, loss=loss) def fit_model(self, X1, X2, y, X1_val=None, X2_val=None, y_val=None, epoch=50): early_stopping = EarlyStopping(monitor='val_loss', patience=3, verbose=0) if X1_val is None: self.model.fit([X1, X2], y, batch_size=self.batch_size, nb_epoch=epoch, validation_split=0.2, shuffle=True, callbacks=[early_stopping]) else: self.model.fit([X1, X2], y, batch_size=self.batch_size, nb_epoch=epoch, validation_data=([X1_val, X2_val], y_val), shuffle=True, callbacks=[early_stopping]) def save(self): self.model.save_weights(self.model_path, overwrite=True) def load_weights(self): if self.model_path is not None and os.path.exists(self.model_path): self.model.load_weights(self.model_path) self.weight_loaded = True if self.ta_model_path is not None and os.path.exists( self.ta_model_path): self.model.load_weights(self.ta_model_path, by_name=True) if self.fa_model_path is not None and os.path.exists( self.fa_model_path): self.model.load_weights(self.fa_model_path, by_name=True) def print_weights(self, weights=None, detail=False): weights = weights or self.model.get_weights() for w in weights: print("w%s: sum(w)=%s, ave(w)=%s" % (w.shape, np.sum(w), np.average(w))) if detail: for w in weights: print("%s: %s" % (w.shape, w)) def model_eval(self, X1, X2, y): y_hat = self.model.predict([X1, X2], batch_size=1) count_true = 0 count_all = y.shape[0] for i in range(y.shape[0]): count_true = count_true + 1 if y[i, 0] * y_hat[ i, 0] > 0 else count_true print y[i, 0], y_hat[i, 0] print count_all, count_true
class FinancialTimeSeriesAnalysisModel(object): model = None def __init__(self, nb_time_step, dim_data, batch_size=1, model_path=None): self.model_path = model_path self.model_path = model_path self.batch_size = batch_size self.size_of_input_data_dim = dim_data self.size_of_input_timesteps = nb_time_step self.build() self.weight_loaded = False if model_path is not None: self.load_weights() def build(self): dim_data = self.size_of_input_data_dim nb_time_step = self.size_of_input_timesteps financial_time_series_input = Input(shape=(nb_time_step, dim_data), name='x1') lstm_layer_1 = LSTM(output_dim=nb_hidden_units, dropout_U=dropout, dropout_W=dropout, W_regularizer=l2(l2_norm_alpha), b_regularizer=l2(l2_norm_alpha), activation='tanh', return_sequences=True, name='lstm_layer1') lstm_layer_21 = LSTM(output_dim=nb_hidden_units, dropout_U=dropout, dropout_W=dropout, W_regularizer=l2(l2_norm_alpha), b_regularizer=l2(l2_norm_alpha), activation='tanh', return_sequences=True, name='lstm_layer2_loss1') lstm_layer_22 = LSTM(output_dim=nb_hidden_units, dropout_U=dropout, dropout_W=dropout, W_regularizer=l2(l2_norm_alpha), b_regularizer=l2(l2_norm_alpha), activation='tanh', return_sequences=True, name='lstm_layer2_loss2') lstm_layer_23 = LSTM(output_dim=nb_hidden_units, dropout_U=dropout, dropout_W=dropout, W_regularizer=l2(l2_norm_alpha), b_regularizer=l2(l2_norm_alpha), activation='tanh', return_sequences=True, name='lstm_layer2_loss3') lstm_layer_24 = LSTM(output_dim=nb_hidden_units, dropout_U=dropout, dropout_W=dropout, W_regularizer=l2(l2_norm_alpha), b_regularizer=l2(l2_norm_alpha), activation='tanh', return_sequences=True, name='lstm_layer2_loss4') lstm_layer_25 = LSTM(output_dim=nb_hidden_units, dropout_U=dropout, dropout_W=dropout, W_regularizer=l2(l2_norm_alpha), b_regularizer=l2(l2_norm_alpha), activation='tanh', return_sequences=True, name='lstm_layer2_loss5') h1 = lstm_layer_1(financial_time_series_input) h21 = lstm_layer_21(h1) h22 = lstm_layer_22(h1) h23 = lstm_layer_23(h1) h24 = lstm_layer_24(h1) h25 = lstm_layer_25(h1) time_series_predictions1 = TimeDistributed(Dense(1), name="p1")(h21) # custom 1 time_series_predictions2 = TimeDistributed(Dense(1), name="p2")(h22) # custom 2 time_series_predictions3 = TimeDistributed(Dense(1), name="p3")(h23) # mse time_series_predictions4 = TimeDistributed(Dense(1, activation='sigmoid'), name="p4")(h24) # logloss time_series_predictions5 = TimeDistributed(Dense(nb_labels, activation='softmax'), name="p5")(h25) # cross self.model = Model( input=financial_time_series_input, output=[ time_series_predictions1, time_series_predictions2, time_series_predictions3, time_series_predictions4, time_series_predictions5 ], name="multi-task deep rnn for financial time series forecasting") plot(self.model, to_file='model.png') def reset(self): for l in self.model.layers: if type(l) is LSTM: l.reset_status() def compile_model(self, lr=0.0001, arg_weight=1.): optimizer = Adam(lr=lr) loss = [ custom_objective1, custom_objective2, 'mse', 'binary_crossentropy', 'categorical_crossentropy' ] self.model.compile(optimizer=optimizer, loss=loss) def fit_model(self, X, y, y_label, epoch=300): early_stopping = EarlyStopping(monitor='val_loss', patience=3, verbose=0) self.model.fit(X, [y] * 3 + [y > 0] + [y_label], batch_size=self.batch_size, nb_epoch=epoch, validation_split=0.3, shuffle=True, callbacks=[early_stopping]) def save(self): self.model.save_weights(self.model_path, overwrite=True) def load_weights(self): if os.path.exists(self.model_path): self.model.load_weights(self.model_path) self.weight_loaded = True def print_weights(self, weights=None, detail=False): weights = weights or self.model.get_weights() for w in weights: print("w%s: sum(w)=%s, ave(w)=%s" % (w.shape, np.sum(w), np.average(w))) if detail: for w in weights: print("%s: %s" % (w.shape, w)) def model_eval(self, X, y): y_hat = self.model.predict(X, batch_size=1)[0] count_true = 0 count_all = y.shape[1] for i in range(y.shape[1]): count_true = count_true + 1 if y[0, i, 0] * y_hat[ 0, i, 0] > 0 else count_true print(y[0, i, 0], y_hat[0, i, 0]) print(count_all, count_true)
class FinancialTimeSeriesAnalysisModel(object): model = None def __init__(self, nb_time_step, dim_data, batch_size=1, model_path=None): self.model_path = model_path self.model_path = model_path self.batch_size = batch_size self.size_of_input_data_dim = dim_data self.size_of_input_timesteps = nb_time_step self.build() self.weight_loaded = False if model_path is not None: self.load_weights() def build(self): dim_data = self.size_of_input_data_dim nb_time_step = self.size_of_input_timesteps financial_time_series_input = Input(shape=(nb_time_step, dim_data)) lstm_layer_1 = LSTM(output_dim=nb_hidden_units, dropout_U=dropout, dropout_W=dropout, inner_activation='sigmoid', W_regularizer=l2(l2_norm_alpha), b_regularizer=l2(l2_norm_alpha), activation='tanh', return_sequences=True) lstm_layer_2 = LSTM(output_dim=nb_hidden_units, dropout_U=dropout, dropout_W=dropout, inner_activation='sigmoid', W_regularizer=l2(l2_norm_alpha), b_regularizer=l2(l2_norm_alpha), activation='tanh', return_sequences=True) h1 = lstm_layer_1(financial_time_series_input) h2 = lstm_layer_2(h1) time_series_predictions = TimeDistributedDense(1)(h2) self.model = Model( financial_time_series_input, time_series_predictions, name="deep rnn for financial time series forecasting") def reset(self): for l in self.model.layers: if type(l) is LSTM: l.reset_status() def compile_model(self, lr=0.0001, arg_weight=1.): optimizer = Adam(lr=lr) loss = 'mse' self.model.compile(optimizer=optimizer, loss=loss) def fit_model(self, X, y, X_val=None, y_val=None, epoch=3): early_stopping = EarlyStopping(monitor='val_loss', patience=3, verbose=0) if X_val is None: self.model.fit(X, y, batch_size=self.batch_size, nb_epoch=epoch, validation_split=0.2, shuffle=True, callbacks=[early_stopping]) else: self.model.fit(X, y, batch_size=self.batch_size, nb_epoch=epoch, validation_data=(X_val, y_val), shuffle=True, callbacks=[early_stopping]) def save(self): self.model.save_weights(self.model_path, overwrite=True) def load_weights(self): if os.path.exists(self.model_path): self.model.load_weights(self.model_path) self.weight_loaded = True def print_weights(self, weights=None, detail=False): weights = weights or self.model.get_weights() for w in weights: print("w%s: sum(w)=%s, ave(w)=%s" % (w.shape, np.sum(w), np.average(w))) if detail: for w in weights: print("%s: %s" % (w.shape, w)) def model_eval(self, X, y): y_hat = self.model.predict(X, batch_size=1) count_true = 0 count_all = y.shape[1] for i in range(y.shape[1]): count_true = count_true + 1 if y[0, i, 0] * y_hat[ 0, i, 0] > 0 else count_true print(y[0, i, 0], y_hat[0, i, 0]) print(count_all, count_true)
class TL(Model): """ Triplet-Loss trained Neural Network. https://arxiv.org/abs/1503.03832 """ def __init__(self, base=None, siamese=None): super(TL, self).__init__() # Store the base model. assert (base != None) self.base = base # For loading. if base != None and siamese != None: self.base = base self.siamese = siamese self.latent_dim = self.base.outputs[0].shape[1] return # Get the latent dimension. assert len(self.base.outputs) == 1 assert len(self.base.outputs[0].shape) == 2 self.latent_dim = self.base.outputs[0].shape[1] # Get the input shape. input_shape = self.base.inputs[0].shape.as_list()[1:] # Create the anchor. input_anchor = layers.Input(shape=input_shape) output_anchor = input_anchor output_anchor = self.base(output_anchor) # Create the positive. input_positive = layers.Input(shape=input_shape) output_positive = input_positive output_positive = self.base(output_positive) # Create the negative. input_negative = layers.Input(shape=input_shape) output_negative = input_negative output_negative = self.base(output_negative) # Create a dummy output. output = layers.concatenate( [output_anchor, output_positive, output_negative]) # Create the model. self.siamese = Model([input_anchor, input_positive, input_negative], output, name="triplet_model") def compile(self, optimizer, loss=None, metrics=None, loss_weights=None, sample_weight_mode=None, weighted_metrics=None, target_tensors=None, triplet_loss="mse", **kwargs): """ Compiles the TL. Additionally to the default functionality of *compile*, it adds the triplet-loss. In order to do so you have to provide it via the parameter *triplet_loss*. The VAE loss is similar to >>> vae_loss = max(0.0, pos_dist - neg_dist + alpha) See the literature for details. Additional args: triplet_loss (string): The base-loss for the triplet-loss. Values are either *euclidean* for euclidean norm or *cosine* for cosine similarity. """ assert loss == None, "Not expected to provide an explicit loss for TL. Use 'triplet_loss'" self.triplet_loss = triplet_loss def triplet_loss_function(y_true, y_pred, alpha=0.4): anchor = y_pred[:, 0:self.latent_dim] positive = y_pred[:, self.latent_dim:self.latent_dim * 2] negative = y_pred[:, self.latent_dim * 2:self.latent_dim * 3] if triplet_loss == "euclidean": pos_dist = euclidean_loss(positive, anchor) neg_dist = euclidean_loss(negative, anchor) elif triplet_loss == "cosine": pos_dist = cosine_loss(positive, anchor) neg_dist = cosine_loss(negative, anchor) else: raise Exception("Unexpected: " + triplet_loss) basic_loss = pos_dist - neg_dist + alpha loss = K.maximum(basic_loss, 0.0) return loss loss = triplet_loss_function self.siamese.compile(optimizer, loss, metrics, loss_weights, sample_weight_mode, weighted_metrics, **kwargs) def fit(self, x=None, y=None, batch_size=None, minibatch_size=None, epochs=1, verbose=1, callbacks=None, validation_split=0., validation_data=None, shuffle=True, class_weight=None, sample_weight=None, initial_epoch=0, steps_per_epoch=None, validation_steps=None, **kwargs): """ This is basically the same as in vanilla Keras. Additional args: minibatch_size (int): The model internally does some sampling. The *minibatch_size* specifies how many candidates to use in order to create a triplet for training. """ assert minibatch_size != None, "ERROR! Must provide 'minibatch_size'." assert steps_per_epoch != None, "ERROR! Must provide 'steps_per_epoch'." assert validation_steps != None, "ERROR! Must provide 'validation_steps'." y_dummy = np.zeros((batch_size, self.latent_dim * 3)) # Template generator. def triplet_loss_generator(x_generator, y_generator, model, sampling): # Get the classes. classes = sorted(list(set(y_generator))) # Sort by classes for easy indexing. class_indices = {} for c in classes: class_indices[c] = [] for index, c in enumerate(y_generator): class_indices[c].append(index) # Compute the complements. class_complements = {} for c in classes: class_complements[c] = [c2 for c2 in classes if c2 != c] # Generator loop. while True: x_input_anchors = [] x_input_positives = [] x_input_negatives = [] # Generate a whole batch. for _ in range(batch_size): anchor_class = random.choice(classes) anchor_index = random.choice(class_indices[anchor_class]) anchor_input = x_generator[anchor_index] #print("anchor_class", anchor_class) anchor_latent = self.base.predict( np.expand_dims(anchor_input, axis=0))[0] # Generate some positive candidates. positive_candidates = [] while len(positive_candidates) < minibatch_size: positive_class = anchor_class positive_index = random.choice( class_indices[positive_class]) positive_input = x_generator[positive_index] assert positive_class == y_generator[positive_index] #print("positive_class", positive_class) positive_candidates.append(positive_input) # Find the farthest positive candidate. positive_candidates = np.array(positive_candidates) positive_latents = self.base.predict(positive_candidates) positive_extremum = compute_latent_extremum( anchor_latent, positive_latents, "argmax", self.triplet_loss) positive_input = positive_candidates[positive_extremum] # Generate some negative candidates. negative_candidates = [] while len(negative_candidates) < minibatch_size: negative_class = random.choice( class_complements[anchor_class]) negative_index = random.choice( class_indices[negative_class]) negative_input = x_generator[negative_index] assert negative_class == y_generator[negative_index] #print("negative_class", negative_class) negative_candidates.append(negative_input) # Find the closest negative candidate. negative_candidates = np.array(negative_candidates) negative_latents = self.base.predict(negative_candidates) negative_extremum = compute_latent_extremum( anchor_latent, negative_latents, "argmin", self.triplet_loss) negative_input = negative_candidates[negative_extremum] # Done. x_input_anchors.append(anchor_input) x_input_positives.append(positive_input) x_input_negatives.append(negative_input) x_input_anchors = np.array(x_input_anchors) x_input_positives = np.array(x_input_positives) x_input_negatives = np.array(x_input_negatives) x_input = [ x_input_anchors, x_input_positives, x_input_negatives ] yield x_input, y_dummy # Create the generators. training_generator = triplet_loss_generator(x, y, batch_size, self.siamese) if validation_data != None: validation_generator = triplet_loss_generator( validation_data[0], validation_data[1], batch_size, self.siamese) else: validation_generator = None # Create the history. history_keys = ["loss", "val_loss"] history = {} for history_key in history_keys: history[history_key] = [] # Training the model for epoch in range(epochs): print("Epoch " + str(epoch + 1) + "/" + str(epochs) + "...") # Generating data for training. training_input, training_output = next(training_generator) if validation_generator != None: validation_input, validation_output = next( validation_generator) model_history = self.siamese.fit( training_input, training_output, validation_data=(validation_input, validation_output), epochs=1, steps_per_epoch=steps_per_epoch, verbose=0, validation_steps=validation_steps) # Update the history. for history_key in history_keys: history_value = model_history.history[history_key] history[history_key].append(history_value) print(history_key, history_value) return history def fit_generator(self, generator, steps_per_epoch=None, epochs=1, verbose=1, callbacks=None, validation_data=None, validation_steps=None, class_weight=None, max_queue_size=10, workers=1, use_multiprocessing=False, shuffle=True, initial_epoch=0): """ Coming soon... """ print("TODO: implement fit_generator!") raise Exception("Not implemented!") return self.siamese.fit_generator(generator, steps_per_epoch, epochs, verbose, callbacks, validation_data, validation_steps, class_weight, max_queue_size, workers, use_multiprocessing, shuffle, initial_epoch) def evaluate(self, x=None, y=None, batch_size=None, verbose=1, sample_weight=None, steps=None): """ Evaluates the model. Same as vanilla Keras. """ return self.siamese.evaluate(x, y, batch_size, verbose, sample_weight, steps=None) def predict(self, x, batch_size=None, verbose=0, steps=None): """ Does a prediction. Same as vanilla Keras. """ return self.siamese.predict(x, batch_size, verbose, steps) def summary(self): """ Provides a summary. """ print("Basemodel:") self.base.summary() print("Siamese model:") self.siamese.summary() def save(self, path): """ Saves the TL. This includes the whole Siamese Net plus the base-model. This code >>> tl.save("myae.h5") will create the files *tl.h5*, and *tl-base.h5*. """ self.siamese.save(path) self.base.save(append_to_filepath(path, "-base"))
inner_r = matrix_inner[:, self.output_dim:2 * self.output_dim] z = self.inner_activation(x_z + inner_z) r = self.inner_activation(x_r + inner_r) x_h = xx_ inner_h = r * self.ln( K.dot(h_tm1 * B_U[0], self.U[:, 2 * self.output_dim:]), 'preactx') hh = self.activation(x_h + inner_h) h = z * h_tm1 + (1 - z) * hh return h, [h] if __name__ == '__main__': from keras.layers import Input from keras.engine.training import Model np.random.seed(42) input = Input(batch_shape=(5, 6, 7), dtype='float32', name='input') rnn = GRULN(10) output = rnn(input) model = Model(input=input, output=output) model.compile(loss='mse', optimizer='sgd') data = np.ones((5, 6, 7), dtype='float32') probs = model.predict(data, batch_size=5) print probs.shape, probs.mean() # (5, 10) 0.0689924 print rnn.trainable_weights
class FinancialTimeSeriesAnalysisModel(object): model = None def __init__(self, nb_time_step, dim_data, batch_size=1, model_path=None): self.model_path = model_path self.model_path = model_path self.batch_size = batch_size self.size_of_input_data_dim = dim_data self.size_of_input_timesteps = nb_time_step self.build() self.weight_loaded = False if model_path is not None: self.load_weights() def build(self): dim_data = self.size_of_input_data_dim nb_time_step = self.size_of_input_timesteps financial_time_series_input = Input(shape=(nb_time_step, dim_data), name='x1') lstm_layer_1 = LSTM(output_dim=nb_hidden_units, dropout_U=dropout, dropout_W=dropout, W_regularizer=l2(l2_norm_alpha), b_regularizer=l2(l2_norm_alpha), activation='tanh', return_sequences=True, name='lstm_layer1') lstm_layer_21 = LSTM(output_dim=nb_hidden_units, dropout_U=dropout, dropout_W=dropout, W_regularizer=l2(l2_norm_alpha), b_regularizer=l2(l2_norm_alpha), activation='tanh', return_sequences=True, name='lstm_layer2_loss1') lstm_layer_22 = LSTM(output_dim=nb_hidden_units, dropout_U=dropout, dropout_W=dropout, W_regularizer=l2(l2_norm_alpha), b_regularizer=l2(l2_norm_alpha), activation='tanh', return_sequences=True, name='lstm_layer2_loss2') lstm_layer_23 = LSTM(output_dim=nb_hidden_units, dropout_U=dropout, dropout_W=dropout, W_regularizer=l2(l2_norm_alpha), b_regularizer=l2(l2_norm_alpha), activation='tanh', return_sequences=True, name='lstm_layer2_loss3') lstm_layer_24 = LSTM(output_dim=nb_hidden_units, dropout_U=dropout, dropout_W=dropout, W_regularizer=l2(l2_norm_alpha), b_regularizer=l2(l2_norm_alpha), activation='tanh', return_sequences=True, name='lstm_layer2_loss4') lstm_layer_25 = LSTM(output_dim=nb_hidden_units, dropout_U=dropout, dropout_W=dropout, W_regularizer=l2(l2_norm_alpha), b_regularizer=l2(l2_norm_alpha), activation='tanh', return_sequences=True, name='lstm_layer2_loss5') h1 = lstm_layer_1(financial_time_series_input) h21 = lstm_layer_21(h1) h22 = lstm_layer_22(h1) h23 = lstm_layer_23(h1) h24 = lstm_layer_24(h1) h25 = lstm_layer_25(h1) time_series_predictions1 = TimeDistributed(Dense(1), name="p1")(h21) # custom 1 time_series_predictions2 = TimeDistributed(Dense(1), name="p2")(h22) # custom 2 time_series_predictions3 = TimeDistributed(Dense(1), name="p3")(h23) # mse time_series_predictions4 = TimeDistributed(Dense(1, activation='sigmoid'), name="p4")(h24) # logloss time_series_predictions5 = TimeDistributed(Dense(nb_labels, activation='softmax'), name="p5")(h25) # cross self.model = Model(input=financial_time_series_input, output=[time_series_predictions1, time_series_predictions2, time_series_predictions3, time_series_predictions4, time_series_predictions5], name="multi-task deep rnn for financial time series forecasting") plot(self.model, to_file='model.png') def reset(self): for l in self.model.layers: if type(l) is LSTM: l.reset_status() def compile_model(self, lr=0.0001, arg_weight=1.): optimizer = Adam(lr=lr) loss = [custom_objective1, custom_objective2, 'mse', 'binary_crossentropy', 'categorical_crossentropy'] self.model.compile(optimizer=optimizer, loss=loss) def fit_model(self, X, y, y_label, epoch=300): early_stopping = EarlyStopping(monitor='val_loss', patience=10, verbose=0) self.model.fit(X, [y]*3 + [y > 0] + [y_label], batch_size=self.batch_size, nb_epoch=epoch, validation_split=0.2, shuffle=True, callbacks=[early_stopping]) def save(self): self.model.save_weights(self.model_path, overwrite=True) def load_weights(self): if os.path.exists(self.model_path): self.model.load_weights(self.model_path) self.weight_loaded = True def print_weights(self, weights=None, detail=False): weights = weights or self.model.get_weights() for w in weights: print("w%s: sum(w)=%s, ave(w)=%s" % (w.shape, np.sum(w), np.average(w))) if detail: for w in weights: print("%s: %s" % (w.shape, w)) def model_eval(self, X, y): y_hat = self.model.predict(X, batch_size=1)[0] count_true = 0 count_all = y.shape[1] for i in range(y.shape[1]): count_true = count_true + 1 if y[0,i,0]*y_hat[0,i,0]>0 else count_true print(y[0,i,0],y_hat[0,i,0]) print(count_all,count_true)
def test_model_methods(): a = Input(shape=(3,), name='input_a') b = Input(shape=(3,), name='input_b') a_2 = Dense(4, name='dense_1')(a) dp = Dropout(0.5, name='dropout') b_2 = dp(b) model = Model([a, b], [a_2, b_2]) optimizer = 'rmsprop' loss = 'mse' loss_weights = [1., 0.5] input_a_np = np.random.random((10, 3)) input_b_np = np.random.random((10, 3)) output_a_np = np.random.random((10, 4)) output_b_np = np.random.random((10, 3)) # training/testing doesn't work before compiling. with pytest.raises(RuntimeError): model.train_on_batch([input_a_np, input_b_np], [output_a_np, output_b_np]) model.compile(optimizer, loss, metrics=[], loss_weights=loss_weights, sample_weight_mode=None) # test train_on_batch out = model.train_on_batch([input_a_np, input_b_np], [output_a_np, output_b_np]) out = model.train_on_batch({'input_a': input_a_np, 'input_b': input_b_np}, [output_a_np, output_b_np]) out = model.train_on_batch({'input_a': input_a_np, 'input_b': input_b_np}, {'dense_1': output_a_np, 'dropout': output_b_np}) # test fit out = model.fit([input_a_np, input_b_np], [output_a_np, output_b_np], epochs=1, batch_size=4) out = model.fit({'input_a': input_a_np, 'input_b': input_b_np}, [output_a_np, output_b_np], epochs=1, batch_size=4) out = model.fit({'input_a': input_a_np, 'input_b': input_b_np}, {'dense_1': output_a_np, 'dropout': output_b_np}, epochs=1, batch_size=4) # test validation_split out = model.fit([input_a_np, input_b_np], [output_a_np, output_b_np], epochs=1, batch_size=4, validation_split=0.5) out = model.fit({'input_a': input_a_np, 'input_b': input_b_np}, [output_a_np, output_b_np], epochs=1, batch_size=4, validation_split=0.5) # test validation data out = model.fit([input_a_np, input_b_np], [output_a_np, output_b_np], epochs=1, batch_size=4, validation_data=([input_a_np, input_b_np], [output_a_np, output_b_np])) out = model.fit({'input_a': input_a_np, 'input_b': input_b_np}, [output_a_np, output_b_np], epochs=1, batch_size=4, validation_split=0.5, validation_data=({'input_a': input_a_np, 'input_b': input_b_np}, [output_a_np, output_b_np])) out = model.fit({'input_a': input_a_np, 'input_b': input_b_np}, {'dense_1': output_a_np, 'dropout': output_b_np}, epochs=1, batch_size=4, validation_split=0.5, validation_data=( {'input_a': input_a_np, 'input_b': input_b_np}, {'dense_1': output_a_np, 'dropout': output_b_np})) # test_on_batch out = model.test_on_batch([input_a_np, input_b_np], [output_a_np, output_b_np]) out = model.test_on_batch({'input_a': input_a_np, 'input_b': input_b_np}, [output_a_np, output_b_np]) out = model.test_on_batch({'input_a': input_a_np, 'input_b': input_b_np}, {'dense_1': output_a_np, 'dropout': output_b_np}) # predict_on_batch out = model.predict_on_batch([input_a_np, input_b_np]) out = model.predict_on_batch({'input_a': input_a_np, 'input_b': input_b_np}) # predict, evaluate input_a_np = np.random.random((10, 3)) input_b_np = np.random.random((10, 3)) output_a_np = np.random.random((10, 4)) output_b_np = np.random.random((10, 3)) out = model.evaluate([input_a_np, input_b_np], [output_a_np, output_b_np], batch_size=4) out = model.predict([input_a_np, input_b_np], batch_size=4) # with sample_weight input_a_np = np.random.random((10, 3)) input_b_np = np.random.random((10, 3)) output_a_np = np.random.random((10, 4)) output_b_np = np.random.random((10, 3)) sample_weight = [None, np.random.random((10,))] out = model.train_on_batch([input_a_np, input_b_np], [output_a_np, output_b_np], sample_weight=sample_weight) out = model.test_on_batch([input_a_np, input_b_np], [output_a_np, output_b_np], sample_weight=sample_weight) # test accuracy metric model.compile(optimizer, loss, metrics=['acc'], sample_weight_mode=None) out = model.train_on_batch([input_a_np, input_b_np], [output_a_np, output_b_np]) assert len(out) == 5 out = model.test_on_batch([input_a_np, input_b_np], [output_a_np, output_b_np]) assert len(out) == 5 # this should also work model.compile(optimizer, loss, metrics={'dense_1': 'acc'}, sample_weight_mode=None) out = model.train_on_batch([input_a_np, input_b_np], [output_a_np, output_b_np]) assert len(out) == 4 out = model.test_on_batch([input_a_np, input_b_np], [output_a_np, output_b_np]) assert len(out) == 4 # and this as well model.compile(optimizer, loss, metrics={'dense_1': ['acc']}, sample_weight_mode=None) out = model.train_on_batch([input_a_np, input_b_np], [output_a_np, output_b_np]) assert len(out) == 4 out = model.test_on_batch([input_a_np, input_b_np], [output_a_np, output_b_np]) assert len(out) == 4 # test starting from non-zero initial epoch trained_epochs = [] trained_batches = [] # define tracer callback def on_epoch_begin(epoch, logs): trained_epochs.append(epoch) def on_batch_begin(batch, logs): trained_batches.append(batch) tracker_cb = LambdaCallback(on_epoch_begin=on_epoch_begin, on_batch_begin=on_batch_begin) out = model.fit([input_a_np, input_b_np], [output_a_np, output_b_np], epochs=5, batch_size=4, initial_epoch=2, callbacks=[tracker_cb]) assert trained_epochs == [2, 3, 4] # test starting from non-zero initial epoch for generator too trained_epochs = [] def gen_data(batch_sz): while True: yield ([np.random.random((batch_sz, 3)), np.random.random((batch_sz, 3))], [np.random.random((batch_sz, 4)), np.random.random((batch_sz, 3))]) out = model.fit_generator(gen_data(4), steps_per_epoch=3, epochs=5, initial_epoch=2, callbacks=[tracker_cb]) assert trained_epochs == [2, 3, 4] # test with a custom metric function def mse(y_true, y_pred): return K.mean(K.pow(y_true - y_pred, 2)) model.compile(optimizer, loss, metrics=[mse], sample_weight_mode=None) out = model.train_on_batch([input_a_np, input_b_np], [output_a_np, output_b_np]) out_len = 1 + 2 * (1 + 1) # total loss + 2 outputs * (loss + metric) assert len(out) == out_len out = model.test_on_batch([input_a_np, input_b_np], [output_a_np, output_b_np]) assert len(out) == out_len input_a_np = np.random.random((10, 3)) input_b_np = np.random.random((10, 3)) output_a_np = np.random.random((10, 4)) output_b_np = np.random.random((10, 3)) out = model.fit([input_a_np, input_b_np], [output_a_np, output_b_np], batch_size=4, epochs=1) out = model.evaluate([input_a_np, input_b_np], [output_a_np, output_b_np], batch_size=4) out = model.predict([input_a_np, input_b_np], batch_size=4) # enable verbose for evaluate_generator out = model.evaluate_generator(gen_data(4), steps=3, verbose=1) # empty batch with pytest.raises(ValueError): def gen_data(): while True: yield (np.asarray([]), np.asarray([])) out = model.evaluate_generator(gen_data(), steps=1) # x is not a list of numpy arrays. with pytest.raises(ValueError): out = model.predict([None]) # x does not match _feed_input_names. with pytest.raises(ValueError): out = model.predict([input_a_np, None, input_b_np]) with pytest.raises(ValueError): out = model.predict([None, input_a_np, input_b_np]) # all input/output/weight arrays should have the same number of samples. with pytest.raises(ValueError): out = model.train_on_batch([input_a_np, input_b_np[:2]], [output_a_np, output_b_np], sample_weight=sample_weight) with pytest.raises(ValueError): out = model.train_on_batch([input_a_np, input_b_np], [output_a_np, output_b_np[:2]], sample_weight=sample_weight) with pytest.raises(ValueError): out = model.train_on_batch([input_a_np, input_b_np], [output_a_np, output_b_np], sample_weight=[sample_weight[1], sample_weight[1][:2]]) # `sample_weight` is neither a dict nor a list. with pytest.raises(TypeError): out = model.train_on_batch([input_a_np, input_b_np], [output_a_np, output_b_np], sample_weight=tuple(sample_weight)) # `validation_data` is neither a tuple nor a triple. with pytest.raises(ValueError): out = model.fit([input_a_np, input_b_np], [output_a_np, output_b_np], epochs=1, batch_size=4, validation_data=([input_a_np, input_b_np],)) # `loss` does not match outputs. with pytest.raises(ValueError): model.compile(optimizer, loss=['mse', 'mae', 'mape']) # `loss_weights` does not match output_names. with pytest.raises(ValueError): model.compile(optimizer, loss='mse', loss_weights={'lstm': 0.5}) # `loss_weights` does not match outputs. with pytest.raises(ValueError): model.compile(optimizer, loss='mse', loss_weights=[0.5]) # `loss_weights` is invalid type. with pytest.raises(TypeError): model.compile(optimizer, loss='mse', loss_weights=(0.5, 0.5)) # `sample_weight_mode` does not match output_names. with pytest.raises(ValueError): model.compile(optimizer, loss='mse', sample_weight_mode={'lstm': 'temporal'}) # `sample_weight_mode` does not match output_names. with pytest.raises(ValueError): model.compile(optimizer, loss='mse', sample_weight_mode=['temporal']) # `sample_weight_mode` matches output_names partially. with pytest.raises(ValueError): model.compile(optimizer, loss='mse', sample_weight_mode={'dense_1': 'temporal'}) # `loss` does not exist. with pytest.raises(ValueError): model.compile(optimizer, loss=[]) model.compile(optimizer, loss=['mse', 'mae']) model.compile(optimizer, loss='mse', loss_weights={'dense_1': 0.2, 'dropout': 0.8}) model.compile(optimizer, loss='mse', loss_weights=[0.2, 0.8]) # the rank of weight arrays should be 1. with pytest.raises(ValueError): out = model.train_on_batch([input_a_np, input_b_np], [output_a_np, output_b_np], sample_weight=[None, np.random.random((10, 20, 30))]) model.compile(optimizer, loss='mse', sample_weight_mode={'dense_1': None, 'dropout': 'temporal'}) model.compile(optimizer, loss='mse', sample_weight_mode=[None, 'temporal']) # the rank of output arrays should be at least 3D. with pytest.raises(ValueError): out = model.train_on_batch([input_a_np, input_b_np], [output_a_np, output_b_np], sample_weight=sample_weight) model.compile(optimizer, loss, metrics=[], loss_weights=loss_weights, sample_weight_mode=None) trained_epochs = [] trained_batches = [] out = model.fit_generator(generator=RandomSequence(3), steps_per_epoch=3, epochs=5, initial_epoch=0, validation_data=RandomSequence(4), validation_steps=3, callbacks=[tracker_cb]) assert trained_epochs == [0, 1, 2, 3, 4] assert trained_batches == list(range(3)) * 5 # steps_per_epoch will be equal to len of sequence if it's unspecified trained_epochs = [] trained_batches = [] out = model.fit_generator(generator=RandomSequence(3), epochs=5, initial_epoch=0, validation_data=RandomSequence(4), callbacks=[tracker_cb]) assert trained_epochs == [0, 1, 2, 3, 4] assert trained_batches == list(range(12)) * 5 # fit_generator will throw an exception if steps is unspecified for regular generator with pytest.raises(ValueError): def gen_data(): while True: yield (np.asarray([]), np.asarray([])) out = model.fit_generator(generator=gen_data(), epochs=5, initial_epoch=0, validation_data=gen_data(), callbacks=[tracker_cb]) # Check if generator is only accessed an expected number of times gen_counters = [0, 0] def gen_data(i): while True: gen_counters[i] += 1 yield ([np.random.random((1, 3)), np.random.random((1, 3))], [np.random.random((1, 4)), np.random.random((1, 3))]) out = model.fit_generator(generator=gen_data(0), epochs=3, steps_per_epoch=2, validation_data=gen_data(1), validation_steps=1, max_queue_size=2, workers=2) # Need range check here as filling of the queue depends on sleep in the enqueuers assert 6 <= gen_counters[0] <= 8 # 12 = (epoch * workers * validation steps * max_queue_size) assert 3 <= gen_counters[1] <= 12 gen_counters = [0] out = model.fit_generator(generator=RandomSequence(3), epochs=3, validation_data=gen_data(0), validation_steps=1, max_queue_size=2, workers=2) # 12 = (epoch * workers * validation steps * max_queue_size) # Need range check here as filling of the queue depends on sleep in the enqueuers assert 3 <= gen_counters[0] <= 12 # predict_generator output shape behavior should be consistent def expected_shape(batch_size, n_batches): return (batch_size * n_batches, 4), (batch_size * n_batches, 3) # Multiple outputs and one step. batch_size = 5 sequence_length = 1 shape_0, shape_1 = expected_shape(batch_size, sequence_length) out = model.predict_generator(RandomSequence(batch_size, sequence_length=sequence_length)) assert np.shape(out[0]) == shape_0 and np.shape(out[1]) == shape_1 # Multiple outputs and multiple steps. batch_size = 5 sequence_length = 2 shape_0, shape_1 = expected_shape(batch_size, sequence_length) out = model.predict_generator(RandomSequence(batch_size, sequence_length=sequence_length)) assert np.shape(out[0]) == shape_0 and np.shape(out[1]) == shape_1 # Create a model with a single output. single_output_model = Model([a, b], a_2) single_output_model.compile(optimizer, loss, metrics=[], sample_weight_mode=None) # Single output and one step. batch_size = 5 sequence_length = 1 shape_0, _ = expected_shape(batch_size, sequence_length) out = single_output_model.predict_generator(RandomSequence(batch_size, sequence_length=sequence_length)) assert np.shape(out) == shape_0 # Single output and multiple steps. batch_size = 5 sequence_length = 2 shape_0, _ = expected_shape(batch_size, sequence_length) out = single_output_model.predict_generator(RandomSequence(batch_size, sequence_length=sequence_length)) assert np.shape(out) == shape_0
def test_pandas_dataframe(): input_a = Input(shape=(3, ), name='input_a') input_b = Input(shape=(3, ), name='input_b') x = Dense(4, name='dense_1')(input_a) y = Dense(3, name='desne_2')(input_b) model_1 = Model(inputs=input_a, outputs=x) model_2 = Model(inputs=[input_a, input_b], outputs=[x, y]) optimizer = 'rmsprop' loss = 'mse' model_1.compile(optimizer=optimizer, loss=loss) model_2.compile(optimizer=optimizer, loss=loss) input_a_df = pd.DataFrame(np.random.random((10, 3))) input_b_df = pd.DataFrame(np.random.random((10, 3))) output_a_df = pd.DataFrame(np.random.random((10, 4))) output_b_df = pd.DataFrame(np.random.random((10, 3))) model_1.fit(input_a_df, output_a_df) model_2.fit([input_a_df, input_b_df], [output_a_df, output_b_df]) model_1.fit([input_a_df], [output_a_df]) model_1.fit({'input_a': input_a_df}, output_a_df) model_2.fit({ 'input_a': input_a_df, 'input_b': input_b_df }, [output_a_df, output_b_df]) model_1.predict(input_a_df) model_2.predict([input_a_df, input_b_df]) model_1.predict([input_a_df]) model_1.predict({'input_a': input_a_df}) model_2.predict({'input_a': input_a_df, 'input_b': input_b_df}) model_1.predict_on_batch(input_a_df) model_2.predict_on_batch([input_a_df, input_b_df]) model_1.predict_on_batch([input_a_df]) model_1.predict_on_batch({'input_a': input_a_df}) model_2.predict_on_batch({'input_a': input_a_df, 'input_b': input_b_df}) model_1.evaluate(input_a_df, output_a_df) model_2.evaluate([input_a_df, input_b_df], [output_a_df, output_b_df]) model_1.evaluate([input_a_df], [output_a_df]) model_1.evaluate({'input_a': input_a_df}, output_a_df) model_2.evaluate({ 'input_a': input_a_df, 'input_b': input_b_df }, [output_a_df, output_b_df]) model_1.train_on_batch(input_a_df, output_a_df) model_2.train_on_batch([input_a_df, input_b_df], [output_a_df, output_b_df]) model_1.train_on_batch([input_a_df], [output_a_df]) model_1.train_on_batch({'input_a': input_a_df}, output_a_df) model_2.train_on_batch({ 'input_a': input_a_df, 'input_b': input_b_df }, [output_a_df, output_b_df]) model_1.test_on_batch(input_a_df, output_a_df) model_2.test_on_batch([input_a_df, input_b_df], [output_a_df, output_b_df]) model_1.test_on_batch([input_a_df], [output_a_df]) model_1.test_on_batch({'input_a': input_a_df}, output_a_df) model_2.test_on_batch({ 'input_a': input_a_df, 'input_b': input_b_df }, [output_a_df, output_b_df])
def test_model_methods(): a = Input(shape=(3,), name='input_a') b = Input(shape=(3,), name='input_b') a_2 = Dense(4, name='dense_1')(a) dp = Dropout(0.5, name='dropout') b_2 = dp(b) model = Model([a, b], [a_2, b_2]) optimizer = 'rmsprop' loss = 'mse' loss_weights = [1., 0.5] input_a_np = np.random.random((10, 3)) input_b_np = np.random.random((10, 3)) input_a_df = pd.DataFrame(input_a_np) input_b_df = pd.DataFrame(input_b_np) output_a_np = np.random.random((10, 4)) output_b_np = np.random.random((10, 3)) output_a_df = pd.DataFrame(output_a_np) output_b_df = pd.DataFrame(output_b_np) # training/testing doesn't work before compiling. with pytest.raises(RuntimeError): model.train_on_batch([input_a_np, input_b_np], [output_a_np, output_b_np]) model.compile(optimizer, loss, metrics=[], loss_weights=loss_weights, sample_weight_mode=None) # test train_on_batch out = model.train_on_batch([input_a_np, input_b_np], [output_a_np, output_b_np]) out = model.train_on_batch({'input_a': input_a_np, 'input_b': input_b_np}, [output_a_np, output_b_np]) out = model.train_on_batch({'input_a': input_a_np, 'input_b': input_b_np}, {'dense_1': output_a_np, 'dropout': output_b_np}) out = model.train_on_batch([input_a_df, input_b_df], [output_a_df, output_b_df]) # test fit out = model.fit([input_a_np, input_b_np], [output_a_np, output_b_np], epochs=1, batch_size=4) out = model.fit({'input_a': input_a_np, 'input_b': input_b_np}, [output_a_np, output_b_np], epochs=1, batch_size=4) out = model.fit({'input_a': input_a_np, 'input_b': input_b_np}, {'dense_1': output_a_np, 'dropout': output_b_np}, epochs=1, batch_size=4) out = model.fit([input_a_df, input_b_df], [output_a_df, output_b_df], epochs=1, batch_size=4) # test validation_split out = model.fit([input_a_np, input_b_np], [output_a_np, output_b_np], epochs=1, batch_size=4, validation_split=0.5) out = model.fit({'input_a': input_a_np, 'input_b': input_b_np}, [output_a_np, output_b_np], epochs=1, batch_size=4, validation_split=0.5) # test validation data out = model.fit([input_a_np, input_b_np], [output_a_np, output_b_np], epochs=1, batch_size=4, validation_data=([input_a_np, input_b_np], [output_a_np, output_b_np])) out = model.fit({'input_a': input_a_np, 'input_b': input_b_np}, [output_a_np, output_b_np], epochs=1, batch_size=4, validation_split=0.5, validation_data=({'input_a': input_a_np, 'input_b': input_b_np}, [output_a_np, output_b_np])) out = model.fit({'input_a': input_a_np, 'input_b': input_b_np}, {'dense_1': output_a_np, 'dropout': output_b_np}, epochs=1, batch_size=4, validation_split=0.5, validation_data=( {'input_a': input_a_np, 'input_b': input_b_np}, {'dense_1': output_a_np, 'dropout': output_b_np})) # test_on_batch out = model.test_on_batch([input_a_np, input_b_np], [output_a_np, output_b_np]) out = model.test_on_batch({'input_a': input_a_np, 'input_b': input_b_np}, [output_a_np, output_b_np]) out = model.test_on_batch({'input_a': input_a_np, 'input_b': input_b_np}, {'dense_1': output_a_np, 'dropout': output_b_np}) out = model.test_on_batch([input_a_df, input_b_df], [output_a_df, output_b_df]) # predict_on_batch out = model.predict_on_batch([input_a_np, input_b_np]) out = model.predict_on_batch({'input_a': input_a_np, 'input_b': input_b_np}) out = model.predict_on_batch([input_a_df, input_b_df]) # predict, evaluate input_a_np = np.random.random((10, 3)) input_b_np = np.random.random((10, 3)) output_a_np = np.random.random((10, 4)) output_b_np = np.random.random((10, 3)) out = model.evaluate([input_a_np, input_b_np], [output_a_np, output_b_np], batch_size=4) out = model.evaluate([input_a_df, input_b_df], [output_a_df, output_b_df], batch_size=4) out = model.predict([input_a_np, input_b_np], batch_size=4) out = model.predict([input_a_df, input_b_df], batch_size=4) # with sample_weight input_a_np = np.random.random((10, 3)) input_b_np = np.random.random((10, 3)) output_a_np = np.random.random((10, 4)) output_b_np = np.random.random((10, 3)) sample_weight = [None, np.random.random((10,))] out = model.train_on_batch([input_a_np, input_b_np], [output_a_np, output_b_np], sample_weight=sample_weight) out = model.test_on_batch([input_a_np, input_b_np], [output_a_np, output_b_np], sample_weight=sample_weight) # test accuracy metric model.compile(optimizer, loss, metrics=['acc'], sample_weight_mode=None) out = model.train_on_batch([input_a_np, input_b_np], [output_a_np, output_b_np]) assert len(out) == 5 out = model.test_on_batch([input_a_np, input_b_np], [output_a_np, output_b_np]) assert len(out) == 5 # this should also work model.compile(optimizer, loss, metrics={'dense_1': 'acc'}, sample_weight_mode=None) out = model.train_on_batch([input_a_np, input_b_np], [output_a_np, output_b_np]) assert len(out) == 4 out = model.test_on_batch([input_a_np, input_b_np], [output_a_np, output_b_np]) assert len(out) == 4 # and this as well model.compile(optimizer, loss, metrics={'dense_1': ['acc']}, sample_weight_mode=None) out = model.train_on_batch([input_a_np, input_b_np], [output_a_np, output_b_np]) assert len(out) == 4 out = model.test_on_batch([input_a_np, input_b_np], [output_a_np, output_b_np]) assert len(out) == 4 # test starting from non-zero initial epoch trained_epochs = [] # define tracer callback def on_epoch_begin(epoch, logs): trained_epochs.append(epoch) tracker_cb = LambdaCallback(on_epoch_begin=on_epoch_begin) out = model.fit([input_a_np, input_b_np], [output_a_np, output_b_np], epochs=5, batch_size=4, initial_epoch=2, callbacks=[tracker_cb]) assert trained_epochs == [2, 3, 4] # test starting from non-zero initial epoch for generator too trained_epochs = [] def gen_data(batch_sz): while True: yield ([np.random.random((batch_sz, 3)), np.random.random((batch_sz, 3))], [np.random.random((batch_sz, 4)), np.random.random((batch_sz, 3))]) out = model.fit_generator(gen_data(4), steps_per_epoch=3, epochs=5, initial_epoch=2, callbacks=[tracker_cb]) assert trained_epochs == [2, 3, 4] # test with a custom metric function def mse(y_true, y_pred): return K.mean(K.pow(y_true - y_pred, 2)) model.compile(optimizer, loss, metrics=[mse], sample_weight_mode=None) out = model.train_on_batch([input_a_np, input_b_np], [output_a_np, output_b_np]) out_len = 1 + 2 * (1 + 1) # total loss + 2 outputs * (loss + metric) assert len(out) == out_len out = model.test_on_batch([input_a_np, input_b_np], [output_a_np, output_b_np]) assert len(out) == out_len input_a_np = np.random.random((10, 3)) input_b_np = np.random.random((10, 3)) output_a_np = np.random.random((10, 4)) output_b_np = np.random.random((10, 3)) out = model.fit([input_a_np, input_b_np], [output_a_np, output_b_np], batch_size=4, epochs=1) out = model.evaluate([input_a_np, input_b_np], [output_a_np, output_b_np], batch_size=4) out = model.predict([input_a_np, input_b_np], batch_size=4) # empty batch with pytest.raises(ValueError): def gen_data(): while True: yield (np.asarray([]), np.asarray([])) out = model.evaluate_generator(gen_data(), steps=1) # x is not a list of numpy arrays. with pytest.raises(ValueError): out = model.predict([None]) # x does not match _feed_input_names. with pytest.raises(ValueError): out = model.predict([input_a_np, None, input_b_np]) with pytest.raises(ValueError): out = model.predict([None, input_a_np, input_b_np]) # all input/output/weight arrays should have the same number of samples. with pytest.raises(ValueError): out = model.train_on_batch([input_a_np, input_b_np[:2]], [output_a_np, output_b_np], sample_weight=sample_weight) with pytest.raises(ValueError): out = model.train_on_batch([input_a_np, input_b_np], [output_a_np, output_b_np[:2]], sample_weight=sample_weight) with pytest.raises(ValueError): out = model.train_on_batch([input_a_np, input_b_np], [output_a_np, output_b_np], sample_weight=[sample_weight[1], sample_weight[1][:2]]) # `sample_weight` is neither a dict nor a list. with pytest.raises(TypeError): out = model.train_on_batch([input_a_np, input_b_np], [output_a_np, output_b_np], sample_weight=tuple(sample_weight)) # `validation_data` is neither a tuple nor a triple. with pytest.raises(ValueError): out = model.fit([input_a_np, input_b_np], [output_a_np, output_b_np], epochs=1, batch_size=4, validation_data=([input_a_np, input_b_np],)) # `loss` does not match outputs. with pytest.raises(ValueError): model.compile(optimizer, loss=['mse', 'mae', 'mape']) # `loss_weights` does not match output_names. with pytest.raises(ValueError): model.compile(optimizer, loss='mse', loss_weights={'lstm': 0.5}) # `loss_weights` does not match outputs. with pytest.raises(ValueError): model.compile(optimizer, loss='mse', loss_weights=[0.5]) # `loss_weights` is invalid type. with pytest.raises(TypeError): model.compile(optimizer, loss='mse', loss_weights=(0.5, 0.5)) # `sample_weight_mode` does not match output_names. with pytest.raises(ValueError): model.compile(optimizer, loss='mse', sample_weight_mode={'lstm': 'temporal'}) # `sample_weight_mode` does not match output_names. with pytest.raises(ValueError): model.compile(optimizer, loss='mse', sample_weight_mode=['temporal']) # `sample_weight_mode` matches output_names partially. with pytest.raises(ValueError): model.compile(optimizer, loss='mse', sample_weight_mode={'dense_1': 'temporal'}) # `loss` does not exist. with pytest.raises(ValueError): model.compile(optimizer, loss=[]) model.compile(optimizer, loss=['mse', 'mae']) model.compile(optimizer, loss='mse', loss_weights={'dense_1': 0.2, 'dropout': 0.8}) model.compile(optimizer, loss='mse', loss_weights=[0.2, 0.8]) # the rank of weight arrays should be 1. with pytest.raises(ValueError): out = model.train_on_batch([input_a_np, input_b_np], [output_a_np, output_b_np], sample_weight=[None, np.random.random((10, 20, 30))]) model.compile(optimizer, loss='mse', sample_weight_mode={'dense_1': None, 'dropout': 'temporal'}) model.compile(optimizer, loss='mse', sample_weight_mode=[None, 'temporal']) # the rank of output arrays should be at least 3D. with pytest.raises(ValueError): out = model.train_on_batch([input_a_np, input_b_np], [output_a_np, output_b_np], sample_weight=sample_weight) model.compile(optimizer, loss, metrics=[], loss_weights=loss_weights, sample_weight_mode=None) trained_epochs = [] out = model.fit_generator(generator=RandomSequence(3), steps_per_epoch=12, epochs=5, initial_epoch=0, validation_data=RandomSequence(4), validation_steps=12, callbacks=[tracker_cb]) assert trained_epochs == [0, 1, 2, 3, 4]
def test_model_with_external_loss(): # None loss, only regularization loss. a = Input(shape=(3, ), name='input_a') a_2 = Dense(4, name='dense_1', kernel_regularizer='l1', bias_regularizer='l2')(a) dp = Dropout(0.5, name='dropout') a_3 = dp(a_2) model = Model(a, [a_2, a_3]) optimizer = 'rmsprop' loss = None model.compile(optimizer, loss, metrics=['mae']) input_a_np = np.random.random((10, 3)) # test train_on_batch out = model.train_on_batch(input_a_np, None) out = model.test_on_batch(input_a_np, None) # fit out = model.fit(input_a_np, None) # evaluate out = model.evaluate(input_a_np, None) # No dropout, external loss. a = Input(shape=(3, ), name='input_a') a_2 = Dense(4, name='dense_1')(a) a_3 = Dense(4, name='dense_2')(a) model = Model(a, [a_2, a_3]) model.add_loss(K.mean(a_3 + a_2)) optimizer = 'rmsprop' loss = None model.compile(optimizer, loss, metrics=['mae']) # test train_on_batch out = model.train_on_batch(input_a_np, None) out = model.test_on_batch(input_a_np, None) # fit out = model.fit(input_a_np, None) # evaluate out = model.evaluate(input_a_np, None) # Test fit with no external data at all. if K.backend() == 'tensorflow': import tensorflow as tf a = Input(tensor=tf.Variable(input_a_np, dtype=tf.float32)) a_2 = Dense(4, name='dense_1')(a) a_2 = Dropout(0.5, name='dropout')(a_2) model = Model(a, a_2) model.add_loss(K.mean(a_2)) model.compile(optimizer='rmsprop', loss=None, metrics=['mean_squared_error']) # test train_on_batch out = model.train_on_batch(None, None) out = model.test_on_batch(None, None) out = model.predict_on_batch(None) # test fit with pytest.raises(ValueError): out = model.fit(None, None, epochs=1, batch_size=10) out = model.fit(None, None, epochs=1, steps_per_epoch=1) # test fit with validation data with pytest.raises(ValueError): out = model.fit(None, None, epochs=1, steps_per_epoch=None, validation_steps=2) out = model.fit(None, None, epochs=1, steps_per_epoch=2, validation_steps=2) # test evaluate with pytest.raises(ValueError): out = model.evaluate(None, None, batch_size=10) out = model.evaluate(None, None, steps=3) # test predict with pytest.raises(ValueError): out = model.predict(None, batch_size=10) out = model.predict(None, steps=3) assert out.shape == (10 * 3, 4) # Test multi-output model without external data. a = Input(tensor=tf.Variable(input_a_np, dtype=tf.float32)) a_1 = Dense(4, name='dense_1')(a) a_2 = Dropout(0.5, name='dropout')(a_1) model = Model(a, [a_1, a_2]) model.add_loss(K.mean(a_2)) model.compile(optimizer='rmsprop', loss=None, metrics=['mean_squared_error']) # test train_on_batch out = model.train_on_batch(None, None) out = model.test_on_batch(None, None) out = model.predict_on_batch(None) # test fit with pytest.raises(ValueError): out = model.fit(None, None, epochs=1, batch_size=10) out = model.fit(None, None, epochs=1, steps_per_epoch=1) # test fit with validation data with pytest.raises(ValueError): out = model.fit(None, None, epochs=1, steps_per_epoch=None, validation_steps=2) out = model.fit(None, None, epochs=1, steps_per_epoch=2, validation_steps=2) # test evaluate with pytest.raises(ValueError): out = model.evaluate(None, None, batch_size=10) out = model.evaluate(None, None, steps=3) # test predict with pytest.raises(ValueError): out = model.predict(None, batch_size=10) out = model.predict(None, steps=3) assert len(out) == 2 assert out[0].shape == (10 * 3, 4) assert out[1].shape == (10 * 3, 4)
def pred_probas_for_classifier(model: KerasModel, test_dataset): probs = model.predict(test_dataset).reshape(-1) print(probs[:3]) return probs
def test_pandas_dataframe(): input_a = Input(shape=(3,), name='input_a') input_b = Input(shape=(3,), name='input_b') x = Dense(4, name='dense_1')(input_a) y = Dense(3, name='desne_2')(input_b) model_1 = Model(inputs=input_a, outputs=x) model_2 = Model(inputs=[input_a, input_b], outputs=[x, y]) optimizer = 'rmsprop' loss = 'mse' model_1.compile(optimizer=optimizer, loss=loss) model_2.compile(optimizer=optimizer, loss=loss) input_a_df = pd.DataFrame(np.random.random((10, 3))) input_b_df = pd.DataFrame(np.random.random((10, 3))) output_a_df = pd.DataFrame(np.random.random((10, 4))) output_b_df = pd.DataFrame(np.random.random((10, 3))) model_1.fit(input_a_df, output_a_df) model_2.fit([input_a_df, input_b_df], [output_a_df, output_b_df]) model_1.fit([input_a_df], [output_a_df]) model_1.fit({'input_a': input_a_df}, output_a_df) model_2.fit({'input_a': input_a_df, 'input_b': input_b_df}, [output_a_df, output_b_df]) model_1.predict(input_a_df) model_2.predict([input_a_df, input_b_df]) model_1.predict([input_a_df]) model_1.predict({'input_a': input_a_df}) model_2.predict({'input_a': input_a_df, 'input_b': input_b_df}) model_1.predict_on_batch(input_a_df) model_2.predict_on_batch([input_a_df, input_b_df]) model_1.predict_on_batch([input_a_df]) model_1.predict_on_batch({'input_a': input_a_df}) model_2.predict_on_batch({'input_a': input_a_df, 'input_b': input_b_df}) model_1.evaluate(input_a_df, output_a_df) model_2.evaluate([input_a_df, input_b_df], [output_a_df, output_b_df]) model_1.evaluate([input_a_df], [output_a_df]) model_1.evaluate({'input_a': input_a_df}, output_a_df) model_2.evaluate({'input_a': input_a_df, 'input_b': input_b_df}, [output_a_df, output_b_df]) model_1.train_on_batch(input_a_df, output_a_df) model_2.train_on_batch([input_a_df, input_b_df], [output_a_df, output_b_df]) model_1.train_on_batch([input_a_df], [output_a_df]) model_1.train_on_batch({'input_a': input_a_df}, output_a_df) model_2.train_on_batch({'input_a': input_a_df, 'input_b': input_b_df}, [output_a_df, output_b_df]) model_1.test_on_batch(input_a_df, output_a_df) model_2.test_on_batch([input_a_df, input_b_df], [output_a_df, output_b_df]) model_1.test_on_batch([input_a_df], [output_a_df]) model_1.test_on_batch({'input_a': input_a_df}, output_a_df) model_2.test_on_batch({'input_a': input_a_df, 'input_b': input_b_df}, [output_a_df, output_b_df])
def test_model_methods(): a = Input(shape=(3,), name='input_a') b = Input(shape=(3,), name='input_b') a_2 = Dense(4, name='dense_1')(a) dp = Dropout(0.5, name='dropout') b_2 = dp(b) model = Model([a, b], [a_2, b_2]) optimizer = 'rmsprop' loss = 'mse' loss_weights = [1., 0.5] model.compile(optimizer, loss, metrics=[], loss_weights=loss_weights, sample_weight_mode=None) input_a_np = np.random.random((10, 3)) input_b_np = np.random.random((10, 3)) output_a_np = np.random.random((10, 4)) output_b_np = np.random.random((10, 3)) # test train_on_batch out = model.train_on_batch([input_a_np, input_b_np], [output_a_np, output_b_np]) out = model.train_on_batch({'input_a': input_a_np, 'input_b': input_b_np}, [output_a_np, output_b_np]) out = model.train_on_batch({'input_a': input_a_np, 'input_b': input_b_np}, {'dense_1': output_a_np, 'dropout': output_b_np}) # test fit out = model.fit([input_a_np, input_b_np], [output_a_np, output_b_np], nb_epoch=1, batch_size=4) out = model.fit({'input_a': input_a_np, 'input_b': input_b_np}, [output_a_np, output_b_np], nb_epoch=1, batch_size=4) out = model.fit({'input_a': input_a_np, 'input_b': input_b_np}, {'dense_1': output_a_np, 'dropout': output_b_np}, nb_epoch=1, batch_size=4) # test validation_split out = model.fit([input_a_np, input_b_np], [output_a_np, output_b_np], nb_epoch=1, batch_size=4, validation_split=0.5) out = model.fit({'input_a': input_a_np, 'input_b': input_b_np}, [output_a_np, output_b_np], nb_epoch=1, batch_size=4, validation_split=0.5) out = model.fit({'input_a': input_a_np, 'input_b': input_b_np}, {'dense_1': output_a_np, 'dropout': output_b_np}, nb_epoch=1, batch_size=4, validation_split=0.5) # test validation data out = model.fit([input_a_np, input_b_np], [output_a_np, output_b_np], nb_epoch=1, batch_size=4, validation_data=([input_a_np, input_b_np], [output_a_np, output_b_np])) out = model.fit({'input_a': input_a_np, 'input_b': input_b_np}, [output_a_np, output_b_np], nb_epoch=1, batch_size=4, validation_split=0.5, validation_data=({'input_a': input_a_np, 'input_b': input_b_np}, [output_a_np, output_b_np])) out = model.fit({'input_a': input_a_np, 'input_b': input_b_np}, {'dense_1': output_a_np, 'dropout': output_b_np}, nb_epoch=1, batch_size=4, validation_split=0.5, validation_data=({'input_a': input_a_np, 'input_b': input_b_np}, {'dense_1': output_a_np, 'dropout': output_b_np})) # test_on_batch out = model.test_on_batch([input_a_np, input_b_np], [output_a_np, output_b_np]) out = model.test_on_batch({'input_a': input_a_np, 'input_b': input_b_np}, [output_a_np, output_b_np]) out = model.test_on_batch({'input_a': input_a_np, 'input_b': input_b_np}, {'dense_1': output_a_np, 'dropout': output_b_np}) # predict_on_batch out = model.predict_on_batch([input_a_np, input_b_np]) out = model.predict_on_batch({'input_a': input_a_np, 'input_b': input_b_np}) # predict, evaluate input_a_np = np.random.random((10, 3)) input_b_np = np.random.random((10, 3)) output_a_np = np.random.random((10, 4)) output_b_np = np.random.random((10, 3)) out = model.evaluate([input_a_np, input_b_np], [output_a_np, output_b_np], batch_size=4) out = model.predict([input_a_np, input_b_np], batch_size=4) # with sample_weight input_a_np = np.random.random((10, 3)) input_b_np = np.random.random((10, 3)) output_a_np = np.random.random((10, 4)) output_b_np = np.random.random((10, 3)) sample_weight = [None, np.random.random((10,))] out = model.train_on_batch([input_a_np, input_b_np], [output_a_np, output_b_np], sample_weight=sample_weight) out = model.test_on_batch([input_a_np, input_b_np], [output_a_np, output_b_np], sample_weight=sample_weight) # test accuracy metric model.compile(optimizer, loss, metrics=['acc'], sample_weight_mode=None) out = model.train_on_batch([input_a_np, input_b_np], [output_a_np, output_b_np]) assert len(out) == 5 out = model.test_on_batch([input_a_np, input_b_np], [output_a_np, output_b_np]) assert len(out) == 5 # this should also work model.compile(optimizer, loss, metrics={'dense_1': 'acc'}, sample_weight_mode=None) out = model.train_on_batch([input_a_np, input_b_np], [output_a_np, output_b_np]) assert len(out) == 4 out = model.test_on_batch([input_a_np, input_b_np], [output_a_np, output_b_np]) assert len(out) == 4 # and this as well model.compile(optimizer, loss, metrics={'dense_1': ['acc']}, sample_weight_mode=None) out = model.train_on_batch([input_a_np, input_b_np], [output_a_np, output_b_np]) assert len(out) == 4 out = model.test_on_batch([input_a_np, input_b_np], [output_a_np, output_b_np]) assert len(out) == 4 # test with a custom metric function mse = lambda y_true, y_pred: K.mean(K.pow(y_true - y_pred, 2)) model.compile(optimizer, loss, metrics=[mse], sample_weight_mode=None) out = model.train_on_batch([input_a_np, input_b_np], [output_a_np, output_b_np]) assert len(out) == 5 out = model.test_on_batch([input_a_np, input_b_np], [output_a_np, output_b_np]) assert len(out) == 5 input_a_np = np.random.random((10, 3)) input_b_np = np.random.random((10, 3)) output_a_np = np.random.random((10, 4)) output_b_np = np.random.random((10, 3)) out = model.fit([input_a_np, input_b_np], [output_a_np, output_b_np], batch_size=4, nb_epoch=1) out = model.evaluate([input_a_np, input_b_np], [output_a_np, output_b_np], batch_size=4) out = model.predict([input_a_np, input_b_np], batch_size=4)
class AIPlayer(Player): def __init__(self, buffer_size, sim_count, train=True, model="", tau=1, compile=False): self.buffer = ReplayBuffer(buffer_size) self.temp_state = deque() self.train = train self.loss = 0 self.acc = 0 self.batch_count = 0 self.sim_count = sim_count if model != "": self.load(model, compile) else: self.create_network() self.tau = tau @staticmethod def create_if_nonexistant(config): models = glob.glob(config.data.model_location + "*.h5") if len(models) == 0: ai = AIPlayer(config.buffer_size, config.game.simulation_num_per_move) ai.save(config.data.model_location + "model_0.h5") del ai def set_training(self, train): self.train = train @staticmethod def clear(): K.clear_session() def load(self, file, compile=False): try: del self.network except Exception: pass self.network = load_model(file, custom_objects={ "objective_function_for_policy": AIPlayer.objective_function_for_policy, "objective_function_for_value": AIPlayer.objective_function_for_value }, compile=compile) def save(self, file): self.network.save(file) def create_network(self): x_in = Input((3, 8, 8)) x = Conv2D(filters=128, kernel_size=(3, 3), padding="same", data_format="channels_first")(x_in) x = BatchNormalization(axis=1)(x) x = Activation("relu")(x) for _ in range(10): x = self._build_residual_block(x) res_out = x x = Conv2D(filters=2, kernel_size=1, data_format="channels_first")(res_out) x = BatchNormalization(axis=1)(x) x = Activation("relu")(x) x = Flatten()(x) policy_out = Dense(8 * 8 + 1, activation="softmax", name="policy_out")(x) x = Conv2D(filters=1, kernel_size=1, data_format="channels_first")(res_out) x = BatchNormalization(axis=1)(x) x = Activation("relu")(x) x = Flatten()(x) x = Dense(64, activation="relu")(x) value_out = Dense(1, activation="tanh", name="value_out")(x) self.network = Model(x_in, [policy_out, value_out], name="reversi_model") self.compile() def _build_residual_block(self, x): in_x = x x = Conv2D(filters=128, kernel_size=(3, 3), padding="same", data_format="channels_first")(x) x = BatchNormalization(axis=1)(x) x = Activation("relu")(x) x = Conv2D(filters=128, kernel_size=(3, 3), padding="same", data_format="channels_first")(x) x = BatchNormalization(axis=1)(x) x = Add()([in_x, x]) x = Activation("relu")(x) return x def compile(self): losses = [ AIPlayer.objective_function_for_policy, AIPlayer.objective_function_for_value ] self.network.compile(optimizer=optimizers.SGD(lr=1e-3, momentum=0.9), loss=losses) def update_lr(self, lr): K.set_value(self.network.optimizer.lr, lr) @staticmethod def objective_function_for_policy(y_true, y_pred): # can use categorical_crossentropy?? return K.sum(-y_true * K.log(y_pred + K.epsilon()), axis=-1) @staticmethod def objective_function_for_value(y_true, y_pred): return mean_squared_error(y_true, y_pred) def update_buffer(self, winner): if self.train: while len(self.temp_state) > 0: t = self.temp_state.pop() self.buffer.add((t[0], t[1], winner)) def train_batches(self, batch_size, batches=-1, verbose=2): if batches == -1: s_buffer = np.array([_[0] for _ in self.buffer.buffer]) p_buffer = np.array([_[1] for _ in self.buffer.buffer]) v_buffer = np.array([_[2] for _ in self.buffer.buffer]) else: sample_size = batch_size * batches sample = [] while sample_size > 0: sample += self.buffer.sample(sample_size) sample_size -= self.buffer.size() s_buffer = np.array([_[0] for _ in sample]) p_buffer = np.array([_[1] for _ in sample]) v_buffer = np.array([_[2] for _ in sample]) history = self.network.fit(s_buffer, [p_buffer, v_buffer], batch_size=batch_size, epochs=1, verbose=verbose) return history def preprocess_input(self, board, side): state = np.zeros((3, 8, 8), dtype=np.int) for i in range(8): for j in range(8): if board[i, j] == 1: state[0, i, j] = 1 elif board[i, j] == -1: state[1, i, j] = 1 if side == 1: state[2, i, j] = 1 return state def evaluate(self, game, side): current_input = self.preprocess_input(game.board, side) pred = self.network.predict(current_input[np.newaxis, :]) return pred[1][0] def pick_move(self, game, side): possible_moves = game.possible_moves(side) if len(possible_moves) == 0: possible_moves.append((-1, -1)) monte_prob = self.monte_carlo(game, side) if self.train: self.temp_state.append((self.preprocess_input(game.board, side), np.divide(monte_prob, np.sum(monte_prob)))) monte_prob = np.float_power(monte_prob, 1 / self.tau) monte_prob = np.divide(monte_prob, np.sum(monte_prob)) r = random() for i, move in enumerate(possible_moves): r -= monte_prob[Othello.move_id(move)] if r <= 0: return move return possible_moves[-1] def monte_carlo(self, game, side): N = defaultdict(lambda: 0) W = defaultdict(lambda: 0) Q = defaultdict(lambda: 0) P = defaultdict(lambda: 0) possible_moves = game.possible_moves(side) if len(possible_moves) == 0: policy = np.zeros((65)) policy[64] = 1 return policy elif len(possible_moves) == 1: policy = np.zeros((65)) policy[Othello.move_id(possible_moves[0])] = 1 return policy current_input = self.preprocess_input(game.board, side) sid = Othello.state_id(game.board) pred = self.network.predict(current_input[np.newaxis, :]) policy = pred[0][0] total = 1e-10 for i, move in enumerate(possible_moves): total += policy[Othello.move_id(move)] for move in possible_moves: P[(sid, Othello.move_id(move))] = policy[Othello.move_id(move)] / total for i in range(self.sim_count): #print("Sim #%d"% i) clone = deepcopy(game) current_side = side visited = deque() while True: possible_moves = clone.possible_moves(current_side) if len(possible_moves) == 0: possible_moves.append((-1, -1)) best_move = None best_move_value = -2 sid = Othello.state_id(clone.board) for move in possible_moves: mid = Othello.move_id(move) qu_val = Q[(sid, mid)] + P[(sid, mid)] / (N[(sid, mid)] + 1) if qu_val > best_move_value: best_move_value = qu_val best_move = move #print(best_move) if N[(sid, Othello.move_id(best_move))] == 0: visited.append((sid, Othello.move_id(best_move))) clone.play_move(best_move[0], best_move[1], current_side) current_side *= -1 if clone.game_over(): for node in visited: N[node] += 1 W[node] += clone.get_winner() * side Q[node] = W[node] / N[node] break current_input = self.preprocess_input( clone.board, current_side) sid = Othello.state_id(clone.board) pred = self.network.predict(current_input[np.newaxis, :]) policy = pred[0][0] value = pred[1][0] possible_moves = clone.possible_moves(current_side) if len(possible_moves) == 0: possible_moves.append((-1, -1)) total = 1e-10 for i, move in enumerate(possible_moves): total += policy[Othello.move_id(move)] for move in possible_moves: P[(sid, Othello.move_id(move) )] = policy[Othello.move_id(move)] / total for node in visited: N[node] += 1 W[node] += value * side Q[node] = W[node] / N[node] #print() break else: visited.append((sid, Othello.move_id(best_move))) clone.play_move(best_move[0], best_move[1], current_side) current_side *= -1 if clone.game_over(): for node in visited: N[node] += 1 W[node] += clone.get_winner() * side Q[node] = W[node] / N[node] break policy = np.zeros((65)) possible_moves = game.possible_moves(side) sid = Othello.state_id(game.board) for move in possible_moves: mid = Othello.move_id(move) policy[mid] = N[(sid, mid)] return policy
class AE(Model): """ Autoencoder. This is a simple autoencoder consisting of an encoder and a decoder. You can use the class like this: >>> encoder = ... >>> decoder = ... >>> ae = Autoencoder(encoder=encoder, decoder=decoder) >>> ae.compile(...) >>> ae.fit(...) """ def __init__(self, encoder=None, decoder=None, autoencoder=None): super(AE, self).__init__() # For calling this as a super-constructor. parameters = [encoder, decoder] if all(v is None for v in parameters): return # From loading. if encoder != None and decoder != None and autoencoder != None: self.encoder = encoder self.decoder = decoder self.autoencoder = autoencoder return # Check preconditions. assert len(encoder.outputs) == 1 assert len(decoder.inputs) == 1 assert encoder.outputs[0].shape[1:] == decoder.inputs[0].shape[ 1:], str(encoder.outputs[0].shape) + " " + str( decoder.inputs[0].shape) self.latent_dim = encoder.outputs[0].shape[1] self.encoder = encoder self.decoder = decoder # Creating the AE. inputs = self.encoder.inputs[0] outputs = self.decoder(self.encoder(inputs)) self.autoencoder = Model(inputs, outputs, name='ae') def compile(self, optimizer, loss=None, metrics=None, loss_weights=None, sample_weight_mode=None, weighted_metrics=None, target_tensors=None, **kwargs): """ Compiles the model. This is the same as compilation in Keras. """ assert "reconstruction_loss" not in kwargs, "Not expected to use reconstruction_loss in AE." self.autoencoder.compile(optimizer, loss, metrics, loss_weights, sample_weight_mode, weighted_metrics, **kwargs) def fit(self, x=None, y=None, batch_size=None, epochs=1, verbose=1, callbacks=None, validation_split=0., validation_data=None, shuffle=True, class_weight=None, sample_weight=None, initial_epoch=0, steps_per_epoch=None, validation_steps=None, **kwargs): """ Trains the autoencoder. """ return self.autoencoder.fit(x, y, batch_size, epochs, verbose, callbacks, validation_split, validation_data, shuffle, class_weight, sample_weight, initial_epoch, steps_per_epoch, validation_steps, **kwargs) def fit_generator(self, generator, steps_per_epoch=None, epochs=1, verbose=1, callbacks=None, validation_data=None, validation_steps=None, class_weight=None, max_queue_size=10, workers=1, use_multiprocessing=False, shuffle=True, initial_epoch=0): """ Trains the autoencoder with a generator. """ return self.autoencoder.fit_generator( generator, steps_per_epoch, epochs, verbose=verbose, callbacks=callbacks, validation_data=validation_data, validation_steps=validation_steps, class_weight=class_weight, max_queue_size=max_queue_size, workers=workers, use_multiprocessing=use_multiprocessing, shuffle=shuffle, initial_epoch=initial_epoch) def evaluate(self, x=None, y=None, batch_size=None, verbose=1, sample_weight=None, steps=None): """ Evaluates the autoencoder. """ return self.autoencoder.evaluate(x, y, batch_size, verbose, sample_weight, steps=None) def predict(self, x, batch_size=None, verbose=0, steps=None): """ Does a prediction. This is the same as :func:`~ngdlm.models.AE.predict_reconstruct_from_samples` """ return self.predict_reconstruct_from_samples(x, batch_size, verbose, steps) def predict_reconstruct_from_samples(self, x, batch_size=None, verbose=0, steps=None): """ Reconstructs samples. Samples are firstly mapped to latent space using the encoder. The resulting latent vectors are then mapped to reconstruction space via the decoder. """ return self.autoencoder.predict(x, batch_size, verbose, steps) def predict_embed_samples_into_latent(self, x, batch_size=None, verbose=0, steps=None): """ Embeds samples into latent space using the encoder. """ return self.encoder.predict(x, batch_size, verbose, steps) def predict_reconstruct_from_latent(self, x, batch_size=None, verbose=0, steps=None): """ Maps latent vectors to reconstruction space using the decoder. """ return self.decoder.predict(x, batch_size, verbose, steps) def summary(self): """ Provides a summary. """ print("Encoder:") self.encoder.summary() print("Decoder:") self.decoder.summary() print("Autoencoder:") self.autoencoder.summary() def save(self, path): """ Saves the autoencoder. This includes the whole autoencoder plus the encoder and the decoder. The encoder and decoder use the path plus a respective annotation. This code >>> ae.save("myae.h5") will create the files *myae.h5*, *myae-encoder.h5*, and *myae-decoder.h5*. """ self.autoencoder.save(path) self.encoder.save(append_to_filepath(path, "-encoder")) self.decoder.save(append_to_filepath(path, "-decoder"))
def test_model_methods(): a = Input(shape=(3, ), name='input_a') b = Input(shape=(3, ), name='input_b') a_2 = Dense(4, name='dense_1')(a) dp = Dropout(0.5, name='dropout') b_2 = dp(b) model = Model([a, b], [a_2, b_2]) optimizer = 'rmsprop' loss = 'mse' loss_weights = [1., 0.5] model.compile(optimizer, loss, metrics=[], loss_weights=loss_weights, sample_weight_mode=None) input_a_np = np.random.random((10, 3)) input_b_np = np.random.random((10, 3)) output_a_np = np.random.random((10, 4)) output_b_np = np.random.random((10, 3)) # test train_on_batch out = model.train_on_batch([input_a_np, input_b_np], [output_a_np, output_b_np]) out = model.train_on_batch({ 'input_a': input_a_np, 'input_b': input_b_np }, [output_a_np, output_b_np]) out = model.train_on_batch({ 'input_a': input_a_np, 'input_b': input_b_np }, { 'dense_1': output_a_np, 'dropout': output_b_np }) # test fit out = model.fit([input_a_np, input_b_np], [output_a_np, output_b_np], nb_epoch=1, batch_size=4) out = model.fit({ 'input_a': input_a_np, 'input_b': input_b_np }, [output_a_np, output_b_np], nb_epoch=1, batch_size=4) out = model.fit({ 'input_a': input_a_np, 'input_b': input_b_np }, { 'dense_1': output_a_np, 'dropout': output_b_np }, nb_epoch=1, batch_size=4) # test validation_split out = model.fit([input_a_np, input_b_np], [output_a_np, output_b_np], nb_epoch=1, batch_size=4, validation_split=0.5) out = model.fit({ 'input_a': input_a_np, 'input_b': input_b_np }, [output_a_np, output_b_np], nb_epoch=1, batch_size=4, validation_split=0.5) out = model.fit({ 'input_a': input_a_np, 'input_b': input_b_np }, { 'dense_1': output_a_np, 'dropout': output_b_np }, nb_epoch=1, batch_size=4, validation_split=0.5) # test validation data out = model.fit([input_a_np, input_b_np], [output_a_np, output_b_np], nb_epoch=1, batch_size=4, validation_data=([input_a_np, input_b_np], [output_a_np, output_b_np])) out = model.fit({ 'input_a': input_a_np, 'input_b': input_b_np }, [output_a_np, output_b_np], nb_epoch=1, batch_size=4, validation_split=0.5, validation_data=({ 'input_a': input_a_np, 'input_b': input_b_np }, [output_a_np, output_b_np])) out = model.fit({ 'input_a': input_a_np, 'input_b': input_b_np }, { 'dense_1': output_a_np, 'dropout': output_b_np }, nb_epoch=1, batch_size=4, validation_split=0.5, validation_data=({ 'input_a': input_a_np, 'input_b': input_b_np }, { 'dense_1': output_a_np, 'dropout': output_b_np })) # test_on_batch out = model.test_on_batch([input_a_np, input_b_np], [output_a_np, output_b_np]) out = model.test_on_batch({ 'input_a': input_a_np, 'input_b': input_b_np }, [output_a_np, output_b_np]) out = model.test_on_batch({ 'input_a': input_a_np, 'input_b': input_b_np }, { 'dense_1': output_a_np, 'dropout': output_b_np }) # predict_on_batch out = model.predict_on_batch([input_a_np, input_b_np]) out = model.predict_on_batch({ 'input_a': input_a_np, 'input_b': input_b_np }) # predict, evaluate input_a_np = np.random.random((10, 3)) input_b_np = np.random.random((10, 3)) output_a_np = np.random.random((10, 4)) output_b_np = np.random.random((10, 3)) out = model.evaluate([input_a_np, input_b_np], [output_a_np, output_b_np], batch_size=4) out = model.predict([input_a_np, input_b_np], batch_size=4) # with sample_weight input_a_np = np.random.random((10, 3)) input_b_np = np.random.random((10, 3)) output_a_np = np.random.random((10, 4)) output_b_np = np.random.random((10, 3)) sample_weight = [None, np.random.random((10, ))] out = model.train_on_batch([input_a_np, input_b_np], [output_a_np, output_b_np], sample_weight=sample_weight) out = model.test_on_batch([input_a_np, input_b_np], [output_a_np, output_b_np], sample_weight=sample_weight) # test accuracy metric model.compile(optimizer, loss, metrics=['acc'], sample_weight_mode=None) out = model.train_on_batch([input_a_np, input_b_np], [output_a_np, output_b_np]) assert len(out) == 5 out = model.test_on_batch([input_a_np, input_b_np], [output_a_np, output_b_np]) assert len(out) == 5 # this should also work model.compile(optimizer, loss, metrics={'dense_1': 'acc'}, sample_weight_mode=None) out = model.train_on_batch([input_a_np, input_b_np], [output_a_np, output_b_np]) assert len(out) == 4 out = model.test_on_batch([input_a_np, input_b_np], [output_a_np, output_b_np]) assert len(out) == 4 # and this as well model.compile(optimizer, loss, metrics={'dense_1': ['acc']}, sample_weight_mode=None) out = model.train_on_batch([input_a_np, input_b_np], [output_a_np, output_b_np]) assert len(out) == 4 out = model.test_on_batch([input_a_np, input_b_np], [output_a_np, output_b_np]) assert len(out) == 4 # test starting from non-zero initial epoch trained_epochs = [] def on_epoch_begin(epoch, logs): trained_epochs.append(epoch) tracker_cb = LambdaCallback(on_epoch_begin=on_epoch_begin) out = model.fit([input_a_np, input_b_np], [output_a_np, output_b_np], nb_epoch=5, batch_size=4, initial_epoch=2, callbacks=[tracker_cb]) assert trained_epochs == [2, 3, 4] # test starting from non-zero initial epoch for generator too trained_epochs = [] def gen_data(batch_sz): while True: yield ([ np.random.random((batch_sz, 3)), np.random.random((batch_sz, 3)) ], [ np.random.random((batch_sz, 4)), np.random.random((batch_sz, 3)) ]) out = model.fit_generator(gen_data(4), samples_per_epoch=10, nb_epoch=5, initial_epoch=2, callbacks=[tracker_cb]) assert trained_epochs == [2, 3, 4] # test with a custom metric function mse = lambda y_true, y_pred: K.mean(K.pow(y_true - y_pred, 2)) def mse_powers(y_true, y_pred): m = mse(y_true, y_pred) return {'mse_squared': K.pow(m, 2), 'mse_cubed': K.pow(m, 3)} model.compile(optimizer, loss, metrics=[mse, mse_powers], sample_weight_mode=None) out = model.train_on_batch([input_a_np, input_b_np], [output_a_np, output_b_np]) out_len = 1 + 2 * 4 # total loss, per layer: loss + 3 metrics assert len(out) == out_len out = model.test_on_batch([input_a_np, input_b_np], [output_a_np, output_b_np]) assert len(out) == out_len input_a_np = np.random.random((10, 3)) input_b_np = np.random.random((10, 3)) output_a_np = np.random.random((10, 4)) output_b_np = np.random.random((10, 3)) out = model.fit([input_a_np, input_b_np], [output_a_np, output_b_np], batch_size=4, nb_epoch=1) out = model.evaluate([input_a_np, input_b_np], [output_a_np, output_b_np], batch_size=4) out = model.predict([input_a_np, input_b_np], batch_size=4)
class VAE(AE): """ Variational Autoencoder. This consists of an encoder and a decoder plus an interpolateable latent space. """ def __init__(self, encoder=None, decoder=None, autoencoder=None, latent_dim=None): super(VAE, self).__init__(encoder=None, decoder=None) # Encoder and decoder must be provided. assert (encoder != None and decoder != None) # From loading. if encoder != None and decoder != None and autoencoder != None: self.encoder = encoder self.decoder = decoder self.autoencoder = autoencoder self.latent_dim = decoder.inputs[0].shape.as_list()[-1] return # Set the latent dimensions. self.latent_dim = latent_dim assert self.latent_dim != None # Encoder. encoder_input = encoder.inputs[0] encoder_output = encoder.outputs[0] z_mean = layers.Dense(self.latent_dim, name='z_mean')(encoder_output) z_log_var = layers.Dense(self.latent_dim, name='z_log_var')(encoder_output) z = layers.Lambda(sampling, output_shape=(self.latent_dim, ), name='z')([z_mean, z_log_var]) self.encoder = Model(encoder_input, [z_mean, z_log_var, z], name='encoder') # Decoder. self.decoder = decoder # Creating the VAE. inputs = self.encoder.inputs[0] outputs = self.decoder(self.encoder(inputs)[2]) # This is z. self.autoencoder = Model(inputs, outputs, name="vae") def compile(self, optimizer, loss=None, metrics=None, loss_weights=None, sample_weight_mode=None, weighted_metrics=None, target_tensors=None, **kwargs): """ Compiles the VAE. Additionally to the default functionality of *compile*, it adds the VAE-loss. This loss takes the provided loss and interprets it as a reconstruction-loss. The VAE loss is similar to >>> vae_loss = mean(r_loss + kl_loss) See the literature for details. """ self.loss = loss # Inputs. inputs = self.encoder.inputs[0] inputs_dim = int(np.prod(inputs.shape.as_list()[1:])) # Outputs. z_mean = self.encoder.outputs[0] z_log_var = self.encoder.outputs[1] outputs = self.decoder(self.encoder(inputs)[2]) # This is z. # Define the loss. def vae_loss(loss_inputs, loss_outputs): # Flatten all to accept different dimensions. loss_inputs = K.flatten(loss_inputs) loss_outputs = K.flatten(loss_outputs) # Reconstruction loss. if isinstance(self.loss, str): r_loss = losses.get(self.loss) else: r_loss = self.loss r_loss *= inputs_dim # kl loss. kl_loss = 1 + z_log_var - K.square(z_mean) - K.exp(z_log_var) kl_loss = K.sum(kl_loss, axis=-1) kl_loss *= -0.5 # VAE loss. vae_loss = K.mean(r_loss + kl_loss) vae_loss /= inputs_dim return vae_loss # Compile model. loss = vae_loss self.autoencoder.compile(optimizer, loss, metrics, loss_weights, sample_weight_mode, weighted_metrics, **kwargs) def predict_embed_samples_into_latent(self, x, batch_size=None, verbose=0, steps=None): return self.encoder.predict(x, batch_size, verbose, steps)[2]
def test_model_with_external_loss(): # None loss, only regularization loss. a = Input(shape=(3,), name='input_a') a_2 = Dense(4, name='dense_1', kernel_regularizer='l1', bias_regularizer='l2')(a) dp = Dropout(0.5, name='dropout') a_3 = dp(a_2) model = Model(a, [a_2, a_3]) optimizer = 'rmsprop' loss = None model.compile(optimizer, loss, metrics=['mae']) input_a_np = np.random.random((10, 3)) # test train_on_batch out = model.train_on_batch(input_a_np, None) out = model.test_on_batch(input_a_np, None) # fit out = model.fit(input_a_np, None) # evaluate out = model.evaluate(input_a_np, None) # No dropout, external loss. a = Input(shape=(3,), name='input_a') a_2 = Dense(4, name='dense_1')(a) a_3 = Dense(4, name='dense_2')(a) model = Model(a, [a_2, a_3]) model.add_loss(K.mean(a_3 + a_2)) optimizer = 'rmsprop' loss = None model.compile(optimizer, loss, metrics=['mae']) # test train_on_batch out = model.train_on_batch(input_a_np, None) out = model.test_on_batch(input_a_np, None) # fit out = model.fit(input_a_np, None) # evaluate out = model.evaluate(input_a_np, None) # Test fit with no external data at all. if K.backend() == 'tensorflow': import tensorflow as tf a = Input(tensor=tf.Variable(input_a_np, dtype=tf.float32)) a_2 = Dense(4, name='dense_1')(a) a_2 = Dropout(0.5, name='dropout')(a_2) model = Model(a, a_2) model.add_loss(K.mean(a_2)) model.compile(optimizer='rmsprop', loss=None, metrics=['mean_squared_error']) # test train_on_batch out = model.train_on_batch(None, None) out = model.test_on_batch(None, None) out = model.predict_on_batch(None) # test fit with pytest.raises(ValueError): out = model.fit(None, None, epochs=1, batch_size=10) out = model.fit(None, None, epochs=1, steps_per_epoch=1) # test fit with validation data with pytest.raises(ValueError): out = model.fit(None, None, epochs=1, steps_per_epoch=None, validation_steps=2) out = model.fit(None, None, epochs=1, steps_per_epoch=2, validation_steps=2) # test evaluate with pytest.raises(ValueError): out = model.evaluate(None, None, batch_size=10) out = model.evaluate(None, None, steps=3) # test predict with pytest.raises(ValueError): out = model.predict(None, batch_size=10) out = model.predict(None, steps=3) assert out.shape == (10 * 3, 4) # Test multi-output model without external data. a = Input(tensor=tf.Variable(input_a_np, dtype=tf.float32)) a_1 = Dense(4, name='dense_1')(a) a_2 = Dropout(0.5, name='dropout')(a_1) model = Model(a, [a_1, a_2]) model.add_loss(K.mean(a_2)) model.compile(optimizer='rmsprop', loss=None, metrics=['mean_squared_error']) # test train_on_batch out = model.train_on_batch(None, None) out = model.test_on_batch(None, None) out = model.predict_on_batch(None) # test fit with pytest.raises(ValueError): out = model.fit(None, None, epochs=1, batch_size=10) out = model.fit(None, None, epochs=1, steps_per_epoch=1) # test fit with validation data with pytest.raises(ValueError): out = model.fit(None, None, epochs=1, steps_per_epoch=None, validation_steps=2) out = model.fit(None, None, epochs=1, steps_per_epoch=2, validation_steps=2) # test evaluate with pytest.raises(ValueError): out = model.evaluate(None, None, batch_size=10) out = model.evaluate(None, None, steps=3) # test predict with pytest.raises(ValueError): out = model.predict(None, batch_size=10) out = model.predict(None, steps=3) assert len(out) == 2 assert out[0].shape == (10 * 3, 4) assert out[1].shape == (10 * 3, 4)
class PolicyValueNet(): def __init__(self, n=15, filename=None): self.n = n self.l2_const = 1e-4 self.pvnet_fn_lock = Lock() if filename != None and os.path.exists(filename): self.model = load_model(filename) else: self.build_model() self.model._make_predict_function() self.graph = tf.get_default_graph() print(self.model.summary()) def build_model(self): print("build_model") x = net = Input((self.n, self.n, 4)) net = conv_block(net, (3, 3), 128, self.l2_const) for i in range(block_sz): net = residual_block(net, (3, 3), 128, self.l2_const) policy_net = Conv2D(filters=2, kernel_size=(1, 1), kernel_regularizer=l2(self.l2_const))(net) policy_net = BatchNormalization()(policy_net) policy_net = Activation('relu')(policy_net) policy_net = Flatten()(policy_net) self.policy_net = Dense(self.n * self.n, activation="softmax", kernel_regularizer=l2( self.l2_const))(policy_net) value_net = Conv2D(filters=1, kernel_size=(1, 1), kernel_regularizer=l2(self.l2_const))(net) value_net = BatchNormalization()(value_net) value_net = Activation('relu')(value_net) value_net = Flatten()(value_net) value_net = Dense(256, activation='relu', kernel_regularizer=l2(self.l2_const))(value_net) self.value_net = Dense(1, activation="tanh", kernel_regularizer=l2(self.l2_const))(value_net) self.model = Model(x, [self.policy_net, self.value_net]) print(self.model.summary()) def get_train_fn(self): losses = ['categorical_crossentropy', 'mean_squared_error'] self.model.compile(optimizer=Adam(lr=0.002), loss=losses) batch_size = config.pvn_config['batch_size'] epochs = config.pvn_config['epochs'] def train_fn(board, policy, value): with self.graph.as_default(): history = self.model.fit( np.asarray(board), [np.asarray(policy), np.asarray(value)], batch_size=batch_size, epochs=epochs, verbose=0) print("train history:", history.history) return train_fn def get_pvnet_fn(self, single=True): def pvnet_fn(board): nparr_board = board.get_board() self.pvnet_fn_lock.acquire() with self.graph.as_default(): probs, value = self.model.predict( nparr_board.reshape(1, self.n, self.n, 4)) self.pvnet_fn_lock.release() #policy_move = nparr_board[:,:,0].reshape(self.n * self.n).nonzero()[0] policy_move = board.get_available().nonzero()[0] policy_probs = probs[0][policy_move] return (policy_move, policy_probs), value[0][0] def pvnet_fn_m(boards): nparr_boards = np.asarray( [b.get_board().reshape(self.n, self.n, 4) for b in boards]) with self.graph.as_default(): probs, value = self.model.predict(nparr_boards) policy_moves = [b.get_available().nonzero()[0] for b in boards] #if len(policy_move) == 0: # policy_moves = [ b[:,:,0].reshape(self.n * self.n).nonzero()[0] for b in nparr_boards] policy_probs = [p[policy_moves[i]] for i, p in enumerate(probs)] return zip(policy_moves, policy_probs, value.ravel()) return pvnet_fn if single else pvnet_fn_m # def get_policy_param(self): # net_params = self.model.get_weights() # return net_params def save_model(self, model_file): if os.path.exists(model_file): os.remove(model_file) self.model.save(model_file)
def test_model_methods(): a = Input(shape=(3,), name='input_a') b = Input(shape=(3,), name='input_b') a_2 = Dense(4, name='dense_1')(a) dp = Dropout(0.5, name='dropout') b_2 = dp(b) model = Model([a, b], [a_2, b_2]) optimizer = 'rmsprop' loss = 'mse' loss_weights = [1., 0.5] model.compile(optimizer, loss, metrics=[], loss_weights=loss_weights, sample_weight_mode=None) input_a_np = np.random.random((10, 3)) input_b_np = np.random.random((10, 3)) output_a_np = np.random.random((10, 4)) output_b_np = np.random.random((10, 3)) # test train_on_batch out = model.train_on_batch([input_a_np, input_b_np], [output_a_np, output_b_np]) out = model.train_on_batch({'input_a': input_a_np, 'input_b': input_b_np}, [output_a_np, output_b_np]) out = model.train_on_batch({'input_a': input_a_np, 'input_b': input_b_np}, {'dense_1': output_a_np, 'dropout': output_b_np}) # test fit out = model.fit([input_a_np, input_b_np], [output_a_np, output_b_np], nb_epoch=1, batch_size=4) out = model.fit({'input_a': input_a_np, 'input_b': input_b_np}, [output_a_np, output_b_np], nb_epoch=1, batch_size=4) out = model.fit({'input_a': input_a_np, 'input_b': input_b_np}, {'dense_1': output_a_np, 'dropout': output_b_np}, nb_epoch=1, batch_size=4) # test validation_split out = model.fit([input_a_np, input_b_np], [output_a_np, output_b_np], nb_epoch=1, batch_size=4, validation_split=0.5) out = model.fit({'input_a': input_a_np, 'input_b': input_b_np}, [output_a_np, output_b_np], nb_epoch=1, batch_size=4, validation_split=0.5) out = model.fit({'input_a': input_a_np, 'input_b': input_b_np}, {'dense_1': output_a_np, 'dropout': output_b_np}, nb_epoch=1, batch_size=4, validation_split=0.5) # test validation data out = model.fit([input_a_np, input_b_np], [output_a_np, output_b_np], nb_epoch=1, batch_size=4, validation_data=([input_a_np, input_b_np], [output_a_np, output_b_np])) out = model.fit({'input_a': input_a_np, 'input_b': input_b_np}, [output_a_np, output_b_np], nb_epoch=1, batch_size=4, validation_split=0.5, validation_data=({'input_a': input_a_np, 'input_b': input_b_np}, [output_a_np, output_b_np])) out = model.fit({'input_a': input_a_np, 'input_b': input_b_np}, {'dense_1': output_a_np, 'dropout': output_b_np}, nb_epoch=1, batch_size=4, validation_split=0.5, validation_data=({'input_a': input_a_np, 'input_b': input_b_np}, {'dense_1': output_a_np, 'dropout': output_b_np})) # test_on_batch out = model.test_on_batch([input_a_np, input_b_np], [output_a_np, output_b_np]) out = model.test_on_batch({'input_a': input_a_np, 'input_b': input_b_np}, [output_a_np, output_b_np]) out = model.test_on_batch({'input_a': input_a_np, 'input_b': input_b_np}, {'dense_1': output_a_np, 'dropout': output_b_np}) # predict_on_batch out = model.predict_on_batch([input_a_np, input_b_np]) out = model.predict_on_batch({'input_a': input_a_np, 'input_b': input_b_np}) # predict, evaluate input_a_np = np.random.random((10, 3)) input_b_np = np.random.random((10, 3)) output_a_np = np.random.random((10, 4)) output_b_np = np.random.random((10, 3)) out = model.evaluate([input_a_np, input_b_np], [output_a_np, output_b_np], batch_size=4) out = model.predict([input_a_np, input_b_np], batch_size=4) # with sample_weight input_a_np = np.random.random((10, 3)) input_b_np = np.random.random((10, 3)) output_a_np = np.random.random((10, 4)) output_b_np = np.random.random((10, 3)) sample_weight = [None, np.random.random((10,))] out = model.train_on_batch([input_a_np, input_b_np], [output_a_np, output_b_np], sample_weight=sample_weight) out = model.test_on_batch([input_a_np, input_b_np], [output_a_np, output_b_np], sample_weight=sample_weight) # test accuracy metric model.compile(optimizer, loss, metrics=['acc'], sample_weight_mode=None) out = model.train_on_batch([input_a_np, input_b_np], [output_a_np, output_b_np]) assert len(out) == 5 out = model.test_on_batch([input_a_np, input_b_np], [output_a_np, output_b_np]) assert len(out) == 5 # this should also work model.compile(optimizer, loss, metrics={'dense_1': 'acc'}, sample_weight_mode=None) out = model.train_on_batch([input_a_np, input_b_np], [output_a_np, output_b_np]) assert len(out) == 4 out = model.test_on_batch([input_a_np, input_b_np], [output_a_np, output_b_np]) assert len(out) == 4 # and this as well model.compile(optimizer, loss, metrics={'dense_1': ['acc']}, sample_weight_mode=None) out = model.train_on_batch([input_a_np, input_b_np], [output_a_np, output_b_np]) assert len(out) == 4 out = model.test_on_batch([input_a_np, input_b_np], [output_a_np, output_b_np]) assert len(out) == 4 # test starting from non-zero initial epoch trained_epochs = [] def on_epoch_begin(epoch, logs): trained_epochs.append(epoch) tracker_cb = LambdaCallback(on_epoch_begin=on_epoch_begin) out = model.fit([input_a_np, input_b_np], [output_a_np, output_b_np], nb_epoch=5, batch_size=4, initial_epoch=2, callbacks=[tracker_cb]) assert trained_epochs == [2, 3, 4] # test starting from non-zero initial epoch for generator too trained_epochs = [] def gen_data(batch_sz): while True: yield ([np.random.random((batch_sz, 3)), np.random.random((batch_sz, 3))], [np.random.random((batch_sz, 4)), np.random.random((batch_sz, 3))]) out = model.fit_generator(gen_data(4), samples_per_epoch=10, nb_epoch=5, initial_epoch=2, callbacks=[tracker_cb]) assert trained_epochs == [2, 3, 4] # test with a custom metric function mse = lambda y_true, y_pred: K.mean(K.pow(y_true - y_pred, 2)) def mse_powers(y_true, y_pred): m = mse(y_true, y_pred) return { 'mse_squared': K.pow(m, 2), 'mse_cubed': K.pow(m, 3) } model.compile(optimizer, loss, metrics=[mse, mse_powers], sample_weight_mode=None) out = model.train_on_batch([input_a_np, input_b_np], [output_a_np, output_b_np]) out_len = 1 + 2 * 4 # total loss, per layer: loss + 3 metrics assert len(out) == out_len out = model.test_on_batch([input_a_np, input_b_np], [output_a_np, output_b_np]) assert len(out) == out_len input_a_np = np.random.random((10, 3)) input_b_np = np.random.random((10, 3)) output_a_np = np.random.random((10, 4)) output_b_np = np.random.random((10, 3)) out = model.fit([input_a_np, input_b_np], [output_a_np, output_b_np], batch_size=4, nb_epoch=1) out = model.evaluate([input_a_np, input_b_np], [output_a_np, output_b_np], batch_size=4) out = model.predict([input_a_np, input_b_np], batch_size=4)
check_point = ModelCheckpoint('model.hdf5', verbose=True, save_best_only=True) early_stop = EarlyStopping(patience=5, verbose=True) model.fit(X_train, y_train.astype(int), validation_data=(X_valid, y_valid.astype(int)), epochs=5, verbose=True, callbacks=[early_stop, check_point]) # In[ ]: from sklearn.metrics import accuracy_score # distribution of confidence that will be used as submission model.load_weights('model.hdf5') confidence_valid = model.predict(X_valid)[:, 0] * 2 - 1 print(accuracy_score(confidence_valid > 0, y_valid)) plt.hist(confidence_valid, bins='auto') plt.title("predicted confidence") plt.show() # In[ ]: # calculation of actual metric that is used to calculate final score r_valid = r_valid.clip(-1, 1) # get rid of outliers. Where do they come from?? x_t_i = confidence_valid * r_valid * u_valid data = {'day': d_valid, 'x_t_i': x_t_i} df = pd.DataFrame(data) x_t = df.groupby('day').sum().values.flatten() mean = np.mean(x_t) std = np.std(x_t)