def test_warnings(): a = Input(shape=(3,), name='input_a') b = Input(shape=(3,), name='input_b') a_2 = Dense(4, name='dense_1')(a) dp = Dropout(0.5, name='dropout') b_2 = dp(b) model = Model([a, b], [a_2, b_2]) optimizer = 'rmsprop' loss = 'mse' loss_weights = [1., 0.5] model.compile(optimizer, loss, metrics=[], loss_weights=loss_weights, sample_weight_mode=None) def gen_data(batch_sz): while True: yield ([np.random.random((batch_sz, 3)), np.random.random((batch_sz, 3))], [np.random.random((batch_sz, 4)), np.random.random((batch_sz, 3))]) with pytest.warns(Warning) as w: out = model.fit_generator(gen_data(4), steps_per_epoch=10, use_multiprocessing=True, workers=2) warning_raised = any(['Sequence' in str(w_.message) for w_ in w]) assert warning_raised, 'No warning raised when using generator with processes.' with pytest.warns(None) as w: out = model.fit_generator(RandomSequence(3), steps_per_epoch=4, use_multiprocessing=True, workers=2) assert all(['Sequence' not in str(w_.message) for w_ in w]), 'A warning was raised for Sequence.'
def test_model_multiple_calls(): x1 = Input(shape=(20,)) y1 = sequential([ Dense(10), Dense(1), ])(x1) m1 = Model(x1, y1) x2 = Input(shape=(25,)) y2 = sequential([ Dense(20), m1 ])(x2) m2 = Model(x2, y2) m2.compile('adam', 'mse') x3 = Input(shape=(20,)) y3 = sequential([ Dense(25), m2 ])(x3) m3 = Model(x3, y3) m3.compile('adam', 'mse') m3.train_on_batch(np.zeros((32, 20)), np.zeros((32, 1)))
def test_model_custom_target_tensors(): a = Input(shape=(3,), name='input_a') b = Input(shape=(3,), name='input_b') a_2 = Dense(4, name='dense_1')(a) dp = Dropout(0.5, name='dropout') b_2 = dp(b) y = K.placeholder([10, 4], name='y') y1 = K.placeholder([10, 3], name='y1') y2 = K.placeholder([7, 5], name='y2') model = Model([a, b], [a_2, b_2]) optimizer = 'rmsprop' loss = 'mse' loss_weights = [1., 0.5] # test list of target tensors with pytest.raises(ValueError): model.compile(optimizer, loss, metrics=[], loss_weights=loss_weights, sample_weight_mode=None, target_tensors=[y, y1, y2]) model.compile(optimizer, loss, metrics=[], loss_weights=loss_weights, sample_weight_mode=None, target_tensors=[y, y1]) input_a_np = np.random.random((10, 3)) input_b_np = np.random.random((10, 3)) output_a_np = np.random.random((10, 4)) output_b_np = np.random.random((10, 3)) out = model.train_on_batch([input_a_np, input_b_np], [output_a_np, output_b_np], {y: np.random.random((10, 4)), y1: np.random.random((10, 3))}) # test dictionary of target_tensors with pytest.raises(ValueError): model.compile(optimizer, loss, metrics=[], loss_weights=loss_weights, sample_weight_mode=None, target_tensors={'does_not_exist': y2}) # test dictionary of target_tensors model.compile(optimizer, loss, metrics=[], loss_weights=loss_weights, sample_weight_mode=None, target_tensors={'dense_1': y, 'dropout': y1}) out = model.train_on_batch([input_a_np, input_b_np], [output_a_np, output_b_np], {y: np.random.random((10, 4)), y1: np.random.random((10, 3))}) if K.backend() == 'tensorflow': import tensorflow as tf # test with custom TF placeholder as target pl_target_a = tf.placeholder('float32', shape=(None, 4)) model.compile(optimizer='rmsprop', loss='mse', target_tensors={'dense_1': pl_target_a}) model.train_on_batch([input_a_np, input_b_np], [output_a_np, output_b_np])
def test_sparse_input_validation_split(): test_input = sparse.random(6, 3, density=0.25).tocsr() in1 = Input(shape=(3,), sparse=True) out1 = Dense(4)(in1) test_output = np.random.random((6, 4)) model = Model(in1, out1) model.compile('rmsprop', 'mse') model.fit(test_input, test_output, epochs=1, batch_size=2, validation_split=0.2)
def m(): x = Input(shape=(input_size + output_size, nb_chars)) m_realness = sequential([ LSTM(14), Dense(1, activation='sigmoid'), ])(x) m = Model([x], [m_realness]) m.compile(Adam(), 'mse') return m
def decoder_dummy(label_sizes, nb_filter=16, data_shape=(1, 64, 64), nb_bits=12, optimizer='adam'): input = Input(shape=data_shape) x = input outputs, losses = decoder_end_block(x, label_sizes, nb_bits, activation=lambda: ELU()) model = Model(input, list(outputs.values())) model.compile(optimizer, loss=list(losses.values()), loss_weights={k: decoder_loss_weights(k) for k in losses.keys()}) return model
def test_sparse_placeholder_fit(): test_inputs = [sparse.random(6, 3, density=0.25).tocsr() for _ in range(2)] test_outputs = [sparse.random(6, i, density=0.25).tocsr() for i in range(3, 5)] in1 = Input(shape=(3,)) in2 = Input(shape=(3,), sparse=True) out1 = Dropout(0.5, name='dropout')(in1) out2 = Dense(4, name='dense_1')(in2) model = Model([in1, in2], [out1, out2]) model.predict(test_inputs, batch_size=2) model.compile('rmsprop', 'mse') model.fit(test_inputs, test_outputs, epochs=1, batch_size=2, validation_split=0.5) model.evaluate(test_inputs, test_outputs, batch_size=2)
def decoder_baseline(label_sizes, nb_bits=12, data_shape=(1, 64, 64), depth=1, nb_filter=16, optimizer='adam'): n = nb_filter input = Input(shape=data_shape) x = sequential([ conv2d_block(n, depth=depth, pooling='max'), # 32x32 conv2d_block(2*n, depth=depth, pooling='max'), # 16x16 conv2d_block(4*n, depth=depth, pooling='max'), # 8x8 conv2d_block(8*n, depth=depth, pooling='max'), # 4x4 ])(input) outputs, losses = decoder_end_block(x, label_sizes, nb_bits, activation=lambda: ELU()) model = Model(input, list(outputs.values())) model.compile(optimizer, loss=list(losses.values()),) return model
def test_render_gan_builder_generator_extended(): labels_shape = (27,) z_dim_offset = 50 builder = RenderGAN(lambda x: tag3d_network_dense(x, nb_units=4), generator_units=4, discriminator_units=4, z_dim_offset=z_dim_offset, labels_shape=(27,)) bs = 19 z, z_offset, labels = data(builder, bs) real = np.zeros((bs,) + builder.data_shape) labels_input = Input(shape=labels_shape) z = Input(shape=(z_dim_offset,)) fake = builder.generator_given_z_and_labels([z, labels_input]) m = Model([z, labels_input], [fake]) m.compile('adam', 'mse') m.train_on_batch([z_offset, labels], real)
def test_model_with_partial_loss(): a = Input(shape=(3, ), name='input_a') a_2 = Dense(4, name='dense_1')(a) dp = Dropout(0.5, name='dropout') a_3 = dp(a_2) model = Model(a, [a_2, a_3]) optimizer = 'rmsprop' loss = {'dropout': 'mse'} model.compile(optimizer, loss, metrics=['mae']) input_a_np = np.random.random((10, 3)) output_a_np = np.random.random((10, 4)) # test train_on_batch out = model.train_on_batch(input_a_np, output_a_np) out = model.test_on_batch(input_a_np, output_a_np) # fit out = model.fit(input_a_np, [output_a_np]) # evaluate out = model.evaluate(input_a_np, [output_a_np]) # Same without dropout. a = Input(shape=(3, ), name='input_a') a_2 = Dense(4, name='dense_1')(a) a_3 = Dense(4, name='dense_2')(a_2) model = Model(a, [a_2, a_3]) optimizer = 'rmsprop' loss = {'dense_2': 'mse'} model.compile(optimizer, loss, metrics={'dense_1': 'mae'}) # test train_on_batch out = model.train_on_batch(input_a_np, output_a_np) out = model.test_on_batch(input_a_np, output_a_np) # fit out = model.fit(input_a_np, [output_a_np]) # evaluate out = model.evaluate(input_a_np, [output_a_np])
def simple_gan(): z = Input(batch_shape=simple_gan_z_shape, name='z') generator = sequential([ Dense(4*simple_gan_nb_z, activation='relu', name='g1'), Dense(4*simple_gan_nb_z, activation='relu', name='g2'), Dense(simple_gan_nb_out, name='g_loss'), ])(z) d_input = Input(batch_shape=simple_gan_real_shape, name='data') discriminator = sequential([ Dense(400, input_dim=2, name='d1'), LeakyReLU(0.3), Dense(400, name='d2'), LeakyReLU(0.3), Dense(1, activation='sigmoid', name='d_loss') ])(d_input) g = Model(z, generator) g.compile(Adam(lr=0.0002, beta_1=0.5), {'g_loss': 'binary_crossentropy'}) d = Model(d_input, discriminator) d.compile(Adam(lr=0.0002, beta_1=0.5), {'d_loss': 'binary_crossentropy'}) return GAN(g, d)
def test_trainable_weights_count_consistency(): """Tests the trainable weights consistency check of Model. This verifies that a warning is shown if model.trainable is modified and the model is summarized/run without a new call to .compile() Reproduce issue #8121 """ a = Input(shape=(3, ), name='input_a') model1 = Model(inputs=a, outputs=Dense(1)(a)) model1.trainable = False b = Input(shape=(3, ), name='input_b') y = model1(b) model2 = Model(inputs=b, outputs=Dense(1)(y)) model2.compile(optimizer='adam', loss='mse') model1.trainable = True # Should warn on .summary() with pytest.warns(UserWarning) as w: model2.summary() warning_raised = any(['Discrepancy' in str(w_.message) for w_ in w]) assert warning_raised, 'No warning raised when trainable is modified without .compile.' # And on .fit() with pytest.warns(UserWarning) as w: model2.fit(x=np.zeros((5, 3)), y=np.zeros((5, 1))) warning_raised = any(['Discrepancy' in str(w_.message) for w_ in w]) assert warning_raised, 'No warning raised when trainable is modified without .compile.' # And shouldn't warn if we recompile model2.compile(optimizer='adam', loss='mse') with pytest.warns(None) as w: model2.summary() assert len( w ) == 0, "Warning raised even when .compile() is called after modifying .trainable"
def test_model_with_partial_loss(): a = Input(shape=(3,), name='input_a') a_2 = Dense(4, name='dense_1')(a) dp = Dropout(0.5, name='dropout') a_3 = dp(a_2) model = Model(a, [a_2, a_3]) optimizer = 'rmsprop' loss = {'dropout': 'mse'} model.compile(optimizer, loss, metrics=['mae']) input_a_np = np.random.random((10, 3)) output_a_np = np.random.random((10, 4)) # test train_on_batch out = model.train_on_batch(input_a_np, output_a_np) out = model.test_on_batch(input_a_np, output_a_np) # fit out = model.fit(input_a_np, [output_a_np]) # evaluate out = model.evaluate(input_a_np, [output_a_np]) # Same without dropout. a = Input(shape=(3,), name='input_a') a_2 = Dense(4, name='dense_1')(a) a_3 = Dense(4, name='dense_2')(a_2) model = Model(a, [a_2, a_3]) optimizer = 'rmsprop' loss = {'dense_2': 'mse'} model.compile(optimizer, loss, metrics={'dense_1': 'mae'}) # test train_on_batch out = model.train_on_batch(input_a_np, output_a_np) out = model.test_on_batch(input_a_np, output_a_np) # fit out = model.fit(input_a_np, [output_a_np]) # evaluate out = model.evaluate(input_a_np, [output_a_np])
def train_f_enc(self, steps_list, epoch=50): print("training f_enc") f_swap0 = Sequential(name='f_swap0') f_swap0.add(self.f_enc) f_swap0.add(Dense(FIELD_DEPTH)) f_swap0.add(Activation('softmax', name='softmax_swap0')) f_swap1 = Sequential(name='f_swap1') f_swap1.add(self.f_enc) f_swap1.add(Dense(FIELD_DEPTH)) f_swap1.add(Activation('softmax', name='softmax_swap1')) env_model = Model(self.f_enc.inputs, [f_swap0.output, f_swap1.output], name="env_model") env_model.compile(optimizer='adam', loss=['categorical_crossentropy'] * 2) for ep in range(epoch): losses = [] for idx, steps_dict in enumerate(steps_list): prev = None for step in steps_dict['steps']: x = self.convert_input(step.input)[:2] env_values = step.input.env.reshape((3, -1)) p1 = np.clip(env_values[0].argmax() - 1, 0, 9) p2 = np.clip(env_values[1].argmax() - 1, 0, 9) p3 = np.clip(env_values[2].argmax() - 1, 0, 9) now = (p1, p2, p3) if prev == now: continue prev = now y0 = to_one_hot_array(min(p1, p2), FIELD_DEPTH) y1 = to_one_hot_array(max(p1, p2), FIELD_DEPTH) y = [yy.reshape((self.batch_size, -1)) for yy in [y0, y1]] loss = env_model.train_on_batch(x, y) losses.append(loss) print("ep %3d: loss=%s" % (ep, np.average(losses))) if np.average(losses) < 1e-06: break
def decoder_resnet(label_sizes, nb_filter=16, data_shape=(1, 64, 64), nb_bits=12, resnet_depth=(3, 4, 6, 3), optimizer='adam'): def _bn_relu_conv(nb_filter, nb_row=3, nb_col=3, subsample=1): return sequential([ BatchNormalization(mode=0, axis=1), ELU(), Convolution2D(nb_filter=nb_filter, nb_row=nb_row, nb_col=nb_col, subsample=(subsample, subsample), init="he_normal", border_mode="same") ]) def f(nb_filter, subsample=1): return sequential([ _bn_relu_conv(nb_filter, subsample=subsample), _bn_relu_conv(nb_filter), ]) input = Input(shape=data_shape) fitlers_by_depth = [nb_filter * 2**i for i in range(len(resnet_depth))] print("fitlers_by_depth", fitlers_by_depth) x = _bn_relu_conv(nb_filter, 3, 3, subsample=2)(input) for i, (n, d) in enumerate(zip(fitlers_by_depth, resnet_depth)): for di in range(d): if di == 0 and i != 0: shortcut = _bn_relu_conv(n, 1, 1, subsample=2) subsample = 2 else: shortcut = lambda x: x subsample = 1 x = merge([shortcut(x), f(n, subsample)(x)], mode='sum') outputs, losses = decoder_end_block(x, label_sizes, nb_bits, activation=lambda: ELU()) model = Model(input, list(outputs.values())) model.compile(optimizer, loss=list(losses.values()), loss_weights={k: decoder_loss_weights(k) for k in losses.keys()}) return model
def train_f_enc(self, steps_list, epoch=50): print("training f_enc") f_add0 = Sequential(name='f_add0') f_add0.add(self.f_enc) f_add0.add(Dense(FIELD_DEPTH)) f_add0.add(Activation('softmax', name='softmax_add0')) f_add1 = Sequential(name='f_add1') f_add1.add(self.f_enc) f_add1.add(Dense(FIELD_DEPTH)) f_add1.add(Activation('softmax', name='softmax_add1')) env_model = Model(self.f_enc.inputs, [f_add0.output, f_add1.output], name="env_model") env_model.compile(optimizer='adam', loss=['categorical_crossentropy']*2) for ep in range(epoch): losses = [] for idx, steps_dict in enumerate(steps_list): prev = None for step in steps_dict['steps']: x = self.convert_input(step.input)[:2] env_values = step.input.env.reshape((4, -1)) in1 = np.clip(env_values[0].argmax() - 1, 0, 9) in2 = np.clip(env_values[1].argmax() - 1, 0, 9) carry = np.clip(env_values[2].argmax() - 1, 0, 9) y_num = in1 + in2 + carry now = (in1, in2, carry) if prev == now: continue prev = now y0 = to_one_hot_array((y_num % 10)+1, FIELD_DEPTH) y1 = to_one_hot_array((y_num // 10)+1, FIELD_DEPTH) y = [yy.reshape((self.batch_size, -1)) for yy in [y0, y1]] loss = env_model.train_on_batch(x, y) losses.append(loss) print("ep %3d: loss=%s" % (ep, np.average(losses))) if np.average(losses) < 1e-06: break
def create_squeeze_net(): inp = Input(shape=getInputDim()) x = Conv2D(64, (3, 3), padding='same', activation='relu')(inp) x = MaxPooling2D((3, 3), strides=2)(x) x = create_fire_mod(x, 64, 128) x = MaxPooling2D((3, 3), strides=2)(x) x = create_fire_mod(x, 64, 128) x = MaxPooling2D((3, 3), strides=2)(x) x = Conv2D(32, (1, 1), activation='relu')(x) x = Flatten()(x) x = Dense(64, activation='relu')(x) x = Dense(1, activation='sigmoid')(x) model = Model(inp, x) model.compile(loss='binary_crossentropy', optimizer='adagrad', metrics=[ 'binary_accuracy', ]) return model
def create_model(): tokens = get_tokens() num_tokens = len(tokens) + 1 input_data = Input(name='speech_data_input', shape=(500, 13)) layer_dense_1 = Dense(256, activation="relu", use_bias=True, kernel_initializer='he_normal')(input_data) layer_dropout_1 = Dropout(0.4)(layer_dense_1) layer_dense_2 = Dense(512, activation="relu", use_bias=True, kernel_initializer='he_normal')(layer_dropout_1) layer_gru1 = GRU(512, return_sequences=True, kernel_initializer='he_normal', dropout=0.4)(layer_dense_2) layer_gru2 = GRU(512, return_sequences=True, go_backwards=True, kernel_initializer='he_normal', dropout=0.4)(layer_gru1) layer_dense_3 = Dense(256, activation="relu", use_bias=True, kernel_initializer='he_normal')(layer_gru2) layer_dropout_2 = Dropout(0.4)(layer_dense_3) layer_dense_4 = Dense(num_tokens, activation="relu", use_bias=True, kernel_initializer='he_normal')(layer_dropout_2) output = Activation('softmax', name='Activation0')(layer_dense_4) #ctc labels = Input(name='speech_labels', shape=[70], dtype='int64') input_length = Input(name='input_length', shape=[1], dtype='int64') label_length = Input(name='label_length', shape=[1], dtype='int64') loss_out = Lambda(ctc_lambda, output_shape=(1,), name='ctc')([labels, output, input_length, label_length]) model = Model(inputs=[input_data, labels, input_length, label_length], outputs=loss_out) adad = Adadelta(lr=0.01, rho=0.95, epsilon=K.epsilon()) model.compile(loss={'ctc': lambda y_true, output: output}, optimizer=adad) print("model compiled successful!") return model
def test_trainable_weights_count_consistency(): """Tests the trainable weights consistency check of Model. This verifies that a warning is shown if model.trainable is modified and the model is summarized/run without a new call to .compile() Reproduce issue #8121 """ a = Input(shape=(3,), name='input_a') model1 = Model(inputs=a, outputs=Dense(1)(a)) model1.trainable = False b = Input(shape=(3,), name='input_b') y = model1(b) model2 = Model(inputs=b, outputs=Dense(1)(y)) model2.compile(optimizer='adam', loss='mse') model1.trainable = True # Should warn on .summary() with pytest.warns(UserWarning) as w: model2.summary() warning_raised = any(['Discrepancy' in str(w_.message) for w_ in w]) assert warning_raised, 'No warning raised when trainable is modified without .compile.' # And on .fit() with pytest.warns(UserWarning) as w: model2.fit(x=np.zeros((5, 3)), y=np.zeros((5, 1))) warning_raised = any(['Discrepancy' in str(w_.message) for w_ in w]) assert warning_raised, 'No warning raised when trainable is modified without .compile.' # And shouldn't warn if we recompile model2.compile(optimizer='adam', loss='mse') with pytest.warns(None) as w: model2.summary() assert len(w) == 0, "Warning raised even when .compile() is called after modifying .trainable"
def decoder_baseline(label_sizes, nb_bits=12, data_shape=(1, 64, 64), depth=1, nb_filter=16, optimizer='adam'): n = nb_filter input = Input(shape=data_shape) x = sequential([ conv2d_block(n, depth=depth, pooling='max'), # 32x32 conv2d_block(2 * n, depth=depth, pooling='max'), # 16x16 conv2d_block(4 * n, depth=depth, pooling='max'), # 8x8 conv2d_block(8 * n, depth=depth, pooling='max'), # 4x4 ])(input) outputs, losses = decoder_end_block(x, label_sizes, nb_bits, activation=lambda: ELU()) model = Model(input, list(outputs.values())) model.compile( optimizer, loss=list(losses.values()), ) return model
def get_model_with_classification_head(self, base_model): base_model.summary() x = base_model.output x = GlobalAveragePooling2D(name='flatten_1')(x) #x = Flatten(name='flatten_1')(x) x = Dense(1024, activation='relu', name='fc6_1')(x) x = Dense(1024, activation='relu', name='fc7_2')(x) x = Dropout(0.5)(x) predictions = Dense(self.nb_classes, activation='softmax', name='fc8_3')(x) # this is the model we will train model = Model(inputs=base_model.input, outputs=predictions) for layer in base_model.layers: layer.trainable = False model.compile(optimizer="rmsprop", loss=LOSS, metrics=['accuracy']) model.summary() return model
class DefogGAN(): def __init__(self): self.project_dir = (os.path.dirname(__file__)) self.data_path = os.path.join(self.project_dir, "./data/starCraft") self.result_work_path = os.path.join(self.project_dir, './result/DefogGAN') self.making_ing_path = os.path.join(self.project_dir, './result/DefogGAN') self.createFolder(self.result_work_path) is_multi_gpu = True self.is_validation_check = True self.save_weights = True #self.num_of_replay = 3500 self.n_epochs = 1000 self.batch_size = 512 self.save_interval = 1000 self.num_of_making_img = 5 optimizer = Adam(0.0001, beta_1=0.5, beta_2=0.9) self.discriminator = self.build_discriminator() self.discriminator.compile(loss='binary_crossentropy', optimizer=optimizer, metrics=['accuracy']) self.generator = self.build_generator() fog_img = Input(shape=(82, 32, 32)) gen_missing = self.generator(fog_img) self.discriminator.trainable = False valid = self.discriminator(gen_missing) self.combined = Model(fog_img, [gen_missing, valid]) if is_multi_gpu: self.combined = multi_gpu_model(self.combined, gpus=self.get_count_of_gpu()) weight = K.variable( np.array([0.75, 0.1875, 0.0468, 0.012, 0.003, 0.0007])) self.combined.compile(loss=[ self.weighted_pyramidal_loss(weights=weight), 'binary_crossentropy' ], loss_weights=[0.999, 0.001], optimizer=optimizer) def accumulate_resolution(self, x, threshhold=0.5): new = np.zeros((x.shape[0], 32, 32)) new_enemy = np.zeros((x.shape[0], 32, 32)) new_self = np.zeros((x.shape[0], 32, 32)) for n in range(x.shape[0]): for w in range(x.shape[2]): for h in range(x.shape[3]): is_enemy = False is_self = False for u in range(x.shape[1]): if u < 32: # enemy if not int(x[n][u][w][h] + threshhold) == 0: is_enemy = True new_enemy[n][w][h] -= int(x[n][u][w][h] + threshhold) else: # self if not int(x[n][u][w][h] + threshhold) == 0: is_self = True new_self[n][w][h] += int(x[n][u][w][h] + threshhold) if is_enemy and is_self: new[n][w][h] = 0 elif not is_enemy and not is_self: new[n][w][h] = -30 elif is_enemy and not is_self: new[n][w][h] = new_enemy[n][w][h] elif not is_enemy and is_self: new[n][w][h] = new_self[n][w][h] return new def make_pickle(self): for i in ['train', 'validation', 'test']: for j in ['x', 'y']: f = open('{}/{}_{}_data_set.csv'.format(self.data_path, j, i), 'r', encoding='utf-8') read_csv_file = csv.reader(f) is_first = True for line in read_csv_file: if is_first: tensor = np.zeros((int(line[0]), int(line[1]), int(line[2]), int(line[3]))) is_first = False else: tensor[int(line[0])][int(line[1])][int(line[2])][int( line[3])] = float(line[4]) #print(int(line[0]), int(line[1]), int(line[2]), int(line[3]), tensor[int(line[0])][int(line[1])][int(line[2])][int(line[3])]) f.close() with open('{}/{}_{}_dataset.pkl'.format(self.data_path, j, i), 'wb') as f: pickle.dump(tensor, f, pickle.HIGHEST_PROTOCOL) def get_pickle_data(self): with open('{}/x_train_dataset.pkl'.format(self.data_path), 'rb') as f: x_train = pickle.load(f) with open('{}/x_validation_dataset.pkl'.format(self.data_path), 'rb') as f: x_validation = pickle.load(f) with open('{}/x_test_dataset.pkl'.format(self.data_path), 'rb') as f: x_test = pickle.load(f) with open('{}/y_train_dataset.pkl'.format(self.data_path), 'rb') as f: y_train = pickle.load(f) with open('{}/y_validation_dataset.pkl'.format(self.data_path), 'rb') as f: y_validation = pickle.load(f) with open('{}/y_test_dataset.pkl'.format(self.data_path), 'rb') as f: y_test = pickle.load(f) return x_train, x_validation, x_test, y_train, y_validation, y_test def get_sample_data(self): f = open('{}/sample_data.csv'.format(self.data_path), 'r', encoding='utf-8') read_csv_file = csv.reader(f) tensor_fog = np.zeros((self.num_of_replay, 82, 32, 32)) tensor_real = np.zeros((self.num_of_replay, 66, 32, 32)) for line in read_csv_file: if len(line) == 1: num_of_replay = int(line[0]) if len(line) == 5: tensor_fog[num_of_replay][int(line[0])][int(line[1])][int( line[2])] = int(line[3]) if int(line[0]) < 66: tensor_real[num_of_replay][int(line[0])][int(line[1])][int( line[2])] = int(line[4]) f.close() # shuffle tensor index train_set_index = random.sample( range(0, self.num_of_replay), int(self.num_of_replay * self.train_rate)) temp_set = [ i for i in range(0, self.num_of_replay) if not i in train_set_index ] validation_set_index = temp_set[:int(self.num_of_replay * self.validation_rate)] test_set_index = temp_set[int(self.num_of_replay * self.test_rate):] return tensor_fog[train_set_index], tensor_fog[ validation_set_index], tensor_fog[test_set_index], tensor_real[ train_set_index], tensor_real[ validation_set_index], tensor_real[test_set_index] def check_pickle(self): for i in ['train', 'validation', 'test']: for j in ['x', 'y']: fname = '{}/{}_{}_dataset.pkl'.format(self.data_path, j, i) if not os.path.isfile(fname): return False return True def train_defogGAN(self): min_loss = 99999999999 valid = np.ones((self.batch_size, 1)) fake = np.zeros((self.batch_size, 1)) #x_train, x_validation, x_test, y_train, y_validation, y_test = self.get_sample_data() is_pickle = self.check_pickle() if not is_pickle: self.make_pickle() x_train, x_validation, x_test, y_train, y_validation, y_test = self.get_pickle_data( ) best_img = None last_epoch_img = None for epoch in range(self.n_epochs + 1): n_batches = int(x_train.shape[0] / self.batch_size) for i in range(math.ceil(n_batches)): start_batch = i * self.batch_size end_batch = (1 + i) * self.batch_size end_batch = x_train.shape[0] if end_batch > x_train.shape[ 0] else (1 + i) * self.batch_size images_train_x = x_train[start_batch:end_batch, :, :, :] images_train_y = y_train[start_batch:end_batch, :, :, :] gen_missing = self.generator.predict(images_train_x) d_loss_real = self.discriminator.train_on_batch( images_train_y, valid) d_loss_fake = self.discriminator.train_on_batch( gen_missing, fake) d_loss = 0.5 * np.add(d_loss_real, d_loss_fake) # Train Generator g_loss = self.combined.train_on_batch(images_train_x, [images_train_y, valid]) if self.is_validation_check: validation_recon_img = self.generator.predict(x_validation) validation_loss = self.get_MSE_Value(validation_recon_img, y_validation) else: validation_loss = 0 print( "%d [D loss: %f, acc: %.2f%%] [G loss: %f, mse: %f, validation_mse : %f]" % (epoch, d_loss[0], 100 * d_loss[1], g_loss[0], g_loss[1], validation_loss)) if epoch % self.save_interval == 0: last_epoch_img = self.generator.predict(x_test) if self.save_weights: self.createFolder('{}/interval_model/'.format( self.result_work_path)) self.generator.save_weights( '{}/interval_model/model_weight_{}.h5'.format( self.result_work_path, epoch)) if min_loss > validation_loss and self.is_validation_check: min_loss = validation_loss best_img = self.generator.predict(x_test) if self.save_weights: if os.path.exists('{}/best_model'.format( self.result_work_path)): shutil.rmtree('{}/best_model'.format( self.result_work_path)) self.createFolder('{}/best_model/'.format( self.result_work_path)) self.generator.save_weights( '{}/best_model/model_weight_{}.h5'.format( self.result_work_path, epoch)) self.make_test(x_test, last_epoch_img, best_img, y_test) def make_test(self, x_test, last_epoch_img, best_img, y_test): n = self.num_of_making_img x_test = x_test[:n] x_test = x_test[:, :66, :, :] last_epoch_img = last_epoch_img[:n] best_img = best_img[:n] y_test = y_test[:n] result = np.zeros((0, n, 32, 32)) # model , index of replay , x, y result = np.concatenate( (result, self.accumulate_resolution(x_test).reshape(1, n, 32, 32)), axis=0) result = np.concatenate( (result, self.accumulate_resolution(last_epoch_img).reshape( 1, n, 32, 32)), axis=0) result = np.concatenate( (result, self.accumulate_resolution(best_img).reshape( 1, n, 32, 32)), axis=0) result = np.concatenate( (result, self.accumulate_resolution(y_test).reshape(1, n, 32, 32)), axis=0) print(result.shape) self.make_img(self.making_ing_path, result) print('Success make image') def make_img(self, path, map): print(map.shape) # (7,30,32,32) plt_threshhold = -20 model_names = [ 'fog_exposed', 'last_epoch', 'best_epoch', 'Ground_truth' ] fig, axn = plt.subplots(map.shape[1], map.shape[0], sharex=True, sharey=True, figsize=(map.shape[0] * 1.3, map.shape[1] * 1.3)) cbar_ax = fig.add_axes([.91, .3, .03, .4]) fig.suptitle( 'GAN\'s compare[enemy(red): positive num, allies(green): negative num, both(yellow): 0]', fontsize=16) for i, ax in enumerate(axn.flat): model_index = i % map.shape[0] unit_index = int(i / map.shape[0]) if i < map.shape[0]: ax.set_title(model_names[model_index], fontsize=10) matrix = map[model_index][unit_index] if plt_threshhold == 0: sns.heatmap(matrix, ax=ax, cbar=i == 0, annot=True, fmt='.1f', cmap=plt.cm.YlGnBu, cbar_ax=None if i else cbar_ax) else: cbar_kws = { 'ticks': [-3, -2, -1, 0, 1, 2, 3], 'drawedges': True } sns.heatmap(matrix, mask=(matrix < plt_threshhold), ax=ax, cbar=i == 0, annot=False, square=True, fmt='.1f', xticklabels=False, yticklabels=False, vmin=-3.5, vmax=3.5, cbar_kws=cbar_kws, cmap=plt.get_cmap('RdYlGn', 7), cbar_ax=None if i else cbar_ax) count = 0 for ax in axn.flat: model_index = count % map.shape[0] unit_index = int(count / map.shape[0]) if model_index == 0: ax.set(ylabel='replay {}'.format(unit_index)) ax.axhline(y=0, color='k', linewidth=1) ax.axhline(y=32, color='k', linewidth=2) ax.axvline(x=0, color='k', linewidth=1) ax.axvline(x=32, color='k', linewidth=2) count += 1 plt.subplots_adjust(hspace=0.03, wspace=0.03) plt.savefig(path) plt.close('all') def build_generator(self): input_channel = 82 output_channel = 66 input_shape = (input_channel, 32, 32) img_input = Input(shape=input_shape, name='input') depth = input_channel # In: 100 # Out: dim x dim x depth c1 = Conv2D(depth * 2, (3, 3), strides=(1, 1), activation='relu', input_shape=input_shape, padding='same', data_format='channels_first')(img_input) b1 = BatchNormalization(axis=-1, momentum=0.99, epsilon=0.001, center=True, scale=True, beta_initializer='zeros', gamma_initializer='ones', moving_mean_initializer='zeros', moving_variance_initializer='ones', beta_regularizer=None, gamma_regularizer=None, beta_constraint=None, gamma_constraint=None)(c1) act1 = Activation('relu')(b1) c2 = Conv2D(depth * 2, (3, 3), strides=(2, 2), activation='relu', padding='same', data_format='channels_first')(act1) b2 = BatchNormalization(axis=-1, momentum=0.99, epsilon=0.001, center=True, scale=True, beta_initializer='zeros', gamma_initializer='ones', moving_mean_initializer='zeros', moving_variance_initializer='ones', beta_regularizer=None, gamma_regularizer=None, beta_constraint=None, gamma_constraint=None)(c2) act2 = Activation('relu')(b2) c3 = Conv2D(depth * 4, (3, 3), strides=(1, 1), activation='relu', padding='same', data_format='channels_first')(act2) b3 = BatchNormalization(axis=-1, momentum=0.99, epsilon=0.001, center=True, scale=True, beta_initializer='zeros', gamma_initializer='ones', moving_mean_initializer='zeros', moving_variance_initializer='ones', beta_regularizer=None, gamma_regularizer=None, beta_constraint=None, gamma_constraint=None)(c3) act3 = Activation('relu')(b3) c4 = Conv2D(depth * 4, (3, 3), strides=(2, 2), activation='relu', padding='same', data_format='channels_first')(act3) b4 = BatchNormalization(axis=-1, momentum=0.99, epsilon=0.001, center=True, scale=True, beta_initializer='zeros', gamma_initializer='ones', moving_mean_initializer='zeros', moving_variance_initializer='ones', beta_regularizer=None, gamma_regularizer=None, beta_constraint=None, gamma_constraint=None)(c4) act4 = Activation('relu')(b4) c5 = Conv2D(depth * 8, (3, 3), strides=(1, 1), activation='relu', padding='same', data_format='channels_first')(act4) b5 = BatchNormalization(axis=-1, momentum=0.99, epsilon=0.001, center=True, scale=True, beta_initializer='zeros', gamma_initializer='ones', moving_mean_initializer='zeros', moving_variance_initializer='ones', beta_regularizer=None, gamma_regularizer=None, beta_constraint=None, gamma_constraint=None)(c5) act5 = Activation('relu')(b5) c6 = Conv2D(depth * 8, (3, 3), strides=(1, 1), activation='relu', padding='same', data_format='channels_first')(act5) b6 = BatchNormalization(axis=-1, momentum=0.99, epsilon=0.001, center=True, scale=True, beta_initializer='zeros', gamma_initializer='ones', moving_mean_initializer='zeros', moving_variance_initializer='ones', beta_regularizer=None, gamma_regularizer=None, beta_constraint=None, gamma_constraint=None)(c6) act6 = Activation('relu')(b6) c7 = Conv2D(depth * 8, (3, 3), strides=(2, 2), activation='relu', padding='same', data_format='channels_first')(act6) b7 = BatchNormalization(axis=-1, momentum=0.99, epsilon=0.001, center=True, scale=True, beta_initializer='zeros', gamma_initializer='ones', moving_mean_initializer='zeros', moving_variance_initializer='ones', beta_regularizer=None, gamma_regularizer=None, beta_constraint=None, gamma_constraint=None)(c7) act7 = Activation('relu')(b7) ct1 = Conv2DTranspose(depth * 8, (3, 3), strides=(2, 2), activation='relu', padding='same', data_format='channels_first')(act7) act8 = Activation('relu')(ct1) act8_output = Lambda(lambda x: x, name='act8_output')(act8) act8_output = keras.layers.Add()([act6, act8_output]) ct2 = Conv2DTranspose(depth * 8, (3, 3), strides=(1, 1), activation='relu', padding='same', data_format='channels_first')(act8_output) act9 = Activation('relu')(ct2) act9_output = Lambda(lambda x: x, name='act9_output')(act9) act9_output = keras.layers.Add()([act5, act9_output]) ct3 = Conv2DTranspose(depth * 4, (3, 3), strides=(1, 1), activation='relu', padding='same', data_format='channels_first')(act9_output) act10 = Activation('relu')(ct3) act10_output = Lambda(lambda x: x, name='act10_output')(act10) act10_output = keras.layers.Add()([act4, act10_output]) ct4 = Conv2DTranspose(depth * 4, (3, 3), strides=(2, 2), activation='relu', padding='same', data_format='channels_first')(act10_output) act11 = Activation('relu')(ct4) act11_output = Lambda(lambda x: x, name='act11_output')(act11) act11_output = keras.layers.Add()([act3, act11_output]) ct5 = Conv2DTranspose(depth * 2, (3, 3), strides=(1, 1), activation='relu', padding='same', data_format='channels_first')(act11_output) act12 = Activation('relu')(ct5) act12_output = Lambda(lambda x: x, name='act12_output')(act12) act12_output = keras.layers.Add()([act2, act12_output]) ct6 = Conv2DTranspose(depth * 2, (3, 3), strides=(2, 2), activation='relu', padding='same', data_format='channels_first')(act12_output) act13 = Activation('relu')(ct6) act13_output = Lambda(lambda x: x, name='act13_output')(act13) act13_output = keras.layers.Add()([act1, act13_output]) ct7 = Conv2DTranspose(depth, (3, 3), strides=(1, 1), activation='relu', padding='same', data_format='channels_first')(act13_output) act14 = Activation('relu')(ct7) act14_output = Lambda(lambda x: x, name='output')(act14) act14_output = keras.layers.Add()([img_input, act14_output]) ct8 = Conv2DTranspose(output_channel, (3, 3), strides=(1, 1), activation='relu', padding='same', data_format='channels_first')(act14_output) act15 = Activation('relu')(ct8) img_output = act15 model = Model(inputs=[img_input], outputs=[img_output]) model.summary() return model def build_discriminator(self): D = Sequential() discriminator_input_channel = 66 depth = discriminator_input_channel dropout = 0.4 input_shape = (discriminator_input_channel, 32, 32) D.add( Conv2D(depth * 1, (3, 3), strides=(2, 2), input_shape=input_shape, padding='same', data_format='channels_first')) D.add(LeakyReLU(alpha=0.2)) D.add(Dropout(dropout)) D.add( Conv2D(depth * 2, (3, 3), strides=(2, 2), padding='same', data_format='channels_first')) D.add(LeakyReLU(alpha=0.2)) D.add(Dropout(dropout)) D.add( Conv2D(depth * 4, (3, 3), strides=(2, 2), padding='same', data_format='channels_first')) D.add(LeakyReLU(alpha=0.2)) D.add(Dropout(dropout)) D.add(Flatten()) D.add(Dense(1)) D.add(Activation('sigmoid')) D.summary() return D def weighted_pyramidal_loss(self, weights): def pyramidal_loss(y_true, y_pred): yt_2 = keras.layers.AveragePooling2D((2, 2))(y_true) * 2 yt_4 = keras.layers.AveragePooling2D((4, 4))(y_true) * 4 yt_8 = keras.layers.AveragePooling2D((8, 8))(y_true) * 8 yt_16 = keras.layers.AveragePooling2D((16, 16))(y_true) * 16 yt_32 = keras.layers.AveragePooling2D((32, 32))(y_true) * 32 yp_2 = keras.layers.AveragePooling2D((2, 2))(y_pred) * 2 yp_4 = keras.layers.AveragePooling2D((4, 4))(y_pred) * 4 yp_8 = keras.layers.AveragePooling2D((8, 8))(y_pred) * 8 yp_16 = keras.layers.AveragePooling2D((16, 16))(y_pred) * 16 yp_32 = keras.layers.AveragePooling2D((32, 32))(y_pred) * 32 loss_0 = keras.losses.mean_squared_error(y_true, y_pred) loss_2 = keras.losses.mean_squared_error(yt_2, yp_2) loss_4 = keras.losses.mean_squared_error(yt_4, yp_4) loss_8 = keras.losses.mean_squared_error(yt_8, yp_8) loss_16 = keras.losses.mean_squared_error(yt_16, yp_16) loss_32 = keras.losses.mean_squared_error(yt_32, yp_32) loss_0 = tf.reduce_mean(loss_0, axis=[1, 2]) loss_2 = tf.reduce_mean(loss_2, axis=[1, 2]) loss_4 = tf.reduce_mean(loss_4, axis=[1, 2]) loss_8 = tf.reduce_mean(loss_8, axis=[1, 2]) loss_16 = tf.reduce_mean(loss_16, axis=[1, 2]) loss_32 = tf.reduce_mean(loss_32, axis=[1, 2]) loss = weights[0] * loss_0 + \ weights[1] * loss_2 + \ weights[2] * loss_4 + \ weights[3] * loss_8 + \ weights[4] * loss_16 + \ weights[5] * loss_32 return loss return pyramidal_loss def get_count_of_gpu(self): device_list = device_lib.list_local_devices() gpu_count = 0 for d in device_list: if d.device_type == 'GPU': gpu_count += 1 return int(gpu_count) def createFolder(self, directory): try: if not os.path.exists(directory): os.makedirs(directory) except OSError: print('Error: Creating directory. ' + directory) def get_MSE_Value(self, x, y): MSE_GAN = 0 MSE_DIV = x.shape[0] * x.shape[1] for n in range(x.shape[0]): for c in range(x.shape[1]): MSE_GAN += mean_squared_error(x[n][c], y[n][c]) MSE = MSE_GAN / MSE_DIV return MSE
def test_model_custom_target_tensors(): a = Input(shape=(3, ), name='input_a') b = Input(shape=(3, ), name='input_b') a_2 = Dense(4, name='dense_1')(a) dp = Dropout(0.5, name='dropout') b_2 = dp(b) y = K.placeholder([10, 4], name='y') y1 = K.placeholder([10, 3], name='y1') y2 = K.placeholder([7, 5], name='y2') model = Model([a, b], [a_2, b_2]) optimizer = 'rmsprop' loss = 'mse' loss_weights = [1., 0.5] # test list of target tensors with pytest.raises(ValueError): model.compile(optimizer, loss, metrics=[], loss_weights=loss_weights, sample_weight_mode=None, target_tensors=[y, y1, y2]) model.compile(optimizer, loss, metrics=[], loss_weights=loss_weights, sample_weight_mode=None, target_tensors=[y, y1]) input_a_np = np.random.random((10, 3)) input_b_np = np.random.random((10, 3)) output_a_np = np.random.random((10, 4)) output_b_np = np.random.random((10, 3)) out = model.train_on_batch([input_a_np, input_b_np], [output_a_np, output_b_np], { y: np.random.random((10, 4)), y1: np.random.random((10, 3)) }) # test dictionary of target_tensors with pytest.raises(ValueError): model.compile(optimizer, loss, metrics=[], loss_weights=loss_weights, sample_weight_mode=None, target_tensors={'does_not_exist': y2}) # test dictionary of target_tensors model.compile(optimizer, loss, metrics=[], loss_weights=loss_weights, sample_weight_mode=None, target_tensors={ 'dense_1': y, 'dropout': y1 }) out = model.train_on_batch([input_a_np, input_b_np], [output_a_np, output_b_np], { y: np.random.random((10, 4)), y1: np.random.random((10, 3)) }) if K.backend() == 'tensorflow': import tensorflow as tf # test with custom TF placeholder as target pl_target_a = tf.placeholder('float32', shape=(None, 4)) model.compile(optimizer='rmsprop', loss='mse', target_tensors={'dense_1': pl_target_a}) model.train_on_batch([input_a_np, input_b_np], [output_a_np, output_b_np])
class PolicyValueNet(): """策略价值网络""" #def __init__(self, board_width, board_height, model_file=None): def __init__(self, policy_infer_size, model_file=None): #self.board_width = board_width #self.board_height = board_height self.policy_infer_size = policy_infer_size self.l2_const = 1e-4 # coef of l2 penalty self.create_policy_value_net() self._loss_train_op() self.load_model_done = True if model_file and os.path.exists(model_file): self.load_model_done = False self.load_model(model_file) def load_model(self, model_file): """重新加载模型(仅用于selfplay时load new model)""" try: #net_params = pickle.load(open(model_file, 'rb'), encoding='bytes') #iso-8859-1') net_params = utils.pickle_load(model_file) self.model.set_weights(net_params) self.load_model_done = True except: logging.error("load_model fail! {}\t{}".format( model_file, utils.get_trace())) self.load_model_done = False if os.path.exists( model_file ) and self.load_model_done is False: #鏂囦欢瀛樺湪鍗村鍦ㄥけ璐ユ椂缁堟杩愯 exit(-1) return self.load_model_done def create_policy_value_net(self): """创建policy-value网络""" # 输入层 #in_x = network = Input((4, self.board_width, self.board_height)) in_x = network = Input((4, 1, self.policy_infer_size)) # conv layers network = Conv2D(filters=32, kernel_size=(3, 3), padding="same", data_format="channels_first", activation="relu", kernel_regularizer=l2(self.l2_const))(network) network = Conv2D(filters=64, kernel_size=(3, 3), padding="same", data_format="channels_first", activation="relu", kernel_regularizer=l2(self.l2_const))(network) network = Conv2D(filters=128, kernel_size=(3, 3), padding="same", data_format="channels_first", activation="relu", kernel_regularizer=l2(self.l2_const))(network) # 走子策略 action policy layers policy_net = Conv2D(filters=4, kernel_size=(1, 1), data_format="channels_first", activation="relu", kernel_regularizer=l2(self.l2_const))(network) policy_net = Flatten()(policy_net) # infer self.board_width * self.board_height action_probs #self.policy_net = Dense(self.board_width * self.board_height, activation="softmax", kernel_regularizer=l2(self.l2_const))(policy_net) self.policy_net = Dense(self.policy_infer_size, activation="softmax", kernel_regularizer=l2( self.l2_const))(policy_net) # 盘面价值 state value layers value_net = Conv2D(filters=2, kernel_size=(1, 1), data_format="channels_first", activation="relu", kernel_regularizer=l2(self.l2_const))(network) value_net = Flatten()(value_net) value_net = Dense(64, kernel_regularizer=l2(self.l2_const))(value_net) # infer one current state score self.value_net = Dense(1, activation="tanh", kernel_regularizer=l2(self.l2_const))(value_net) # 创建网络模型 self.model = Model(in_x, [self.policy_net, self.value_net]) # 返回走子策略和价值概率 def policy_value(state_input): state_input_union = np.array(state_input) #print(state_input_union) results = self.model.predict_on_batch(state_input_union) return results self.policy_value = policy_value def policy_value_fn(self, board): """使用模型预测棋盘所有actionid的价值概率""" # 棋盘所有可移动action_ids legal_positions = board.availables #print(legal_positions) # 当前玩家角度的actions过程 current_actions = board.current_actions() #print(current_actions) # 使用模型预测走子策略和价值概率 #print(self.policy_infer_size) #act_probs, value = self.policy_value(current_actions.reshape(-1, 4, self.board_width, self.board_height)) act_probs, value = self.policy_value( current_actions.reshape(-1, 4, 1, self.policy_infer_size)) act_probs = zip(legal_positions, act_probs.flatten()[legal_positions]) # 返回[(action, 概率)] 以及当前玩家的后续走子value return act_probs, value[0][0] def _loss_train_op(self): """初始化损失 3个损失函数因子 loss = (z - v)^2 + pi^T * log(p) + c||theta||^2 loss = value损失函数 + policy损失函数 + 惩罚项 """ # 定义优化器和损失函数 opt = Adam() losses = ['categorical_crossentropy', 'mean_squared_error'] self.model.compile(optimizer=opt, loss=losses) def self_entropy(probs): return -np.mean(np.sum(probs * np.log(probs + 1e-10), axis=1)) def train_step(state_input, mcts_probs, winner, learning_rate): """输出训练过程中的结果""" state_input_union = np.array(state_input) mcts_probs_union = np.array(mcts_probs) winner_union = np.array(winner) # 评估 loss = self.model.evaluate(state_input_union, [mcts_probs_union, winner_union], batch_size=len(state_input), verbose=0) # 预测 action_probs, _ = self.model.predict_on_batch(state_input_union) entropy = self_entropy(action_probs) K.set_value(self.model.optimizer.lr, learning_rate) self.model.fit(state_input_union, [mcts_probs_union, winner_union], batch_size=len(state_input), verbose=0) return loss[0], entropy self.train_step = train_step def get_policy_param(self): """获得模型参数""" net_params = self.model.get_weights() return net_params def save_model(self, model_file): """保存模型参数到文件""" net_params = self.get_policy_param() #pickle.dump(net_params, open(model_file, 'wb'), protocol=4) utils.pickle_dump(net_params, model_file)
# GAN网络编译 gan_core = Model(inputs=gan_x, outputs=[ gan_output, features_full, features_medium, features_low, pred_full, pred_medium, pred_low ]) gan_core.name = "gan_core" optimizer = Adam(learning_rate, 0.5, decay=decay_rate) loss_gan = ['mae', 'mae', 'mae', 'mae', 'mse', 'mse', 'mse'] loss_weights_gan = [1, 3.33, 3.33, 3.33, 0.33, 0.33, 0.33] # gan_core = multi_gpu_model(gan_core_org) gan_core.compile(optimizer=optimizer, loss_weights=loss_weights_gan, loss=loss_gan) # -------------------------------- # 编译判别器Discriminator # -------------------------------- discriminator_full.model.trainable = True discriminator_medium.model.trainable = True discriminator_low.model.trainable = True def zero_loss(y_true, y_pred): return K.zeros_like(y_true)
def test_pandas_dataframe(): input_a = Input(shape=(3,), name='input_a') input_b = Input(shape=(3,), name='input_b') x = Dense(4, name='dense_1')(input_a) y = Dense(3, name='desne_2')(input_b) model_1 = Model(inputs=input_a, outputs=x) model_2 = Model(inputs=[input_a, input_b], outputs=[x, y]) optimizer = 'rmsprop' loss = 'mse' model_1.compile(optimizer=optimizer, loss=loss) model_2.compile(optimizer=optimizer, loss=loss) input_a_df = pd.DataFrame(np.random.random((10, 3))) input_b_df = pd.DataFrame(np.random.random((10, 3))) output_a_df = pd.DataFrame(np.random.random((10, 4))) output_b_df = pd.DataFrame(np.random.random((10, 3))) model_1.fit(input_a_df, output_a_df) model_2.fit([input_a_df, input_b_df], [output_a_df, output_b_df]) model_1.fit([input_a_df], [output_a_df]) model_1.fit({'input_a': input_a_df}, output_a_df) model_2.fit({'input_a': input_a_df, 'input_b': input_b_df}, [output_a_df, output_b_df]) model_1.predict(input_a_df) model_2.predict([input_a_df, input_b_df]) model_1.predict([input_a_df]) model_1.predict({'input_a': input_a_df}) model_2.predict({'input_a': input_a_df, 'input_b': input_b_df}) model_1.predict_on_batch(input_a_df) model_2.predict_on_batch([input_a_df, input_b_df]) model_1.predict_on_batch([input_a_df]) model_1.predict_on_batch({'input_a': input_a_df}) model_2.predict_on_batch({'input_a': input_a_df, 'input_b': input_b_df}) model_1.evaluate(input_a_df, output_a_df) model_2.evaluate([input_a_df, input_b_df], [output_a_df, output_b_df]) model_1.evaluate([input_a_df], [output_a_df]) model_1.evaluate({'input_a': input_a_df}, output_a_df) model_2.evaluate({'input_a': input_a_df, 'input_b': input_b_df}, [output_a_df, output_b_df]) model_1.train_on_batch(input_a_df, output_a_df) model_2.train_on_batch([input_a_df, input_b_df], [output_a_df, output_b_df]) model_1.train_on_batch([input_a_df], [output_a_df]) model_1.train_on_batch({'input_a': input_a_df}, output_a_df) model_2.train_on_batch({'input_a': input_a_df, 'input_b': input_b_df}, [output_a_df, output_b_df]) model_1.test_on_batch(input_a_df, output_a_df) model_2.test_on_batch([input_a_df, input_b_df], [output_a_df, output_b_df]) model_1.test_on_batch([input_a_df], [output_a_df]) model_1.test_on_batch({'input_a': input_a_df}, output_a_df) model_2.test_on_batch({'input_a': input_a_df, 'input_b': input_b_df}, [output_a_df, output_b_df])
class FinancialNewsAnalysisModel(object): model = None def __init__(self, nb_time_step, dim_data, batch_size=1, model_path=None): self.model_path = model_path self.model_path = model_path self.batch_size = batch_size self.size_of_input_data_dim = dim_data self.size_of_input_timesteps = nb_time_step self.build() self.weight_loaded = False if model_path is not None: self.load_weights() def build(self): dim_data = self.size_of_input_data_dim nb_time_step = self.size_of_input_timesteps news_input = Input(shape=(nb_time_step, dim_data)) lstm = LSTM(output_dim=nb_hidden_units, dropout_U=dropout, dropout_W=dropout, W_regularizer=l2(l2_norm_alpha), b_regularizer=l2(l2_norm_alpha), activation='tanh') bi_lstm = Bidirectional(lstm, input_shape=(nb_time_step, dim_data), merge_mode='concat') all_news_rep = bi_lstm(news_input) news_predictions = Dense(1, activation='linear')(all_news_rep) self.model = Model(news_input, news_predictions, name="deep rnn for financial news analysis") def reset(self): for l in self.model.layers: if type(l) is LSTM: l.reset_status() def compile_model(self, lr=0.0001, loss_weights=0.1): optimizer = Adam(lr=lr) loss = 'mse' # loss = custom_objective self.model.compile(optimizer=optimizer, loss=loss) #metrics=['mse']) plot(self.model, to_file='model.png') def fit_model(self, X, y, X_val=None, y_val=None, epoch=500): early_stopping = EarlyStopping(monitor='val_loss', patience=100, verbose=0) if X_val is None: self.model.fit(X, y, batch_size=self.batch_size, nb_epoch=epoch, validation_split=0.2, shuffle=True, callbacks=[early_stopping]) else: self.model.fit(X, y, batch_size=self.batch_size, nb_epoch=epoch, validation_data=(X_val, y_val), shuffle=True, callbacks=[early_stopping]) def save(self): self.model.save_weights(self.model_path, overwrite=True) def load_weights(self): if os.path.exists(self.model_path): self.model.load_weights(self.model_path) self.weight_loaded = True def print_weights(self, weights=None, detail=False): weights = weights or self.model.get_weights() for w in weights: print("w%s: sum(w)=%s, ave(w)=%s" % (w.shape, np.sum(w), np.average(w))) if detail: for w in weights: print("%s: %s" % (w.shape, w)) def model_eval(self, X, y): y_hat = self.model.predict(X, batch_size=1) count_true = 0 count_all = y.shape[0] for i in range(y.shape[0]): count_true = count_true + 1 if y[i,0]*y_hat[i,0]>0 else count_true print y[i,0],y_hat[i,0] print count_all,count_true
class RenderGAN: def __init__(self, tag3d_network, z_dim_offset=50, z_dim_labels=50, z_dim_bits=12, discriminator_units=32, discriminator_depth=2, generator_units=32, generator_depth=2, data_shape=(1, 64, 64), labels_shape=(24,), nb_bits=12, generator_optimizer=Adam(lr=0.0004, beta_1=0.5), discriminator_optimizer=Adam(lr=0.0004, beta_1=0.5) ): self.tag3d_network = tag3d_network self.z_dim_offset = z_dim_offset self.z_dim_labels = z_dim_labels self.z_dim_bits = z_dim_bits self.z_dim = z_dim_offset + z_dim_labels + z_dim_bits self.discriminator_units = discriminator_units self.discriminator_depth = discriminator_depth self.generator_units = generator_units self.generator_depth = generator_depth self.preprocess_units = self.generator_units // 2 self.data_shape = data_shape self.labels_shape = labels_shape self.nb_bits = nb_bits self.generator_optimizer = generator_optimizer self.discriminator_optimizer = discriminator_optimizer self._build() def _build_discriminator(self): x = Input(shape=self.data_shape, name='data') d = render_gan_discriminator(x, n=self.discriminator_units, conv_repeat=self.discriminator_depth, dense=[512]) self.discriminator = Model([x], [d]) def _build_generator_given_z_offset_and_labels(self): labels = Input(shape=self.labels_shape, name='input_labels') z_offset = Input(shape=(self.z_dim_offset,), name='input_z_offset') outputs = OrderedDict() labels_without_bits = Subtensor(self.nb_bits, self.labels_shape[0], axis=1)(labels) raw_tag3d, tag3d_depth_map = self.tag3d_network(labels) tag3d = ScaleUnitIntervalTo(-1, 1)(raw_tag3d) outputs['tag3d'] = tag3d outputs['tag3d_depth_map'] = tag3d_depth_map segmentation = Segmentation(threshold=-0.08, smooth_threshold=0.2, sigma=1.5, name='segmentation') tag3d_downsampled = PyramidReduce()(tag3d) tag3d_segmented = segmentation(raw_tag3d) outputs['tag3d_segmented'] = tag3d_segmented tag3d_segmented_blur = GaussianBlur(sigma=0.66)(tag3d_segmented) out_offset_front = get_offset_front([z_offset, ZeroGradient()(labels_without_bits)], self.generator_units) light_depth_map = get_preprocess(tag3d_depth_map, self.preprocess_units, nb_conv_layers=2) light_outs = get_lighting_generator([out_offset_front, light_depth_map], self.generator_units) offset_depth_map = get_preprocess(tag3d_depth_map, self.preprocess_units, nb_conv_layers=2) offset_middle_light = get_preprocess(concat(light_outs), self.preprocess_units, resize=['down', 'down']) offset_middle_tag3d = get_preprocess(tag3d_downsampled, self.preprocess_units // 2, resize=['down', ''], nb_conv_layers=2) out_offset_middle = get_offset_middle( [out_offset_front, offset_depth_map, offset_middle_light, offset_middle_tag3d], self.generator_units) offset_back_tag3d_downsampled = get_preprocess(tag3d_downsampled, self.preprocess_units // 2, nb_conv_layers=2) offset_back_feature_map, out_offset_back = get_offset_back( [out_offset_middle, offset_back_tag3d_downsampled], self.generator_units) blur_factor = get_blur_factor(out_offset_middle, min=0.25, max=1.) outputs['blur_factor'] = blur_factor tag3d_blur = BlendingBlur(sigma=2.0)([tag3d, blur_factor]) outputs['tag3d_blur'] = tag3d_blur outputs['light_black'] = light_outs[0] outputs['light_white'] = light_outs[1] outputs['light_shift'] = light_outs[2] tag3d_lighten = AddLighting( scale_factor=0.90, shift_factor=0.90)([tag3d_blur] + light_outs) tag3d_lighten = InBounds(clip=True, weight=15)(tag3d_lighten) outputs['tag3d_lighten'] = tag3d_lighten outputs['background_offset'] = out_offset_back blending = Background(name='blending')([out_offset_back, tag3d_lighten, tag3d_segmented_blur]) outputs['fake_without_noise'] = blending details = get_details( [blending, tag3d_segmented_blur, tag3d, out_offset_back, offset_back_feature_map] + light_outs, self.generator_units) outputs['details_offset'] = details details_high_pass = HighPass(3.5, nb_steps=3)(details) outputs['details_high_pass'] = details_high_pass fake = InBounds(-2.0, 2.0)( merge([details_high_pass, blending], mode='sum')) outputs['fake'] = fake for name in outputs.keys(): outputs[name] = name_tensor(outputs[name], name) self.generator_given_z_and_labels = Model([z_offset, labels], [fake]) self.sample_generator_given_z_and_labels_output_names = list(outputs.keys()) self.sample_generator_given_z_and_labels = Model([z_offset, labels], list(outputs.values())) @property def pos_z_bits(self): return (0, self.z_dim_bits) @property def pos_z_labels(self): return (self.z_dim_bits, self.z_dim_bits + self.z_dim_labels) @property def pos_z_offset(self): return (self.z_dim_labels + self.z_dim_bits, self.z_dim_labels + self.z_dim_bits + self.z_dim_offset) def _build_generator_given_z(self): z = Input(shape=(self.z_dim,), name='z') z_bits = Subtensor(*self.pos_z_bits, axis=1)(z) z_labels = Subtensor(*self.pos_z_labels, axis=1)(z) z_offset = Subtensor(*self.pos_z_offset, axis=1)(z) bits = ThresholdBits()(z_bits) nb_labels_without_bits = self.labels_shape[0] - self.nb_bits generated_labels = get_label_generator( z_labels, self.generator_units, nb_output_units=nb_labels_without_bits) labels_normed = NormSinCosAngle(0)(generated_labels) labels = concat([bits, labels_normed], name='labels') fake = self.generator_given_z_and_labels([z_offset, labels]) self.generator_given_z = Model([z], [fake]) sample_tensors = self.sample_generator_given_z_and_labels([z_offset, labels]) sample_tensors = [name_tensor(t, n) for t, n in zip(sample_tensors, self.sample_generator_given_z_and_labels.output_names)] self.sample_generator_given_z_output_names = ['labels'] + \ self.sample_generator_given_z_and_labels_output_names self.sample_generator_given_z = Model([z], [labels] + sample_tensors) def _build_gan(self): self.generator_given_z.compile(self.generator_optimizer, 'binary_crossentropy') self.discriminator.compile(self.discriminator_optimizer, 'binary_crossentropy') self.gan = GAN(self.generator_given_z, self.discriminator) def _build(self): self._build_discriminator() self._build_generator_given_z_offset_and_labels() self._build_generator_given_z() self._build_gan() def save_weights(self, fname_format, overwrite=False, attrs={}): def save(name): model = self.__dict__[name] fname = fname_format.format(name=name) os.makedirs(os.path.dirname(fname), exist_ok=True) save_model(model, fname, overwrite=False, attrs=attrs) save("sample_generator_given_z") save("sample_generator_given_z_and_labels") save("generator_given_z_and_labels") save("generator_given_z") save("discriminator")
def test_model_with_external_loss(): # None loss, only regularization loss. a = Input(shape=(3, ), name='input_a') a_2 = Dense(4, name='dense_1', kernel_regularizer='l1', bias_regularizer='l2')(a) dp = Dropout(0.5, name='dropout') a_3 = dp(a_2) model = Model(a, [a_2, a_3]) optimizer = 'rmsprop' loss = None model.compile(optimizer, loss, metrics=['mae']) input_a_np = np.random.random((10, 3)) # test train_on_batch out = model.train_on_batch(input_a_np, None) out = model.test_on_batch(input_a_np, None) # fit out = model.fit(input_a_np, None) # evaluate out = model.evaluate(input_a_np, None) # No dropout, external loss. a = Input(shape=(3, ), name='input_a') a_2 = Dense(4, name='dense_1')(a) a_3 = Dense(4, name='dense_2')(a) model = Model(a, [a_2, a_3]) model.add_loss(K.mean(a_3 + a_2)) optimizer = 'rmsprop' loss = None model.compile(optimizer, loss, metrics=['mae']) # test train_on_batch out = model.train_on_batch(input_a_np, None) out = model.test_on_batch(input_a_np, None) # fit out = model.fit(input_a_np, None) # evaluate out = model.evaluate(input_a_np, None) # Test fit with no external data at all. if K.backend() == 'tensorflow': import tensorflow as tf a = Input(tensor=tf.Variable(input_a_np, dtype=tf.float32)) a_2 = Dense(4, name='dense_1')(a) a_2 = Dropout(0.5, name='dropout')(a_2) model = Model(a, a_2) model.add_loss(K.mean(a_2)) model.compile(optimizer='rmsprop', loss=None, metrics=['mean_squared_error']) # test train_on_batch out = model.train_on_batch(None, None) out = model.test_on_batch(None, None) out = model.predict_on_batch(None) # test fit with pytest.raises(ValueError): out = model.fit(None, None, epochs=1, batch_size=10) out = model.fit(None, None, epochs=1, steps_per_epoch=1) # test fit with validation data with pytest.raises(ValueError): out = model.fit(None, None, epochs=1, steps_per_epoch=None, validation_steps=2) out = model.fit(None, None, epochs=1, steps_per_epoch=2, validation_steps=2) # test evaluate with pytest.raises(ValueError): out = model.evaluate(None, None, batch_size=10) out = model.evaluate(None, None, steps=3) # test predict with pytest.raises(ValueError): out = model.predict(None, batch_size=10) out = model.predict(None, steps=3) assert out.shape == (10 * 3, 4) # Test multi-output model without external data. a = Input(tensor=tf.Variable(input_a_np, dtype=tf.float32)) a_1 = Dense(4, name='dense_1')(a) a_2 = Dropout(0.5, name='dropout')(a_1) model = Model(a, [a_1, a_2]) model.add_loss(K.mean(a_2)) model.compile(optimizer='rmsprop', loss=None, metrics=['mean_squared_error']) # test train_on_batch out = model.train_on_batch(None, None) out = model.test_on_batch(None, None) out = model.predict_on_batch(None) # test fit with pytest.raises(ValueError): out = model.fit(None, None, epochs=1, batch_size=10) out = model.fit(None, None, epochs=1, steps_per_epoch=1) # test fit with validation data with pytest.raises(ValueError): out = model.fit(None, None, epochs=1, steps_per_epoch=None, validation_steps=2) out = model.fit(None, None, epochs=1, steps_per_epoch=2, validation_steps=2) # test evaluate with pytest.raises(ValueError): out = model.evaluate(None, None, batch_size=10) out = model.evaluate(None, None, steps=3) # test predict with pytest.raises(ValueError): out = model.predict(None, batch_size=10) out = model.predict(None, steps=3) assert len(out) == 2 assert out[0].shape == (10 * 3, 4) assert out[1].shape == (10 * 3, 4)
def decoder_stochastic_wrn(label_sizes, nb_bits=12, data_shape=(1, 64, 64), activation=lambda: Activation('relu'), normalization=lambda: BatchNormalization(axis=1, mode=0), weight_init='he_normal', weight_decay=0.0001, dropout_probability=0., wrn_depth=58, wrn_k=2, death_rate=0.5, optimizer='adam'): def norm_act_block(): def f(inputs): x = normalization()(inputs) x = activation()(x) return x return f def conv2(nb_filter, nb_row, nb_col, subsample=(1, 1), bias=True): return Convolution2D(nb_filter, nb_row, nb_col, init=weight_init, border_mode='same', subsample=subsample, bias=bias, W_regularizer=l2(weight_decay)) def dropout(p=None): if p is None: p = dropout_probability def f(inputs): if p > 0.: inputs = Dropout(p)(inputs) return inputs return f def equal_gate_shape(input_shapes): assert(input_shapes[0] == input_shapes[1]) return input_shapes[1] def residual_block(nb_filter, stochastic=False, stochastic_layers=None): def f(inputs): x = norm_act_block()(inputs) x = conv2(nb_filter, 3, 3)(x) x = dropout()(x) x = norm_act_block()(x) x = conv2(nb_filter, 3, 3)(x) if inputs._keras_shape != x._keras_shape: inputs = conv2(nb_filter, 1, 1, bias=False)(inputs) if not stochastic: return merge((inputs, x), mode='sum') scale = ScaleInTestPhase(death_rate) x = scale(x) out = merge([inputs, x], mode="sum", output_shape=x._keras_shape[1:]) rs = RandomSwitch(death_rate) stochastic_layers.append((rs.death_rate, scale.death_rate)) return rs([out, inputs]) return f def residual_reduction_block(nb_filter): def f(inputs): x = norm_act_block()(inputs) x = conv2(nb_filter, 3, 3, subsample=(2, 2))(x) x = dropout()(x) x = norm_act_block()(x) x = conv2(nb_filter, 3, 3)(x) inputs_bottleneck = conv2(nb_filter, 1, 1, subsample=(2, 2), bias=False)(inputs) s = merge((inputs_bottleneck, x), mode='sum') return s return f def skip_connection(inputs, residual, stochastic=False, stochastic_layers=None): inputs_filters = inputs._keras_shape[1] residual_filters = residual._keras_shape[1] subsample = np.array(inputs._keras_shape[2:]) // np.array(residual._keras_shape[2:]) inputs = dropout()(inputs) if (inputs_filters != residual_filters) or np.any(subsample > 1): skip = conv2(residual_filters, 1, 1, subsample=subsample, bias=False)(inputs) else: skip = inputs if not stochastic: return merge((skip, residual), mode='sum') else: scale = ScaleInTestPhase(death_rate) skip = scale(skip) out = merge([skip, residual], mode="sum") rs = RandomSwitch(death_rate) stochastic_layers.append((rs.death_rate, scale.death_rate)) return rs([out, residual]) n = (wrn_depth - 4) // 6 stochastic_layers = [] input = Input(shape=data_shape) m_stem = conv2(16, 3, 3, subsample=(2, 2))(input) m_stem = norm_act_block()(m_stem) m_b1 = residual_block(nb_filter=16 * wrn_k, stochastic_layers=stochastic_layers)(m_stem) for _ in range(n - 1): m_b1 = residual_block(nb_filter=16 * wrn_k, stochastic=True, stochastic_layers=stochastic_layers)(m_b1) m_b1 = skip_connection(input, m_b1, stochastic=True, stochastic_layers=stochastic_layers) m_b2 = residual_reduction_block(nb_filter=32 * wrn_k)(m_b1) for _ in range(n - 1): m_b2 = residual_block(nb_filter=32 * wrn_k, stochastic=True, stochastic_layers=stochastic_layers)(m_b2) m_b2 = skip_connection(m_b1, m_b2, stochastic=True, stochastic_layers=stochastic_layers) m_b3 = residual_reduction_block(nb_filter=64 * wrn_k)(m_b2) for _ in range(n - 1): m_b3 = residual_block(nb_filter=64 * wrn_k, stochastic=True, stochastic_layers=stochastic_layers)(m_b3) m_b3 = skip_connection(m_b2, m_b3, stochastic=True, stochastic_layers=stochastic_layers) m_b3 = skip_connection(input, m_b3, stochastic=True, stochastic_layers=stochastic_layers) x = norm_act_block()(m_b3) x = AveragePooling2D(pool_size=(8, 8))(x) x = dropout()(x) for i, (tb, ts) in enumerate(stochastic_layers, start=0): K.set_value(tb, i / len(stochastic_layers) * death_rate) K.set_value(ts, i / len(stochastic_layers) * death_rate) outputs, losses = decoder_end_block(x, label_sizes, nb_bits, activation, weight_decay) model = Model(input, list(outputs.values())) model.compile(optimizer, loss=list(losses.values()), loss_weights={k: decoder_loss_weights(k) for k in losses.keys()}) return model
class SeqGAN: # # Initialization # ---------------------------------------------------------------------------- def __init__(self, g, d, m, g_optimizer, d_optimizer): # Model of generator self.g = g # Model of discriminator self.d = d # Model of ??? self.m = m self.z, self.seq_input = self.g.inputs self.fake_prob, = self.g.outputs self.history = None with trainable(m, False): # m_input = merge([self.seq_input, self.fake_prob], mode='concat', concat_axis=1) m_input = concatenate([self.seq_input, self.fake_prob], axis=1) self.m_realness = self.m(m_input) self.model_fit_g = Model(inputs=[self.z, self.seq_input], outputs=[self.m_realness]) self.model_fit_g.compile(optimizer=g_optimizer, loss=K.binary_crossentropy) self.d.compile(optimizer=d_optimizer, loss=K.binary_crossentropy) # # Return the shape of input noise variables z # (batch_size: the number of data read per batch) # ---------------------------------------------------------------------------- def z_shape(self, batch_size=64): # layer, _, _ = self.z._keras_history # _keras_history是一个tuple类型,第一个参数代表previous Layer; 第二个参数node_index; 第三个参数是tensor_index # ( 因为可能有多个tensor output,如果是只有一个tensor output的话,tensor_index = 0) layer, node_index, tensor_index = self.z._keras_history return (batch_size, ) + layer.output_shape[1:] # the first dimension is data size # # Sample input noise variables z by uniform distributions # (batch_size: the number of data read per batch) # ---------------------------------------------------------------------------- def sample_z(self, batch_size=64): shape = self.z_shape(batch_size) return np.random.uniform(-1, 1, shape) # # Generate the fake samples with: the input noise variables z & input sequence seq_input # ---------------------------------------------------------------------------- def generate(self, z, seq_input, batch_size=32): return self.g.predict([z, seq_input], batch_size=batch_size) # # Training # ---------------------------------------------------------------------------- def train_on_batch(self, seq_input, real, d_target=None): nb_real = len(real) nb_fake = len(seq_input) if d_target is None: d_target = np.concatenate( [np.zeros((nb_fake, 1)), np.ones((nb_real, 1))]) fake_prob = self.generate(self.sample_z(nb_fake), seq_input) fake = np.concatenate([seq_input, prob_to_sentence(fake_prob)], axis=1) fake_and_real = np.concatenate([fake, real], axis=0) d_loss = self.d.train_on_batch(x=fake_and_real, y=d_target) d_realness = self.d.predict(fake) m_loss = self.m.train_on_batch(x=np.concatenate([seq_input, fake_prob], axis=1), y=d_realness) g_loss = self.model_fit_g.train_on_batch( x=[self.sample_z(nb_fake), seq_input], y=np.ones((nb_fake, 1))) return g_loss, d_loss, m_loss # # Training # ---------------------------------------------------------------------------- def fit_generator(self, generator, nb_epoch, nb_batches_per_epoch, callbacks=[], batch_size=None, verbose=False): if batch_size is None: batch_size = 2 * len(next(generator)[0]) out_labels = ['g', 'd', 'm'] self.history = cbks.History() callbacks = [cbks.BaseLogger()] + callbacks + [self.history] if verbose: callbacks += [cbks.ProgbarLogger()] callbacks = cbks.CallbackList(callbacks) callbacks.set_model(self) callbacks.set_params({ 'nb_epoch': nb_epoch, 'nb_sample': nb_batches_per_epoch * batch_size, 'verbose': verbose, 'metrics': out_labels, }) callbacks.on_train_begin() for e in range(nb_epoch): callbacks.on_epoch_begin(e) for batch_index, (seq_input, real) in enumerate(generator): callbacks.on_batch_begin(batch_index) batch_logs = dict() batch_logs['batch'] = batch_index batch_logs['size'] = len(real) + len(seq_input) outs = self.train_on_batch(seq_input, real) for l, o in zip(out_labels, outs): batch_logs[l] = o callbacks.on_batch_end(batch_index, batch_logs) if batch_index + 1 == nb_batches_per_epoch: break callbacks.on_epoch_end(e) callbacks.on_train_end()
def test_model_methods(): a = Input(shape=(3,), name='input_a') b = Input(shape=(3,), name='input_b') a_2 = Dense(4, name='dense_1')(a) dp = Dropout(0.5, name='dropout') b_2 = dp(b) model = Model([a, b], [a_2, b_2]) optimizer = 'rmsprop' loss = 'mse' loss_weights = [1., 0.5] input_a_np = np.random.random((10, 3)) input_b_np = np.random.random((10, 3)) output_a_np = np.random.random((10, 4)) output_b_np = np.random.random((10, 3)) # training/testing doesn't work before compiling. with pytest.raises(RuntimeError): model.train_on_batch([input_a_np, input_b_np], [output_a_np, output_b_np]) model.compile(optimizer, loss, metrics=[], loss_weights=loss_weights, sample_weight_mode=None) # test train_on_batch out = model.train_on_batch([input_a_np, input_b_np], [output_a_np, output_b_np]) out = model.train_on_batch({'input_a': input_a_np, 'input_b': input_b_np}, [output_a_np, output_b_np]) out = model.train_on_batch({'input_a': input_a_np, 'input_b': input_b_np}, {'dense_1': output_a_np, 'dropout': output_b_np}) # test fit out = model.fit([input_a_np, input_b_np], [output_a_np, output_b_np], epochs=1, batch_size=4) out = model.fit({'input_a': input_a_np, 'input_b': input_b_np}, [output_a_np, output_b_np], epochs=1, batch_size=4) out = model.fit({'input_a': input_a_np, 'input_b': input_b_np}, {'dense_1': output_a_np, 'dropout': output_b_np}, epochs=1, batch_size=4) # test validation_split out = model.fit([input_a_np, input_b_np], [output_a_np, output_b_np], epochs=1, batch_size=4, validation_split=0.5) out = model.fit({'input_a': input_a_np, 'input_b': input_b_np}, [output_a_np, output_b_np], epochs=1, batch_size=4, validation_split=0.5) # test validation data out = model.fit([input_a_np, input_b_np], [output_a_np, output_b_np], epochs=1, batch_size=4, validation_data=([input_a_np, input_b_np], [output_a_np, output_b_np])) out = model.fit({'input_a': input_a_np, 'input_b': input_b_np}, [output_a_np, output_b_np], epochs=1, batch_size=4, validation_split=0.5, validation_data=({'input_a': input_a_np, 'input_b': input_b_np}, [output_a_np, output_b_np])) out = model.fit({'input_a': input_a_np, 'input_b': input_b_np}, {'dense_1': output_a_np, 'dropout': output_b_np}, epochs=1, batch_size=4, validation_split=0.5, validation_data=( {'input_a': input_a_np, 'input_b': input_b_np}, {'dense_1': output_a_np, 'dropout': output_b_np})) # test_on_batch out = model.test_on_batch([input_a_np, input_b_np], [output_a_np, output_b_np]) out = model.test_on_batch({'input_a': input_a_np, 'input_b': input_b_np}, [output_a_np, output_b_np]) out = model.test_on_batch({'input_a': input_a_np, 'input_b': input_b_np}, {'dense_1': output_a_np, 'dropout': output_b_np}) # predict_on_batch out = model.predict_on_batch([input_a_np, input_b_np]) out = model.predict_on_batch({'input_a': input_a_np, 'input_b': input_b_np}) # predict, evaluate input_a_np = np.random.random((10, 3)) input_b_np = np.random.random((10, 3)) output_a_np = np.random.random((10, 4)) output_b_np = np.random.random((10, 3)) out = model.evaluate([input_a_np, input_b_np], [output_a_np, output_b_np], batch_size=4) out = model.predict([input_a_np, input_b_np], batch_size=4) # with sample_weight input_a_np = np.random.random((10, 3)) input_b_np = np.random.random((10, 3)) output_a_np = np.random.random((10, 4)) output_b_np = np.random.random((10, 3)) sample_weight = [None, np.random.random((10,))] out = model.train_on_batch([input_a_np, input_b_np], [output_a_np, output_b_np], sample_weight=sample_weight) out = model.test_on_batch([input_a_np, input_b_np], [output_a_np, output_b_np], sample_weight=sample_weight) # test accuracy metric model.compile(optimizer, loss, metrics=['acc'], sample_weight_mode=None) out = model.train_on_batch([input_a_np, input_b_np], [output_a_np, output_b_np]) assert len(out) == 5 out = model.test_on_batch([input_a_np, input_b_np], [output_a_np, output_b_np]) assert len(out) == 5 # this should also work model.compile(optimizer, loss, metrics={'dense_1': 'acc'}, sample_weight_mode=None) out = model.train_on_batch([input_a_np, input_b_np], [output_a_np, output_b_np]) assert len(out) == 4 out = model.test_on_batch([input_a_np, input_b_np], [output_a_np, output_b_np]) assert len(out) == 4 # and this as well model.compile(optimizer, loss, metrics={'dense_1': ['acc']}, sample_weight_mode=None) out = model.train_on_batch([input_a_np, input_b_np], [output_a_np, output_b_np]) assert len(out) == 4 out = model.test_on_batch([input_a_np, input_b_np], [output_a_np, output_b_np]) assert len(out) == 4 # test starting from non-zero initial epoch trained_epochs = [] trained_batches = [] # define tracer callback def on_epoch_begin(epoch, logs): trained_epochs.append(epoch) def on_batch_begin(batch, logs): trained_batches.append(batch) tracker_cb = LambdaCallback(on_epoch_begin=on_epoch_begin, on_batch_begin=on_batch_begin) out = model.fit([input_a_np, input_b_np], [output_a_np, output_b_np], epochs=5, batch_size=4, initial_epoch=2, callbacks=[tracker_cb]) assert trained_epochs == [2, 3, 4] # test starting from non-zero initial epoch for generator too trained_epochs = [] def gen_data(batch_sz): while True: yield ([np.random.random((batch_sz, 3)), np.random.random((batch_sz, 3))], [np.random.random((batch_sz, 4)), np.random.random((batch_sz, 3))]) out = model.fit_generator(gen_data(4), steps_per_epoch=3, epochs=5, initial_epoch=2, callbacks=[tracker_cb]) assert trained_epochs == [2, 3, 4] # test with a custom metric function def mse(y_true, y_pred): return K.mean(K.pow(y_true - y_pred, 2)) model.compile(optimizer, loss, metrics=[mse], sample_weight_mode=None) out = model.train_on_batch([input_a_np, input_b_np], [output_a_np, output_b_np]) out_len = 1 + 2 * (1 + 1) # total loss + 2 outputs * (loss + metric) assert len(out) == out_len out = model.test_on_batch([input_a_np, input_b_np], [output_a_np, output_b_np]) assert len(out) == out_len input_a_np = np.random.random((10, 3)) input_b_np = np.random.random((10, 3)) output_a_np = np.random.random((10, 4)) output_b_np = np.random.random((10, 3)) out = model.fit([input_a_np, input_b_np], [output_a_np, output_b_np], batch_size=4, epochs=1) out = model.evaluate([input_a_np, input_b_np], [output_a_np, output_b_np], batch_size=4) out = model.predict([input_a_np, input_b_np], batch_size=4) # enable verbose for evaluate_generator out = model.evaluate_generator(gen_data(4), steps=3, verbose=1) # empty batch with pytest.raises(ValueError): def gen_data(): while True: yield (np.asarray([]), np.asarray([])) out = model.evaluate_generator(gen_data(), steps=1) # x is not a list of numpy arrays. with pytest.raises(ValueError): out = model.predict([None]) # x does not match _feed_input_names. with pytest.raises(ValueError): out = model.predict([input_a_np, None, input_b_np]) with pytest.raises(ValueError): out = model.predict([None, input_a_np, input_b_np]) # all input/output/weight arrays should have the same number of samples. with pytest.raises(ValueError): out = model.train_on_batch([input_a_np, input_b_np[:2]], [output_a_np, output_b_np], sample_weight=sample_weight) with pytest.raises(ValueError): out = model.train_on_batch([input_a_np, input_b_np], [output_a_np, output_b_np[:2]], sample_weight=sample_weight) with pytest.raises(ValueError): out = model.train_on_batch([input_a_np, input_b_np], [output_a_np, output_b_np], sample_weight=[sample_weight[1], sample_weight[1][:2]]) # `sample_weight` is neither a dict nor a list. with pytest.raises(TypeError): out = model.train_on_batch([input_a_np, input_b_np], [output_a_np, output_b_np], sample_weight=tuple(sample_weight)) # `validation_data` is neither a tuple nor a triple. with pytest.raises(ValueError): out = model.fit([input_a_np, input_b_np], [output_a_np, output_b_np], epochs=1, batch_size=4, validation_data=([input_a_np, input_b_np],)) # `loss` does not match outputs. with pytest.raises(ValueError): model.compile(optimizer, loss=['mse', 'mae', 'mape']) # `loss_weights` does not match output_names. with pytest.raises(ValueError): model.compile(optimizer, loss='mse', loss_weights={'lstm': 0.5}) # `loss_weights` does not match outputs. with pytest.raises(ValueError): model.compile(optimizer, loss='mse', loss_weights=[0.5]) # `loss_weights` is invalid type. with pytest.raises(TypeError): model.compile(optimizer, loss='mse', loss_weights=(0.5, 0.5)) # `sample_weight_mode` does not match output_names. with pytest.raises(ValueError): model.compile(optimizer, loss='mse', sample_weight_mode={'lstm': 'temporal'}) # `sample_weight_mode` does not match output_names. with pytest.raises(ValueError): model.compile(optimizer, loss='mse', sample_weight_mode=['temporal']) # `sample_weight_mode` matches output_names partially. with pytest.raises(ValueError): model.compile(optimizer, loss='mse', sample_weight_mode={'dense_1': 'temporal'}) # `loss` does not exist. with pytest.raises(ValueError): model.compile(optimizer, loss=[]) model.compile(optimizer, loss=['mse', 'mae']) model.compile(optimizer, loss='mse', loss_weights={'dense_1': 0.2, 'dropout': 0.8}) model.compile(optimizer, loss='mse', loss_weights=[0.2, 0.8]) # the rank of weight arrays should be 1. with pytest.raises(ValueError): out = model.train_on_batch([input_a_np, input_b_np], [output_a_np, output_b_np], sample_weight=[None, np.random.random((10, 20, 30))]) model.compile(optimizer, loss='mse', sample_weight_mode={'dense_1': None, 'dropout': 'temporal'}) model.compile(optimizer, loss='mse', sample_weight_mode=[None, 'temporal']) # the rank of output arrays should be at least 3D. with pytest.raises(ValueError): out = model.train_on_batch([input_a_np, input_b_np], [output_a_np, output_b_np], sample_weight=sample_weight) model.compile(optimizer, loss, metrics=[], loss_weights=loss_weights, sample_weight_mode=None) trained_epochs = [] trained_batches = [] out = model.fit_generator(generator=RandomSequence(3), steps_per_epoch=3, epochs=5, initial_epoch=0, validation_data=RandomSequence(4), validation_steps=3, callbacks=[tracker_cb]) assert trained_epochs == [0, 1, 2, 3, 4] assert trained_batches == list(range(3)) * 5 # steps_per_epoch will be equal to len of sequence if it's unspecified trained_epochs = [] trained_batches = [] out = model.fit_generator(generator=RandomSequence(3), epochs=5, initial_epoch=0, validation_data=RandomSequence(4), callbacks=[tracker_cb]) assert trained_epochs == [0, 1, 2, 3, 4] assert trained_batches == list(range(12)) * 5 # fit_generator will throw an exception if steps is unspecified for regular generator with pytest.raises(ValueError): def gen_data(): while True: yield (np.asarray([]), np.asarray([])) out = model.fit_generator(generator=gen_data(), epochs=5, initial_epoch=0, validation_data=gen_data(), callbacks=[tracker_cb]) # Check if generator is only accessed an expected number of times gen_counters = [0, 0] def gen_data(i): while True: gen_counters[i] += 1 yield ([np.random.random((1, 3)), np.random.random((1, 3))], [np.random.random((1, 4)), np.random.random((1, 3))]) out = model.fit_generator(generator=gen_data(0), epochs=3, steps_per_epoch=2, validation_data=gen_data(1), validation_steps=1, max_queue_size=2, workers=2) # Need range check here as filling of the queue depends on sleep in the enqueuers assert 6 <= gen_counters[0] <= 8 # 12 = (epoch * workers * validation steps * max_queue_size) assert 3 <= gen_counters[1] <= 12 gen_counters = [0] out = model.fit_generator(generator=RandomSequence(3), epochs=3, validation_data=gen_data(0), validation_steps=1, max_queue_size=2, workers=2) # 12 = (epoch * workers * validation steps * max_queue_size) # Need range check here as filling of the queue depends on sleep in the enqueuers assert 3 <= gen_counters[0] <= 12 # predict_generator output shape behavior should be consistent def expected_shape(batch_size, n_batches): return (batch_size * n_batches, 4), (batch_size * n_batches, 3) # Multiple outputs and one step. batch_size = 5 sequence_length = 1 shape_0, shape_1 = expected_shape(batch_size, sequence_length) out = model.predict_generator(RandomSequence(batch_size, sequence_length=sequence_length)) assert np.shape(out[0]) == shape_0 and np.shape(out[1]) == shape_1 # Multiple outputs and multiple steps. batch_size = 5 sequence_length = 2 shape_0, shape_1 = expected_shape(batch_size, sequence_length) out = model.predict_generator(RandomSequence(batch_size, sequence_length=sequence_length)) assert np.shape(out[0]) == shape_0 and np.shape(out[1]) == shape_1 # Create a model with a single output. single_output_model = Model([a, b], a_2) single_output_model.compile(optimizer, loss, metrics=[], sample_weight_mode=None) # Single output and one step. batch_size = 5 sequence_length = 1 shape_0, _ = expected_shape(batch_size, sequence_length) out = single_output_model.predict_generator(RandomSequence(batch_size, sequence_length=sequence_length)) assert np.shape(out) == shape_0 # Single output and multiple steps. batch_size = 5 sequence_length = 2 shape_0, _ = expected_shape(batch_size, sequence_length) out = single_output_model.predict_generator(RandomSequence(batch_size, sequence_length=sequence_length)) assert np.shape(out) == shape_0
class AdditionNPIModel(NPIStep): model = None f_enc = None def __init__(self, system: RuntimeSystem, model_path: str=None, program_set: AdditionProgramSet=None): self.system = system self.model_path = model_path self.program_set = program_set self.batch_size = 1 self.build() self.weight_loaded = False self.load_weights() def build(self): enc_size = self.size_of_env_observation() argument_size = IntegerArguments.size_of_arguments input_enc = InputLayer(batch_input_shape=(self.batch_size, enc_size), name='input_enc') input_arg = InputLayer(batch_input_shape=(self.batch_size, argument_size), name='input_arg') input_prg = Embedding(input_dim=PROGRAM_VEC_SIZE, output_dim=PROGRAM_KEY_VEC_SIZE, input_length=1, batch_input_shape=(self.batch_size, 1)) f_enc = Sequential(name='f_enc') f_enc.add(Merge([input_enc, input_arg], mode='concat')) f_enc.add(Dense(256)) f_enc.add(Dense(32)) f_enc.add(Activation('relu', name='relu_enc')) self.f_enc = f_enc program_embedding = Sequential(name='program_embedding') program_embedding.add(input_prg) f_enc_convert = Sequential(name='f_enc_convert') f_enc_convert.add(f_enc) f_enc_convert.add(RepeatVector(1)) f_lstm = Sequential(name='f_lstm') f_lstm.add(Merge([f_enc_convert, program_embedding], mode='concat')) # f_lstm.add(Activation('relu', name='relu_lstm_0')) f_lstm.add(LSTM(256, return_sequences=False, stateful=True)) f_lstm.add(Activation('relu', name='relu_lstm_1')) f_lstm.add(RepeatVector(1)) f_lstm.add(LSTM(256, return_sequences=False, stateful=True)) f_lstm.add(Activation('relu', name='relu_lstm_2')) # plot(f_lstm, to_file='f_lstm.png', show_shapes=True) f_end = Sequential(name='f_end') f_end.add(f_lstm) f_end.add(Dense(10)) f_end.add(Dense(1)) f_end.add(Activation('hard_sigmoid', name='hard_sigmoid_end')) # plot(f_end, to_file='f_end.png', show_shapes=True) f_prog = Sequential(name='f_prog') f_prog.add(f_lstm) f_prog.add(Dense(PROGRAM_KEY_VEC_SIZE)) f_prog.add(Dense(PROGRAM_VEC_SIZE)) f_prog.add(Activation('softmax', name='softmax_prog')) # plot(f_prog, to_file='f_prog.png', show_shapes=True) f_args = [] for ai in range(1, IntegerArguments.max_arg_num+1): f_arg = Sequential(name='f_arg%s' % ai) f_arg.add(f_lstm) f_arg.add(Dense(32)) f_arg.add(Dense(IntegerArguments.depth)) f_arg.add(Activation('softmax', name='softmax_arg%s' % ai)) f_args.append(f_arg) # plot(f_arg, to_file='f_arg.png', show_shapes=True) self.model = Model([input_enc.input, input_arg.input, input_prg.input], [f_end.output, f_prog.output] + [fa.output for fa in f_args], name="npi") self.compile_model() plot(self.model, to_file='model.png', show_shapes=True) def reset(self): super(AdditionNPIModel, self).reset() for l in self.model.layers: if type(l) is LSTM: l.reset_states() def compile_model(self, lr=0.0001, arg_weight=1.): arg_num = IntegerArguments.max_arg_num optimizer = Adam(lr=lr) loss = ['binary_crossentropy', 'categorical_crossentropy'] + ['categorical_crossentropy'] * arg_num self.model.compile(optimizer=optimizer, loss=loss, loss_weights=[0.25, 0.25] + [arg_weight] * arg_num) def fit(self, steps_list, epoch=3000): """ :param int epoch: :param typing.List[typing.Dict[q=dict, steps=typing.List[StepInOut]]] steps_list: :return: """ def filter_question(condition_func): sub_steps_list = [] for steps_dict in steps_list: question = steps_dict['q'] if condition_func(question['in1'], question['in2']): sub_steps_list.append(steps_dict) return sub_steps_list # self.print_weights() if not self.weight_loaded: self.train_f_enc(filter_question(lambda a, b: 10 <= a < 100 and 10 <= b < 100), epoch=100) self.f_enc.trainable = False q_type = "training questions of a+b < 10" print(q_type) pr = 0.8 all_ok = self.fit_to_subset(filter_question(lambda a, b: a+b < 10), epoch=epoch, pass_rate=pr) print("%s is pass_rate >= %s: %s" % (q_type, pr, all_ok)) q_type = "training questions of a<10 and b< 10 and 10 <= a+b" print(q_type) pr = 0.8 all_ok = self.fit_to_subset(filter_question(lambda a, b: a<10 and b<10 and a + b >= 10), epoch=epoch, pass_rate=pr) print("%s is pass_rate >= %s: %s" % (q_type, pr, all_ok)) q_type = "training questions of a<10 and b<10" print(q_type) pr = 0.8 all_ok = self.fit_to_subset(filter_question(lambda a, b: a < 10 and b < 10), epoch=epoch, pass_rate=pr) print("%s is pass_rate >= %s: %s" % (q_type, pr, all_ok)) q_type = "training questions of a<100 and b<100" print(q_type) pr = 0.8 all_ok = self.fit_to_subset(filter_question(lambda a, b: a < 100 and b < 100), epoch=epoch, pass_rate=pr) print("%s is pass_rate >= %s: %s" % (q_type, pr, all_ok)) while True: print("test all type of questions") cc, wc = self.test_to_subset(create_questions(1000)) print("Accuracy %s(OK=%d, NG=%d)" % (cc/(cc+wc), cc, wc)) if wc == 0: break q_type = "training questions of ALL" print(q_type) pr = 1.0 self.fit_to_subset(filter_question(lambda a, b: True), epoch=epoch, pass_rate=pr) all_ok = self.fit_to_subset(filter_question(lambda a, b: True), epoch=epoch, pass_rate=pr, skip_correct=True) print("%s is pass_rate >= %s: %s" % (q_type, pr, all_ok)) def fit_to_subset(self, steps_list, epoch=3000, pass_rate=1.0, skip_correct=False): learning_rate = 0.0001 for i in range(30): all_ok = self.do_learn(steps_list, 30, learning_rate=learning_rate, pass_rate=pass_rate, arg_weight=1., skip_correct=skip_correct) if all_ok: return True learning_rate *= 0.95 return False def test_to_subset(self, questions): addition_env = AdditionEnv(FIELD_ROW, FIELD_WIDTH, FIELD_DEPTH) npi_runner = TerminalNPIRunner(None, self) correct_count = wrong_count = 0 for idx, question in enumerate(questions): question = copy(question) if self.question_test(addition_env, npi_runner, question): correct_count += 1 else: wrong_count += 1 return correct_count, wrong_count @staticmethod def dict_to_str(d): return str(tuple([(k, d[k]) for k in sorted(d)])) def do_learn(self, steps_list, epoch, learning_rate=None, pass_rate=1.0, arg_weight=1., skip_correct=False): if learning_rate is not None: self.update_learning_rate(learning_rate, arg_weight) addition_env = AdditionEnv(FIELD_ROW, FIELD_WIDTH, FIELD_DEPTH) npi_runner = TerminalNPIRunner(None, self) last_weights = None correct_count = Counter() no_change_count = 0 last_loss = 1000 for ep in range(1, epoch+1): correct_new = wrong_new = 0 losses = [] ok_rate = [] np.random.shuffle(steps_list) for idx, steps_dict in enumerate(steps_list): question = copy(steps_dict['q']) question_key = self.dict_to_str(question) if self.question_test(addition_env, npi_runner, question): if correct_count[question_key] == 0: correct_new += 1 correct_count[question_key] += 1 print("GOOD!: ep=%2d idx=%3d :%s CorrectCount=%s" % (ep, idx, self.dict_to_str(question), correct_count[question_key])) ok_rate.append(1) if skip_correct or int(math.sqrt(correct_count[question_key])) ** 2 != correct_count[question_key]: continue else: ok_rate.append(0) if correct_count[question_key] > 0: print("Degraded: ep=%2d idx=%3d :%s CorrectCount=%s -> 0" % (ep, idx, self.dict_to_str(question), correct_count[question_key])) correct_count[question_key] = 0 wrong_new += 1 steps = steps_dict['steps'] xs = [] ys = [] ws = [] for step in steps: xs.append(self.convert_input(step.input)) y, w = self.convert_output(step.output) ys.append(y) ws.append(w) self.reset() for i, (x, y, w) in enumerate(zip(xs, ys, ws)): loss = self.model.train_on_batch(x, y, sample_weight=w) if not np.isfinite(loss): print("Loss is not finite!, Last Input=%s" % ([i, (x, y, w)])) self.print_weights(last_weights, detail=True) raise RuntimeError("Loss is not finite!") losses.append(loss) last_weights = self.model.get_weights() if losses: cur_loss = np.average(losses) print("ep=%2d: ok_rate=%.2f%% (+%s -%s): ave loss %s (%s samples)" % (ep, np.average(ok_rate)*100, correct_new, wrong_new, cur_loss, len(steps_list))) # self.print_weights() if correct_new + wrong_new == 0: no_change_count += 1 else: no_change_count = 0 if math.fabs(1 - cur_loss/last_loss) < 0.001 and no_change_count > 5: print("math.fabs(1 - cur_loss/last_loss) < 0.001 and no_change_count > 5:") return False last_loss = cur_loss print("=" * 80) self.save() if np.average(ok_rate) >= pass_rate: return True return False def update_learning_rate(self, learning_rate, arg_weight=1.): print("Re-Compile Model lr=%s aw=%s" % (learning_rate, arg_weight)) self.compile_model(learning_rate, arg_weight=arg_weight) def train_f_enc(self, steps_list, epoch=50): print("training f_enc") f_add0 = Sequential(name='f_add0') f_add0.add(self.f_enc) f_add0.add(Dense(FIELD_DEPTH)) f_add0.add(Activation('softmax', name='softmax_add0')) f_add1 = Sequential(name='f_add1') f_add1.add(self.f_enc) f_add1.add(Dense(FIELD_DEPTH)) f_add1.add(Activation('softmax', name='softmax_add1')) env_model = Model(self.f_enc.inputs, [f_add0.output, f_add1.output], name="env_model") env_model.compile(optimizer='adam', loss=['categorical_crossentropy']*2) for ep in range(epoch): losses = [] for idx, steps_dict in enumerate(steps_list): prev = None for step in steps_dict['steps']: x = self.convert_input(step.input)[:2] env_values = step.input.env.reshape((4, -1)) in1 = np.clip(env_values[0].argmax() - 1, 0, 9) in2 = np.clip(env_values[1].argmax() - 1, 0, 9) carry = np.clip(env_values[2].argmax() - 1, 0, 9) y_num = in1 + in2 + carry now = (in1, in2, carry) if prev == now: continue prev = now y0 = to_one_hot_array((y_num % 10)+1, FIELD_DEPTH) y1 = to_one_hot_array((y_num // 10)+1, FIELD_DEPTH) y = [yy.reshape((self.batch_size, -1)) for yy in [y0, y1]] loss = env_model.train_on_batch(x, y) losses.append(loss) print("ep %3d: loss=%s" % (ep, np.average(losses))) def question_test(self, addition_env, npi_runner, question): addition_env.reset() self.reset() try: run_npi(addition_env, npi_runner, self.program_set.ADD, question) if question['correct']: return True except StopIteration: pass return False def convert_input(self, p_in: StepInput): x_pg = np.array((p_in.program.program_id,)) x = [xx.reshape((self.batch_size, -1)) for xx in (p_in.env, p_in.arguments.values, x_pg)] return x def convert_output(self, p_out: StepOutput): y = [np.array((p_out.r,))] weights = [[1.]] if p_out.program: arg_values = p_out.arguments.values arg_num = len(p_out.program.args or []) y += [p_out.program.to_one_hot(PROGRAM_VEC_SIZE)] weights += [[1.]] else: arg_values = IntegerArguments().values arg_num = 0 y += [np.zeros((PROGRAM_VEC_SIZE, ))] weights += [[1e-10]] for v in arg_values: # split by each args y += [v] weights += [[1.]] * arg_num + [[1e-10]] * (len(arg_values) - arg_num) weights = [np.array(w) for w in weights] return [yy.reshape((self.batch_size, -1)) for yy in y], weights def step(self, env_observation: np.ndarray, pg: Program, arguments: IntegerArguments) -> StepOutput: x = self.convert_input(StepInput(env_observation, pg, arguments)) results = self.model.predict(x, batch_size=1) # if batch_size==1, returns single row r, pg_one_hot, arg_values = results[0], results[1], results[2:] program = self.program_set.get(pg_one_hot.argmax()) ret = StepOutput(r, program, IntegerArguments(values=np.stack(arg_values))) return ret def save(self): self.model.save_weights(self.model_path, overwrite=True) def load_weights(self): if os.path.exists(self.model_path): self.model.load_weights(self.model_path) self.weight_loaded = True def print_weights(self, weights=None, detail=False): weights = weights or self.model.get_weights() for w in weights: print("w%s: sum(w)=%s, ave(w)=%s" % (w.shape, np.sum(w), np.average(w))) if detail: for w in weights: print("%s: %s" % (w.shape, w)) @staticmethod def size_of_env_observation(): return FIELD_ROW * FIELD_DEPTH
class PolicyValueNet(): """policy-value network """ def __init__(self, board_width, board_height, model_file=None): self.board_width = board_width self.board_height = board_height self.l2_const = 1e-4 # coef of l2 penalty self.create_policy_value_net() if model_file: print("[Notice] load model from file") self.model = load_model(model_file) else: print("[Notice] create model") self.create_policy_value_net() self._loss_train_op() def create_policy_value_net(self): """create the policy value network """ in_x = network = Input((4, self.board_width, self.board_height)) # conv layers network = Conv2D(filters=32, kernel_size=(3, 3), padding="same", data_format="channels_first", activation="relu", kernel_regularizer=l2(self.l2_const))(network) network = Conv2D(filters=64, kernel_size=(3, 3), padding="same", data_format="channels_first", activation="relu", kernel_regularizer=l2(self.l2_const))(network) network = Conv2D(filters=128, kernel_size=(3, 3), padding="same", data_format="channels_first", activation="relu", kernel_regularizer=l2(self.l2_const))(network) # action policy layers policy_net = Conv2D(filters=4, kernel_size=(1, 1), data_format="channels_first", activation="relu", kernel_regularizer=l2(self.l2_const))(network) policy_net = Flatten()(policy_net) self.policy_net = Dense(self.board_width * self.board_height, activation="softmax", kernel_regularizer=l2( self.l2_const))(policy_net) # state value layers value_net = Conv2D(filters=2, kernel_size=(1, 1), data_format="channels_first", activation="relu", kernel_regularizer=l2(self.l2_const))(network) value_net = Flatten()(value_net) value_net = Dense(64, kernel_regularizer=l2(self.l2_const))(value_net) self.value_net = Dense(1, activation="tanh", kernel_regularizer=l2(self.l2_const))(value_net) self.model = Model(in_x, [self.policy_net, self.value_net]) def policy_value(state_input): state_input_union = np.array(state_input) results = self.model.predict_on_batch(state_input_union) return results self.policy_value = policy_value def policy_value_fn(self, board): """ input: board output: a list of (action, probability) tuples for each available action and the score of the board state """ legal_positions = board.availables current_state = board.current_state() act_probs, value = self.policy_value( current_state.reshape(-1, 4, self.board_width, self.board_height)) act_probs = zip(legal_positions, act_probs.flatten()[legal_positions]) return act_probs, value[0][0] def _loss_train_op(self): """ Three loss terms: loss = (z - v)^2 + pi^T * log(p) + c||theta||^2 """ # get the train op opt = Adam() losses = ['categorical_crossentropy', 'mean_squared_error'] self.model.compile(optimizer=opt, loss=losses) def self_entropy(probs): return -np.mean(np.sum(probs * np.log(probs + 1e-10), axis=1)) def train_step(state_input, mcts_probs, winner, learning_rate): state_input_union = np.array(state_input) mcts_probs_union = np.array(mcts_probs) winner_union = np.array(winner) loss = self.model.evaluate(state_input_union, [mcts_probs_union, winner_union], batch_size=len(state_input), verbose=0) action_probs, _ = self.model.predict_on_batch(state_input_union) entropy = self_entropy(action_probs) K.set_value(self.model.optimizer.lr, learning_rate) self.model.fit(state_input_union, [mcts_probs_union, winner_union], batch_size=len(state_input), verbose=0) return loss[0], entropy self.train_step = train_step def get_policy_param(self): net_params = self.model.get_weights() return net_params def save_model(self, model_file): """ save model to file """ print("save model file") self.model.save(model_file)
def test_pandas_dataframe(): input_a = Input(shape=(3, ), name='input_a') input_b = Input(shape=(3, ), name='input_b') x = Dense(4, name='dense_1')(input_a) y = Dense(3, name='desne_2')(input_b) model_1 = Model(inputs=input_a, outputs=x) model_2 = Model(inputs=[input_a, input_b], outputs=[x, y]) optimizer = 'rmsprop' loss = 'mse' model_1.compile(optimizer=optimizer, loss=loss) model_2.compile(optimizer=optimizer, loss=loss) input_a_df = pd.DataFrame(np.random.random((10, 3))) input_b_df = pd.DataFrame(np.random.random((10, 3))) output_a_df = pd.DataFrame(np.random.random((10, 4))) output_b_df = pd.DataFrame(np.random.random((10, 3))) model_1.fit(input_a_df, output_a_df) model_2.fit([input_a_df, input_b_df], [output_a_df, output_b_df]) model_1.fit([input_a_df], [output_a_df]) model_1.fit({'input_a': input_a_df}, output_a_df) model_2.fit({ 'input_a': input_a_df, 'input_b': input_b_df }, [output_a_df, output_b_df]) model_1.predict(input_a_df) model_2.predict([input_a_df, input_b_df]) model_1.predict([input_a_df]) model_1.predict({'input_a': input_a_df}) model_2.predict({'input_a': input_a_df, 'input_b': input_b_df}) model_1.predict_on_batch(input_a_df) model_2.predict_on_batch([input_a_df, input_b_df]) model_1.predict_on_batch([input_a_df]) model_1.predict_on_batch({'input_a': input_a_df}) model_2.predict_on_batch({'input_a': input_a_df, 'input_b': input_b_df}) model_1.evaluate(input_a_df, output_a_df) model_2.evaluate([input_a_df, input_b_df], [output_a_df, output_b_df]) model_1.evaluate([input_a_df], [output_a_df]) model_1.evaluate({'input_a': input_a_df}, output_a_df) model_2.evaluate({ 'input_a': input_a_df, 'input_b': input_b_df }, [output_a_df, output_b_df]) model_1.train_on_batch(input_a_df, output_a_df) model_2.train_on_batch([input_a_df, input_b_df], [output_a_df, output_b_df]) model_1.train_on_batch([input_a_df], [output_a_df]) model_1.train_on_batch({'input_a': input_a_df}, output_a_df) model_2.train_on_batch({ 'input_a': input_a_df, 'input_b': input_b_df }, [output_a_df, output_b_df]) model_1.test_on_batch(input_a_df, output_a_df) model_2.test_on_batch([input_a_df, input_b_df], [output_a_df, output_b_df]) model_1.test_on_batch([input_a_df], [output_a_df]) model_1.test_on_batch({'input_a': input_a_df}, output_a_df) model_2.test_on_batch({ 'input_a': input_a_df, 'input_b': input_b_df }, [output_a_df, output_b_df])
class VAE(AE): """ Variational Autoencoder. This consists of an encoder and a decoder plus an interpolateable latent space. """ def __init__(self, encoder=None, decoder=None, autoencoder=None, latent_dim=None): super(VAE, self).__init__(encoder=None, decoder=None) # Encoder and decoder must be provided. assert (encoder != None and decoder != None) # From loading. if encoder != None and decoder != None and autoencoder != None: self.encoder = encoder self.decoder = decoder self.autoencoder = autoencoder self.latent_dim = decoder.inputs[0].shape.as_list()[-1] return # Set the latent dimensions. self.latent_dim = latent_dim assert self.latent_dim != None # Encoder. encoder_input = encoder.inputs[0] encoder_output = encoder.outputs[0] z_mean = layers.Dense(self.latent_dim, name='z_mean')(encoder_output) z_log_var = layers.Dense(self.latent_dim, name='z_log_var')(encoder_output) z = layers.Lambda(sampling, output_shape=(self.latent_dim, ), name='z')([z_mean, z_log_var]) self.encoder = Model(encoder_input, [z_mean, z_log_var, z], name='encoder') # Decoder. self.decoder = decoder # Creating the VAE. inputs = self.encoder.inputs[0] outputs = self.decoder(self.encoder(inputs)[2]) # This is z. self.autoencoder = Model(inputs, outputs, name="vae") def compile(self, optimizer, loss=None, metrics=None, loss_weights=None, sample_weight_mode=None, weighted_metrics=None, target_tensors=None, **kwargs): """ Compiles the VAE. Additionally to the default functionality of *compile*, it adds the VAE-loss. This loss takes the provided loss and interprets it as a reconstruction-loss. The VAE loss is similar to >>> vae_loss = mean(r_loss + kl_loss) See the literature for details. """ self.loss = loss # Inputs. inputs = self.encoder.inputs[0] inputs_dim = int(np.prod(inputs.shape.as_list()[1:])) # Outputs. z_mean = self.encoder.outputs[0] z_log_var = self.encoder.outputs[1] outputs = self.decoder(self.encoder(inputs)[2]) # This is z. # Define the loss. def vae_loss(loss_inputs, loss_outputs): # Flatten all to accept different dimensions. loss_inputs = K.flatten(loss_inputs) loss_outputs = K.flatten(loss_outputs) # Reconstruction loss. if isinstance(self.loss, str): r_loss = losses.get(self.loss) else: r_loss = self.loss r_loss *= inputs_dim # kl loss. kl_loss = 1 + z_log_var - K.square(z_mean) - K.exp(z_log_var) kl_loss = K.sum(kl_loss, axis=-1) kl_loss *= -0.5 # VAE loss. vae_loss = K.mean(r_loss + kl_loss) vae_loss /= inputs_dim return vae_loss # Compile model. loss = vae_loss self.autoencoder.compile(optimizer, loss, metrics, loss_weights, sample_weight_mode, weighted_metrics, **kwargs) def predict_embed_samples_into_latent(self, x, batch_size=None, verbose=0, steps=None): return self.encoder.predict(x, batch_size, verbose, steps)[2]
def test_model_with_input_feed_tensor(): """We test building a model with a TF variable as input. We should be able to call fit, evaluate, predict, by only passing them data for the placeholder inputs in the model. """ import tensorflow as tf input_a_np = np.random.random((10, 3)) input_b_np = np.random.random((10, 3)) output_a_np = np.random.random((10, 4)) output_b_np = np.random.random((10, 3)) a = Input(tensor=tf.Variable(input_a_np, dtype=tf.float32)) b = Input(shape=(3, ), name='input_b') a_2 = Dense(4, name='dense_1')(a) dp = Dropout(0.5, name='dropout') b_2 = dp(b) model = Model([a, b], [a_2, b_2]) model.summary() optimizer = 'rmsprop' loss = 'mse' loss_weights = [1., 0.5] model.compile(optimizer, loss, metrics=['mean_squared_error'], loss_weights=loss_weights, sample_weight_mode=None) # test train_on_batch out = model.train_on_batch(input_b_np, [output_a_np, output_b_np]) out = model.train_on_batch({'input_b': input_b_np}, [output_a_np, output_b_np]) out = model.test_on_batch({'input_b': input_b_np}, [output_a_np, output_b_np]) out = model.predict_on_batch({'input_b': input_b_np}) # test fit out = model.fit({'input_b': input_b_np}, [output_a_np, output_b_np], epochs=1, batch_size=10) out = model.fit(input_b_np, [output_a_np, output_b_np], epochs=1, batch_size=10) # test evaluate out = model.evaluate({'input_b': input_b_np}, [output_a_np, output_b_np], batch_size=10) out = model.evaluate(input_b_np, [output_a_np, output_b_np], batch_size=10) # test predict out = model.predict({'input_b': input_b_np}, batch_size=10) out = model.predict(input_b_np, batch_size=10) assert len(out) == 2 # Now test a model with a single input # i.e. we don't pass any data to fit the model. a = Input(tensor=tf.Variable(input_a_np, dtype=tf.float32)) a_2 = Dense(4, name='dense_1')(a) a_2 = Dropout(0.5, name='dropout')(a_2) model = Model(a, a_2) model.summary() optimizer = 'rmsprop' loss = 'mse' model.compile(optimizer, loss, metrics=['mean_squared_error']) # test train_on_batch out = model.train_on_batch(None, output_a_np) out = model.train_on_batch(None, output_a_np) out = model.test_on_batch(None, output_a_np) out = model.predict_on_batch(None) out = model.train_on_batch([], output_a_np) out = model.train_on_batch({}, output_a_np) # test fit out = model.fit(None, output_a_np, epochs=1, batch_size=10) out = model.fit(None, output_a_np, epochs=1, batch_size=10) # test evaluate out = model.evaluate(None, output_a_np, batch_size=10) out = model.evaluate(None, output_a_np, batch_size=10) # test predict out = model.predict(None, steps=3) out = model.predict(None, steps=3) assert out.shape == (10 * 3, 4) # Same, without learning phase # i.e. we don't pass any data to fit the model. a = Input(tensor=tf.Variable(input_a_np, dtype=tf.float32)) a_2 = Dense(4, name='dense_1')(a) model = Model(a, a_2) model.summary() optimizer = 'rmsprop' loss = 'mse' model.compile(optimizer, loss, metrics=['mean_squared_error']) # test train_on_batch out = model.train_on_batch(None, output_a_np) out = model.train_on_batch(None, output_a_np) out = model.test_on_batch(None, output_a_np) out = model.predict_on_batch(None) out = model.train_on_batch([], output_a_np) out = model.train_on_batch({}, output_a_np) # test fit out = model.fit(None, output_a_np, epochs=1, batch_size=10) out = model.fit(None, output_a_np, epochs=1, batch_size=10) # test evaluate out = model.evaluate(None, output_a_np, batch_size=10) out = model.evaluate(None, output_a_np, batch_size=10) # test predict out = model.predict(None, steps=3) out = model.predict(None, steps=3) assert out.shape == (10 * 3, 4)
class TL(Model): """ Triplet-Loss trained Neural Network. https://arxiv.org/abs/1503.03832 """ def __init__(self, base=None, siamese=None): super(TL, self).__init__() # Store the base model. assert (base != None) self.base = base # For loading. if base != None and siamese != None: self.base = base self.siamese = siamese self.latent_dim = self.base.outputs[0].shape[1] return # Get the latent dimension. assert len(self.base.outputs) == 1 assert len(self.base.outputs[0].shape) == 2 self.latent_dim = self.base.outputs[0].shape[1] # Get the input shape. input_shape = self.base.inputs[0].shape.as_list()[1:] # Create the anchor. input_anchor = layers.Input(shape=input_shape) output_anchor = input_anchor output_anchor = self.base(output_anchor) # Create the positive. input_positive = layers.Input(shape=input_shape) output_positive = input_positive output_positive = self.base(output_positive) # Create the negative. input_negative = layers.Input(shape=input_shape) output_negative = input_negative output_negative = self.base(output_negative) # Create a dummy output. output = layers.concatenate( [output_anchor, output_positive, output_negative]) # Create the model. self.siamese = Model([input_anchor, input_positive, input_negative], output, name="triplet_model") def compile(self, optimizer, loss=None, metrics=None, loss_weights=None, sample_weight_mode=None, weighted_metrics=None, target_tensors=None, triplet_loss="mse", **kwargs): """ Compiles the TL. Additionally to the default functionality of *compile*, it adds the triplet-loss. In order to do so you have to provide it via the parameter *triplet_loss*. The VAE loss is similar to >>> vae_loss = max(0.0, pos_dist - neg_dist + alpha) See the literature for details. Additional args: triplet_loss (string): The base-loss for the triplet-loss. Values are either *euclidean* for euclidean norm or *cosine* for cosine similarity. """ assert loss == None, "Not expected to provide an explicit loss for TL. Use 'triplet_loss'" self.triplet_loss = triplet_loss def triplet_loss_function(y_true, y_pred, alpha=0.4): anchor = y_pred[:, 0:self.latent_dim] positive = y_pred[:, self.latent_dim:self.latent_dim * 2] negative = y_pred[:, self.latent_dim * 2:self.latent_dim * 3] if triplet_loss == "euclidean": pos_dist = euclidean_loss(positive, anchor) neg_dist = euclidean_loss(negative, anchor) elif triplet_loss == "cosine": pos_dist = cosine_loss(positive, anchor) neg_dist = cosine_loss(negative, anchor) else: raise Exception("Unexpected: " + triplet_loss) basic_loss = pos_dist - neg_dist + alpha loss = K.maximum(basic_loss, 0.0) return loss loss = triplet_loss_function self.siamese.compile(optimizer, loss, metrics, loss_weights, sample_weight_mode, weighted_metrics, **kwargs) def fit(self, x=None, y=None, batch_size=None, minibatch_size=None, epochs=1, verbose=1, callbacks=None, validation_split=0., validation_data=None, shuffle=True, class_weight=None, sample_weight=None, initial_epoch=0, steps_per_epoch=None, validation_steps=None, **kwargs): """ This is basically the same as in vanilla Keras. Additional args: minibatch_size (int): The model internally does some sampling. The *minibatch_size* specifies how many candidates to use in order to create a triplet for training. """ assert minibatch_size != None, "ERROR! Must provide 'minibatch_size'." assert steps_per_epoch != None, "ERROR! Must provide 'steps_per_epoch'." assert validation_steps != None, "ERROR! Must provide 'validation_steps'." y_dummy = np.zeros((batch_size, self.latent_dim * 3)) # Template generator. def triplet_loss_generator(x_generator, y_generator, model, sampling): # Get the classes. classes = sorted(list(set(y_generator))) # Sort by classes for easy indexing. class_indices = {} for c in classes: class_indices[c] = [] for index, c in enumerate(y_generator): class_indices[c].append(index) # Compute the complements. class_complements = {} for c in classes: class_complements[c] = [c2 for c2 in classes if c2 != c] # Generator loop. while True: x_input_anchors = [] x_input_positives = [] x_input_negatives = [] # Generate a whole batch. for _ in range(batch_size): anchor_class = random.choice(classes) anchor_index = random.choice(class_indices[anchor_class]) anchor_input = x_generator[anchor_index] #print("anchor_class", anchor_class) anchor_latent = self.base.predict( np.expand_dims(anchor_input, axis=0))[0] # Generate some positive candidates. positive_candidates = [] while len(positive_candidates) < minibatch_size: positive_class = anchor_class positive_index = random.choice( class_indices[positive_class]) positive_input = x_generator[positive_index] assert positive_class == y_generator[positive_index] #print("positive_class", positive_class) positive_candidates.append(positive_input) # Find the farthest positive candidate. positive_candidates = np.array(positive_candidates) positive_latents = self.base.predict(positive_candidates) positive_extremum = compute_latent_extremum( anchor_latent, positive_latents, "argmax", self.triplet_loss) positive_input = positive_candidates[positive_extremum] # Generate some negative candidates. negative_candidates = [] while len(negative_candidates) < minibatch_size: negative_class = random.choice( class_complements[anchor_class]) negative_index = random.choice( class_indices[negative_class]) negative_input = x_generator[negative_index] assert negative_class == y_generator[negative_index] #print("negative_class", negative_class) negative_candidates.append(negative_input) # Find the closest negative candidate. negative_candidates = np.array(negative_candidates) negative_latents = self.base.predict(negative_candidates) negative_extremum = compute_latent_extremum( anchor_latent, negative_latents, "argmin", self.triplet_loss) negative_input = negative_candidates[negative_extremum] # Done. x_input_anchors.append(anchor_input) x_input_positives.append(positive_input) x_input_negatives.append(negative_input) x_input_anchors = np.array(x_input_anchors) x_input_positives = np.array(x_input_positives) x_input_negatives = np.array(x_input_negatives) x_input = [ x_input_anchors, x_input_positives, x_input_negatives ] yield x_input, y_dummy # Create the generators. training_generator = triplet_loss_generator(x, y, batch_size, self.siamese) if validation_data != None: validation_generator = triplet_loss_generator( validation_data[0], validation_data[1], batch_size, self.siamese) else: validation_generator = None # Create the history. history_keys = ["loss", "val_loss"] history = {} for history_key in history_keys: history[history_key] = [] # Training the model for epoch in range(epochs): print("Epoch " + str(epoch + 1) + "/" + str(epochs) + "...") # Generating data for training. training_input, training_output = next(training_generator) if validation_generator != None: validation_input, validation_output = next( validation_generator) model_history = self.siamese.fit( training_input, training_output, validation_data=(validation_input, validation_output), epochs=1, steps_per_epoch=steps_per_epoch, verbose=0, validation_steps=validation_steps) # Update the history. for history_key in history_keys: history_value = model_history.history[history_key] history[history_key].append(history_value) print(history_key, history_value) return history def fit_generator(self, generator, steps_per_epoch=None, epochs=1, verbose=1, callbacks=None, validation_data=None, validation_steps=None, class_weight=None, max_queue_size=10, workers=1, use_multiprocessing=False, shuffle=True, initial_epoch=0): """ Coming soon... """ print("TODO: implement fit_generator!") raise Exception("Not implemented!") return self.siamese.fit_generator(generator, steps_per_epoch, epochs, verbose, callbacks, validation_data, validation_steps, class_weight, max_queue_size, workers, use_multiprocessing, shuffle, initial_epoch) def evaluate(self, x=None, y=None, batch_size=None, verbose=1, sample_weight=None, steps=None): """ Evaluates the model. Same as vanilla Keras. """ return self.siamese.evaluate(x, y, batch_size, verbose, sample_weight, steps=None) def predict(self, x, batch_size=None, verbose=0, steps=None): """ Does a prediction. Same as vanilla Keras. """ return self.siamese.predict(x, batch_size, verbose, steps) def summary(self): """ Provides a summary. """ print("Basemodel:") self.base.summary() print("Siamese model:") self.siamese.summary() def save(self, path): """ Saves the TL. This includes the whole Siamese Net plus the base-model. This code >>> tl.save("myae.h5") will create the files *tl.h5*, and *tl-base.h5*. """ self.siamese.save(path) self.base.save(append_to_filepath(path, "-base"))
cnn = Convolution1D(filters=50, kernel_size=3, activation='tanh')(dropouted) cnn = Convolution1D(filters=50, kernel_size=3, activation='tanh')(cnn) flattened = Flatten()(cnn) dense = Dense(100, activation='tanh')(flattened) predict = Dense(2, activation='softmax')(dense) model = Model(input=[word, distance_e1, distance_e2], output=predict) # opt = RMSprop(lr=0.001, rho=0.9, epsilon=1e-06) # opt = Adagrad(lr=0.01, epsilon=1e-06) # opt = Adadelta(lr=1.0, rho=0.95, epsilon=1e-06) # opt = Adam(lr=0.001, beta_1=0.9, beta_2=0.999, epsilon=1e-08) opt = SGD(lr=0.01, decay=1e-6, momentum=0.9, nesterov=True) model.compile(loss='categorical_crossentropy', optimizer=opt) train_instances = [line.strip() for line in lines] label_array_t, word_array_t, dis_e1_array_t, dis_e2_array_t = rep.represent_instances( train_instances) model.fit([word_array_t, dis_e1_array_t, dis_e2_array_t], label_array_t, batch_size=128, epochs=epoch_size) model.save(output_file) label_array_ans = model.predict([word_array_t, dis_e1_array_t, dis_e2_array_t], batch_size=128) print(label_array_ans) print("训练完成!!") eval_mulclass(label_array_t, label_array_ans)
class GAN(Model): """ Generative Adversarial Network (GAN). """ def __init__(self, generator, discriminator): super(GAN, self).__init__() assert generator != None assert discriminator != None assert discriminator.optimizer != None, "Discriminator must be compiled!" self.generator = generator self.discriminator = discriminator # Create the GAN. z_shape = generator.inputs[0].shape[1:] gan_input = layers.Input(shape=z_shape) gan_output = gan_input gan_output = self.generator(gan_output) self.discriminator.trainable = False gan_output = self.discriminator(gan_output) self.gan = Model(gan_input, gan_output) def compile(self, optimizer, loss=None, metrics=None, loss_weights=None, sample_weight_mode=None, weighted_metrics=None, target_tensors=None, **kwargs): """ Compiles the model. Same as vanilla Keras. """ self.gan.compile(optimizer, loss, metrics, loss_weights, sample_weight_mode, weighted_metrics, **kwargs) def fit( self, x=None, y=None, batch_size=None, epochs=1, sample_interval=None, # TODO document! verbose=1, callbacks=None, validation_split=0., validation_data=None, shuffle=True, class_weight=None, sample_weight=None, initial_epoch=0, steps_per_epoch=None, validation_steps=None, **kwargs): """ Trains the GAN. This is almost the same as in vanilla Keras. """ # Adversarial ground truths valid = np.ones((batch_size, 1)) fake = np.zeros((batch_size, 1)) for epoch in range(epochs): # Select a random batch of images idx = np.random.randint(0, x.shape[0], batch_size) imgs = x[idx] # Create some noise. noise = np.random.normal(0, 1, (batch_size, 100)) # Generate a batch of new images. gen_imgs = self.generator.predict(noise) # Train the discriminator d_loss_real = self.discriminator.train_on_batch(imgs, valid) d_loss_fake = self.discriminator.train_on_batch(gen_imgs, fake) d_loss = 0.5 * np.add(d_loss_real, d_loss_fake) # Create some noise. noise = np.random.normal(0, 1, (batch_size, 100)) # Train the generator (to have the discriminator label samples as valid). g_loss = self.gan.train_on_batch(noise, valid) if type(g_loss) == list: g_loss = g_loss[0] # Plot the progress. print("%d [D loss: %f, acc.: %.2f%%] [G loss: %f]" % (epoch, d_loss[0], 100 * d_loss[1], g_loss), end="\r") # If at save interval => save generated image samples if sample_interval != None and epoch % sample_interval == 0: self.sample_images(epoch) def sample_images(self, epoch): """ Samples images. """ r, c = 5, 5 noise = np.random.normal(0, 1, (r * c, 100)) gen_imgs = self.generator.predict(noise) # Rescale images 0 - 1 gen_imgs = 0.5 * gen_imgs + 0.5 fig, axs = plt.subplots(r, c) cnt = 0 for i in range(r): for j in range(c): axs[i, j].imshow(gen_imgs[cnt, :, :, 0], cmap='gray') axs[i, j].axis('off') cnt += 1 #fig.savefig("images/%d.png" % epoch) plt.show() plt.close() def summary(self): """ Provides a summary. """ print("Generator:") self.generator.summary() print("Discriminator:") self.discriminator.summary() print("GAN:") self.gan.summary() def save(self, path): """ Saves the GAN. This includes the whole autoencoder plus the encoder and the decoder. The encoder and decoder use the path plus a respective annotation. This code >>> ae.save("myae.h5") will create the files *myae.h5*, *myae-encoder.h5*, and *myae-decoder.h5*. """ self.gan.save(path) self.generator.save(append_to_filepath(path, "-generator")) self.discriminator.save(append_to_filepath(path, "-discriminator"))
class AIPlayer(Player): def __init__(self, buffer_size, sim_count, train=True, model="", tau = 1, compile=False): self.buffer = ReplayBuffer(buffer_size) self.temp_state = deque() self.train = train self.loss = 0 self.acc = 0 self.batch_count = 0 self.sim_count = sim_count if model != "": self.load(model, compile) else: self.create_network() self.tau = tau @staticmethod def create_if_nonexistant(config): models = glob.glob(config.data.model_location + "*.h5") if len(models) == 0: ai = AIPlayer(config.buffer_size, config.game.simulation_num_per_move) ai.save(config.data.model_location+"model_0.h5") del ai def set_training(self, train): self.train = train @staticmethod def clear(): K.clear_session() def load(self, file, compile=False): try: del self.network except Exception: pass self.network = load_model(file, custom_objects={"objective_function_for_policy":AIPlayer.objective_function_for_policy, "objective_function_for_value":AIPlayer.objective_function_for_value}, compile=compile) def save(self, file): self.network.save(file) def create_network(self): x_in = Input((3, 8, 8)) x = Conv2D(filters=128, kernel_size=(3,3), padding="same", data_format="channels_first")(x_in) x = BatchNormalization(axis=1)(x) x = Activation("relu")(x) for _ in range(10): x = self._build_residual_block(x) res_out = x x = Conv2D(filters=2, kernel_size=1, data_format="channels_first")(res_out) x = BatchNormalization(axis=1)(x) x = Activation("relu")(x) x = Flatten()(x) policy_out = Dense(8*8+1, activation="softmax", name="policy_out")(x) x = Conv2D(filters=1, kernel_size=1, data_format="channels_first")(res_out) x = BatchNormalization(axis=1)(x) x = Activation("relu")(x) x = Flatten()(x) x = Dense(64, activation="relu")(x) value_out = Dense(1, activation="tanh", name="value_out")(x) self.network = Model(x_in, [policy_out, value_out], name="reversi_model") self.compile() def _build_residual_block(self, x): in_x = x x = Conv2D(filters=128, kernel_size=(3,3), padding="same", data_format="channels_first")(x) x = BatchNormalization(axis=1)(x) x = Activation("relu")(x) x = Conv2D(filters=128, kernel_size=(3,3), padding="same", data_format="channels_first")(x) x = BatchNormalization(axis=1)(x) x = Add()([in_x, x]) x = Activation("relu")(x) return x def compile(self): losses = [AIPlayer.objective_function_for_policy, AIPlayer.objective_function_for_value] self.network.compile(optimizer=optimizers.SGD(lr=1e-3, momentum=0.9), loss=losses) def update_lr(self, lr): K.set_value(self.network.optimizer.lr, lr) @staticmethod def objective_function_for_policy(y_true, y_pred): # can use categorical_crossentropy?? return K.sum(-y_true * K.log(y_pred + K.epsilon()), axis=-1) @staticmethod def objective_function_for_value(y_true, y_pred): return mean_squared_error(y_true, y_pred) def update_buffer(self, winner): if self.train: while len(self.temp_state) > 0: t = self.temp_state.pop() self.buffer.add((t[0], t[1], winner)) def train_batches(self, batch_size, batches=-1, verbose=2): if batches == -1: s_buffer = np.array([_[0] for _ in self.buffer.buffer]) p_buffer = np.array([_[1] for _ in self.buffer.buffer]) v_buffer = np.array([_[2] for _ in self.buffer.buffer]) else: sample_size = batch_size*batches sample = [] while sample_size > 0: sample += self.buffer.sample(sample_size) sample_size -= self.buffer.size() s_buffer = np.array([_[0] for _ in sample]) p_buffer = np.array([_[1] for _ in sample]) v_buffer = np.array([_[2] for _ in sample]) history = self.network.fit(s_buffer, [p_buffer, v_buffer], batch_size=batch_size, epochs=1, verbose=verbose) return history def preprocess_input(self, board, side): state = np.zeros((3, 8, 8), dtype=np.int) for i in range(8): for j in range(8): if board[i,j] == 1: state[0,i,j] = 1 elif board[i,j] == -1: state[1,i,j] = 1 if side == 1: state[2,i,j] = 1 return state def evaluate(self, game, side): current_input = self.preprocess_input(game.board, side) pred = self.network.predict(current_input[np.newaxis,:]) return pred[1][0] def pick_move(self, game, side): possible_moves = game.possible_moves(side) if len(possible_moves) == 0: possible_moves.append((-1,-1)) monte_prob = self.monte_carlo(game, side) if self.train: self.temp_state.append((self.preprocess_input(game.board, side), np.divide(monte_prob, np.sum(monte_prob)))) monte_prob = np.float_power(monte_prob, 1/self.tau) monte_prob = np.divide(monte_prob, np.sum(monte_prob)) r = random() for i, move in enumerate(possible_moves): r -= monte_prob[Othello.move_id(move)] if r <= 0: return move return possible_moves[-1] def monte_carlo(self, game, side): N = defaultdict(lambda: 0) W = defaultdict(lambda: 0) Q = defaultdict(lambda: 0) P = defaultdict(lambda: 0) possible_moves = game.possible_moves(side) if len(possible_moves) == 0: policy = np.zeros((65)) policy[64] = 1 return policy elif len(possible_moves) == 1: policy = np.zeros((65)) policy[Othello.move_id(possible_moves[0])] = 1 return policy current_input = self.preprocess_input(game.board, side) sid = Othello.state_id(game.board) pred = self.network.predict(current_input[np.newaxis,:]) policy = pred[0][0] total = 1e-10 for i, move in enumerate(possible_moves): total += policy[Othello.move_id(move)] for move in possible_moves: P[(sid, Othello.move_id(move))] = policy[Othello.move_id(move)]/total for i in range(self.sim_count): #print("Sim #%d"% i) clone = deepcopy(game) current_side = side visited = deque() while True: possible_moves = clone.possible_moves(current_side) if len(possible_moves) == 0: possible_moves.append((-1,-1)) best_move = None best_move_value = -2 sid = Othello.state_id(clone.board) for move in possible_moves: mid = Othello.move_id(move) qu_val = Q[(sid, mid)] + P[(sid, mid)]/(N[(sid, mid)]+1) if qu_val > best_move_value: best_move_value = qu_val best_move = move #print(best_move) if N[(sid, Othello.move_id(best_move))] == 0: visited.append((sid, Othello.move_id(best_move))) clone.play_move(best_move[0], best_move[1], current_side) current_side *= -1 if clone.game_over(): for node in visited: N[node] += 1 W[node] += clone.get_winner()*side Q[node] = W[node]/N[node] break current_input = self.preprocess_input(clone.board, current_side) sid = Othello.state_id(clone.board) pred = self.network.predict(current_input[np.newaxis,:]) policy = pred[0][0] value = pred[1][0] possible_moves = clone.possible_moves(current_side) if len(possible_moves) == 0: possible_moves.append((-1,-1)) total = 1e-10 for i, move in enumerate(possible_moves): total += policy[Othello.move_id(move)] for move in possible_moves: P[(sid, Othello.move_id(move))] = policy[Othello.move_id(move)]/total for node in visited: N[node] += 1 W[node] += value*side Q[node] = W[node]/N[node] #print() break else: visited.append((sid, Othello.move_id(best_move))) clone.play_move(best_move[0], best_move[1], current_side) current_side *= -1 if clone.game_over(): for node in visited: N[node] += 1 W[node] += clone.get_winner()*side Q[node] = W[node]/N[node] break policy = np.zeros((65)) possible_moves = game.possible_moves(side) sid = Othello.state_id(game.board) for move in possible_moves: mid = Othello.move_id(move) policy[mid] = N[(sid,mid)] return policy
class AdditionNPIModel(NPIStep): model = None f_enc = None def __init__(self, system: RuntimeSystem, model_path: str = None, program_set: AdditionProgramSet = None): self.system = system self.model_path = model_path self.program_set = program_set self.batch_size = 1 self.build() self.weight_loaded = False self.load_weights() def build(self): enc_size = self.size_of_env_observation() argument_size = IntegerArguments.size_of_arguments input_enc = InputLayer(batch_input_shape=(self.batch_size, enc_size), name='input_enc') input_arg = InputLayer(batch_input_shape=(self.batch_size, argument_size), name='input_arg') input_prg = Embedding(input_dim=PROGRAM_VEC_SIZE, output_dim=PROGRAM_KEY_VEC_SIZE, input_length=1, batch_input_shape=(self.batch_size, 1)) f_enc = Sequential(name='f_enc') f_enc.add(Merge([input_enc, input_arg], mode='concat')) f_enc.add(MaxoutDense(128, nb_feature=4)) self.f_enc = f_enc program_embedding = Sequential(name='program_embedding') program_embedding.add(input_prg) f_enc_convert = Sequential(name='f_enc_convert') f_enc_convert.add(f_enc) f_enc_convert.add(RepeatVector(1)) f_lstm = Sequential(name='f_lstm') f_lstm.add(Merge([f_enc_convert, program_embedding], mode='concat')) f_lstm.add( LSTM(256, return_sequences=False, stateful=True, W_regularizer=l2(0.0000001))) f_lstm.add(Activation('relu', name='relu_lstm_1')) f_lstm.add(RepeatVector(1)) f_lstm.add( LSTM(256, return_sequences=False, stateful=True, W_regularizer=l2(0.0000001))) f_lstm.add(Activation('relu', name='relu_lstm_2')) # plot(f_lstm, to_file='f_lstm.png', show_shapes=True) f_end = Sequential(name='f_end') f_end.add(f_lstm) f_end.add(Dense(1, W_regularizer=l2(0.001))) f_end.add(Activation('sigmoid', name='sigmoid_end')) f_prog = Sequential(name='f_prog') f_prog.add(f_lstm) f_prog.add(Dense(PROGRAM_KEY_VEC_SIZE, activation="relu")) f_prog.add(Dense(PROGRAM_VEC_SIZE, W_regularizer=l2(0.0001))) f_prog.add(Activation('softmax', name='softmax_prog')) # plot(f_prog, to_file='f_prog.png', show_shapes=True) f_args = [] for ai in range(1, IntegerArguments.max_arg_num + 1): f_arg = Sequential(name='f_arg%s' % ai) f_arg.add(f_lstm) f_arg.add(Dense(IntegerArguments.depth, W_regularizer=l2(0.0001))) f_arg.add(Activation('softmax', name='softmax_arg%s' % ai)) f_args.append(f_arg) # plot(f_arg, to_file='f_arg.png', show_shapes=True) self.model = Model([input_enc.input, input_arg.input, input_prg.input], [f_end.output, f_prog.output] + [fa.output for fa in f_args], name="npi") self.compile_model() plot(self.model, to_file='model.png', show_shapes=True) def reset(self): super(AdditionNPIModel, self).reset() for l in self.model.layers: if type(l) is LSTM: l.reset_states() def compile_model(self, lr=0.0001, arg_weight=1.): arg_num = IntegerArguments.max_arg_num optimizer = Adam(lr=lr) loss = ['binary_crossentropy', 'categorical_crossentropy' ] + ['categorical_crossentropy'] * arg_num self.model.compile(optimizer=optimizer, loss=loss, loss_weights=[0.25, 0.25] + [arg_weight] * arg_num) def fit(self, steps_list, epoch=3000): # 过滤一些问题 def filter_question(condition_func): sub_steps_list = [] for steps_dict in steps_list: question = steps_dict['q'] if condition_func(question['in1'], question['in2']): sub_steps_list.append(steps_dict) return sub_steps_list if not self.weight_loaded: self.train_f_enc( filter_question(lambda a, b: 10 <= a < 100 and 10 <= b < 100), epoch=100) self.f_enc.trainable = False self.update_learning_rate(0.0001) q_type = "training questions of a<100 and b<100" print(q_type) pr = 0.8 all_ok = self.fit_to_subset( filter_question(lambda a, b: a < 100 and b < 100), pass_rate=pr) print("%s is pass_rate >= %s: %s" % (q_type, pr, all_ok)) while True: if self.test_and_learn([10, 100, 1000]): break q_type = "training questions of ALL" print(q_type) q_num = 100 skip_correct = False pr = 1.0 questions = filter_question(lambda a, b: True) np.random.shuffle(questions) questions = questions[:q_num] all_ok = self.fit_to_subset(questions, pass_rate=pr, skip_correct=skip_correct) print("%s is pass_rate >= %s: %s" % (q_type, pr, all_ok)) def fit_to_subset(self, steps_list, pass_rate=1.0, skip_correct=False): for i in range(10): all_ok = self.do_learn(steps_list, 100, pass_rate=pass_rate, skip_correct=skip_correct) if all_ok: return True return False def test_and_learn(self, num_questions): for num in num_questions: print("test all type of %d questions" % num) cc, wc, wrong_questions = self.test_to_subset( create_random_questions(num)) acc_rate = cc / (cc + wc) print("Accuracy %s(OK=%d, NG=%d)" % (acc_rate, cc, wc)) if wc > 0: self.fit_to_subset(wrong_questions, pass_rate=1.0, skip_correct=False) return False return True def test_to_subset(self, questions): addition_env = AdditionEnv(FIELD_ROW, FIELD_WIDTH, FIELD_DEPTH) teacher = AdditionTeacher(self.program_set) npi_runner = TerminalNPIRunner(None, self) teacher_runner = TerminalNPIRunner(None, teacher) correct_count = wrong_count = 0 wrong_steps_list = [] for idx, question in enumerate(questions): question = copy(question) if self.question_test(addition_env, npi_runner, question): correct_count += 1 else: self.question_test(addition_env, teacher_runner, question) wrong_steps_list.append({ "q": question, "steps": teacher_runner.step_list }) wrong_count += 1 return correct_count, wrong_count, wrong_steps_list @staticmethod def dict_to_str(d): return str(tuple([(k, d[k]) for k in sorted(d)])) def do_learn(self, steps_list, epoch, pass_rate=1.0, skip_correct=False): addition_env = AdditionEnv(FIELD_ROW, FIELD_WIDTH, FIELD_DEPTH) npi_runner = TerminalNPIRunner(None, self) last_weights = None correct_count = Counter() no_change_count = 0 last_loss = 1000 for ep in range(1, epoch + 1): correct_new = wrong_new = 0 losses = [] ok_rate = [] np.random.shuffle(steps_list) for idx, steps_dict in enumerate(steps_list): question = copy(steps_dict['q']) question_key = self.dict_to_str(question) if self.question_test(addition_env, npi_runner, question): if correct_count[question_key] == 0: correct_new += 1 correct_count[question_key] += 1 print("GOOD!: ep=%2d idx=%3d :%s CorrectCount=%s" % (ep, idx, self.dict_to_str(question), correct_count[question_key])) ok_rate.append(1) cc = correct_count[question_key] if skip_correct or int(math.sqrt(cc))**2 != cc: continue else: ok_rate.append(0) if correct_count[question_key] > 0: print( "Degraded: ep=%2d idx=%3d :%s CorrectCount=%s -> 0" % (ep, idx, self.dict_to_str(question), correct_count[question_key])) correct_count[question_key] = 0 wrong_new += 1 steps = steps_dict['steps'] xs = [] ys = [] ws = [] for step in steps: xs.append(self.convert_input(step.input)) y, w = self.convert_output(step.output) ys.append(y) ws.append(w) self.reset() for i, (x, y, w) in enumerate(zip(xs, ys, ws)): loss = self.model.train_on_batch(x, y, sample_weight=w) if not np.isfinite(loss): print("Loss is not finite!, Last Input=%s" % ([i, (x, y, w)])) self.print_weights(last_weights, detail=True) raise RuntimeError("Loss is not finite!") losses.append(loss) last_weights = self.model.get_weights() if losses: cur_loss = np.average(losses) print( "ep=%2d: ok_rate=%.2f%% (+%s -%s): ave loss %s (%s samples)" % (ep, np.average(ok_rate) * 100, correct_new, wrong_new, cur_loss, len(steps_list))) # self.print_weights() if correct_new + wrong_new == 0: no_change_count += 1 else: no_change_count = 0 if math.fabs(1 - cur_loss / last_loss) < 0.001 and no_change_count > 5: print( "math.fabs(1 - cur_loss/last_loss) < 0.001 and no_change_count > 5:" ) return False last_loss = cur_loss print("=" * 80) self.save() if np.average(ok_rate) >= pass_rate: return True return False def update_learning_rate(self, learning_rate, arg_weight=1.): print("Re-Compile Model lr=%s aw=%s" % (learning_rate, arg_weight)) self.compile_model(learning_rate, arg_weight=arg_weight) def train_f_enc(self, steps_list, epoch=50): print("training f_enc") f_add0 = Sequential(name='f_add0') f_add0.add(self.f_enc) f_add0.add(Dense(FIELD_DEPTH)) f_add0.add(Activation('softmax', name='softmax_add0')) f_add1 = Sequential(name='f_add1') f_add1.add(self.f_enc) f_add1.add(Dense(FIELD_DEPTH)) f_add1.add(Activation('softmax', name='softmax_add1')) env_model = Model(self.f_enc.inputs, [f_add0.output, f_add1.output], name="env_model") env_model.compile(optimizer='adam', loss=['categorical_crossentropy'] * 2) for ep in range(epoch): losses = [] for idx, steps_dict in enumerate(steps_list): prev = None for step in steps_dict['steps']: x = self.convert_input(step.input)[:2] env_values = step.input.env.reshape((4, -1)) in1 = np.clip(env_values[0].argmax() - 1, 0, 9) in2 = np.clip(env_values[1].argmax() - 1, 0, 9) carry = np.clip(env_values[2].argmax() - 1, 0, 9) y_num = in1 + in2 + carry now = (in1, in2, carry) if prev == now: continue prev = now y0 = to_one_hot_array((y_num % 10) + 1, FIELD_DEPTH) y1 = to_one_hot_array((y_num // 10) + 1, FIELD_DEPTH) y = [yy.reshape((self.batch_size, -1)) for yy in [y0, y1]] loss = env_model.train_on_batch(x, y) losses.append(loss) print("ep %3d: loss=%s" % (ep, np.average(losses))) if np.average(losses) < 1e-06: break def question_test(self, addition_env, npi_runner, question): addition_env.reset() self.reset() try: run_npi(addition_env, npi_runner, self.program_set.ADD, question) if question['correct']: return True except StopIteration: pass return False def convert_input(self, p_in: StepInput): x_pg = np.array((p_in.program.program_id, )) x = [ xx.reshape((self.batch_size, -1)) for xx in (p_in.env, p_in.arguments.values, x_pg) ] return x def convert_output(self, p_out: StepOutput): y = [np.array((p_out.r, ))] weights = [[1.]] if p_out.program: arg_values = p_out.arguments.values arg_num = len(p_out.program.args or []) y += [p_out.program.to_one_hot(PROGRAM_VEC_SIZE)] weights += [[1.]] else: arg_values = IntegerArguments().values arg_num = 0 y += [np.zeros((PROGRAM_VEC_SIZE, ))] weights += [[1e-10]] for v in arg_values: # split by each args y += [v] weights += [[1.]] * arg_num + [[1e-10]] * (len(arg_values) - arg_num) weights = [np.array(w) for w in weights] return [yy.reshape((self.batch_size, -1)) for yy in y], weights def step(self, env_observation: np.ndarray, pg: Program, arguments: IntegerArguments) -> StepOutput: x = self.convert_input(StepInput(env_observation, pg, arguments)) results = self.model.predict( x, batch_size=1) # if batch_size==1, returns single row r, pg_one_hot, arg_values = results[0], results[1], results[2:] program = self.program_set.get(pg_one_hot.argmax()) ret = StepOutput(r, program, IntegerArguments(values=np.stack(arg_values))) return ret def save(self): self.model.save_weights(self.model_path, overwrite=True) def load_weights(self): if os.path.exists(self.model_path): self.model.load_weights(self.model_path) self.weight_loaded = True def print_weights(self, weights=None, detail=False): weights = weights or self.model.get_weights() for w in weights: print("w%s: sum(w)=%s, ave(w)=%s" % (w.shape, np.sum(w), np.average(w))) if detail: for w in weights: print("%s: %s" % (w.shape, w)) @staticmethod def size_of_env_observation(): return FIELD_ROW * FIELD_DEPTH
class AIPlayer(Player): def __init__(self, buffer_size, sim_count, train=True, model="", tau=1, compile=False): self.buffer = ReplayBuffer(buffer_size) self.temp_state = deque() self.train = train self.loss = 0 self.acc = 0 self.batch_count = 0 self.sim_count = sim_count if model != "": self.load(model, compile) else: self.create_network() self.tau = tau @staticmethod def create_if_nonexistant(config): models = glob.glob(config.data.model_location + "*.h5") if len(models) == 0: ai = AIPlayer(config.buffer_size, config.game.simulation_num_per_move) ai.save(config.data.model_location + "model_0.h5") del ai def set_training(self, train): self.train = train @staticmethod def clear(): K.clear_session() def load(self, file, compile=False): try: del self.network except Exception: pass self.network = load_model(file, custom_objects={ "objective_function_for_policy": AIPlayer.objective_function_for_policy, "objective_function_for_value": AIPlayer.objective_function_for_value }, compile=compile) def save(self, file): self.network.save(file) def create_network(self): x_in = Input((3, 8, 8)) x = Conv2D(filters=128, kernel_size=(3, 3), padding="same", data_format="channels_first")(x_in) x = BatchNormalization(axis=1)(x) x = Activation("relu")(x) for _ in range(10): x = self._build_residual_block(x) res_out = x x = Conv2D(filters=2, kernel_size=1, data_format="channels_first")(res_out) x = BatchNormalization(axis=1)(x) x = Activation("relu")(x) x = Flatten()(x) policy_out = Dense(8 * 8 + 1, activation="softmax", name="policy_out")(x) x = Conv2D(filters=1, kernel_size=1, data_format="channels_first")(res_out) x = BatchNormalization(axis=1)(x) x = Activation("relu")(x) x = Flatten()(x) x = Dense(64, activation="relu")(x) value_out = Dense(1, activation="tanh", name="value_out")(x) self.network = Model(x_in, [policy_out, value_out], name="reversi_model") self.compile() def _build_residual_block(self, x): in_x = x x = Conv2D(filters=128, kernel_size=(3, 3), padding="same", data_format="channels_first")(x) x = BatchNormalization(axis=1)(x) x = Activation("relu")(x) x = Conv2D(filters=128, kernel_size=(3, 3), padding="same", data_format="channels_first")(x) x = BatchNormalization(axis=1)(x) x = Add()([in_x, x]) x = Activation("relu")(x) return x def compile(self): losses = [ AIPlayer.objective_function_for_policy, AIPlayer.objective_function_for_value ] self.network.compile(optimizer=optimizers.SGD(lr=1e-3, momentum=0.9), loss=losses) def update_lr(self, lr): K.set_value(self.network.optimizer.lr, lr) @staticmethod def objective_function_for_policy(y_true, y_pred): # can use categorical_crossentropy?? return K.sum(-y_true * K.log(y_pred + K.epsilon()), axis=-1) @staticmethod def objective_function_for_value(y_true, y_pred): return mean_squared_error(y_true, y_pred) def update_buffer(self, winner): if self.train: while len(self.temp_state) > 0: t = self.temp_state.pop() self.buffer.add((t[0], t[1], winner)) def train_batches(self, batch_size, batches=-1, verbose=2): if batches == -1: s_buffer = np.array([_[0] for _ in self.buffer.buffer]) p_buffer = np.array([_[1] for _ in self.buffer.buffer]) v_buffer = np.array([_[2] for _ in self.buffer.buffer]) else: sample_size = batch_size * batches sample = [] while sample_size > 0: sample += self.buffer.sample(sample_size) sample_size -= self.buffer.size() s_buffer = np.array([_[0] for _ in sample]) p_buffer = np.array([_[1] for _ in sample]) v_buffer = np.array([_[2] for _ in sample]) history = self.network.fit(s_buffer, [p_buffer, v_buffer], batch_size=batch_size, epochs=1, verbose=verbose) return history def preprocess_input(self, board, side): state = np.zeros((3, 8, 8), dtype=np.int) for i in range(8): for j in range(8): if board[i, j] == 1: state[0, i, j] = 1 elif board[i, j] == -1: state[1, i, j] = 1 if side == 1: state[2, i, j] = 1 return state def evaluate(self, game, side): current_input = self.preprocess_input(game.board, side) pred = self.network.predict(current_input[np.newaxis, :]) return pred[1][0] def pick_move(self, game, side): possible_moves = game.possible_moves(side) if len(possible_moves) == 0: possible_moves.append((-1, -1)) monte_prob = self.monte_carlo(game, side) if self.train: self.temp_state.append((self.preprocess_input(game.board, side), np.divide(monte_prob, np.sum(monte_prob)))) monte_prob = np.float_power(monte_prob, 1 / self.tau) monte_prob = np.divide(monte_prob, np.sum(monte_prob)) r = random() for i, move in enumerate(possible_moves): r -= monte_prob[Othello.move_id(move)] if r <= 0: return move return possible_moves[-1] def monte_carlo(self, game, side): N = defaultdict(lambda: 0) W = defaultdict(lambda: 0) Q = defaultdict(lambda: 0) P = defaultdict(lambda: 0) possible_moves = game.possible_moves(side) if len(possible_moves) == 0: policy = np.zeros((65)) policy[64] = 1 return policy elif len(possible_moves) == 1: policy = np.zeros((65)) policy[Othello.move_id(possible_moves[0])] = 1 return policy current_input = self.preprocess_input(game.board, side) sid = Othello.state_id(game.board) pred = self.network.predict(current_input[np.newaxis, :]) policy = pred[0][0] total = 1e-10 for i, move in enumerate(possible_moves): total += policy[Othello.move_id(move)] for move in possible_moves: P[(sid, Othello.move_id(move))] = policy[Othello.move_id(move)] / total for i in range(self.sim_count): #print("Sim #%d"% i) clone = deepcopy(game) current_side = side visited = deque() while True: possible_moves = clone.possible_moves(current_side) if len(possible_moves) == 0: possible_moves.append((-1, -1)) best_move = None best_move_value = -2 sid = Othello.state_id(clone.board) for move in possible_moves: mid = Othello.move_id(move) qu_val = Q[(sid, mid)] + P[(sid, mid)] / (N[(sid, mid)] + 1) if qu_val > best_move_value: best_move_value = qu_val best_move = move #print(best_move) if N[(sid, Othello.move_id(best_move))] == 0: visited.append((sid, Othello.move_id(best_move))) clone.play_move(best_move[0], best_move[1], current_side) current_side *= -1 if clone.game_over(): for node in visited: N[node] += 1 W[node] += clone.get_winner() * side Q[node] = W[node] / N[node] break current_input = self.preprocess_input( clone.board, current_side) sid = Othello.state_id(clone.board) pred = self.network.predict(current_input[np.newaxis, :]) policy = pred[0][0] value = pred[1][0] possible_moves = clone.possible_moves(current_side) if len(possible_moves) == 0: possible_moves.append((-1, -1)) total = 1e-10 for i, move in enumerate(possible_moves): total += policy[Othello.move_id(move)] for move in possible_moves: P[(sid, Othello.move_id(move) )] = policy[Othello.move_id(move)] / total for node in visited: N[node] += 1 W[node] += value * side Q[node] = W[node] / N[node] #print() break else: visited.append((sid, Othello.move_id(best_move))) clone.play_move(best_move[0], best_move[1], current_side) current_side *= -1 if clone.game_over(): for node in visited: N[node] += 1 W[node] += clone.get_winner() * side Q[node] = W[node] / N[node] break policy = np.zeros((65)) possible_moves = game.possible_moves(side) sid = Othello.state_id(game.board) for move in possible_moves: mid = Othello.move_id(move) policy[mid] = N[(sid, mid)] return policy
class FinancialTimeSeriesAnalysisModel(object): model = None def __init__(self, nb_time_step, dim_data, batch_size=1, model_path=None): self.model_path = model_path self.model_path = model_path self.batch_size = batch_size self.size_of_input_data_dim = dim_data self.size_of_input_timesteps = nb_time_step self.build() self.weight_loaded = False if model_path is not None: self.load_weights() def build(self): dim_data = self.size_of_input_data_dim nb_time_step = self.size_of_input_timesteps financial_time_series_input = Input(shape=(nb_time_step, dim_data), name='x1') lstm_layer_1 = LSTM(output_dim=nb_hidden_units, dropout_U=dropout, dropout_W=dropout, W_regularizer=l2(l2_norm_alpha), b_regularizer=l2(l2_norm_alpha), activation='tanh', return_sequences=True, name='lstm_layer1') lstm_layer_21 = LSTM(output_dim=nb_hidden_units, dropout_U=dropout, dropout_W=dropout, W_regularizer=l2(l2_norm_alpha), b_regularizer=l2(l2_norm_alpha), activation='tanh', return_sequences=True, name='lstm_layer2_loss1') lstm_layer_22 = LSTM(output_dim=nb_hidden_units, dropout_U=dropout, dropout_W=dropout, W_regularizer=l2(l2_norm_alpha), b_regularizer=l2(l2_norm_alpha), activation='tanh', return_sequences=True, name='lstm_layer2_loss2') lstm_layer_23 = LSTM(output_dim=nb_hidden_units, dropout_U=dropout, dropout_W=dropout, W_regularizer=l2(l2_norm_alpha), b_regularizer=l2(l2_norm_alpha), activation='tanh', return_sequences=True, name='lstm_layer2_loss3') lstm_layer_24 = LSTM(output_dim=nb_hidden_units, dropout_U=dropout, dropout_W=dropout, W_regularizer=l2(l2_norm_alpha), b_regularizer=l2(l2_norm_alpha), activation='tanh', return_sequences=True, name='lstm_layer2_loss4') lstm_layer_25 = LSTM(output_dim=nb_hidden_units, dropout_U=dropout, dropout_W=dropout, W_regularizer=l2(l2_norm_alpha), b_regularizer=l2(l2_norm_alpha), activation='tanh', return_sequences=True, name='lstm_layer2_loss5') h1 = lstm_layer_1(financial_time_series_input) h21 = lstm_layer_21(h1) h22 = lstm_layer_22(h1) h23 = lstm_layer_23(h1) h24 = lstm_layer_24(h1) h25 = lstm_layer_25(h1) time_series_predictions1 = TimeDistributed(Dense(1), name="p1")(h21) # custom 1 time_series_predictions2 = TimeDistributed(Dense(1), name="p2")(h22) # custom 2 time_series_predictions3 = TimeDistributed(Dense(1), name="p3")(h23) # mse time_series_predictions4 = TimeDistributed(Dense(1, activation='sigmoid'), name="p4")(h24) # logloss time_series_predictions5 = TimeDistributed(Dense(nb_labels, activation='softmax'), name="p5")(h25) # cross self.model = Model(input=financial_time_series_input, output=[time_series_predictions1, time_series_predictions2, time_series_predictions3, time_series_predictions4, time_series_predictions5], name="multi-task deep rnn for financial time series forecasting") plot(self.model, to_file='model.png') def reset(self): for l in self.model.layers: if type(l) is LSTM: l.reset_status() def compile_model(self, lr=0.0001, arg_weight=1.): optimizer = Adam(lr=lr) loss = [custom_objective1, custom_objective2, 'mse', 'binary_crossentropy', 'categorical_crossentropy'] self.model.compile(optimizer=optimizer, loss=loss) def fit_model(self, X, y, y_label, epoch=300): early_stopping = EarlyStopping(monitor='val_loss', patience=10, verbose=0) self.model.fit(X, [y]*3 + [y > 0] + [y_label], batch_size=self.batch_size, nb_epoch=epoch, validation_split=0.2, shuffle=True, callbacks=[early_stopping]) def save(self): self.model.save_weights(self.model_path, overwrite=True) def load_weights(self): if os.path.exists(self.model_path): self.model.load_weights(self.model_path) self.weight_loaded = True def print_weights(self, weights=None, detail=False): weights = weights or self.model.get_weights() for w in weights: print("w%s: sum(w)=%s, ave(w)=%s" % (w.shape, np.sum(w), np.average(w))) if detail: for w in weights: print("%s: %s" % (w.shape, w)) def model_eval(self, X, y): y_hat = self.model.predict(X, batch_size=1)[0] count_true = 0 count_all = y.shape[1] for i in range(y.shape[1]): count_true = count_true + 1 if y[0,i,0]*y_hat[0,i,0]>0 else count_true print(y[0,i,0],y_hat[0,i,0]) print(count_all,count_true)
x = Flatten()(x) x = Dense(512, activation=None)(x) x = BatchNormalization()(x) x = advanced_activations.LeakyReLU(alpha=0.1)(x) logits = Dense(num_classes, activation=None)(x) output = Activation('softmax')(logits) opt = keras.optimizers.Adam(lr=0.003, beta_1=0.9, beta_2=0.999, epsilon=1e-08) model = Model(input_layer, output) model.summary() plot_model(model, show_shapes=True, to_file='teacher_model.png') model.compile(loss='categorical_crossentropy', optimizer=opt, metrics=['accuracy']) print('Using real-time data augmentation.') datagen = ImageDataGenerator( featurewise_center=False, # set input mean to 0 over the dataset samplewise_center=False, # set each sample mean to 0 featurewise_std_normalization=False, # divide inputs by std of the dataset samplewise_std_normalization=False, # divide each input by its std zca_whitening=False, # apply ZCA whitening zca_epsilon=1e-06, # epsilon for ZCA whitening rotation_range=20, # randomly rotate images in the range (degrees, 0 to 180) # randomly shift images horizontally (fraction of total width) width_shift_range=0.1, # randomly shift images vertically (fraction of total height)
def test_model_methods(): a = Input(shape=(3, ), name='input_a') b = Input(shape=(3, ), name='input_b') a_2 = Dense(4, name='dense_1')(a) dp = Dropout(0.5, name='dropout') b_2 = dp(b) model = Model([a, b], [a_2, b_2]) optimizer = 'rmsprop' loss = 'mse' loss_weights = [1., 0.5] input_a_np = np.random.random((10, 3)) input_b_np = np.random.random((10, 3)) output_a_np = np.random.random((10, 4)) output_b_np = np.random.random((10, 3)) # training/testing doesn't work before compiling. with pytest.raises(RuntimeError): model.train_on_batch([input_a_np, input_b_np], [output_a_np, output_b_np]) model.compile(optimizer, loss, metrics=[], loss_weights=loss_weights, sample_weight_mode=None) # test train_on_batch out = model.train_on_batch([input_a_np, input_b_np], [output_a_np, output_b_np]) out = model.train_on_batch({ 'input_a': input_a_np, 'input_b': input_b_np }, [output_a_np, output_b_np]) out = model.train_on_batch({ 'input_a': input_a_np, 'input_b': input_b_np }, { 'dense_1': output_a_np, 'dropout': output_b_np }) # test fit out = model.fit([input_a_np, input_b_np], [output_a_np, output_b_np], epochs=1, batch_size=4) out = model.fit({ 'input_a': input_a_np, 'input_b': input_b_np }, [output_a_np, output_b_np], epochs=1, batch_size=4) out = model.fit({ 'input_a': input_a_np, 'input_b': input_b_np }, { 'dense_1': output_a_np, 'dropout': output_b_np }, epochs=1, batch_size=4) # test validation_split out = model.fit([input_a_np, input_b_np], [output_a_np, output_b_np], epochs=1, batch_size=4, validation_split=0.5) out = model.fit({ 'input_a': input_a_np, 'input_b': input_b_np }, [output_a_np, output_b_np], epochs=1, batch_size=4, validation_split=0.5) # test validation data out = model.fit([input_a_np, input_b_np], [output_a_np, output_b_np], epochs=1, batch_size=4, validation_data=([input_a_np, input_b_np], [output_a_np, output_b_np])) out = model.fit({ 'input_a': input_a_np, 'input_b': input_b_np }, [output_a_np, output_b_np], epochs=1, batch_size=4, validation_split=0.5, validation_data=({ 'input_a': input_a_np, 'input_b': input_b_np }, [output_a_np, output_b_np])) out = model.fit({ 'input_a': input_a_np, 'input_b': input_b_np }, { 'dense_1': output_a_np, 'dropout': output_b_np }, epochs=1, batch_size=4, validation_split=0.5, validation_data=({ 'input_a': input_a_np, 'input_b': input_b_np }, { 'dense_1': output_a_np, 'dropout': output_b_np })) # test_on_batch out = model.test_on_batch([input_a_np, input_b_np], [output_a_np, output_b_np]) out = model.test_on_batch({ 'input_a': input_a_np, 'input_b': input_b_np }, [output_a_np, output_b_np]) out = model.test_on_batch({ 'input_a': input_a_np, 'input_b': input_b_np }, { 'dense_1': output_a_np, 'dropout': output_b_np }) # predict_on_batch out = model.predict_on_batch([input_a_np, input_b_np]) out = model.predict_on_batch({ 'input_a': input_a_np, 'input_b': input_b_np }) # predict, evaluate input_a_np = np.random.random((10, 3)) input_b_np = np.random.random((10, 3)) output_a_np = np.random.random((10, 4)) output_b_np = np.random.random((10, 3)) out = model.evaluate([input_a_np, input_b_np], [output_a_np, output_b_np], batch_size=4) out = model.predict([input_a_np, input_b_np], batch_size=4) # with sample_weight input_a_np = np.random.random((10, 3)) input_b_np = np.random.random((10, 3)) output_a_np = np.random.random((10, 4)) output_b_np = np.random.random((10, 3)) sample_weight = [None, np.random.random((10, ))] out = model.train_on_batch([input_a_np, input_b_np], [output_a_np, output_b_np], sample_weight=sample_weight) out = model.test_on_batch([input_a_np, input_b_np], [output_a_np, output_b_np], sample_weight=sample_weight) # test accuracy metric model.compile(optimizer, loss, metrics=['acc'], sample_weight_mode=None) out = model.train_on_batch([input_a_np, input_b_np], [output_a_np, output_b_np]) assert len(out) == 5 out = model.test_on_batch([input_a_np, input_b_np], [output_a_np, output_b_np]) assert len(out) == 5 # this should also work model.compile(optimizer, loss, metrics={'dense_1': 'acc'}, sample_weight_mode=None) out = model.train_on_batch([input_a_np, input_b_np], [output_a_np, output_b_np]) assert len(out) == 4 out = model.test_on_batch([input_a_np, input_b_np], [output_a_np, output_b_np]) assert len(out) == 4 # and this as well model.compile(optimizer, loss, metrics={'dense_1': ['acc']}, sample_weight_mode=None) out = model.train_on_batch([input_a_np, input_b_np], [output_a_np, output_b_np]) assert len(out) == 4 out = model.test_on_batch([input_a_np, input_b_np], [output_a_np, output_b_np]) assert len(out) == 4 # test starting from non-zero initial epoch trained_epochs = [] trained_batches = [] # define tracer callback def on_epoch_begin(epoch, logs): trained_epochs.append(epoch) def on_batch_begin(batch, logs): trained_batches.append(batch) tracker_cb = LambdaCallback(on_epoch_begin=on_epoch_begin, on_batch_begin=on_batch_begin) out = model.fit([input_a_np, input_b_np], [output_a_np, output_b_np], epochs=5, batch_size=4, initial_epoch=2, callbacks=[tracker_cb]) assert trained_epochs == [2, 3, 4] # test starting from non-zero initial epoch for generator too trained_epochs = [] def gen_data(batch_sz): while True: yield ([ np.random.random((batch_sz, 3)), np.random.random((batch_sz, 3)) ], [ np.random.random((batch_sz, 4)), np.random.random((batch_sz, 3)) ]) out = model.fit_generator(gen_data(4), steps_per_epoch=3, epochs=5, initial_epoch=2, callbacks=[tracker_cb]) assert trained_epochs == [2, 3, 4] # test with a custom metric function def mse(y_true, y_pred): return K.mean(K.pow(y_true - y_pred, 2)) model.compile(optimizer, loss, metrics=[mse], sample_weight_mode=None) out = model.train_on_batch([input_a_np, input_b_np], [output_a_np, output_b_np]) out_len = 1 + 2 * (1 + 1) # total loss + 2 outputs * (loss + metric) assert len(out) == out_len out = model.test_on_batch([input_a_np, input_b_np], [output_a_np, output_b_np]) assert len(out) == out_len input_a_np = np.random.random((10, 3)) input_b_np = np.random.random((10, 3)) output_a_np = np.random.random((10, 4)) output_b_np = np.random.random((10, 3)) out = model.fit([input_a_np, input_b_np], [output_a_np, output_b_np], batch_size=4, epochs=1) out = model.evaluate([input_a_np, input_b_np], [output_a_np, output_b_np], batch_size=4) out = model.predict([input_a_np, input_b_np], batch_size=4) # empty batch with pytest.raises(ValueError): def gen_data(): while True: yield (np.asarray([]), np.asarray([])) out = model.evaluate_generator(gen_data(), steps=1) # x is not a list of numpy arrays. with pytest.raises(ValueError): out = model.predict([None]) # x does not match _feed_input_names. with pytest.raises(ValueError): out = model.predict([input_a_np, None, input_b_np]) with pytest.raises(ValueError): out = model.predict([None, input_a_np, input_b_np]) # all input/output/weight arrays should have the same number of samples. with pytest.raises(ValueError): out = model.train_on_batch([input_a_np, input_b_np[:2]], [output_a_np, output_b_np], sample_weight=sample_weight) with pytest.raises(ValueError): out = model.train_on_batch([input_a_np, input_b_np], [output_a_np, output_b_np[:2]], sample_weight=sample_weight) with pytest.raises(ValueError): out = model.train_on_batch( [input_a_np, input_b_np], [output_a_np, output_b_np], sample_weight=[sample_weight[1], sample_weight[1][:2]]) # `sample_weight` is neither a dict nor a list. with pytest.raises(TypeError): out = model.train_on_batch([input_a_np, input_b_np], [output_a_np, output_b_np], sample_weight=tuple(sample_weight)) # `validation_data` is neither a tuple nor a triple. with pytest.raises(ValueError): out = model.fit([input_a_np, input_b_np], [output_a_np, output_b_np], epochs=1, batch_size=4, validation_data=([input_a_np, input_b_np], )) # `loss` does not match outputs. with pytest.raises(ValueError): model.compile(optimizer, loss=['mse', 'mae', 'mape']) # `loss_weights` does not match output_names. with pytest.raises(ValueError): model.compile(optimizer, loss='mse', loss_weights={'lstm': 0.5}) # `loss_weights` does not match outputs. with pytest.raises(ValueError): model.compile(optimizer, loss='mse', loss_weights=[0.5]) # `loss_weights` is invalid type. with pytest.raises(TypeError): model.compile(optimizer, loss='mse', loss_weights=(0.5, 0.5)) # `sample_weight_mode` does not match output_names. with pytest.raises(ValueError): model.compile(optimizer, loss='mse', sample_weight_mode={'lstm': 'temporal'}) # `sample_weight_mode` does not match output_names. with pytest.raises(ValueError): model.compile(optimizer, loss='mse', sample_weight_mode=['temporal']) # `sample_weight_mode` matches output_names partially. with pytest.raises(ValueError): model.compile(optimizer, loss='mse', sample_weight_mode={'dense_1': 'temporal'}) # `loss` does not exist. with pytest.raises(ValueError): model.compile(optimizer, loss=[]) model.compile(optimizer, loss=['mse', 'mae']) model.compile(optimizer, loss='mse', loss_weights={ 'dense_1': 0.2, 'dropout': 0.8 }) model.compile(optimizer, loss='mse', loss_weights=[0.2, 0.8]) # the rank of weight arrays should be 1. with pytest.raises(ValueError): out = model.train_on_batch( [input_a_np, input_b_np], [output_a_np, output_b_np], sample_weight=[None, np.random.random((10, 20, 30))]) model.compile(optimizer, loss='mse', sample_weight_mode={ 'dense_1': None, 'dropout': 'temporal' }) model.compile(optimizer, loss='mse', sample_weight_mode=[None, 'temporal']) # the rank of output arrays should be at least 3D. with pytest.raises(ValueError): out = model.train_on_batch([input_a_np, input_b_np], [output_a_np, output_b_np], sample_weight=sample_weight) model.compile(optimizer, loss, metrics=[], loss_weights=loss_weights, sample_weight_mode=None) trained_epochs = [] trained_batches = [] out = model.fit_generator(generator=RandomSequence(3), steps_per_epoch=3, epochs=5, initial_epoch=0, validation_data=RandomSequence(4), validation_steps=3, callbacks=[tracker_cb]) assert trained_epochs == [0, 1, 2, 3, 4] assert trained_batches == list(range(3)) * 5 # steps_per_epoch will be equal to len of sequence if it's unspecified trained_epochs = [] trained_batches = [] out = model.fit_generator(generator=RandomSequence(3), epochs=5, initial_epoch=0, validation_data=RandomSequence(4), callbacks=[tracker_cb]) assert trained_epochs == [0, 1, 2, 3, 4] assert trained_batches == list(range(12)) * 5 # fit_generator will throw an exception if steps is unspecified for regular generator with pytest.raises(ValueError): def gen_data(): while True: yield (np.asarray([]), np.asarray([])) out = model.fit_generator(generator=gen_data(), epochs=5, initial_epoch=0, validation_data=gen_data(), callbacks=[tracker_cb]) # predict_generator output shape behavior should be consistent def expected_shape(batch_size, n_batches): return (batch_size * n_batches, 4), (batch_size * n_batches, 3) # Multiple outputs and one step. batch_size = 5 sequence_length = 1 shape_0, shape_1 = expected_shape(batch_size, sequence_length) out = model.predict_generator( RandomSequence(batch_size, sequence_length=sequence_length)) assert np.shape(out[0]) == shape_0 and np.shape(out[1]) == shape_1 # Multiple outputs and multiple steps. batch_size = 5 sequence_length = 2 shape_0, shape_1 = expected_shape(batch_size, sequence_length) out = model.predict_generator( RandomSequence(batch_size, sequence_length=sequence_length)) assert np.shape(out[0]) == shape_0 and np.shape(out[1]) == shape_1 # Create a model with a single output. single_output_model = Model([a, b], a_2) single_output_model.compile(optimizer, loss, metrics=[], sample_weight_mode=None) # Single output and one step. batch_size = 5 sequence_length = 1 shape_0, _ = expected_shape(batch_size, sequence_length) out = single_output_model.predict_generator( RandomSequence(batch_size, sequence_length=sequence_length)) assert np.shape(out) == shape_0 # Single output and multiple steps. batch_size = 5 sequence_length = 2 shape_0, _ = expected_shape(batch_size, sequence_length) out = single_output_model.predict_generator( RandomSequence(batch_size, sequence_length=sequence_length)) assert np.shape(out) == shape_0
def test_model_methods(): a = Input(shape=(3, ), name='input_a') b = Input(shape=(3, ), name='input_b') a_2 = Dense(4, name='dense_1')(a) dp = Dropout(0.5, name='dropout') b_2 = dp(b) model = Model([a, b], [a_2, b_2]) optimizer = 'rmsprop' loss = 'mse' loss_weights = [1., 0.5] model.compile(optimizer, loss, metrics=[], loss_weights=loss_weights, sample_weight_mode=None) input_a_np = np.random.random((10, 3)) input_b_np = np.random.random((10, 3)) output_a_np = np.random.random((10, 4)) output_b_np = np.random.random((10, 3)) # test train_on_batch out = model.train_on_batch([input_a_np, input_b_np], [output_a_np, output_b_np]) out = model.train_on_batch({ 'input_a': input_a_np, 'input_b': input_b_np }, [output_a_np, output_b_np]) out = model.train_on_batch({ 'input_a': input_a_np, 'input_b': input_b_np }, { 'dense_1': output_a_np, 'dropout': output_b_np }) # test fit out = model.fit([input_a_np, input_b_np], [output_a_np, output_b_np], nb_epoch=1, batch_size=4) out = model.fit({ 'input_a': input_a_np, 'input_b': input_b_np }, [output_a_np, output_b_np], nb_epoch=1, batch_size=4) out = model.fit({ 'input_a': input_a_np, 'input_b': input_b_np }, { 'dense_1': output_a_np, 'dropout': output_b_np }, nb_epoch=1, batch_size=4) # test validation_split out = model.fit([input_a_np, input_b_np], [output_a_np, output_b_np], nb_epoch=1, batch_size=4, validation_split=0.5) out = model.fit({ 'input_a': input_a_np, 'input_b': input_b_np }, [output_a_np, output_b_np], nb_epoch=1, batch_size=4, validation_split=0.5) out = model.fit({ 'input_a': input_a_np, 'input_b': input_b_np }, { 'dense_1': output_a_np, 'dropout': output_b_np }, nb_epoch=1, batch_size=4, validation_split=0.5) # test validation data out = model.fit([input_a_np, input_b_np], [output_a_np, output_b_np], nb_epoch=1, batch_size=4, validation_data=([input_a_np, input_b_np], [output_a_np, output_b_np])) out = model.fit({ 'input_a': input_a_np, 'input_b': input_b_np }, [output_a_np, output_b_np], nb_epoch=1, batch_size=4, validation_split=0.5, validation_data=({ 'input_a': input_a_np, 'input_b': input_b_np }, [output_a_np, output_b_np])) out = model.fit({ 'input_a': input_a_np, 'input_b': input_b_np }, { 'dense_1': output_a_np, 'dropout': output_b_np }, nb_epoch=1, batch_size=4, validation_split=0.5, validation_data=({ 'input_a': input_a_np, 'input_b': input_b_np }, { 'dense_1': output_a_np, 'dropout': output_b_np })) # test_on_batch out = model.test_on_batch([input_a_np, input_b_np], [output_a_np, output_b_np]) out = model.test_on_batch({ 'input_a': input_a_np, 'input_b': input_b_np }, [output_a_np, output_b_np]) out = model.test_on_batch({ 'input_a': input_a_np, 'input_b': input_b_np }, { 'dense_1': output_a_np, 'dropout': output_b_np }) # predict_on_batch out = model.predict_on_batch([input_a_np, input_b_np]) out = model.predict_on_batch({ 'input_a': input_a_np, 'input_b': input_b_np }) # predict, evaluate input_a_np = np.random.random((10, 3)) input_b_np = np.random.random((10, 3)) output_a_np = np.random.random((10, 4)) output_b_np = np.random.random((10, 3)) out = model.evaluate([input_a_np, input_b_np], [output_a_np, output_b_np], batch_size=4) out = model.predict([input_a_np, input_b_np], batch_size=4) # with sample_weight input_a_np = np.random.random((10, 3)) input_b_np = np.random.random((10, 3)) output_a_np = np.random.random((10, 4)) output_b_np = np.random.random((10, 3)) sample_weight = [None, np.random.random((10, ))] out = model.train_on_batch([input_a_np, input_b_np], [output_a_np, output_b_np], sample_weight=sample_weight) out = model.test_on_batch([input_a_np, input_b_np], [output_a_np, output_b_np], sample_weight=sample_weight) # test accuracy metric model.compile(optimizer, loss, metrics=['acc'], sample_weight_mode=None) out = model.train_on_batch([input_a_np, input_b_np], [output_a_np, output_b_np]) assert len(out) == 5 out = model.test_on_batch([input_a_np, input_b_np], [output_a_np, output_b_np]) assert len(out) == 5 # this should also work model.compile(optimizer, loss, metrics={'dense_1': 'acc'}, sample_weight_mode=None) out = model.train_on_batch([input_a_np, input_b_np], [output_a_np, output_b_np]) assert len(out) == 4 out = model.test_on_batch([input_a_np, input_b_np], [output_a_np, output_b_np]) assert len(out) == 4 # and this as well model.compile(optimizer, loss, metrics={'dense_1': ['acc']}, sample_weight_mode=None) out = model.train_on_batch([input_a_np, input_b_np], [output_a_np, output_b_np]) assert len(out) == 4 out = model.test_on_batch([input_a_np, input_b_np], [output_a_np, output_b_np]) assert len(out) == 4 # test starting from non-zero initial epoch trained_epochs = [] def on_epoch_begin(epoch, logs): trained_epochs.append(epoch) tracker_cb = LambdaCallback(on_epoch_begin=on_epoch_begin) out = model.fit([input_a_np, input_b_np], [output_a_np, output_b_np], nb_epoch=5, batch_size=4, initial_epoch=2, callbacks=[tracker_cb]) assert trained_epochs == [2, 3, 4] # test starting from non-zero initial epoch for generator too trained_epochs = [] def gen_data(batch_sz): while True: yield ([ np.random.random((batch_sz, 3)), np.random.random((batch_sz, 3)) ], [ np.random.random((batch_sz, 4)), np.random.random((batch_sz, 3)) ]) out = model.fit_generator(gen_data(4), samples_per_epoch=10, nb_epoch=5, initial_epoch=2, callbacks=[tracker_cb]) assert trained_epochs == [2, 3, 4] # test with a custom metric function mse = lambda y_true, y_pred: K.mean(K.pow(y_true - y_pred, 2)) def mse_powers(y_true, y_pred): m = mse(y_true, y_pred) return {'mse_squared': K.pow(m, 2), 'mse_cubed': K.pow(m, 3)} model.compile(optimizer, loss, metrics=[mse, mse_powers], sample_weight_mode=None) out = model.train_on_batch([input_a_np, input_b_np], [output_a_np, output_b_np]) out_len = 1 + 2 * 4 # total loss, per layer: loss + 3 metrics assert len(out) == out_len out = model.test_on_batch([input_a_np, input_b_np], [output_a_np, output_b_np]) assert len(out) == out_len input_a_np = np.random.random((10, 3)) input_b_np = np.random.random((10, 3)) output_a_np = np.random.random((10, 4)) output_b_np = np.random.random((10, 3)) out = model.fit([input_a_np, input_b_np], [output_a_np, output_b_np], batch_size=4, nb_epoch=1) out = model.evaluate([input_a_np, input_b_np], [output_a_np, output_b_np], batch_size=4) out = model.predict([input_a_np, input_b_np], batch_size=4)
def decoder_stochastic_wrn( label_sizes, nb_bits=12, data_shape=(1, 64, 64), activation=lambda: Activation('relu'), normalization=lambda: BatchNormalization(axis=1, mode=0), weight_init='he_normal', weight_decay=0.0001, dropout_probability=0., wrn_depth=58, wrn_k=2, death_rate=0.5, optimizer='adam'): def norm_act_block(): def f(inputs): x = normalization()(inputs) x = activation()(x) return x return f def conv2(nb_filter, nb_row, nb_col, subsample=(1, 1), bias=True): return Convolution2D(nb_filter, nb_row, nb_col, init=weight_init, border_mode='same', subsample=subsample, bias=bias, W_regularizer=l2(weight_decay)) def dropout(p=None): if p is None: p = dropout_probability def f(inputs): if p > 0.: inputs = Dropout(p)(inputs) return inputs return f def equal_gate_shape(input_shapes): assert (input_shapes[0] == input_shapes[1]) return input_shapes[1] def residual_block(nb_filter, stochastic=False, stochastic_layers=None): def f(inputs): x = norm_act_block()(inputs) x = conv2(nb_filter, 3, 3)(x) x = dropout()(x) x = norm_act_block()(x) x = conv2(nb_filter, 3, 3)(x) if inputs._keras_shape != x._keras_shape: inputs = conv2(nb_filter, 1, 1, bias=False)(inputs) if not stochastic: return merge((inputs, x), mode='sum') scale = ScaleInTestPhase(death_rate) x = scale(x) out = merge([inputs, x], mode="sum", output_shape=x._keras_shape[1:]) rs = RandomSwitch(death_rate) stochastic_layers.append((rs.death_rate, scale.death_rate)) return rs([out, inputs]) return f def residual_reduction_block(nb_filter): def f(inputs): x = norm_act_block()(inputs) x = conv2(nb_filter, 3, 3, subsample=(2, 2))(x) x = dropout()(x) x = norm_act_block()(x) x = conv2(nb_filter, 3, 3)(x) inputs_bottleneck = conv2(nb_filter, 1, 1, subsample=(2, 2), bias=False)(inputs) s = merge((inputs_bottleneck, x), mode='sum') return s return f def skip_connection(inputs, residual, stochastic=False, stochastic_layers=None): inputs_filters = inputs._keras_shape[1] residual_filters = residual._keras_shape[1] subsample = np.array(inputs._keras_shape[2:]) // np.array( residual._keras_shape[2:]) inputs = dropout()(inputs) if (inputs_filters != residual_filters) or np.any(subsample > 1): skip = conv2(residual_filters, 1, 1, subsample=subsample, bias=False)(inputs) else: skip = inputs if not stochastic: return merge((skip, residual), mode='sum') else: scale = ScaleInTestPhase(death_rate) skip = scale(skip) out = merge([skip, residual], mode="sum") rs = RandomSwitch(death_rate) stochastic_layers.append((rs.death_rate, scale.death_rate)) return rs([out, residual]) n = (wrn_depth - 4) // 6 stochastic_layers = [] input = Input(shape=data_shape) m_stem = conv2(16, 3, 3, subsample=(2, 2))(input) m_stem = norm_act_block()(m_stem) m_b1 = residual_block(nb_filter=16 * wrn_k, stochastic_layers=stochastic_layers)(m_stem) for _ in range(n - 1): m_b1 = residual_block(nb_filter=16 * wrn_k, stochastic=True, stochastic_layers=stochastic_layers)(m_b1) m_b1 = skip_connection(input, m_b1, stochastic=True, stochastic_layers=stochastic_layers) m_b2 = residual_reduction_block(nb_filter=32 * wrn_k)(m_b1) for _ in range(n - 1): m_b2 = residual_block(nb_filter=32 * wrn_k, stochastic=True, stochastic_layers=stochastic_layers)(m_b2) m_b2 = skip_connection(m_b1, m_b2, stochastic=True, stochastic_layers=stochastic_layers) m_b3 = residual_reduction_block(nb_filter=64 * wrn_k)(m_b2) for _ in range(n - 1): m_b3 = residual_block(nb_filter=64 * wrn_k, stochastic=True, stochastic_layers=stochastic_layers)(m_b3) m_b3 = skip_connection(m_b2, m_b3, stochastic=True, stochastic_layers=stochastic_layers) m_b3 = skip_connection(input, m_b3, stochastic=True, stochastic_layers=stochastic_layers) x = norm_act_block()(m_b3) x = AveragePooling2D(pool_size=(8, 8))(x) x = dropout()(x) for i, (tb, ts) in enumerate(stochastic_layers, start=0): K.set_value(tb, i / len(stochastic_layers) * death_rate) K.set_value(ts, i / len(stochastic_layers) * death_rate) outputs, losses = decoder_end_block(x, label_sizes, nb_bits, activation, weight_decay) model = Model(input, list(outputs.values())) model.compile( optimizer, loss=list(losses.values()), loss_weights={k: decoder_loss_weights(k) for k in losses.keys()}) return model
class SeqGAN: def __init__(self, g, d, m, g_optimizer, d_optimizer): self.g = g self.d = d self.m = m self.z, self.seq_input = self.g.inputs self.fake_prob, = self.g.outputs with trainable(m, False): m_input = merge([self.seq_input, self.fake_prob], mode='concat', concat_axis=1) self.m_realness = self.m(m_input) self.model_fit_g = Model([self.z, self.seq_input], [self.m_realness]) self.model_fit_g.compile(g_optimizer, K.binary_crossentropy) self.d.compile(d_optimizer, loss=K.binary_crossentropy) def z_shape(self, batch_size=64): layer, _, _ = self.z._keras_history return (batch_size,) + layer.output_shape[1:] def sample_z(self, batch_size=64): shape = self.z_shape(batch_size) return np.random.uniform(-1, 1, shape) def generate(self, z, seq_input, batch_size=32): return self.g.predict([z, seq_input], batch_size=batch_size) def train_on_batch(self, seq_input, real, d_target=None): nb_real = len(real) nb_fake = len(seq_input) if d_target is None: d_target = np.concatenate([ np.zeros((nb_fake, 1)), np.ones((nb_real, 1)) ]) fake_prob = self.generate(self.sample_z(nb_fake), seq_input) fake = np.concatenate([seq_input, prob_to_sentence(fake_prob)], axis=1) fake_and_real = np.concatenate([fake, real], axis=0) d_loss = self.d.train_on_batch(fake_and_real, d_target) d_realness = self.d.predict(fake) m_loss = self.m.train_on_batch( np.concatenate([seq_input, fake_prob], axis=1), d_realness) g_loss = self.model_fit_g.train_on_batch([self.sample_z(nb_fake), seq_input], np.ones((nb_fake, 1))) return g_loss, d_loss, m_loss def fit_generator(self, generator, nb_epoch, nb_batches_per_epoch, callbacks=[], batch_size=None, verbose=False): if batch_size is None: batch_size = 2*len(next(generator)[0]) out_labels = ['g', 'd', 'm'] self.history = cbks.History() callbacks = [cbks.BaseLogger()] + callbacks + [self.history] if verbose: callbacks += [cbks.ProgbarLogger()] callbacks = cbks.CallbackList(callbacks) callbacks._set_model(self) callbacks._set_params({ 'nb_epoch': nb_epoch, 'nb_sample': nb_batches_per_epoch*batch_size, 'verbose': verbose, 'metrics': out_labels, }) callbacks.on_train_begin() for e in range(nb_epoch): callbacks.on_epoch_begin(e) for batch_index, (seq_input, real) in enumerate(generator): callbacks.on_batch_begin(batch_index) batch_logs = {} batch_logs['batch'] = batch_index batch_logs['size'] = len(real) + len(seq_input ) outs = self.train_on_batch(seq_input, real) for l, o in zip(out_labels, outs): batch_logs[l] = o callbacks.on_batch_end(batch_index, batch_logs) if batch_index + 1 == nb_batches_per_epoch: break callbacks.on_epoch_end(e) callbacks.on_train_end()
def build_CNN_model(inputType, do_training=False, model_inputs=None, loss_func='binary_crossentropy', optimize_proc='adam', is_IntermediateModel=False, load_weight_path=None, **kwargs): """ :param inputType: :param do_training: :param model_inputs: :param loss_func: :param optimize_proc: :param is_IntermediateModel: :param load_weight_path: :param kwargs: :return: """ # assert not do_training and model_inputs, "if do_training then must pass in model_inputs dictionary" EMBEDDING_TYPE = 'embeddingMatrix' ONEHOT_TYPE = '1hotVector' defined_input_types = {EMBEDDING_TYPE, ONEHOT_TYPE} assert inputType in defined_input_types, "unknown input type {0}".format(inputType) if inputType is ONEHOT_TYPE: review_input = Input(shape=(modelParameters.MaxLen_w,), dtype='float32', name="ONEHOT_INPUT") layer = Embedding(modelParameters.VocabSize_w + modelParameters.INDEX_FROM, embedding_dims, embeddings_initializer=embedding_init, embeddings_regularizer=embedding_reg, input_length=modelParameters.MaxLen_w, name='1hot_embeddingLayer')(review_input) layer = SpatialDropout1D(0.50)(layer) elif inputType is EMBEDDING_TYPE: review_input = Input(shape=(modelParameters.MaxLen_w, embedding_dims), dtype="float32", name="EMBEDDING_INPUT") layer = review_input else: raise ValueError("Bad inputType arg to build_CNN_model") layer = Convolution1D(filters=num_filters1, kernel_size=filter_length1, padding=region, strides=1, activation=conv_activation1, kernel_initializer='glorot_uniform', bias_initializer='zeros', kernel_regularizer=conv_reg1, dilation_rate=1, name='ConvLayer1')(layer) layer = SpatialDropout1D(0.50)(layer) layer = MaxPooling1D(pool_size=pool_len1)(layer) # layer = Convolution1D(filters=num_filters2, # kernel_size=filter_length2, # padding=region, # strides=1, # activation=conv_activation2, # kernel_initializer=conv_init2, # kernel_regularizer=conv_reg2, # dilation_rate=1, # name='ConvLayer2')(layer) # # layer = SpatialDropout1D(0.50)(layer) # # layer = MaxPooling1D(pool_size=pool_len2)(layer) # layer = Convolution1D(filters=num_filters3, # kernel_size=filter_length3, # padding=region, # activation=conv_activation3, # kernel_initializer=conv_init3, # kernel_regularizer=conv_reg3, # dilation_rate=1, # name='ConvLayer3')(layer) # # layer = SpatialDropout1D(0.50)(layer) # # layer = MaxPooling1D(pool_size=pool_len3)(layer) # #layer = GlobalMaxPool1D()(layer) # # layer = Convolution1D(filters=num_filters4, # kernel_size=filter_length4, # padding=region, # activation=conv_activation4, # kernel_initializer=conv_init4, # kernel_regularizer=conv_reg4, # dilation_rate=1, # name='ConvLayer4')(layer) # # #layer = leaky_relu(layer) # # layer = SpatialDropout1D(0.50)(layer) # # layer = MaxPooling1D(pool_size=pool_len4)(layer) # #layer = GlobalMaxPool1D()(layer) # # # layer = BatchNormalization()(layer) layer = Flatten()(layer) layer = Dense(dense_dims0, activation=dense_activation0, kernel_regularizer=dense_reg0, kernel_initializer='glorot_normal', bias_initializer='zeros', name='dense0')(layer) layer = Dropout(0.50)(layer) layer = Dense(dense_dims1, activation=dense_activation1, kernel_regularizer=dense_reg1, kernel_initializer='glorot_normal', bias_initializer='zeros', name='dense1')(layer) layer = Dropout(0.50)(layer) # layer = Dense(dense_dims2, activation=dense_activation2, kernel_regularizer=dense_reg2, # kernel_initializer=dense_init2, # name='dense2')(layer) # # # layer = Dropout(0.50)(layer) # # layer = Dense(dense_dims3, activation=dense_activation3, kernel_regularizer=dense_reg3, # kernel_initializer=dense_init3, # name='dense3_outA')(layer) # #layer = leaky_relu(layer) # if is_IntermediateModel: return Model(inputs=[review_input], outputs=[layer], name="CNN_model") # # layer = Dropout(0.5)(layer) layer = Dense(dense_dims_final, activation=dense_activation_final, kernel_initializer=dense_init_final, kernel_regularizer=dense_reg0, name='output_Full')(layer) CNN_model = Model(inputs=[review_input], outputs=[layer], name="CNN_model") CNN_model.compile(optimizer=Adam(lr=0.001, decay=0.0), loss=loss_func, metrics=[binary_accuracy]) if load_weight_path is not None: CNN_model.load_weights(load_weight_path) hist = "" if do_training: weightPath = os.path.join(modelParameters.WEIGHT_PATH, filename) configPath = os.path.join(modelParameters.WEIGHT_PATH, filename_config) with open(configPath + ".json", 'wb') as f: f.write(CNN_model.to_json()) checkpoint = ModelCheckpoint(weightPath + '_W.{epoch:02d}-{val_loss:.4f}.hdf5', verbose=1, save_best_only=True, save_weights_only=False, monitor='val_loss') earlyStop = EarlyStopping(patience=3, verbose=1, monitor='val_loss') LRadjuster = ReduceLROnPlateau(monitor='val_loss', factor=0.30, patience=0, verbose=1, cooldown=1, min_lr=0.00001, epsilon=1e-2) call_backs = [checkpoint, earlyStop, LRadjuster] CNN_model.summary() hist = CNN_model.fit(*model_inputs['training'], batch_size=batch_size, epochs=nb_epoch, verbose=1, validation_data=model_inputs['dev'], callbacks=call_backs) return {"model": CNN_model, "hist": hist}
def test_model_with_input_feed_tensor(): """We test building a model with a TF variable as input. We should be able to call fit, evaluate, predict, by only passing them data for the placeholder inputs in the model. """ import tensorflow as tf input_a_np = np.random.random((10, 3)) input_b_np = np.random.random((10, 3)) output_a_np = np.random.random((10, 4)) output_b_np = np.random.random((10, 3)) a = Input(tensor=tf.Variable(input_a_np, dtype=tf.float32)) b = Input(shape=(3,), name='input_b') a_2 = Dense(4, name='dense_1')(a) dp = Dropout(0.5, name='dropout') b_2 = dp(b) model = Model([a, b], [a_2, b_2]) model.summary() optimizer = 'rmsprop' loss = 'mse' loss_weights = [1., 0.5] model.compile(optimizer, loss, metrics=['mean_squared_error'], loss_weights=loss_weights, sample_weight_mode=None) # test train_on_batch out = model.train_on_batch(input_b_np, [output_a_np, output_b_np]) out = model.train_on_batch({'input_b': input_b_np}, [output_a_np, output_b_np]) out = model.test_on_batch({'input_b': input_b_np}, [output_a_np, output_b_np]) out = model.predict_on_batch({'input_b': input_b_np}) # test fit out = model.fit({'input_b': input_b_np}, [output_a_np, output_b_np], epochs=1, batch_size=10) out = model.fit(input_b_np, [output_a_np, output_b_np], epochs=1, batch_size=10) # test evaluate out = model.evaluate({'input_b': input_b_np}, [output_a_np, output_b_np], batch_size=10) out = model.evaluate(input_b_np, [output_a_np, output_b_np], batch_size=10) # test predict out = model.predict({'input_b': input_b_np}, batch_size=10) out = model.predict(input_b_np, batch_size=10) assert len(out) == 2 # Now test a model with a single input # i.e. we don't pass any data to fit the model. a = Input(tensor=tf.Variable(input_a_np, dtype=tf.float32)) a_2 = Dense(4, name='dense_1')(a) a_2 = Dropout(0.5, name='dropout')(a_2) model = Model(a, a_2) model.summary() optimizer = 'rmsprop' loss = 'mse' model.compile(optimizer, loss, metrics=['mean_squared_error']) # test train_on_batch out = model.train_on_batch(None, output_a_np) out = model.train_on_batch(None, output_a_np) out = model.test_on_batch(None, output_a_np) out = model.predict_on_batch(None) out = model.train_on_batch([], output_a_np) out = model.train_on_batch({}, output_a_np) # test fit out = model.fit(None, output_a_np, epochs=1, batch_size=10) out = model.fit(None, output_a_np, epochs=1, batch_size=10) # test evaluate out = model.evaluate(None, output_a_np, batch_size=10) out = model.evaluate(None, output_a_np, batch_size=10) # test predict out = model.predict(None, steps=3) out = model.predict(None, steps=3) assert out.shape == (10 * 3, 4) # Same, without learning phase # i.e. we don't pass any data to fit the model. a = Input(tensor=tf.Variable(input_a_np, dtype=tf.float32)) a_2 = Dense(4, name='dense_1')(a) model = Model(a, a_2) model.summary() optimizer = 'rmsprop' loss = 'mse' model.compile(optimizer, loss, metrics=['mean_squared_error']) # test train_on_batch out = model.train_on_batch(None, output_a_np) out = model.train_on_batch(None, output_a_np) out = model.test_on_batch(None, output_a_np) out = model.predict_on_batch(None) out = model.train_on_batch([], output_a_np) out = model.train_on_batch({}, output_a_np) # test fit out = model.fit(None, output_a_np, epochs=1, batch_size=10) out = model.fit(None, output_a_np, epochs=1, batch_size=10) # test evaluate out = model.evaluate(None, output_a_np, batch_size=10) out = model.evaluate(None, output_a_np, batch_size=10) # test predict out = model.predict(None, steps=3) out = model.predict(None, steps=3) assert out.shape == (10 * 3, 4)
numerical_inputs = Input(shape=(11, ), name='num') numerical_logits = numerical_inputs numerical_logits = BatchNormalization()(numerical_logits) numerical_logits = Dense(128, activation='relu')(numerical_logits) numerical_logits = Dropout(0.5)(numerical_logits) numerical_logits = BatchNormalization()(numerical_logits) numerical_logits = Dense(128, activation='relu')(numerical_logits) numerical_logits = Dense(64, activation='relu')(numerical_logits) logits = Concatenate()([numerical_logits, categorical_logits]) logits = Dense(64, activation='relu')(logits) out = Dense(1, activation='sigmoid')(logits) model = Model(inputs=categorical_inputs + [numerical_inputs], outputs=out) model.compile(optimizer='adam', loss=binary_crossentropy) # In[ ]: def get_input(market_train, indices): X_num = market_train.loc[indices, num_cols].values X = {'num': X_num} for cat in cat_cols: X[cat] = market_train.loc[indices, cat_cols].values y = (market_train.loc[indices, 'returnsOpenNextMktres10'] >= 0).values r = market_train.loc[indices, 'returnsOpenNextMktres10'].values u = market_train.loc[indices, 'universe'] d = market_train.loc[indices, 'time'].dt.date return X, y, r, u, d
def test_model_with_external_loss(): # None loss, only regularization loss. a = Input(shape=(3,), name='input_a') a_2 = Dense(4, name='dense_1', kernel_regularizer='l1', bias_regularizer='l2')(a) dp = Dropout(0.5, name='dropout') a_3 = dp(a_2) model = Model(a, [a_2, a_3]) optimizer = 'rmsprop' loss = None model.compile(optimizer, loss, metrics=['mae']) input_a_np = np.random.random((10, 3)) # test train_on_batch out = model.train_on_batch(input_a_np, None) out = model.test_on_batch(input_a_np, None) # fit out = model.fit(input_a_np, None) # evaluate out = model.evaluate(input_a_np, None) # No dropout, external loss. a = Input(shape=(3,), name='input_a') a_2 = Dense(4, name='dense_1')(a) a_3 = Dense(4, name='dense_2')(a) model = Model(a, [a_2, a_3]) model.add_loss(K.mean(a_3 + a_2)) optimizer = 'rmsprop' loss = None model.compile(optimizer, loss, metrics=['mae']) # test train_on_batch out = model.train_on_batch(input_a_np, None) out = model.test_on_batch(input_a_np, None) # fit out = model.fit(input_a_np, None) # evaluate out = model.evaluate(input_a_np, None) # Test fit with no external data at all. if K.backend() == 'tensorflow': import tensorflow as tf a = Input(tensor=tf.Variable(input_a_np, dtype=tf.float32)) a_2 = Dense(4, name='dense_1')(a) a_2 = Dropout(0.5, name='dropout')(a_2) model = Model(a, a_2) model.add_loss(K.mean(a_2)) model.compile(optimizer='rmsprop', loss=None, metrics=['mean_squared_error']) # test train_on_batch out = model.train_on_batch(None, None) out = model.test_on_batch(None, None) out = model.predict_on_batch(None) # test fit with pytest.raises(ValueError): out = model.fit(None, None, epochs=1, batch_size=10) out = model.fit(None, None, epochs=1, steps_per_epoch=1) # test fit with validation data with pytest.raises(ValueError): out = model.fit(None, None, epochs=1, steps_per_epoch=None, validation_steps=2) out = model.fit(None, None, epochs=1, steps_per_epoch=2, validation_steps=2) # test evaluate with pytest.raises(ValueError): out = model.evaluate(None, None, batch_size=10) out = model.evaluate(None, None, steps=3) # test predict with pytest.raises(ValueError): out = model.predict(None, batch_size=10) out = model.predict(None, steps=3) assert out.shape == (10 * 3, 4) # Test multi-output model without external data. a = Input(tensor=tf.Variable(input_a_np, dtype=tf.float32)) a_1 = Dense(4, name='dense_1')(a) a_2 = Dropout(0.5, name='dropout')(a_1) model = Model(a, [a_1, a_2]) model.add_loss(K.mean(a_2)) model.compile(optimizer='rmsprop', loss=None, metrics=['mean_squared_error']) # test train_on_batch out = model.train_on_batch(None, None) out = model.test_on_batch(None, None) out = model.predict_on_batch(None) # test fit with pytest.raises(ValueError): out = model.fit(None, None, epochs=1, batch_size=10) out = model.fit(None, None, epochs=1, steps_per_epoch=1) # test fit with validation data with pytest.raises(ValueError): out = model.fit(None, None, epochs=1, steps_per_epoch=None, validation_steps=2) out = model.fit(None, None, epochs=1, steps_per_epoch=2, validation_steps=2) # test evaluate with pytest.raises(ValueError): out = model.evaluate(None, None, batch_size=10) out = model.evaluate(None, None, steps=3) # test predict with pytest.raises(ValueError): out = model.predict(None, batch_size=10) out = model.predict(None, steps=3) assert len(out) == 2 assert out[0].shape == (10 * 3, 4) assert out[1].shape == (10 * 3, 4)
def test_model_methods(): a = Input(shape=(3,), name='input_a') b = Input(shape=(3,), name='input_b') a_2 = Dense(4, name='dense_1')(a) dp = Dropout(0.5, name='dropout') b_2 = dp(b) model = Model([a, b], [a_2, b_2]) optimizer = 'rmsprop' loss = 'mse' loss_weights = [1., 0.5] model.compile(optimizer, loss, metrics=[], loss_weights=loss_weights, sample_weight_mode=None) input_a_np = np.random.random((10, 3)) input_b_np = np.random.random((10, 3)) output_a_np = np.random.random((10, 4)) output_b_np = np.random.random((10, 3)) # test train_on_batch out = model.train_on_batch([input_a_np, input_b_np], [output_a_np, output_b_np]) out = model.train_on_batch({'input_a': input_a_np, 'input_b': input_b_np}, [output_a_np, output_b_np]) out = model.train_on_batch({'input_a': input_a_np, 'input_b': input_b_np}, {'dense_1': output_a_np, 'dropout': output_b_np}) # test fit out = model.fit([input_a_np, input_b_np], [output_a_np, output_b_np], nb_epoch=1, batch_size=4) out = model.fit({'input_a': input_a_np, 'input_b': input_b_np}, [output_a_np, output_b_np], nb_epoch=1, batch_size=4) out = model.fit({'input_a': input_a_np, 'input_b': input_b_np}, {'dense_1': output_a_np, 'dropout': output_b_np}, nb_epoch=1, batch_size=4) # test validation_split out = model.fit([input_a_np, input_b_np], [output_a_np, output_b_np], nb_epoch=1, batch_size=4, validation_split=0.5) out = model.fit({'input_a': input_a_np, 'input_b': input_b_np}, [output_a_np, output_b_np], nb_epoch=1, batch_size=4, validation_split=0.5) out = model.fit({'input_a': input_a_np, 'input_b': input_b_np}, {'dense_1': output_a_np, 'dropout': output_b_np}, nb_epoch=1, batch_size=4, validation_split=0.5) # test validation data out = model.fit([input_a_np, input_b_np], [output_a_np, output_b_np], nb_epoch=1, batch_size=4, validation_data=([input_a_np, input_b_np], [output_a_np, output_b_np])) out = model.fit({'input_a': input_a_np, 'input_b': input_b_np}, [output_a_np, output_b_np], nb_epoch=1, batch_size=4, validation_split=0.5, validation_data=({'input_a': input_a_np, 'input_b': input_b_np}, [output_a_np, output_b_np])) out = model.fit({'input_a': input_a_np, 'input_b': input_b_np}, {'dense_1': output_a_np, 'dropout': output_b_np}, nb_epoch=1, batch_size=4, validation_split=0.5, validation_data=({'input_a': input_a_np, 'input_b': input_b_np}, {'dense_1': output_a_np, 'dropout': output_b_np})) # test_on_batch out = model.test_on_batch([input_a_np, input_b_np], [output_a_np, output_b_np]) out = model.test_on_batch({'input_a': input_a_np, 'input_b': input_b_np}, [output_a_np, output_b_np]) out = model.test_on_batch({'input_a': input_a_np, 'input_b': input_b_np}, {'dense_1': output_a_np, 'dropout': output_b_np}) # predict_on_batch out = model.predict_on_batch([input_a_np, input_b_np]) out = model.predict_on_batch({'input_a': input_a_np, 'input_b': input_b_np}) # predict, evaluate input_a_np = np.random.random((10, 3)) input_b_np = np.random.random((10, 3)) output_a_np = np.random.random((10, 4)) output_b_np = np.random.random((10, 3)) out = model.evaluate([input_a_np, input_b_np], [output_a_np, output_b_np], batch_size=4) out = model.predict([input_a_np, input_b_np], batch_size=4) # with sample_weight input_a_np = np.random.random((10, 3)) input_b_np = np.random.random((10, 3)) output_a_np = np.random.random((10, 4)) output_b_np = np.random.random((10, 3)) sample_weight = [None, np.random.random((10,))] out = model.train_on_batch([input_a_np, input_b_np], [output_a_np, output_b_np], sample_weight=sample_weight) out = model.test_on_batch([input_a_np, input_b_np], [output_a_np, output_b_np], sample_weight=sample_weight) # test accuracy metric model.compile(optimizer, loss, metrics=['acc'], sample_weight_mode=None) out = model.train_on_batch([input_a_np, input_b_np], [output_a_np, output_b_np]) assert len(out) == 5 out = model.test_on_batch([input_a_np, input_b_np], [output_a_np, output_b_np]) assert len(out) == 5 # this should also work model.compile(optimizer, loss, metrics={'dense_1': 'acc'}, sample_weight_mode=None) out = model.train_on_batch([input_a_np, input_b_np], [output_a_np, output_b_np]) assert len(out) == 4 out = model.test_on_batch([input_a_np, input_b_np], [output_a_np, output_b_np]) assert len(out) == 4 # and this as well model.compile(optimizer, loss, metrics={'dense_1': ['acc']}, sample_weight_mode=None) out = model.train_on_batch([input_a_np, input_b_np], [output_a_np, output_b_np]) assert len(out) == 4 out = model.test_on_batch([input_a_np, input_b_np], [output_a_np, output_b_np]) assert len(out) == 4 # test with a custom metric function mse = lambda y_true, y_pred: K.mean(K.pow(y_true - y_pred, 2)) model.compile(optimizer, loss, metrics=[mse], sample_weight_mode=None) out = model.train_on_batch([input_a_np, input_b_np], [output_a_np, output_b_np]) assert len(out) == 5 out = model.test_on_batch([input_a_np, input_b_np], [output_a_np, output_b_np]) assert len(out) == 5 input_a_np = np.random.random((10, 3)) input_b_np = np.random.random((10, 3)) output_a_np = np.random.random((10, 4)) output_b_np = np.random.random((10, 3)) out = model.fit([input_a_np, input_b_np], [output_a_np, output_b_np], batch_size=4, nb_epoch=1) out = model.evaluate([input_a_np, input_b_np], [output_a_np, output_b_np], batch_size=4) out = model.predict([input_a_np, input_b_np], batch_size=4)
def test_model_methods(): a = Input(shape=(3,), name='input_a') b = Input(shape=(3,), name='input_b') a_2 = Dense(4, name='dense_1')(a) dp = Dropout(0.5, name='dropout') b_2 = dp(b) model = Model([a, b], [a_2, b_2]) optimizer = 'rmsprop' loss = 'mse' loss_weights = [1., 0.5] input_a_np = np.random.random((10, 3)) input_b_np = np.random.random((10, 3)) input_a_df = pd.DataFrame(input_a_np) input_b_df = pd.DataFrame(input_b_np) output_a_np = np.random.random((10, 4)) output_b_np = np.random.random((10, 3)) output_a_df = pd.DataFrame(output_a_np) output_b_df = pd.DataFrame(output_b_np) # training/testing doesn't work before compiling. with pytest.raises(RuntimeError): model.train_on_batch([input_a_np, input_b_np], [output_a_np, output_b_np]) model.compile(optimizer, loss, metrics=[], loss_weights=loss_weights, sample_weight_mode=None) # test train_on_batch out = model.train_on_batch([input_a_np, input_b_np], [output_a_np, output_b_np]) out = model.train_on_batch({'input_a': input_a_np, 'input_b': input_b_np}, [output_a_np, output_b_np]) out = model.train_on_batch({'input_a': input_a_np, 'input_b': input_b_np}, {'dense_1': output_a_np, 'dropout': output_b_np}) out = model.train_on_batch([input_a_df, input_b_df], [output_a_df, output_b_df]) # test fit out = model.fit([input_a_np, input_b_np], [output_a_np, output_b_np], epochs=1, batch_size=4) out = model.fit({'input_a': input_a_np, 'input_b': input_b_np}, [output_a_np, output_b_np], epochs=1, batch_size=4) out = model.fit({'input_a': input_a_np, 'input_b': input_b_np}, {'dense_1': output_a_np, 'dropout': output_b_np}, epochs=1, batch_size=4) out = model.fit([input_a_df, input_b_df], [output_a_df, output_b_df], epochs=1, batch_size=4) # test validation_split out = model.fit([input_a_np, input_b_np], [output_a_np, output_b_np], epochs=1, batch_size=4, validation_split=0.5) out = model.fit({'input_a': input_a_np, 'input_b': input_b_np}, [output_a_np, output_b_np], epochs=1, batch_size=4, validation_split=0.5) # test validation data out = model.fit([input_a_np, input_b_np], [output_a_np, output_b_np], epochs=1, batch_size=4, validation_data=([input_a_np, input_b_np], [output_a_np, output_b_np])) out = model.fit({'input_a': input_a_np, 'input_b': input_b_np}, [output_a_np, output_b_np], epochs=1, batch_size=4, validation_split=0.5, validation_data=({'input_a': input_a_np, 'input_b': input_b_np}, [output_a_np, output_b_np])) out = model.fit({'input_a': input_a_np, 'input_b': input_b_np}, {'dense_1': output_a_np, 'dropout': output_b_np}, epochs=1, batch_size=4, validation_split=0.5, validation_data=( {'input_a': input_a_np, 'input_b': input_b_np}, {'dense_1': output_a_np, 'dropout': output_b_np})) # test_on_batch out = model.test_on_batch([input_a_np, input_b_np], [output_a_np, output_b_np]) out = model.test_on_batch({'input_a': input_a_np, 'input_b': input_b_np}, [output_a_np, output_b_np]) out = model.test_on_batch({'input_a': input_a_np, 'input_b': input_b_np}, {'dense_1': output_a_np, 'dropout': output_b_np}) out = model.test_on_batch([input_a_df, input_b_df], [output_a_df, output_b_df]) # predict_on_batch out = model.predict_on_batch([input_a_np, input_b_np]) out = model.predict_on_batch({'input_a': input_a_np, 'input_b': input_b_np}) out = model.predict_on_batch([input_a_df, input_b_df]) # predict, evaluate input_a_np = np.random.random((10, 3)) input_b_np = np.random.random((10, 3)) output_a_np = np.random.random((10, 4)) output_b_np = np.random.random((10, 3)) out = model.evaluate([input_a_np, input_b_np], [output_a_np, output_b_np], batch_size=4) out = model.evaluate([input_a_df, input_b_df], [output_a_df, output_b_df], batch_size=4) out = model.predict([input_a_np, input_b_np], batch_size=4) out = model.predict([input_a_df, input_b_df], batch_size=4) # with sample_weight input_a_np = np.random.random((10, 3)) input_b_np = np.random.random((10, 3)) output_a_np = np.random.random((10, 4)) output_b_np = np.random.random((10, 3)) sample_weight = [None, np.random.random((10,))] out = model.train_on_batch([input_a_np, input_b_np], [output_a_np, output_b_np], sample_weight=sample_weight) out = model.test_on_batch([input_a_np, input_b_np], [output_a_np, output_b_np], sample_weight=sample_weight) # test accuracy metric model.compile(optimizer, loss, metrics=['acc'], sample_weight_mode=None) out = model.train_on_batch([input_a_np, input_b_np], [output_a_np, output_b_np]) assert len(out) == 5 out = model.test_on_batch([input_a_np, input_b_np], [output_a_np, output_b_np]) assert len(out) == 5 # this should also work model.compile(optimizer, loss, metrics={'dense_1': 'acc'}, sample_weight_mode=None) out = model.train_on_batch([input_a_np, input_b_np], [output_a_np, output_b_np]) assert len(out) == 4 out = model.test_on_batch([input_a_np, input_b_np], [output_a_np, output_b_np]) assert len(out) == 4 # and this as well model.compile(optimizer, loss, metrics={'dense_1': ['acc']}, sample_weight_mode=None) out = model.train_on_batch([input_a_np, input_b_np], [output_a_np, output_b_np]) assert len(out) == 4 out = model.test_on_batch([input_a_np, input_b_np], [output_a_np, output_b_np]) assert len(out) == 4 # test starting from non-zero initial epoch trained_epochs = [] # define tracer callback def on_epoch_begin(epoch, logs): trained_epochs.append(epoch) tracker_cb = LambdaCallback(on_epoch_begin=on_epoch_begin) out = model.fit([input_a_np, input_b_np], [output_a_np, output_b_np], epochs=5, batch_size=4, initial_epoch=2, callbacks=[tracker_cb]) assert trained_epochs == [2, 3, 4] # test starting from non-zero initial epoch for generator too trained_epochs = [] def gen_data(batch_sz): while True: yield ([np.random.random((batch_sz, 3)), np.random.random((batch_sz, 3))], [np.random.random((batch_sz, 4)), np.random.random((batch_sz, 3))]) out = model.fit_generator(gen_data(4), steps_per_epoch=3, epochs=5, initial_epoch=2, callbacks=[tracker_cb]) assert trained_epochs == [2, 3, 4] # test with a custom metric function def mse(y_true, y_pred): return K.mean(K.pow(y_true - y_pred, 2)) model.compile(optimizer, loss, metrics=[mse], sample_weight_mode=None) out = model.train_on_batch([input_a_np, input_b_np], [output_a_np, output_b_np]) out_len = 1 + 2 * (1 + 1) # total loss + 2 outputs * (loss + metric) assert len(out) == out_len out = model.test_on_batch([input_a_np, input_b_np], [output_a_np, output_b_np]) assert len(out) == out_len input_a_np = np.random.random((10, 3)) input_b_np = np.random.random((10, 3)) output_a_np = np.random.random((10, 4)) output_b_np = np.random.random((10, 3)) out = model.fit([input_a_np, input_b_np], [output_a_np, output_b_np], batch_size=4, epochs=1) out = model.evaluate([input_a_np, input_b_np], [output_a_np, output_b_np], batch_size=4) out = model.predict([input_a_np, input_b_np], batch_size=4) # empty batch with pytest.raises(ValueError): def gen_data(): while True: yield (np.asarray([]), np.asarray([])) out = model.evaluate_generator(gen_data(), steps=1) # x is not a list of numpy arrays. with pytest.raises(ValueError): out = model.predict([None]) # x does not match _feed_input_names. with pytest.raises(ValueError): out = model.predict([input_a_np, None, input_b_np]) with pytest.raises(ValueError): out = model.predict([None, input_a_np, input_b_np]) # all input/output/weight arrays should have the same number of samples. with pytest.raises(ValueError): out = model.train_on_batch([input_a_np, input_b_np[:2]], [output_a_np, output_b_np], sample_weight=sample_weight) with pytest.raises(ValueError): out = model.train_on_batch([input_a_np, input_b_np], [output_a_np, output_b_np[:2]], sample_weight=sample_weight) with pytest.raises(ValueError): out = model.train_on_batch([input_a_np, input_b_np], [output_a_np, output_b_np], sample_weight=[sample_weight[1], sample_weight[1][:2]]) # `sample_weight` is neither a dict nor a list. with pytest.raises(TypeError): out = model.train_on_batch([input_a_np, input_b_np], [output_a_np, output_b_np], sample_weight=tuple(sample_weight)) # `validation_data` is neither a tuple nor a triple. with pytest.raises(ValueError): out = model.fit([input_a_np, input_b_np], [output_a_np, output_b_np], epochs=1, batch_size=4, validation_data=([input_a_np, input_b_np],)) # `loss` does not match outputs. with pytest.raises(ValueError): model.compile(optimizer, loss=['mse', 'mae', 'mape']) # `loss_weights` does not match output_names. with pytest.raises(ValueError): model.compile(optimizer, loss='mse', loss_weights={'lstm': 0.5}) # `loss_weights` does not match outputs. with pytest.raises(ValueError): model.compile(optimizer, loss='mse', loss_weights=[0.5]) # `loss_weights` is invalid type. with pytest.raises(TypeError): model.compile(optimizer, loss='mse', loss_weights=(0.5, 0.5)) # `sample_weight_mode` does not match output_names. with pytest.raises(ValueError): model.compile(optimizer, loss='mse', sample_weight_mode={'lstm': 'temporal'}) # `sample_weight_mode` does not match output_names. with pytest.raises(ValueError): model.compile(optimizer, loss='mse', sample_weight_mode=['temporal']) # `sample_weight_mode` matches output_names partially. with pytest.raises(ValueError): model.compile(optimizer, loss='mse', sample_weight_mode={'dense_1': 'temporal'}) # `loss` does not exist. with pytest.raises(ValueError): model.compile(optimizer, loss=[]) model.compile(optimizer, loss=['mse', 'mae']) model.compile(optimizer, loss='mse', loss_weights={'dense_1': 0.2, 'dropout': 0.8}) model.compile(optimizer, loss='mse', loss_weights=[0.2, 0.8]) # the rank of weight arrays should be 1. with pytest.raises(ValueError): out = model.train_on_batch([input_a_np, input_b_np], [output_a_np, output_b_np], sample_weight=[None, np.random.random((10, 20, 30))]) model.compile(optimizer, loss='mse', sample_weight_mode={'dense_1': None, 'dropout': 'temporal'}) model.compile(optimizer, loss='mse', sample_weight_mode=[None, 'temporal']) # the rank of output arrays should be at least 3D. with pytest.raises(ValueError): out = model.train_on_batch([input_a_np, input_b_np], [output_a_np, output_b_np], sample_weight=sample_weight) model.compile(optimizer, loss, metrics=[], loss_weights=loss_weights, sample_weight_mode=None) trained_epochs = [] out = model.fit_generator(generator=RandomSequence(3), steps_per_epoch=12, epochs=5, initial_epoch=0, validation_data=RandomSequence(4), validation_steps=12, callbacks=[tracker_cb]) assert trained_epochs == [0, 1, 2, 3, 4]
class AE(Model): """ Autoencoder. This is a simple autoencoder consisting of an encoder and a decoder. You can use the class like this: >>> encoder = ... >>> decoder = ... >>> ae = Autoencoder(encoder=encoder, decoder=decoder) >>> ae.compile(...) >>> ae.fit(...) """ def __init__(self, encoder=None, decoder=None, autoencoder=None): super(AE, self).__init__() # For calling this as a super-constructor. parameters = [encoder, decoder] if all(v is None for v in parameters): return # From loading. if encoder != None and decoder != None and autoencoder != None: self.encoder = encoder self.decoder = decoder self.autoencoder = autoencoder return # Check preconditions. assert len(encoder.outputs) == 1 assert len(decoder.inputs) == 1 assert encoder.outputs[0].shape[1:] == decoder.inputs[0].shape[ 1:], str(encoder.outputs[0].shape) + " " + str( decoder.inputs[0].shape) self.latent_dim = encoder.outputs[0].shape[1] self.encoder = encoder self.decoder = decoder # Creating the AE. inputs = self.encoder.inputs[0] outputs = self.decoder(self.encoder(inputs)) self.autoencoder = Model(inputs, outputs, name='ae') def compile(self, optimizer, loss=None, metrics=None, loss_weights=None, sample_weight_mode=None, weighted_metrics=None, target_tensors=None, **kwargs): """ Compiles the model. This is the same as compilation in Keras. """ assert "reconstruction_loss" not in kwargs, "Not expected to use reconstruction_loss in AE." self.autoencoder.compile(optimizer, loss, metrics, loss_weights, sample_weight_mode, weighted_metrics, **kwargs) def fit(self, x=None, y=None, batch_size=None, epochs=1, verbose=1, callbacks=None, validation_split=0., validation_data=None, shuffle=True, class_weight=None, sample_weight=None, initial_epoch=0, steps_per_epoch=None, validation_steps=None, **kwargs): """ Trains the autoencoder. """ return self.autoencoder.fit(x, y, batch_size, epochs, verbose, callbacks, validation_split, validation_data, shuffle, class_weight, sample_weight, initial_epoch, steps_per_epoch, validation_steps, **kwargs) def fit_generator(self, generator, steps_per_epoch=None, epochs=1, verbose=1, callbacks=None, validation_data=None, validation_steps=None, class_weight=None, max_queue_size=10, workers=1, use_multiprocessing=False, shuffle=True, initial_epoch=0): """ Trains the autoencoder with a generator. """ return self.autoencoder.fit_generator( generator, steps_per_epoch, epochs, verbose=verbose, callbacks=callbacks, validation_data=validation_data, validation_steps=validation_steps, class_weight=class_weight, max_queue_size=max_queue_size, workers=workers, use_multiprocessing=use_multiprocessing, shuffle=shuffle, initial_epoch=initial_epoch) def evaluate(self, x=None, y=None, batch_size=None, verbose=1, sample_weight=None, steps=None): """ Evaluates the autoencoder. """ return self.autoencoder.evaluate(x, y, batch_size, verbose, sample_weight, steps=None) def predict(self, x, batch_size=None, verbose=0, steps=None): """ Does a prediction. This is the same as :func:`~ngdlm.models.AE.predict_reconstruct_from_samples` """ return self.predict_reconstruct_from_samples(x, batch_size, verbose, steps) def predict_reconstruct_from_samples(self, x, batch_size=None, verbose=0, steps=None): """ Reconstructs samples. Samples are firstly mapped to latent space using the encoder. The resulting latent vectors are then mapped to reconstruction space via the decoder. """ return self.autoencoder.predict(x, batch_size, verbose, steps) def predict_embed_samples_into_latent(self, x, batch_size=None, verbose=0, steps=None): """ Embeds samples into latent space using the encoder. """ return self.encoder.predict(x, batch_size, verbose, steps) def predict_reconstruct_from_latent(self, x, batch_size=None, verbose=0, steps=None): """ Maps latent vectors to reconstruction space using the decoder. """ return self.decoder.predict(x, batch_size, verbose, steps) def summary(self): """ Provides a summary. """ print("Encoder:") self.encoder.summary() print("Decoder:") self.decoder.summary() print("Autoencoder:") self.autoencoder.summary() def save(self, path): """ Saves the autoencoder. This includes the whole autoencoder plus the encoder and the decoder. The encoder and decoder use the path plus a respective annotation. This code >>> ae.save("myae.h5") will create the files *myae.h5*, *myae-encoder.h5*, and *myae-decoder.h5*. """ self.autoencoder.save(path) self.encoder.save(append_to_filepath(path, "-encoder")) self.decoder.save(append_to_filepath(path, "-decoder"))
def test_model_methods(): a = Input(shape=(3,), name='input_a') b = Input(shape=(3,), name='input_b') a_2 = Dense(4, name='dense_1')(a) dp = Dropout(0.5, name='dropout') b_2 = dp(b) model = Model([a, b], [a_2, b_2]) optimizer = 'rmsprop' loss = 'mse' loss_weights = [1., 0.5] model.compile(optimizer, loss, metrics=[], loss_weights=loss_weights, sample_weight_mode=None) input_a_np = np.random.random((10, 3)) input_b_np = np.random.random((10, 3)) output_a_np = np.random.random((10, 4)) output_b_np = np.random.random((10, 3)) # test train_on_batch out = model.train_on_batch([input_a_np, input_b_np], [output_a_np, output_b_np]) out = model.train_on_batch({'input_a': input_a_np, 'input_b': input_b_np}, [output_a_np, output_b_np]) out = model.train_on_batch({'input_a': input_a_np, 'input_b': input_b_np}, {'dense_1': output_a_np, 'dropout': output_b_np}) # test fit out = model.fit([input_a_np, input_b_np], [output_a_np, output_b_np], nb_epoch=1, batch_size=4) out = model.fit({'input_a': input_a_np, 'input_b': input_b_np}, [output_a_np, output_b_np], nb_epoch=1, batch_size=4) out = model.fit({'input_a': input_a_np, 'input_b': input_b_np}, {'dense_1': output_a_np, 'dropout': output_b_np}, nb_epoch=1, batch_size=4) # test validation_split out = model.fit([input_a_np, input_b_np], [output_a_np, output_b_np], nb_epoch=1, batch_size=4, validation_split=0.5) out = model.fit({'input_a': input_a_np, 'input_b': input_b_np}, [output_a_np, output_b_np], nb_epoch=1, batch_size=4, validation_split=0.5) out = model.fit({'input_a': input_a_np, 'input_b': input_b_np}, {'dense_1': output_a_np, 'dropout': output_b_np}, nb_epoch=1, batch_size=4, validation_split=0.5) # test validation data out = model.fit([input_a_np, input_b_np], [output_a_np, output_b_np], nb_epoch=1, batch_size=4, validation_data=([input_a_np, input_b_np], [output_a_np, output_b_np])) out = model.fit({'input_a': input_a_np, 'input_b': input_b_np}, [output_a_np, output_b_np], nb_epoch=1, batch_size=4, validation_split=0.5, validation_data=({'input_a': input_a_np, 'input_b': input_b_np}, [output_a_np, output_b_np])) out = model.fit({'input_a': input_a_np, 'input_b': input_b_np}, {'dense_1': output_a_np, 'dropout': output_b_np}, nb_epoch=1, batch_size=4, validation_split=0.5, validation_data=({'input_a': input_a_np, 'input_b': input_b_np}, {'dense_1': output_a_np, 'dropout': output_b_np})) # test_on_batch out = model.test_on_batch([input_a_np, input_b_np], [output_a_np, output_b_np]) out = model.test_on_batch({'input_a': input_a_np, 'input_b': input_b_np}, [output_a_np, output_b_np]) out = model.test_on_batch({'input_a': input_a_np, 'input_b': input_b_np}, {'dense_1': output_a_np, 'dropout': output_b_np}) # predict_on_batch out = model.predict_on_batch([input_a_np, input_b_np]) out = model.predict_on_batch({'input_a': input_a_np, 'input_b': input_b_np}) # predict, evaluate input_a_np = np.random.random((10, 3)) input_b_np = np.random.random((10, 3)) output_a_np = np.random.random((10, 4)) output_b_np = np.random.random((10, 3)) out = model.evaluate([input_a_np, input_b_np], [output_a_np, output_b_np], batch_size=4) out = model.predict([input_a_np, input_b_np], batch_size=4) # with sample_weight input_a_np = np.random.random((10, 3)) input_b_np = np.random.random((10, 3)) output_a_np = np.random.random((10, 4)) output_b_np = np.random.random((10, 3)) sample_weight = [None, np.random.random((10,))] out = model.train_on_batch([input_a_np, input_b_np], [output_a_np, output_b_np], sample_weight=sample_weight) out = model.test_on_batch([input_a_np, input_b_np], [output_a_np, output_b_np], sample_weight=sample_weight) # test accuracy metric model.compile(optimizer, loss, metrics=['acc'], sample_weight_mode=None) out = model.train_on_batch([input_a_np, input_b_np], [output_a_np, output_b_np]) assert len(out) == 5 out = model.test_on_batch([input_a_np, input_b_np], [output_a_np, output_b_np]) assert len(out) == 5 # this should also work model.compile(optimizer, loss, metrics={'dense_1': 'acc'}, sample_weight_mode=None) out = model.train_on_batch([input_a_np, input_b_np], [output_a_np, output_b_np]) assert len(out) == 4 out = model.test_on_batch([input_a_np, input_b_np], [output_a_np, output_b_np]) assert len(out) == 4 # and this as well model.compile(optimizer, loss, metrics={'dense_1': ['acc']}, sample_weight_mode=None) out = model.train_on_batch([input_a_np, input_b_np], [output_a_np, output_b_np]) assert len(out) == 4 out = model.test_on_batch([input_a_np, input_b_np], [output_a_np, output_b_np]) assert len(out) == 4 # test starting from non-zero initial epoch trained_epochs = [] def on_epoch_begin(epoch, logs): trained_epochs.append(epoch) tracker_cb = LambdaCallback(on_epoch_begin=on_epoch_begin) out = model.fit([input_a_np, input_b_np], [output_a_np, output_b_np], nb_epoch=5, batch_size=4, initial_epoch=2, callbacks=[tracker_cb]) assert trained_epochs == [2, 3, 4] # test starting from non-zero initial epoch for generator too trained_epochs = [] def gen_data(batch_sz): while True: yield ([np.random.random((batch_sz, 3)), np.random.random((batch_sz, 3))], [np.random.random((batch_sz, 4)), np.random.random((batch_sz, 3))]) out = model.fit_generator(gen_data(4), samples_per_epoch=10, nb_epoch=5, initial_epoch=2, callbacks=[tracker_cb]) assert trained_epochs == [2, 3, 4] # test with a custom metric function mse = lambda y_true, y_pred: K.mean(K.pow(y_true - y_pred, 2)) def mse_powers(y_true, y_pred): m = mse(y_true, y_pred) return { 'mse_squared': K.pow(m, 2), 'mse_cubed': K.pow(m, 3) } model.compile(optimizer, loss, metrics=[mse, mse_powers], sample_weight_mode=None) out = model.train_on_batch([input_a_np, input_b_np], [output_a_np, output_b_np]) out_len = 1 + 2 * 4 # total loss, per layer: loss + 3 metrics assert len(out) == out_len out = model.test_on_batch([input_a_np, input_b_np], [output_a_np, output_b_np]) assert len(out) == out_len input_a_np = np.random.random((10, 3)) input_b_np = np.random.random((10, 3)) output_a_np = np.random.random((10, 4)) output_b_np = np.random.random((10, 3)) out = model.fit([input_a_np, input_b_np], [output_a_np, output_b_np], batch_size=4, nb_epoch=1) out = model.evaluate([input_a_np, input_b_np], [output_a_np, output_b_np], batch_size=4) out = model.predict([input_a_np, input_b_np], batch_size=4)
class PolicyValueNet(): """policy-value network """ def __init__(self, board_width, board_height, model_file=None): self.board_width = board_width self.board_height = board_height self.l2_const = 1e-4 # coef of l2 penalty self.create_policy_value_net() self._loss_train_op() if model_file: net_params = pickle.load(open(model_file, 'rb')) self.model.set_weights(net_params) def create_policy_value_net(self): """create the policy value network """ in_x = network = Input((4, self.board_width, self.board_height)) # conv layers network = Conv2D(filters=32, kernel_size=(3, 3), padding="same", data_format="channels_first", activation="relu", kernel_regularizer=l2(self.l2_const))(network) network = Conv2D(filters=64, kernel_size=(3, 3), padding="same", data_format="channels_first", activation="relu", kernel_regularizer=l2(self.l2_const))(network) network = Conv2D(filters=128, kernel_size=(3, 3), padding="same", data_format="channels_first", activation="relu", kernel_regularizer=l2(self.l2_const))(network) # action policy layers policy_net = Conv2D(filters=4, kernel_size=(1, 1), data_format="channels_first", activation="relu", kernel_regularizer=l2(self.l2_const))(network) policy_net = Flatten()(policy_net) self.policy_net = Dense(self.board_width*self.board_height, activation="softmax", kernel_regularizer=l2(self.l2_const))(policy_net) # state value layers value_net = Conv2D(filters=2, kernel_size=(1, 1), data_format="channels_first", activation="relu", kernel_regularizer=l2(self.l2_const))(network) value_net = Flatten()(value_net) value_net = Dense(64, kernel_regularizer=l2(self.l2_const))(value_net) self.value_net = Dense(1, activation="tanh", kernel_regularizer=l2(self.l2_const))(value_net) self.model = Model(in_x, [self.policy_net, self.value_net]) def policy_value(state_input): state_input_union = np.array(state_input) results = self.model.predict_on_batch(state_input_union) return results self.policy_value = policy_value def policy_value_fn(self, board): """ input: board output: a list of (action, probability) tuples for each available action and the score of the board state """ legal_positions = board.availables current_state = board.current_state() act_probs, value = self.policy_value(current_state.reshape(-1, 4, self.board_width, self.board_height)) act_probs = zip(legal_positions, act_probs.flatten()[legal_positions]) return act_probs, value[0][0] def _loss_train_op(self): """ Three loss terms: loss = (z - v)^2 + pi^T * log(p) + c||theta||^2 """ # get the train op opt = Adam() losses = ['categorical_crossentropy', 'mean_squared_error'] self.model.compile(optimizer=opt, loss=losses) def self_entropy(probs): return -np.mean(np.sum(probs * np.log(probs + 1e-10), axis=1)) def train_step(state_input, mcts_probs, winner, learning_rate): state_input_union = np.array(state_input) mcts_probs_union = np.array(mcts_probs) winner_union = np.array(winner) loss = self.model.evaluate(state_input_union, [mcts_probs_union, winner_union], batch_size=len(state_input), verbose=0) action_probs, _ = self.model.predict_on_batch(state_input_union) entropy = self_entropy(action_probs) K.set_value(self.model.optimizer.lr, learning_rate) self.model.fit(state_input_union, [mcts_probs_union, winner_union], batch_size=len(state_input), verbose=0) return loss[0], entropy self.train_step = train_step def get_policy_param(self): net_params = self.model.get_weights() return net_params def save_model(self, model_file): """ save model params to file """ net_params = self.get_policy_param() pickle.dump(net_params, open(model_file, 'wb'), protocol=2)