Exemple #1
0
def test_warnings():
    a = Input(shape=(3,), name='input_a')
    b = Input(shape=(3,), name='input_b')

    a_2 = Dense(4, name='dense_1')(a)
    dp = Dropout(0.5, name='dropout')
    b_2 = dp(b)

    model = Model([a, b], [a_2, b_2])

    optimizer = 'rmsprop'
    loss = 'mse'
    loss_weights = [1., 0.5]
    model.compile(optimizer, loss, metrics=[], loss_weights=loss_weights,
                  sample_weight_mode=None)

    def gen_data(batch_sz):
        while True:
            yield ([np.random.random((batch_sz, 3)), np.random.random((batch_sz, 3))],
                   [np.random.random((batch_sz, 4)), np.random.random((batch_sz, 3))])

    with pytest.warns(Warning) as w:
        out = model.fit_generator(gen_data(4), steps_per_epoch=10, use_multiprocessing=True, workers=2)
    warning_raised = any(['Sequence' in str(w_.message) for w_ in w])
    assert warning_raised, 'No warning raised when using generator with processes.'

    with pytest.warns(None) as w:
        out = model.fit_generator(RandomSequence(3), steps_per_epoch=4, use_multiprocessing=True, workers=2)
    assert all(['Sequence' not in str(w_.message) for w_ in w]), 'A warning was raised for Sequence.'
def test_model_multiple_calls():
    x1 = Input(shape=(20,))

    y1 = sequential([
        Dense(10),
        Dense(1),
    ])(x1)
    m1 = Model(x1, y1)

    x2 = Input(shape=(25,))
    y2 = sequential([
        Dense(20),
        m1
    ])(x2)
    m2 = Model(x2, y2)
    m2.compile('adam', 'mse')

    x3 = Input(shape=(20,))
    y3 = sequential([
        Dense(25),
        m2
    ])(x3)
    m3 = Model(x3, y3)
    m3.compile('adam', 'mse')
    m3.train_on_batch(np.zeros((32, 20)), np.zeros((32, 1)))
Exemple #3
0
def test_model_custom_target_tensors():
    a = Input(shape=(3,), name='input_a')
    b = Input(shape=(3,), name='input_b')

    a_2 = Dense(4, name='dense_1')(a)
    dp = Dropout(0.5, name='dropout')
    b_2 = dp(b)

    y = K.placeholder([10, 4], name='y')
    y1 = K.placeholder([10, 3], name='y1')
    y2 = K.placeholder([7, 5], name='y2')
    model = Model([a, b], [a_2, b_2])

    optimizer = 'rmsprop'
    loss = 'mse'
    loss_weights = [1., 0.5]

    # test list of target tensors
    with pytest.raises(ValueError):
        model.compile(optimizer, loss, metrics=[], loss_weights=loss_weights,
                      sample_weight_mode=None, target_tensors=[y, y1, y2])
    model.compile(optimizer, loss, metrics=[], loss_weights=loss_weights,
                  sample_weight_mode=None, target_tensors=[y, y1])
    input_a_np = np.random.random((10, 3))
    input_b_np = np.random.random((10, 3))

    output_a_np = np.random.random((10, 4))
    output_b_np = np.random.random((10, 3))

    out = model.train_on_batch([input_a_np, input_b_np],
                               [output_a_np, output_b_np],
                               {y: np.random.random((10, 4)),
                                y1: np.random.random((10, 3))})
    # test dictionary of target_tensors
    with pytest.raises(ValueError):
        model.compile(optimizer, loss,
                      metrics=[],
                      loss_weights=loss_weights,
                      sample_weight_mode=None,
                      target_tensors={'does_not_exist': y2})
    # test dictionary of target_tensors
    model.compile(optimizer, loss,
                  metrics=[],
                  loss_weights=loss_weights,
                  sample_weight_mode=None,
                  target_tensors={'dense_1': y, 'dropout': y1})
    out = model.train_on_batch([input_a_np, input_b_np],
                               [output_a_np, output_b_np],
                               {y: np.random.random((10, 4)),
                                y1: np.random.random((10, 3))})

    if K.backend() == 'tensorflow':
        import tensorflow as tf
        # test with custom TF placeholder as target
        pl_target_a = tf.placeholder('float32', shape=(None, 4))
        model.compile(optimizer='rmsprop', loss='mse',
                      target_tensors={'dense_1': pl_target_a})
        model.train_on_batch([input_a_np, input_b_np],
                             [output_a_np, output_b_np])
Exemple #4
0
def test_sparse_input_validation_split():
    test_input = sparse.random(6, 3, density=0.25).tocsr()
    in1 = Input(shape=(3,), sparse=True)
    out1 = Dense(4)(in1)
    test_output = np.random.random((6, 4))
    model = Model(in1, out1)
    model.compile('rmsprop', 'mse')
    model.fit(test_input, test_output, epochs=1, batch_size=2, validation_split=0.2)
Exemple #5
0
def m():
    x = Input(shape=(input_size + output_size, nb_chars))
    m_realness = sequential([
        LSTM(14),
        Dense(1, activation='sigmoid'),
    ])(x)
    m = Model([x], [m_realness])
    m.compile(Adam(), 'mse')
    return m
Exemple #6
0
def decoder_dummy(label_sizes, nb_filter=16, data_shape=(1, 64, 64), nb_bits=12,
                  optimizer='adam'):

    input = Input(shape=data_shape)
    x = input
    outputs, losses = decoder_end_block(x, label_sizes, nb_bits,
                                        activation=lambda: ELU())

    model = Model(input, list(outputs.values()))
    model.compile(optimizer, loss=list(losses.values()),
                  loss_weights={k: decoder_loss_weights(k) for k in losses.keys()})
    return model
Exemple #7
0
def test_sparse_placeholder_fit():
    test_inputs = [sparse.random(6, 3, density=0.25).tocsr() for _ in range(2)]
    test_outputs = [sparse.random(6, i, density=0.25).tocsr() for i in range(3, 5)]
    in1 = Input(shape=(3,))
    in2 = Input(shape=(3,), sparse=True)
    out1 = Dropout(0.5, name='dropout')(in1)
    out2 = Dense(4, name='dense_1')(in2)
    model = Model([in1, in2], [out1, out2])
    model.predict(test_inputs, batch_size=2)
    model.compile('rmsprop', 'mse')
    model.fit(test_inputs, test_outputs, epochs=1, batch_size=2, validation_split=0.5)
    model.evaluate(test_inputs, test_outputs, batch_size=2)
Exemple #8
0
def decoder_baseline(label_sizes, nb_bits=12, data_shape=(1, 64, 64),
                     depth=1, nb_filter=16, optimizer='adam'):
    n = nb_filter
    input = Input(shape=data_shape)
    x = sequential([
        conv2d_block(n, depth=depth, pooling='max'),    # 32x32
        conv2d_block(2*n, depth=depth, pooling='max'),  # 16x16
        conv2d_block(4*n, depth=depth, pooling='max'),  # 8x8
        conv2d_block(8*n, depth=depth, pooling='max'),  # 4x4
    ])(input)
    outputs, losses = decoder_end_block(x, label_sizes, nb_bits,
                                        activation=lambda: ELU())
    model = Model(input, list(outputs.values()))
    model.compile(optimizer, loss=list(losses.values()),)
    return model
def test_render_gan_builder_generator_extended():
    labels_shape = (27,)
    z_dim_offset = 50
    builder = RenderGAN(lambda x: tag3d_network_dense(x, nb_units=4),
                        generator_units=4, discriminator_units=4,
                        z_dim_offset=z_dim_offset,
                        labels_shape=(27,))
    bs = 19
    z, z_offset, labels = data(builder, bs)
    real = np.zeros((bs,) + builder.data_shape)

    labels_input = Input(shape=labels_shape)
    z = Input(shape=(z_dim_offset,))
    fake = builder.generator_given_z_and_labels([z, labels_input])
    m = Model([z, labels_input], [fake])
    m.compile('adam', 'mse')
    m.train_on_batch([z_offset, labels], real)
def test_model_with_partial_loss():
    a = Input(shape=(3, ), name='input_a')
    a_2 = Dense(4, name='dense_1')(a)
    dp = Dropout(0.5, name='dropout')
    a_3 = dp(a_2)
    model = Model(a, [a_2, a_3])

    optimizer = 'rmsprop'
    loss = {'dropout': 'mse'}
    model.compile(optimizer, loss, metrics=['mae'])

    input_a_np = np.random.random((10, 3))
    output_a_np = np.random.random((10, 4))

    # test train_on_batch
    out = model.train_on_batch(input_a_np, output_a_np)
    out = model.test_on_batch(input_a_np, output_a_np)
    # fit
    out = model.fit(input_a_np, [output_a_np])
    # evaluate
    out = model.evaluate(input_a_np, [output_a_np])

    # Same without dropout.
    a = Input(shape=(3, ), name='input_a')
    a_2 = Dense(4, name='dense_1')(a)
    a_3 = Dense(4, name='dense_2')(a_2)
    model = Model(a, [a_2, a_3])

    optimizer = 'rmsprop'
    loss = {'dense_2': 'mse'}
    model.compile(optimizer, loss, metrics={'dense_1': 'mae'})

    # test train_on_batch
    out = model.train_on_batch(input_a_np, output_a_np)
    out = model.test_on_batch(input_a_np, output_a_np)
    # fit
    out = model.fit(input_a_np, [output_a_np])
    # evaluate
    out = model.evaluate(input_a_np, [output_a_np])
Exemple #11
0
def simple_gan():
    z = Input(batch_shape=simple_gan_z_shape, name='z')
    generator = sequential([
        Dense(4*simple_gan_nb_z, activation='relu', name='g1'),
        Dense(4*simple_gan_nb_z, activation='relu', name='g2'),
        Dense(simple_gan_nb_out, name='g_loss'),
    ])(z)

    d_input = Input(batch_shape=simple_gan_real_shape, name='data')

    discriminator = sequential([
        Dense(400, input_dim=2, name='d1'),
        LeakyReLU(0.3),
        Dense(400, name='d2'),
        LeakyReLU(0.3),
        Dense(1, activation='sigmoid', name='d_loss')
    ])(d_input)
    g = Model(z, generator)
    g.compile(Adam(lr=0.0002, beta_1=0.5), {'g_loss': 'binary_crossentropy'})
    d = Model(d_input, discriminator)
    d.compile(Adam(lr=0.0002, beta_1=0.5), {'d_loss': 'binary_crossentropy'})
    return GAN(g, d)
def test_trainable_weights_count_consistency():
    """Tests the trainable weights consistency check of Model.

    This verifies that a warning is shown if model.trainable is modified
    and the model is summarized/run without a new call to .compile()

    Reproduce issue #8121
    """
    a = Input(shape=(3, ), name='input_a')
    model1 = Model(inputs=a, outputs=Dense(1)(a))

    model1.trainable = False
    b = Input(shape=(3, ), name='input_b')
    y = model1(b)
    model2 = Model(inputs=b, outputs=Dense(1)(y))

    model2.compile(optimizer='adam', loss='mse')

    model1.trainable = True

    # Should warn on .summary()
    with pytest.warns(UserWarning) as w:
        model2.summary()
    warning_raised = any(['Discrepancy' in str(w_.message) for w_ in w])
    assert warning_raised, 'No warning raised when trainable is modified without .compile.'

    # And on .fit()
    with pytest.warns(UserWarning) as w:
        model2.fit(x=np.zeros((5, 3)), y=np.zeros((5, 1)))
    warning_raised = any(['Discrepancy' in str(w_.message) for w_ in w])
    assert warning_raised, 'No warning raised when trainable is modified without .compile.'

    # And shouldn't warn if we recompile
    model2.compile(optimizer='adam', loss='mse')
    with pytest.warns(None) as w:
        model2.summary()
    assert len(
        w
    ) == 0, "Warning raised even when .compile() is called after modifying .trainable"
Exemple #13
0
def test_model_with_partial_loss():
    a = Input(shape=(3,), name='input_a')
    a_2 = Dense(4, name='dense_1')(a)
    dp = Dropout(0.5, name='dropout')
    a_3 = dp(a_2)
    model = Model(a, [a_2, a_3])

    optimizer = 'rmsprop'
    loss = {'dropout': 'mse'}
    model.compile(optimizer, loss, metrics=['mae'])

    input_a_np = np.random.random((10, 3))
    output_a_np = np.random.random((10, 4))

    # test train_on_batch
    out = model.train_on_batch(input_a_np, output_a_np)
    out = model.test_on_batch(input_a_np, output_a_np)
    # fit
    out = model.fit(input_a_np, [output_a_np])
    # evaluate
    out = model.evaluate(input_a_np, [output_a_np])

    # Same without dropout.
    a = Input(shape=(3,), name='input_a')
    a_2 = Dense(4, name='dense_1')(a)
    a_3 = Dense(4, name='dense_2')(a_2)
    model = Model(a, [a_2, a_3])

    optimizer = 'rmsprop'
    loss = {'dense_2': 'mse'}
    model.compile(optimizer, loss, metrics={'dense_1': 'mae'})

    # test train_on_batch
    out = model.train_on_batch(input_a_np, output_a_np)
    out = model.test_on_batch(input_a_np, output_a_np)
    # fit
    out = model.fit(input_a_np, [output_a_np])
    # evaluate
    out = model.evaluate(input_a_np, [output_a_np])
Exemple #14
0
    def train_f_enc(self, steps_list, epoch=50):
        print("training f_enc")
        f_swap0 = Sequential(name='f_swap0')
        f_swap0.add(self.f_enc)
        f_swap0.add(Dense(FIELD_DEPTH))
        f_swap0.add(Activation('softmax', name='softmax_swap0'))

        f_swap1 = Sequential(name='f_swap1')
        f_swap1.add(self.f_enc)
        f_swap1.add(Dense(FIELD_DEPTH))
        f_swap1.add(Activation('softmax', name='softmax_swap1'))

        env_model = Model(self.f_enc.inputs, [f_swap0.output, f_swap1.output],
                          name="env_model")
        env_model.compile(optimizer='adam',
                          loss=['categorical_crossentropy'] * 2)

        for ep in range(epoch):
            losses = []
            for idx, steps_dict in enumerate(steps_list):
                prev = None
                for step in steps_dict['steps']:
                    x = self.convert_input(step.input)[:2]
                    env_values = step.input.env.reshape((3, -1))
                    p1 = np.clip(env_values[0].argmax() - 1, 0, 9)
                    p2 = np.clip(env_values[1].argmax() - 1, 0, 9)
                    p3 = np.clip(env_values[2].argmax() - 1, 0, 9)
                    now = (p1, p2, p3)
                    if prev == now:
                        continue
                    prev = now
                    y0 = to_one_hot_array(min(p1, p2), FIELD_DEPTH)
                    y1 = to_one_hot_array(max(p1, p2), FIELD_DEPTH)
                    y = [yy.reshape((self.batch_size, -1)) for yy in [y0, y1]]
                    loss = env_model.train_on_batch(x, y)
                    losses.append(loss)
            print("ep %3d: loss=%s" % (ep, np.average(losses)))
            if np.average(losses) < 1e-06:
                break
Exemple #15
0
def decoder_resnet(label_sizes, nb_filter=16, data_shape=(1, 64, 64), nb_bits=12,
                   resnet_depth=(3, 4, 6, 3),
                   optimizer='adam'):
    def _bn_relu_conv(nb_filter, nb_row=3, nb_col=3, subsample=1):
        return sequential([
            BatchNormalization(mode=0, axis=1),
            ELU(),
            Convolution2D(nb_filter=nb_filter, nb_row=nb_row, nb_col=nb_col,
                          subsample=(subsample, subsample), init="he_normal", border_mode="same")
        ])

    def f(nb_filter, subsample=1):
        return sequential([
            _bn_relu_conv(nb_filter, subsample=subsample),
            _bn_relu_conv(nb_filter),
        ])

    input = Input(shape=data_shape)
    fitlers_by_depth = [nb_filter * 2**i for i in range(len(resnet_depth))]
    print("fitlers_by_depth", fitlers_by_depth)
    x = _bn_relu_conv(nb_filter, 3, 3, subsample=2)(input)
    for i, (n, d) in enumerate(zip(fitlers_by_depth, resnet_depth)):
        for di in range(d):
            if di == 0 and i != 0:
                shortcut = _bn_relu_conv(n, 1, 1, subsample=2)
                subsample = 2
            else:
                shortcut = lambda x: x
                subsample = 1
            x = merge([shortcut(x), f(n, subsample)(x)], mode='sum')

    outputs, losses = decoder_end_block(x, label_sizes, nb_bits,
                                        activation=lambda: ELU())

    model = Model(input, list(outputs.values()))
    model.compile(optimizer, loss=list(losses.values()),
                  loss_weights={k: decoder_loss_weights(k) for k in losses.keys()})
    return model
    def train_f_enc(self, steps_list, epoch=50):
        print("training f_enc")
        f_add0 = Sequential(name='f_add0')
        f_add0.add(self.f_enc)
        f_add0.add(Dense(FIELD_DEPTH))
        f_add0.add(Activation('softmax', name='softmax_add0'))

        f_add1 = Sequential(name='f_add1')
        f_add1.add(self.f_enc)
        f_add1.add(Dense(FIELD_DEPTH))
        f_add1.add(Activation('softmax', name='softmax_add1'))

        env_model = Model(self.f_enc.inputs, [f_add0.output, f_add1.output], name="env_model")
        env_model.compile(optimizer='adam', loss=['categorical_crossentropy']*2)

        for ep in range(epoch):
            losses = []
            for idx, steps_dict in enumerate(steps_list):
                prev = None
                for step in steps_dict['steps']:
                    x = self.convert_input(step.input)[:2]
                    env_values = step.input.env.reshape((4, -1))
                    in1 = np.clip(env_values[0].argmax() - 1, 0, 9)
                    in2 = np.clip(env_values[1].argmax() - 1, 0, 9)
                    carry = np.clip(env_values[2].argmax() - 1, 0, 9)
                    y_num = in1 + in2 + carry
                    now = (in1, in2, carry)
                    if prev == now:
                        continue
                    prev = now
                    y0 = to_one_hot_array((y_num %  10)+1, FIELD_DEPTH)
                    y1 = to_one_hot_array((y_num // 10)+1, FIELD_DEPTH)
                    y = [yy.reshape((self.batch_size, -1)) for yy in [y0, y1]]
                    loss = env_model.train_on_batch(x, y)
                    losses.append(loss)
            print("ep %3d: loss=%s" % (ep, np.average(losses)))
            if np.average(losses) < 1e-06:
                break
Exemple #17
0
def create_squeeze_net():
    inp = Input(shape=getInputDim())
    x = Conv2D(64, (3, 3), padding='same', activation='relu')(inp)
    x = MaxPooling2D((3, 3), strides=2)(x)
    x = create_fire_mod(x, 64, 128)
    x = MaxPooling2D((3, 3), strides=2)(x)
    x = create_fire_mod(x, 64, 128)
    x = MaxPooling2D((3, 3), strides=2)(x)

    x = Conv2D(32, (1, 1), activation='relu')(x)

    x = Flatten()(x)
    x = Dense(64, activation='relu')(x)
    x = Dense(1, activation='sigmoid')(x)

    model = Model(inp, x)
    model.compile(loss='binary_crossentropy',
                  optimizer='adagrad',
                  metrics=[
                      'binary_accuracy',
                  ])

    return model
def create_model():
    tokens = get_tokens()
    num_tokens = len(tokens) + 1
    input_data = Input(name='speech_data_input', shape=(500, 13))
    layer_dense_1 = Dense(256, activation="relu", use_bias=True, kernel_initializer='he_normal')(input_data)
    layer_dropout_1 = Dropout(0.4)(layer_dense_1)
    layer_dense_2 = Dense(512, activation="relu", use_bias=True, kernel_initializer='he_normal')(layer_dropout_1)
    layer_gru1 = GRU(512, return_sequences=True, kernel_initializer='he_normal', dropout=0.4)(layer_dense_2)
    layer_gru2 = GRU(512, return_sequences=True, go_backwards=True, kernel_initializer='he_normal', dropout=0.4)(layer_gru1)
    layer_dense_3 = Dense(256, activation="relu", use_bias=True, kernel_initializer='he_normal')(layer_gru2)
    layer_dropout_2 = Dropout(0.4)(layer_dense_3)
    layer_dense_4 = Dense(num_tokens, activation="relu", use_bias=True, kernel_initializer='he_normal')(layer_dropout_2)
    output = Activation('softmax', name='Activation0')(layer_dense_4)
    #ctc
    labels = Input(name='speech_labels', shape=[70], dtype='int64')
    input_length = Input(name='input_length', shape=[1], dtype='int64')
    label_length = Input(name='label_length', shape=[1], dtype='int64')
    loss_out = Lambda(ctc_lambda, output_shape=(1,), name='ctc')([labels, output, input_length, label_length])
    model = Model(inputs=[input_data, labels, input_length, label_length], outputs=loss_out)
    adad = Adadelta(lr=0.01, rho=0.95, epsilon=K.epsilon())
    model.compile(loss={'ctc': lambda y_true, output: output}, optimizer=adad)
    print("model compiled successful!")
    return model
Exemple #19
0
def test_trainable_weights_count_consistency():
    """Tests the trainable weights consistency check of Model.

    This verifies that a warning is shown if model.trainable is modified
    and the model is summarized/run without a new call to .compile()

    Reproduce issue #8121
    """
    a = Input(shape=(3,), name='input_a')
    model1 = Model(inputs=a, outputs=Dense(1)(a))

    model1.trainable = False
    b = Input(shape=(3,), name='input_b')
    y = model1(b)
    model2 = Model(inputs=b, outputs=Dense(1)(y))

    model2.compile(optimizer='adam', loss='mse')

    model1.trainable = True

    # Should warn on .summary()
    with pytest.warns(UserWarning) as w:
        model2.summary()
    warning_raised = any(['Discrepancy' in str(w_.message) for w_ in w])
    assert warning_raised, 'No warning raised when trainable is modified without .compile.'

    # And on .fit()
    with pytest.warns(UserWarning) as w:
        model2.fit(x=np.zeros((5, 3)), y=np.zeros((5, 1)))
    warning_raised = any(['Discrepancy' in str(w_.message) for w_ in w])
    assert warning_raised, 'No warning raised when trainable is modified without .compile.'

    # And shouldn't warn if we recompile
    model2.compile(optimizer='adam', loss='mse')
    with pytest.warns(None) as w:
        model2.summary()
    assert len(w) == 0, "Warning raised even when .compile() is called after modifying .trainable"
Exemple #20
0
def test_warnings():
    a = Input(shape=(3,), name='input_a')
    b = Input(shape=(3,), name='input_b')

    a_2 = Dense(4, name='dense_1')(a)
    dp = Dropout(0.5, name='dropout')
    b_2 = dp(b)

    model = Model([a, b], [a_2, b_2])

    optimizer = 'rmsprop'
    loss = 'mse'
    loss_weights = [1., 0.5]
    model.compile(optimizer, loss, metrics=[], loss_weights=loss_weights,
                  sample_weight_mode=None)

    def gen_data(batch_sz):
        while True:
            yield ([np.random.random((batch_sz, 3)),
                    np.random.random((batch_sz, 3))],
                   [np.random.random((batch_sz, 4)),
                    np.random.random((batch_sz, 3))])

    with pytest.warns(Warning) as w:
        out = model.fit_generator(gen_data(4),
                                  steps_per_epoch=10,
                                  use_multiprocessing=True,
                                  workers=2)
    warning_raised = any(['Sequence' in str(w_.message) for w_ in w])
    assert warning_raised, 'No warning raised when using generator with processes.'

    with pytest.warns(None) as w:
        out = model.fit_generator(RandomSequence(3),
                                  steps_per_epoch=4,
                                  use_multiprocessing=True,
                                  workers=2)
    assert all(['Sequence' not in str(w_.message) for w_ in w]), 'A warning was raised for Sequence.'
Exemple #21
0
def decoder_baseline(label_sizes,
                     nb_bits=12,
                     data_shape=(1, 64, 64),
                     depth=1,
                     nb_filter=16,
                     optimizer='adam'):
    n = nb_filter
    input = Input(shape=data_shape)
    x = sequential([
        conv2d_block(n, depth=depth, pooling='max'),  # 32x32
        conv2d_block(2 * n, depth=depth, pooling='max'),  # 16x16
        conv2d_block(4 * n, depth=depth, pooling='max'),  # 8x8
        conv2d_block(8 * n, depth=depth, pooling='max'),  # 4x4
    ])(input)
    outputs, losses = decoder_end_block(x,
                                        label_sizes,
                                        nb_bits,
                                        activation=lambda: ELU())
    model = Model(input, list(outputs.values()))
    model.compile(
        optimizer,
        loss=list(losses.values()),
    )
    return model
Exemple #22
0
    def get_model_with_classification_head(self, base_model):

        base_model.summary()

        x = base_model.output
        x = GlobalAveragePooling2D(name='flatten_1')(x)
        #x = Flatten(name='flatten_1')(x)
        x = Dense(1024, activation='relu', name='fc6_1')(x)
        x = Dense(1024, activation='relu', name='fc7_2')(x)
        x = Dropout(0.5)(x)
        predictions = Dense(self.nb_classes,
                            activation='softmax',
                            name='fc8_3')(x)

        # this is the model we will train
        model = Model(inputs=base_model.input, outputs=predictions)

        for layer in base_model.layers:
            layer.trainable = False

        model.compile(optimizer="rmsprop", loss=LOSS, metrics=['accuracy'])
        model.summary()

        return model
Exemple #23
0
class DefogGAN():
    def __init__(self):
        self.project_dir = (os.path.dirname(__file__))
        self.data_path = os.path.join(self.project_dir, "./data/starCraft")
        self.result_work_path = os.path.join(self.project_dir,
                                             './result/DefogGAN')
        self.making_ing_path = os.path.join(self.project_dir,
                                            './result/DefogGAN')
        self.createFolder(self.result_work_path)
        is_multi_gpu = True
        self.is_validation_check = True
        self.save_weights = True
        #self.num_of_replay = 3500
        self.n_epochs = 1000
        self.batch_size = 512
        self.save_interval = 1000
        self.num_of_making_img = 5
        optimizer = Adam(0.0001, beta_1=0.5, beta_2=0.9)
        self.discriminator = self.build_discriminator()
        self.discriminator.compile(loss='binary_crossentropy',
                                   optimizer=optimizer,
                                   metrics=['accuracy'])
        self.generator = self.build_generator()
        fog_img = Input(shape=(82, 32, 32))
        gen_missing = self.generator(fog_img)
        self.discriminator.trainable = False
        valid = self.discriminator(gen_missing)
        self.combined = Model(fog_img, [gen_missing, valid])
        if is_multi_gpu:
            self.combined = multi_gpu_model(self.combined,
                                            gpus=self.get_count_of_gpu())
        weight = K.variable(
            np.array([0.75, 0.1875, 0.0468, 0.012, 0.003, 0.0007]))
        self.combined.compile(loss=[
            self.weighted_pyramidal_loss(weights=weight), 'binary_crossentropy'
        ],
                              loss_weights=[0.999, 0.001],
                              optimizer=optimizer)

    def accumulate_resolution(self, x, threshhold=0.5):
        new = np.zeros((x.shape[0], 32, 32))
        new_enemy = np.zeros((x.shape[0], 32, 32))
        new_self = np.zeros((x.shape[0], 32, 32))
        for n in range(x.shape[0]):
            for w in range(x.shape[2]):
                for h in range(x.shape[3]):
                    is_enemy = False
                    is_self = False
                    for u in range(x.shape[1]):
                        if u < 32:
                            # enemy
                            if not int(x[n][u][w][h] + threshhold) == 0:
                                is_enemy = True
                                new_enemy[n][w][h] -= int(x[n][u][w][h] +
                                                          threshhold)
                        else:
                            # self
                            if not int(x[n][u][w][h] + threshhold) == 0:
                                is_self = True
                                new_self[n][w][h] += int(x[n][u][w][h] +
                                                         threshhold)
                    if is_enemy and is_self:
                        new[n][w][h] = 0
                    elif not is_enemy and not is_self:
                        new[n][w][h] = -30
                    elif is_enemy and not is_self:
                        new[n][w][h] = new_enemy[n][w][h]
                    elif not is_enemy and is_self:
                        new[n][w][h] = new_self[n][w][h]
        return new

    def make_pickle(self):
        for i in ['train', 'validation', 'test']:
            for j in ['x', 'y']:
                f = open('{}/{}_{}_data_set.csv'.format(self.data_path, j, i),
                         'r',
                         encoding='utf-8')
                read_csv_file = csv.reader(f)
                is_first = True
                for line in read_csv_file:
                    if is_first:
                        tensor = np.zeros((int(line[0]), int(line[1]),
                                           int(line[2]), int(line[3])))
                        is_first = False
                    else:
                        tensor[int(line[0])][int(line[1])][int(line[2])][int(
                            line[3])] = float(line[4])
                        #print(int(line[0]), int(line[1]), int(line[2]), int(line[3]), tensor[int(line[0])][int(line[1])][int(line[2])][int(line[3])])
                f.close()
                with open('{}/{}_{}_dataset.pkl'.format(self.data_path, j, i),
                          'wb') as f:
                    pickle.dump(tensor, f, pickle.HIGHEST_PROTOCOL)

    def get_pickle_data(self):
        with open('{}/x_train_dataset.pkl'.format(self.data_path), 'rb') as f:
            x_train = pickle.load(f)
        with open('{}/x_validation_dataset.pkl'.format(self.data_path),
                  'rb') as f:
            x_validation = pickle.load(f)
        with open('{}/x_test_dataset.pkl'.format(self.data_path), 'rb') as f:
            x_test = pickle.load(f)

        with open('{}/y_train_dataset.pkl'.format(self.data_path), 'rb') as f:
            y_train = pickle.load(f)
        with open('{}/y_validation_dataset.pkl'.format(self.data_path),
                  'rb') as f:
            y_validation = pickle.load(f)
        with open('{}/y_test_dataset.pkl'.format(self.data_path), 'rb') as f:
            y_test = pickle.load(f)
        return x_train, x_validation, x_test, y_train, y_validation, y_test

    def get_sample_data(self):
        f = open('{}/sample_data.csv'.format(self.data_path),
                 'r',
                 encoding='utf-8')
        read_csv_file = csv.reader(f)
        tensor_fog = np.zeros((self.num_of_replay, 82, 32, 32))
        tensor_real = np.zeros((self.num_of_replay, 66, 32, 32))
        for line in read_csv_file:
            if len(line) == 1:
                num_of_replay = int(line[0])
            if len(line) == 5:
                tensor_fog[num_of_replay][int(line[0])][int(line[1])][int(
                    line[2])] = int(line[3])
                if int(line[0]) < 66:
                    tensor_real[num_of_replay][int(line[0])][int(line[1])][int(
                        line[2])] = int(line[4])
        f.close()
        # shuffle tensor index
        train_set_index = random.sample(
            range(0, self.num_of_replay),
            int(self.num_of_replay * self.train_rate))
        temp_set = [
            i for i in range(0, self.num_of_replay) if not i in train_set_index
        ]
        validation_set_index = temp_set[:int(self.num_of_replay *
                                             self.validation_rate)]
        test_set_index = temp_set[int(self.num_of_replay * self.test_rate):]

        return tensor_fog[train_set_index], tensor_fog[
            validation_set_index], tensor_fog[test_set_index], tensor_real[
                train_set_index], tensor_real[
                    validation_set_index], tensor_real[test_set_index]

    def check_pickle(self):
        for i in ['train', 'validation', 'test']:
            for j in ['x', 'y']:
                fname = '{}/{}_{}_dataset.pkl'.format(self.data_path, j, i)
                if not os.path.isfile(fname):
                    return False
        return True

    def train_defogGAN(self):
        min_loss = 99999999999
        valid = np.ones((self.batch_size, 1))
        fake = np.zeros((self.batch_size, 1))
        #x_train, x_validation, x_test, y_train, y_validation, y_test = self.get_sample_data()
        is_pickle = self.check_pickle()
        if not is_pickle:
            self.make_pickle()
        x_train, x_validation, x_test, y_train, y_validation, y_test = self.get_pickle_data(
        )
        best_img = None
        last_epoch_img = None
        for epoch in range(self.n_epochs + 1):
            n_batches = int(x_train.shape[0] / self.batch_size)
            for i in range(math.ceil(n_batches)):
                start_batch = i * self.batch_size
                end_batch = (1 + i) * self.batch_size
                end_batch = x_train.shape[0] if end_batch > x_train.shape[
                    0] else (1 + i) * self.batch_size
                images_train_x = x_train[start_batch:end_batch, :, :, :]
                images_train_y = y_train[start_batch:end_batch, :, :, :]
                gen_missing = self.generator.predict(images_train_x)
                d_loss_real = self.discriminator.train_on_batch(
                    images_train_y, valid)
                d_loss_fake = self.discriminator.train_on_batch(
                    gen_missing, fake)
                d_loss = 0.5 * np.add(d_loss_real, d_loss_fake)
                #  Train Generator
                g_loss = self.combined.train_on_batch(images_train_x,
                                                      [images_train_y, valid])
            if self.is_validation_check:
                validation_recon_img = self.generator.predict(x_validation)
                validation_loss = self.get_MSE_Value(validation_recon_img,
                                                     y_validation)
            else:
                validation_loss = 0
            print(
                "%d [D loss: %f, acc: %.2f%%] [G loss: %f, mse: %f, validation_mse : %f]"
                % (epoch, d_loss[0], 100 * d_loss[1], g_loss[0], g_loss[1],
                   validation_loss))
            if epoch % self.save_interval == 0:
                last_epoch_img = self.generator.predict(x_test)
                if self.save_weights:
                    self.createFolder('{}/interval_model/'.format(
                        self.result_work_path))
                    self.generator.save_weights(
                        '{}/interval_model/model_weight_{}.h5'.format(
                            self.result_work_path, epoch))

            if min_loss > validation_loss and self.is_validation_check:
                min_loss = validation_loss
                best_img = self.generator.predict(x_test)
                if self.save_weights:
                    if os.path.exists('{}/best_model'.format(
                            self.result_work_path)):
                        shutil.rmtree('{}/best_model'.format(
                            self.result_work_path))
                    self.createFolder('{}/best_model/'.format(
                        self.result_work_path))
                    self.generator.save_weights(
                        '{}/best_model/model_weight_{}.h5'.format(
                            self.result_work_path, epoch))
        self.make_test(x_test, last_epoch_img, best_img, y_test)

    def make_test(self, x_test, last_epoch_img, best_img, y_test):
        n = self.num_of_making_img
        x_test = x_test[:n]
        x_test = x_test[:, :66, :, :]
        last_epoch_img = last_epoch_img[:n]
        best_img = best_img[:n]
        y_test = y_test[:n]
        result = np.zeros((0, n, 32, 32))  # model , index of replay , x, y
        result = np.concatenate(
            (result, self.accumulate_resolution(x_test).reshape(1, n, 32, 32)),
            axis=0)
        result = np.concatenate(
            (result, self.accumulate_resolution(last_epoch_img).reshape(
                1, n, 32, 32)),
            axis=0)
        result = np.concatenate(
            (result, self.accumulate_resolution(best_img).reshape(
                1, n, 32, 32)),
            axis=0)
        result = np.concatenate(
            (result, self.accumulate_resolution(y_test).reshape(1, n, 32, 32)),
            axis=0)

        print(result.shape)
        self.make_img(self.making_ing_path, result)
        print('Success make image')

    def make_img(self, path, map):
        print(map.shape)  # (7,30,32,32)
        plt_threshhold = -20
        model_names = [
            'fog_exposed', 'last_epoch', 'best_epoch', 'Ground_truth'
        ]
        fig, axn = plt.subplots(map.shape[1],
                                map.shape[0],
                                sharex=True,
                                sharey=True,
                                figsize=(map.shape[0] * 1.3,
                                         map.shape[1] * 1.3))
        cbar_ax = fig.add_axes([.91, .3, .03, .4])
        fig.suptitle(
            'GAN\'s compare[enemy(red): positive num, allies(green): negative num, both(yellow): 0]',
            fontsize=16)
        for i, ax in enumerate(axn.flat):

            model_index = i % map.shape[0]
            unit_index = int(i / map.shape[0])
            if i < map.shape[0]:
                ax.set_title(model_names[model_index], fontsize=10)
            matrix = map[model_index][unit_index]
            if plt_threshhold == 0:
                sns.heatmap(matrix,
                            ax=ax,
                            cbar=i == 0,
                            annot=True,
                            fmt='.1f',
                            cmap=plt.cm.YlGnBu,
                            cbar_ax=None if i else cbar_ax)
            else:
                cbar_kws = {
                    'ticks': [-3, -2, -1, 0, 1, 2, 3],
                    'drawedges': True
                }
                sns.heatmap(matrix,
                            mask=(matrix < plt_threshhold),
                            ax=ax,
                            cbar=i == 0,
                            annot=False,
                            square=True,
                            fmt='.1f',
                            xticklabels=False,
                            yticklabels=False,
                            vmin=-3.5,
                            vmax=3.5,
                            cbar_kws=cbar_kws,
                            cmap=plt.get_cmap('RdYlGn', 7),
                            cbar_ax=None if i else cbar_ax)
        count = 0
        for ax in axn.flat:
            model_index = count % map.shape[0]
            unit_index = int(count / map.shape[0])
            if model_index == 0:
                ax.set(ylabel='replay {}'.format(unit_index))
            ax.axhline(y=0, color='k', linewidth=1)
            ax.axhline(y=32, color='k', linewidth=2)
            ax.axvline(x=0, color='k', linewidth=1)
            ax.axvline(x=32, color='k', linewidth=2)
            count += 1
        plt.subplots_adjust(hspace=0.03, wspace=0.03)
        plt.savefig(path)
        plt.close('all')

    def build_generator(self):
        input_channel = 82
        output_channel = 66
        input_shape = (input_channel, 32, 32)
        img_input = Input(shape=input_shape, name='input')
        depth = input_channel
        # In: 100
        # Out: dim x dim x depth
        c1 = Conv2D(depth * 2, (3, 3),
                    strides=(1, 1),
                    activation='relu',
                    input_shape=input_shape,
                    padding='same',
                    data_format='channels_first')(img_input)
        b1 = BatchNormalization(axis=-1,
                                momentum=0.99,
                                epsilon=0.001,
                                center=True,
                                scale=True,
                                beta_initializer='zeros',
                                gamma_initializer='ones',
                                moving_mean_initializer='zeros',
                                moving_variance_initializer='ones',
                                beta_regularizer=None,
                                gamma_regularizer=None,
                                beta_constraint=None,
                                gamma_constraint=None)(c1)
        act1 = Activation('relu')(b1)
        c2 = Conv2D(depth * 2, (3, 3),
                    strides=(2, 2),
                    activation='relu',
                    padding='same',
                    data_format='channels_first')(act1)
        b2 = BatchNormalization(axis=-1,
                                momentum=0.99,
                                epsilon=0.001,
                                center=True,
                                scale=True,
                                beta_initializer='zeros',
                                gamma_initializer='ones',
                                moving_mean_initializer='zeros',
                                moving_variance_initializer='ones',
                                beta_regularizer=None,
                                gamma_regularizer=None,
                                beta_constraint=None,
                                gamma_constraint=None)(c2)
        act2 = Activation('relu')(b2)
        c3 = Conv2D(depth * 4, (3, 3),
                    strides=(1, 1),
                    activation='relu',
                    padding='same',
                    data_format='channels_first')(act2)
        b3 = BatchNormalization(axis=-1,
                                momentum=0.99,
                                epsilon=0.001,
                                center=True,
                                scale=True,
                                beta_initializer='zeros',
                                gamma_initializer='ones',
                                moving_mean_initializer='zeros',
                                moving_variance_initializer='ones',
                                beta_regularizer=None,
                                gamma_regularizer=None,
                                beta_constraint=None,
                                gamma_constraint=None)(c3)
        act3 = Activation('relu')(b3)
        c4 = Conv2D(depth * 4, (3, 3),
                    strides=(2, 2),
                    activation='relu',
                    padding='same',
                    data_format='channels_first')(act3)
        b4 = BatchNormalization(axis=-1,
                                momentum=0.99,
                                epsilon=0.001,
                                center=True,
                                scale=True,
                                beta_initializer='zeros',
                                gamma_initializer='ones',
                                moving_mean_initializer='zeros',
                                moving_variance_initializer='ones',
                                beta_regularizer=None,
                                gamma_regularizer=None,
                                beta_constraint=None,
                                gamma_constraint=None)(c4)
        act4 = Activation('relu')(b4)
        c5 = Conv2D(depth * 8, (3, 3),
                    strides=(1, 1),
                    activation='relu',
                    padding='same',
                    data_format='channels_first')(act4)
        b5 = BatchNormalization(axis=-1,
                                momentum=0.99,
                                epsilon=0.001,
                                center=True,
                                scale=True,
                                beta_initializer='zeros',
                                gamma_initializer='ones',
                                moving_mean_initializer='zeros',
                                moving_variance_initializer='ones',
                                beta_regularizer=None,
                                gamma_regularizer=None,
                                beta_constraint=None,
                                gamma_constraint=None)(c5)
        act5 = Activation('relu')(b5)
        c6 = Conv2D(depth * 8, (3, 3),
                    strides=(1, 1),
                    activation='relu',
                    padding='same',
                    data_format='channels_first')(act5)
        b6 = BatchNormalization(axis=-1,
                                momentum=0.99,
                                epsilon=0.001,
                                center=True,
                                scale=True,
                                beta_initializer='zeros',
                                gamma_initializer='ones',
                                moving_mean_initializer='zeros',
                                moving_variance_initializer='ones',
                                beta_regularizer=None,
                                gamma_regularizer=None,
                                beta_constraint=None,
                                gamma_constraint=None)(c6)
        act6 = Activation('relu')(b6)
        c7 = Conv2D(depth * 8, (3, 3),
                    strides=(2, 2),
                    activation='relu',
                    padding='same',
                    data_format='channels_first')(act6)
        b7 = BatchNormalization(axis=-1,
                                momentum=0.99,
                                epsilon=0.001,
                                center=True,
                                scale=True,
                                beta_initializer='zeros',
                                gamma_initializer='ones',
                                moving_mean_initializer='zeros',
                                moving_variance_initializer='ones',
                                beta_regularizer=None,
                                gamma_regularizer=None,
                                beta_constraint=None,
                                gamma_constraint=None)(c7)
        act7 = Activation('relu')(b7)
        ct1 = Conv2DTranspose(depth * 8, (3, 3),
                              strides=(2, 2),
                              activation='relu',
                              padding='same',
                              data_format='channels_first')(act7)
        act8 = Activation('relu')(ct1)
        act8_output = Lambda(lambda x: x, name='act8_output')(act8)
        act8_output = keras.layers.Add()([act6, act8_output])
        ct2 = Conv2DTranspose(depth * 8, (3, 3),
                              strides=(1, 1),
                              activation='relu',
                              padding='same',
                              data_format='channels_first')(act8_output)
        act9 = Activation('relu')(ct2)
        act9_output = Lambda(lambda x: x, name='act9_output')(act9)
        act9_output = keras.layers.Add()([act5, act9_output])
        ct3 = Conv2DTranspose(depth * 4, (3, 3),
                              strides=(1, 1),
                              activation='relu',
                              padding='same',
                              data_format='channels_first')(act9_output)
        act10 = Activation('relu')(ct3)
        act10_output = Lambda(lambda x: x, name='act10_output')(act10)
        act10_output = keras.layers.Add()([act4, act10_output])
        ct4 = Conv2DTranspose(depth * 4, (3, 3),
                              strides=(2, 2),
                              activation='relu',
                              padding='same',
                              data_format='channels_first')(act10_output)
        act11 = Activation('relu')(ct4)
        act11_output = Lambda(lambda x: x, name='act11_output')(act11)
        act11_output = keras.layers.Add()([act3, act11_output])
        ct5 = Conv2DTranspose(depth * 2, (3, 3),
                              strides=(1, 1),
                              activation='relu',
                              padding='same',
                              data_format='channels_first')(act11_output)
        act12 = Activation('relu')(ct5)
        act12_output = Lambda(lambda x: x, name='act12_output')(act12)
        act12_output = keras.layers.Add()([act2, act12_output])
        ct6 = Conv2DTranspose(depth * 2, (3, 3),
                              strides=(2, 2),
                              activation='relu',
                              padding='same',
                              data_format='channels_first')(act12_output)
        act13 = Activation('relu')(ct6)
        act13_output = Lambda(lambda x: x, name='act13_output')(act13)
        act13_output = keras.layers.Add()([act1, act13_output])
        ct7 = Conv2DTranspose(depth, (3, 3),
                              strides=(1, 1),
                              activation='relu',
                              padding='same',
                              data_format='channels_first')(act13_output)
        act14 = Activation('relu')(ct7)
        act14_output = Lambda(lambda x: x, name='output')(act14)
        act14_output = keras.layers.Add()([img_input, act14_output])
        ct8 = Conv2DTranspose(output_channel, (3, 3),
                              strides=(1, 1),
                              activation='relu',
                              padding='same',
                              data_format='channels_first')(act14_output)
        act15 = Activation('relu')(ct8)
        img_output = act15
        model = Model(inputs=[img_input], outputs=[img_output])
        model.summary()
        return model

    def build_discriminator(self):
        D = Sequential()
        discriminator_input_channel = 66
        depth = discriminator_input_channel
        dropout = 0.4
        input_shape = (discriminator_input_channel, 32, 32)
        D.add(
            Conv2D(depth * 1, (3, 3),
                   strides=(2, 2),
                   input_shape=input_shape,
                   padding='same',
                   data_format='channels_first'))
        D.add(LeakyReLU(alpha=0.2))
        D.add(Dropout(dropout))
        D.add(
            Conv2D(depth * 2, (3, 3),
                   strides=(2, 2),
                   padding='same',
                   data_format='channels_first'))
        D.add(LeakyReLU(alpha=0.2))
        D.add(Dropout(dropout))
        D.add(
            Conv2D(depth * 4, (3, 3),
                   strides=(2, 2),
                   padding='same',
                   data_format='channels_first'))
        D.add(LeakyReLU(alpha=0.2))
        D.add(Dropout(dropout))
        D.add(Flatten())
        D.add(Dense(1))
        D.add(Activation('sigmoid'))
        D.summary()
        return D

    def weighted_pyramidal_loss(self, weights):
        def pyramidal_loss(y_true, y_pred):
            yt_2 = keras.layers.AveragePooling2D((2, 2))(y_true) * 2
            yt_4 = keras.layers.AveragePooling2D((4, 4))(y_true) * 4
            yt_8 = keras.layers.AveragePooling2D((8, 8))(y_true) * 8
            yt_16 = keras.layers.AveragePooling2D((16, 16))(y_true) * 16
            yt_32 = keras.layers.AveragePooling2D((32, 32))(y_true) * 32

            yp_2 = keras.layers.AveragePooling2D((2, 2))(y_pred) * 2
            yp_4 = keras.layers.AveragePooling2D((4, 4))(y_pred) * 4
            yp_8 = keras.layers.AveragePooling2D((8, 8))(y_pred) * 8
            yp_16 = keras.layers.AveragePooling2D((16, 16))(y_pred) * 16
            yp_32 = keras.layers.AveragePooling2D((32, 32))(y_pred) * 32

            loss_0 = keras.losses.mean_squared_error(y_true, y_pred)
            loss_2 = keras.losses.mean_squared_error(yt_2, yp_2)
            loss_4 = keras.losses.mean_squared_error(yt_4, yp_4)
            loss_8 = keras.losses.mean_squared_error(yt_8, yp_8)
            loss_16 = keras.losses.mean_squared_error(yt_16, yp_16)
            loss_32 = keras.losses.mean_squared_error(yt_32, yp_32)
            loss_0 = tf.reduce_mean(loss_0, axis=[1, 2])
            loss_2 = tf.reduce_mean(loss_2, axis=[1, 2])
            loss_4 = tf.reduce_mean(loss_4, axis=[1, 2])
            loss_8 = tf.reduce_mean(loss_8, axis=[1, 2])
            loss_16 = tf.reduce_mean(loss_16, axis=[1, 2])
            loss_32 = tf.reduce_mean(loss_32, axis=[1, 2])
            loss = weights[0] * loss_0 + \
                   weights[1] * loss_2 + \
                   weights[2] * loss_4 + \
                   weights[3] * loss_8 + \
                   weights[4] * loss_16 + \
                   weights[5] * loss_32
            return loss

        return pyramidal_loss

    def get_count_of_gpu(self):
        device_list = device_lib.list_local_devices()
        gpu_count = 0
        for d in device_list:
            if d.device_type == 'GPU':
                gpu_count += 1
        return int(gpu_count)

    def createFolder(self, directory):
        try:
            if not os.path.exists(directory):
                os.makedirs(directory)
        except OSError:
            print('Error: Creating directory. ' + directory)

    def get_MSE_Value(self, x, y):
        MSE_GAN = 0
        MSE_DIV = x.shape[0] * x.shape[1]
        for n in range(x.shape[0]):
            for c in range(x.shape[1]):
                MSE_GAN += mean_squared_error(x[n][c], y[n][c])
        MSE = MSE_GAN / MSE_DIV
        return MSE
Exemple #24
0
def test_model_custom_target_tensors():
    a = Input(shape=(3, ), name='input_a')
    b = Input(shape=(3, ), name='input_b')

    a_2 = Dense(4, name='dense_1')(a)
    dp = Dropout(0.5, name='dropout')
    b_2 = dp(b)

    y = K.placeholder([10, 4], name='y')
    y1 = K.placeholder([10, 3], name='y1')
    y2 = K.placeholder([7, 5], name='y2')
    model = Model([a, b], [a_2, b_2])

    optimizer = 'rmsprop'
    loss = 'mse'
    loss_weights = [1., 0.5]

    # test list of target tensors
    with pytest.raises(ValueError):
        model.compile(optimizer,
                      loss,
                      metrics=[],
                      loss_weights=loss_weights,
                      sample_weight_mode=None,
                      target_tensors=[y, y1, y2])
    model.compile(optimizer,
                  loss,
                  metrics=[],
                  loss_weights=loss_weights,
                  sample_weight_mode=None,
                  target_tensors=[y, y1])
    input_a_np = np.random.random((10, 3))
    input_b_np = np.random.random((10, 3))

    output_a_np = np.random.random((10, 4))
    output_b_np = np.random.random((10, 3))

    out = model.train_on_batch([input_a_np, input_b_np],
                               [output_a_np, output_b_np], {
                                   y: np.random.random((10, 4)),
                                   y1: np.random.random((10, 3))
                               })
    # test dictionary of target_tensors
    with pytest.raises(ValueError):
        model.compile(optimizer,
                      loss,
                      metrics=[],
                      loss_weights=loss_weights,
                      sample_weight_mode=None,
                      target_tensors={'does_not_exist': y2})
    # test dictionary of target_tensors
    model.compile(optimizer,
                  loss,
                  metrics=[],
                  loss_weights=loss_weights,
                  sample_weight_mode=None,
                  target_tensors={
                      'dense_1': y,
                      'dropout': y1
                  })
    out = model.train_on_batch([input_a_np, input_b_np],
                               [output_a_np, output_b_np], {
                                   y: np.random.random((10, 4)),
                                   y1: np.random.random((10, 3))
                               })

    if K.backend() == 'tensorflow':
        import tensorflow as tf
        # test with custom TF placeholder as target
        pl_target_a = tf.placeholder('float32', shape=(None, 4))
        model.compile(optimizer='rmsprop',
                      loss='mse',
                      target_tensors={'dense_1': pl_target_a})
        model.train_on_batch([input_a_np, input_b_np],
                             [output_a_np, output_b_np])
class PolicyValueNet():
    """策略价值网络"""

    #def __init__(self, board_width, board_height, model_file=None):
    def __init__(self, policy_infer_size, model_file=None):
        #self.board_width = board_width
        #self.board_height = board_height
        self.policy_infer_size = policy_infer_size
        self.l2_const = 1e-4  # coef of l2 penalty
        self.create_policy_value_net()
        self._loss_train_op()

        self.load_model_done = True
        if model_file and os.path.exists(model_file):
            self.load_model_done = False
            self.load_model(model_file)

    def load_model(self, model_file):
        """重新加载模型(仅用于selfplay时load new model)"""
        try:
            #net_params = pickle.load(open(model_file, 'rb'), encoding='bytes') #iso-8859-1')
            net_params = utils.pickle_load(model_file)
            self.model.set_weights(net_params)
            self.load_model_done = True
        except:
            logging.error("load_model fail! {}\t{}".format(
                model_file, utils.get_trace()))
            self.load_model_done = False
        if os.path.exists(
                model_file
        ) and self.load_model_done is False:  #鏂囦欢瀛樺湪鍗村鍦ㄥけ璐ユ椂缁堟杩愯
            exit(-1)
        return self.load_model_done

    def create_policy_value_net(self):
        """创建policy-value网络"""
        # 输入层
        #in_x = network = Input((4, self.board_width, self.board_height))
        in_x = network = Input((4, 1, self.policy_infer_size))

        # conv layers
        network = Conv2D(filters=32,
                         kernel_size=(3, 3),
                         padding="same",
                         data_format="channels_first",
                         activation="relu",
                         kernel_regularizer=l2(self.l2_const))(network)
        network = Conv2D(filters=64,
                         kernel_size=(3, 3),
                         padding="same",
                         data_format="channels_first",
                         activation="relu",
                         kernel_regularizer=l2(self.l2_const))(network)
        network = Conv2D(filters=128,
                         kernel_size=(3, 3),
                         padding="same",
                         data_format="channels_first",
                         activation="relu",
                         kernel_regularizer=l2(self.l2_const))(network)
        # 走子策略 action policy layers
        policy_net = Conv2D(filters=4,
                            kernel_size=(1, 1),
                            data_format="channels_first",
                            activation="relu",
                            kernel_regularizer=l2(self.l2_const))(network)
        policy_net = Flatten()(policy_net)
        # infer self.board_width * self.board_height action_probs
        #self.policy_net = Dense(self.board_width * self.board_height, activation="softmax", kernel_regularizer=l2(self.l2_const))(policy_net)
        self.policy_net = Dense(self.policy_infer_size,
                                activation="softmax",
                                kernel_regularizer=l2(
                                    self.l2_const))(policy_net)
        # 盘面价值 state value layers
        value_net = Conv2D(filters=2,
                           kernel_size=(1, 1),
                           data_format="channels_first",
                           activation="relu",
                           kernel_regularizer=l2(self.l2_const))(network)
        value_net = Flatten()(value_net)
        value_net = Dense(64, kernel_regularizer=l2(self.l2_const))(value_net)
        # infer one current state score
        self.value_net = Dense(1,
                               activation="tanh",
                               kernel_regularizer=l2(self.l2_const))(value_net)

        # 创建网络模型
        self.model = Model(in_x, [self.policy_net, self.value_net])

        # 返回走子策略和价值概率
        def policy_value(state_input):
            state_input_union = np.array(state_input)
            #print(state_input_union)
            results = self.model.predict_on_batch(state_input_union)
            return results

        self.policy_value = policy_value

    def policy_value_fn(self, board):
        """使用模型预测棋盘所有actionid的价值概率"""
        # 棋盘所有可移动action_ids
        legal_positions = board.availables
        #print(legal_positions)
        # 当前玩家角度的actions过程
        current_actions = board.current_actions()
        #print(current_actions)
        # 使用模型预测走子策略和价值概率
        #print(self.policy_infer_size)
        #act_probs, value = self.policy_value(current_actions.reshape(-1, 4, self.board_width, self.board_height))
        act_probs, value = self.policy_value(
            current_actions.reshape(-1, 4, 1, self.policy_infer_size))
        act_probs = zip(legal_positions, act_probs.flatten()[legal_positions])
        # 返回[(action, 概率)] 以及当前玩家的后续走子value
        return act_probs, value[0][0]

    def _loss_train_op(self):
        """初始化损失
        3个损失函数因子
        loss = (z - v)^2 + pi^T * log(p) + c||theta||^2
        loss = value损失函数 + policy损失函数 + 惩罚项
        """
        # 定义优化器和损失函数
        opt = Adam()
        losses = ['categorical_crossentropy', 'mean_squared_error']
        self.model.compile(optimizer=opt, loss=losses)

        def self_entropy(probs):
            return -np.mean(np.sum(probs * np.log(probs + 1e-10), axis=1))

        def train_step(state_input, mcts_probs, winner, learning_rate):
            """输出训练过程中的结果"""
            state_input_union = np.array(state_input)
            mcts_probs_union = np.array(mcts_probs)
            winner_union = np.array(winner)
            # 评估
            loss = self.model.evaluate(state_input_union,
                                       [mcts_probs_union, winner_union],
                                       batch_size=len(state_input),
                                       verbose=0)
            # 预测
            action_probs, _ = self.model.predict_on_batch(state_input_union)
            entropy = self_entropy(action_probs)
            K.set_value(self.model.optimizer.lr, learning_rate)
            self.model.fit(state_input_union, [mcts_probs_union, winner_union],
                           batch_size=len(state_input),
                           verbose=0)
            return loss[0], entropy

        self.train_step = train_step

    def get_policy_param(self):
        """获得模型参数"""
        net_params = self.model.get_weights()
        return net_params

    def save_model(self, model_file):
        """保存模型参数到文件"""
        net_params = self.get_policy_param()
        #pickle.dump(net_params, open(model_file, 'wb'), protocol=4)
        utils.pickle_dump(net_params, model_file)
# GAN网络编译
gan_core = Model(inputs=gan_x,
                 outputs=[
                     gan_output, features_full, features_medium, features_low,
                     pred_full, pred_medium, pred_low
                 ])

gan_core.name = "gan_core"
optimizer = Adam(learning_rate, 0.5, decay=decay_rate)
loss_gan = ['mae', 'mae', 'mae', 'mae', 'mse', 'mse', 'mse']
loss_weights_gan = [1, 3.33, 3.33, 3.33, 0.33, 0.33, 0.33]

# gan_core = multi_gpu_model(gan_core_org)
gan_core.compile(optimizer=optimizer,
                 loss_weights=loss_weights_gan,
                 loss=loss_gan)

# --------------------------------
#  编译判别器Discriminator
# --------------------------------

discriminator_full.model.trainable = True
discriminator_medium.model.trainable = True
discriminator_low.model.trainable = True


def zero_loss(y_true, y_pred):
    return K.zeros_like(y_true)

Exemple #27
0
def test_pandas_dataframe():
    input_a = Input(shape=(3,), name='input_a')
    input_b = Input(shape=(3,), name='input_b')

    x = Dense(4, name='dense_1')(input_a)
    y = Dense(3, name='desne_2')(input_b)

    model_1 = Model(inputs=input_a, outputs=x)
    model_2 = Model(inputs=[input_a, input_b], outputs=[x, y])

    optimizer = 'rmsprop'
    loss = 'mse'

    model_1.compile(optimizer=optimizer, loss=loss)
    model_2.compile(optimizer=optimizer, loss=loss)

    input_a_df = pd.DataFrame(np.random.random((10, 3)))
    input_b_df = pd.DataFrame(np.random.random((10, 3)))

    output_a_df = pd.DataFrame(np.random.random((10, 4)))
    output_b_df = pd.DataFrame(np.random.random((10, 3)))

    model_1.fit(input_a_df,
                output_a_df)
    model_2.fit([input_a_df, input_b_df],
                [output_a_df, output_b_df])
    model_1.fit([input_a_df],
                [output_a_df])
    model_1.fit({'input_a': input_a_df},
                output_a_df)
    model_2.fit({'input_a': input_a_df, 'input_b': input_b_df},
                [output_a_df, output_b_df])

    model_1.predict(input_a_df)
    model_2.predict([input_a_df, input_b_df])
    model_1.predict([input_a_df])
    model_1.predict({'input_a': input_a_df})
    model_2.predict({'input_a': input_a_df, 'input_b': input_b_df})

    model_1.predict_on_batch(input_a_df)
    model_2.predict_on_batch([input_a_df, input_b_df])
    model_1.predict_on_batch([input_a_df])
    model_1.predict_on_batch({'input_a': input_a_df})
    model_2.predict_on_batch({'input_a': input_a_df, 'input_b': input_b_df})

    model_1.evaluate(input_a_df,
                     output_a_df)
    model_2.evaluate([input_a_df, input_b_df],
                     [output_a_df, output_b_df])
    model_1.evaluate([input_a_df],
                     [output_a_df])
    model_1.evaluate({'input_a': input_a_df},
                     output_a_df)
    model_2.evaluate({'input_a': input_a_df, 'input_b': input_b_df},
                     [output_a_df, output_b_df])

    model_1.train_on_batch(input_a_df,
                           output_a_df)
    model_2.train_on_batch([input_a_df, input_b_df],
                           [output_a_df, output_b_df])
    model_1.train_on_batch([input_a_df],
                           [output_a_df])
    model_1.train_on_batch({'input_a': input_a_df},
                           output_a_df)
    model_2.train_on_batch({'input_a': input_a_df, 'input_b': input_b_df},
                           [output_a_df, output_b_df])

    model_1.test_on_batch(input_a_df,
                          output_a_df)
    model_2.test_on_batch([input_a_df, input_b_df],
                          [output_a_df, output_b_df])
    model_1.test_on_batch([input_a_df],
                          [output_a_df])
    model_1.test_on_batch({'input_a': input_a_df},
                          output_a_df)
    model_2.test_on_batch({'input_a': input_a_df, 'input_b': input_b_df},
                          [output_a_df, output_b_df])
Exemple #28
0
class FinancialNewsAnalysisModel(object):
    model = None

    def __init__(self, nb_time_step, dim_data, batch_size=1, model_path=None):
        self.model_path = model_path
        self.model_path = model_path
        self.batch_size = batch_size
        self.size_of_input_data_dim = dim_data
        self.size_of_input_timesteps = nb_time_step
        self.build()
        self.weight_loaded = False
        if model_path is not None:
            self.load_weights()

    def build(self):
        dim_data = self.size_of_input_data_dim
        nb_time_step = self.size_of_input_timesteps
        news_input = Input(shape=(nb_time_step, dim_data))
        lstm = LSTM(output_dim=nb_hidden_units, dropout_U=dropout, dropout_W=dropout,
                    W_regularizer=l2(l2_norm_alpha), b_regularizer=l2(l2_norm_alpha), activation='tanh')
        bi_lstm = Bidirectional(lstm, input_shape=(nb_time_step, dim_data), merge_mode='concat')
        all_news_rep = bi_lstm(news_input)
        news_predictions = Dense(1, activation='linear')(all_news_rep)
        self.model = Model(news_input, news_predictions, name="deep rnn for financial news analysis")

    def reset(self):
        for l in self.model.layers:
            if type(l) is LSTM:
                l.reset_status()

    def compile_model(self, lr=0.0001, loss_weights=0.1):
        optimizer = Adam(lr=lr)
        loss = 'mse'
        # loss = custom_objective
        self.model.compile(optimizer=optimizer, loss=loss)
                           #metrics=['mse'])
        plot(self.model, to_file='model.png')

    def fit_model(self, X, y, X_val=None, y_val=None, epoch=500):
        early_stopping = EarlyStopping(monitor='val_loss', patience=100, verbose=0)
        if X_val is None:
            self.model.fit(X, y, batch_size=self.batch_size, nb_epoch=epoch, validation_split=0.2,
                           shuffle=True, callbacks=[early_stopping])
        else:
            self.model.fit(X, y, batch_size=self.batch_size, nb_epoch=epoch, validation_data=(X_val, y_val),
                           shuffle=True, callbacks=[early_stopping])

    def save(self):
        self.model.save_weights(self.model_path, overwrite=True)

    def load_weights(self):
        if os.path.exists(self.model_path):
            self.model.load_weights(self.model_path)
            self.weight_loaded = True

    def print_weights(self, weights=None, detail=False):
        weights = weights or self.model.get_weights()
        for w in weights:
            print("w%s: sum(w)=%s, ave(w)=%s" % (w.shape, np.sum(w), np.average(w)))
        if detail:
            for w in weights:
                print("%s: %s" % (w.shape, w))

    def model_eval(self, X, y):
        y_hat = self.model.predict(X, batch_size=1)
        count_true = 0
        count_all = y.shape[0]
        for i in range(y.shape[0]):
            count_true = count_true + 1 if y[i,0]*y_hat[i,0]>0 else count_true
            print y[i,0],y_hat[i,0]
        print count_all,count_true
Exemple #29
0
class RenderGAN:
    def __init__(self,
                 tag3d_network,
                 z_dim_offset=50, z_dim_labels=50, z_dim_bits=12,
                 discriminator_units=32,
                 discriminator_depth=2,
                 generator_units=32,
                 generator_depth=2,
                 data_shape=(1, 64, 64),
                 labels_shape=(24,),
                 nb_bits=12,
                 generator_optimizer=Adam(lr=0.0004, beta_1=0.5),
                 discriminator_optimizer=Adam(lr=0.0004, beta_1=0.5)
                 ):
        self.tag3d_network = tag3d_network
        self.z_dim_offset = z_dim_offset
        self.z_dim_labels = z_dim_labels
        self.z_dim_bits = z_dim_bits
        self.z_dim = z_dim_offset + z_dim_labels + z_dim_bits
        self.discriminator_units = discriminator_units
        self.discriminator_depth = discriminator_depth
        self.generator_units = generator_units
        self.generator_depth = generator_depth
        self.preprocess_units = self.generator_units // 2
        self.data_shape = data_shape
        self.labels_shape = labels_shape
        self.nb_bits = nb_bits
        self.generator_optimizer = generator_optimizer
        self.discriminator_optimizer = discriminator_optimizer

        self._build()

    def _build_discriminator(self):
        x = Input(shape=self.data_shape, name='data')
        d = render_gan_discriminator(x, n=self.discriminator_units,
                                     conv_repeat=self.discriminator_depth, dense=[512])
        self.discriminator = Model([x], [d])

    def _build_generator_given_z_offset_and_labels(self):
        labels = Input(shape=self.labels_shape, name='input_labels')
        z_offset = Input(shape=(self.z_dim_offset,), name='input_z_offset')

        outputs = OrderedDict()
        labels_without_bits = Subtensor(self.nb_bits, self.labels_shape[0], axis=1)(labels)
        raw_tag3d, tag3d_depth_map = self.tag3d_network(labels)

        tag3d = ScaleUnitIntervalTo(-1, 1)(raw_tag3d)
        outputs['tag3d'] = tag3d
        outputs['tag3d_depth_map'] = tag3d_depth_map

        segmentation = Segmentation(threshold=-0.08, smooth_threshold=0.2,
                                    sigma=1.5, name='segmentation')

        tag3d_downsampled = PyramidReduce()(tag3d)
        tag3d_segmented = segmentation(raw_tag3d)
        outputs['tag3d_segmented'] = tag3d_segmented
        tag3d_segmented_blur = GaussianBlur(sigma=0.66)(tag3d_segmented)

        out_offset_front = get_offset_front([z_offset, ZeroGradient()(labels_without_bits)],
                                            self.generator_units)

        light_depth_map = get_preprocess(tag3d_depth_map, self.preprocess_units,
                                         nb_conv_layers=2)
        light_outs = get_lighting_generator([out_offset_front, light_depth_map],
                                            self.generator_units)
        offset_depth_map = get_preprocess(tag3d_depth_map, self.preprocess_units,
                                          nb_conv_layers=2)
        offset_middle_light = get_preprocess(concat(light_outs), self.preprocess_units,
                                             resize=['down', 'down'])

        offset_middle_tag3d = get_preprocess(tag3d_downsampled,
                                             self.preprocess_units // 2,
                                             resize=['down', ''],
                                             nb_conv_layers=2)
        out_offset_middle = get_offset_middle(
            [out_offset_front, offset_depth_map,
             offset_middle_light, offset_middle_tag3d],
            self.generator_units)

        offset_back_tag3d_downsampled = get_preprocess(tag3d_downsampled,
                                                       self.preprocess_units // 2,
                                                       nb_conv_layers=2)

        offset_back_feature_map, out_offset_back = get_offset_back(
            [out_offset_middle, offset_back_tag3d_downsampled], self.generator_units)

        blur_factor = get_blur_factor(out_offset_middle, min=0.25, max=1.)
        outputs['blur_factor'] = blur_factor

        tag3d_blur = BlendingBlur(sigma=2.0)([tag3d, blur_factor])
        outputs['tag3d_blur'] = tag3d_blur
        outputs['light_black'] = light_outs[0]
        outputs['light_white'] = light_outs[1]
        outputs['light_shift'] = light_outs[2]
        tag3d_lighten = AddLighting(
            scale_factor=0.90, shift_factor=0.90)([tag3d_blur] + light_outs)
        tag3d_lighten = InBounds(clip=True, weight=15)(tag3d_lighten)
        outputs['tag3d_lighten'] = tag3d_lighten

        outputs['background_offset'] = out_offset_back
        blending = Background(name='blending')([out_offset_back, tag3d_lighten,
                                                tag3d_segmented_blur])
        outputs['fake_without_noise'] = blending
        details = get_details(
            [blending, tag3d_segmented_blur, tag3d, out_offset_back,
             offset_back_feature_map] + light_outs, self.generator_units)
        outputs['details_offset'] = details
        details_high_pass = HighPass(3.5, nb_steps=3)(details)
        outputs['details_high_pass'] = details_high_pass
        fake = InBounds(-2.0, 2.0)(
            merge([details_high_pass, blending], mode='sum'))
        outputs['fake'] = fake
        for name in outputs.keys():
            outputs[name] = name_tensor(outputs[name], name)

        self.generator_given_z_and_labels = Model([z_offset, labels], [fake])
        self.sample_generator_given_z_and_labels_output_names = list(outputs.keys())
        self.sample_generator_given_z_and_labels = Model([z_offset, labels],
                                                         list(outputs.values()))

    @property
    def pos_z_bits(self):
        return (0, self.z_dim_bits)

    @property
    def pos_z_labels(self):
        return (self.z_dim_bits, self.z_dim_bits + self.z_dim_labels)

    @property
    def pos_z_offset(self):
        return (self.z_dim_labels + self.z_dim_bits,
                self.z_dim_labels + self.z_dim_bits + self.z_dim_offset)

    def _build_generator_given_z(self):
        z = Input(shape=(self.z_dim,), name='z')

        z_bits = Subtensor(*self.pos_z_bits, axis=1)(z)
        z_labels = Subtensor(*self.pos_z_labels, axis=1)(z)
        z_offset = Subtensor(*self.pos_z_offset, axis=1)(z)
        bits = ThresholdBits()(z_bits)
        nb_labels_without_bits = self.labels_shape[0] - self.nb_bits
        generated_labels = get_label_generator(
            z_labels, self.generator_units, nb_output_units=nb_labels_without_bits)

        labels_normed = NormSinCosAngle(0)(generated_labels)
        labels = concat([bits, labels_normed], name='labels')
        fake = self.generator_given_z_and_labels([z_offset, labels])
        self.generator_given_z = Model([z], [fake])

        sample_tensors = self.sample_generator_given_z_and_labels([z_offset, labels])
        sample_tensors = [name_tensor(t, n)
                          for t, n in zip(sample_tensors,
                                          self.sample_generator_given_z_and_labels.output_names)]
        self.sample_generator_given_z_output_names = ['labels'] + \
            self.sample_generator_given_z_and_labels_output_names
        self.sample_generator_given_z = Model([z], [labels] + sample_tensors)

    def _build_gan(self):
        self.generator_given_z.compile(self.generator_optimizer, 'binary_crossentropy')
        self.discriminator.compile(self.discriminator_optimizer, 'binary_crossentropy')
        self.gan = GAN(self.generator_given_z, self.discriminator)

    def _build(self):
        self._build_discriminator()
        self._build_generator_given_z_offset_and_labels()
        self._build_generator_given_z()
        self._build_gan()

    def save_weights(self, fname_format, overwrite=False, attrs={}):
        def save(name):
            model = self.__dict__[name]
            fname = fname_format.format(name=name)
            os.makedirs(os.path.dirname(fname), exist_ok=True)
            save_model(model, fname, overwrite=False, attrs=attrs)

        save("sample_generator_given_z")
        save("sample_generator_given_z_and_labels")
        save("generator_given_z_and_labels")
        save("generator_given_z")
        save("discriminator")
Exemple #30
0
def test_model_with_external_loss():
    # None loss, only regularization loss.
    a = Input(shape=(3, ), name='input_a')
    a_2 = Dense(4,
                name='dense_1',
                kernel_regularizer='l1',
                bias_regularizer='l2')(a)
    dp = Dropout(0.5, name='dropout')
    a_3 = dp(a_2)

    model = Model(a, [a_2, a_3])

    optimizer = 'rmsprop'
    loss = None
    model.compile(optimizer, loss, metrics=['mae'])

    input_a_np = np.random.random((10, 3))

    # test train_on_batch
    out = model.train_on_batch(input_a_np, None)
    out = model.test_on_batch(input_a_np, None)
    # fit
    out = model.fit(input_a_np, None)
    # evaluate
    out = model.evaluate(input_a_np, None)

    # No dropout, external loss.
    a = Input(shape=(3, ), name='input_a')
    a_2 = Dense(4, name='dense_1')(a)
    a_3 = Dense(4, name='dense_2')(a)

    model = Model(a, [a_2, a_3])
    model.add_loss(K.mean(a_3 + a_2))

    optimizer = 'rmsprop'
    loss = None
    model.compile(optimizer, loss, metrics=['mae'])

    # test train_on_batch
    out = model.train_on_batch(input_a_np, None)
    out = model.test_on_batch(input_a_np, None)
    # fit
    out = model.fit(input_a_np, None)
    # evaluate
    out = model.evaluate(input_a_np, None)

    # Test fit with no external data at all.
    if K.backend() == 'tensorflow':
        import tensorflow as tf

        a = Input(tensor=tf.Variable(input_a_np, dtype=tf.float32))
        a_2 = Dense(4, name='dense_1')(a)
        a_2 = Dropout(0.5, name='dropout')(a_2)
        model = Model(a, a_2)
        model.add_loss(K.mean(a_2))

        model.compile(optimizer='rmsprop',
                      loss=None,
                      metrics=['mean_squared_error'])

        # test train_on_batch
        out = model.train_on_batch(None, None)
        out = model.test_on_batch(None, None)
        out = model.predict_on_batch(None)

        # test fit
        with pytest.raises(ValueError):
            out = model.fit(None, None, epochs=1, batch_size=10)
        out = model.fit(None, None, epochs=1, steps_per_epoch=1)

        # test fit with validation data
        with pytest.raises(ValueError):
            out = model.fit(None,
                            None,
                            epochs=1,
                            steps_per_epoch=None,
                            validation_steps=2)
        out = model.fit(None,
                        None,
                        epochs=1,
                        steps_per_epoch=2,
                        validation_steps=2)

        # test evaluate
        with pytest.raises(ValueError):
            out = model.evaluate(None, None, batch_size=10)
        out = model.evaluate(None, None, steps=3)

        # test predict
        with pytest.raises(ValueError):
            out = model.predict(None, batch_size=10)
        out = model.predict(None, steps=3)
        assert out.shape == (10 * 3, 4)

        # Test multi-output model without external data.
        a = Input(tensor=tf.Variable(input_a_np, dtype=tf.float32))
        a_1 = Dense(4, name='dense_1')(a)
        a_2 = Dropout(0.5, name='dropout')(a_1)
        model = Model(a, [a_1, a_2])
        model.add_loss(K.mean(a_2))
        model.compile(optimizer='rmsprop',
                      loss=None,
                      metrics=['mean_squared_error'])

        # test train_on_batch
        out = model.train_on_batch(None, None)
        out = model.test_on_batch(None, None)
        out = model.predict_on_batch(None)

        # test fit
        with pytest.raises(ValueError):
            out = model.fit(None, None, epochs=1, batch_size=10)
        out = model.fit(None, None, epochs=1, steps_per_epoch=1)

        # test fit with validation data
        with pytest.raises(ValueError):
            out = model.fit(None,
                            None,
                            epochs=1,
                            steps_per_epoch=None,
                            validation_steps=2)
        out = model.fit(None,
                        None,
                        epochs=1,
                        steps_per_epoch=2,
                        validation_steps=2)

        # test evaluate
        with pytest.raises(ValueError):
            out = model.evaluate(None, None, batch_size=10)
        out = model.evaluate(None, None, steps=3)

        # test predict
        with pytest.raises(ValueError):
            out = model.predict(None, batch_size=10)
        out = model.predict(None, steps=3)
        assert len(out) == 2
        assert out[0].shape == (10 * 3, 4)
        assert out[1].shape == (10 * 3, 4)
Exemple #31
0
def decoder_stochastic_wrn(label_sizes,
                           nb_bits=12,
                           data_shape=(1, 64, 64),
                           activation=lambda: Activation('relu'),
                           normalization=lambda: BatchNormalization(axis=1, mode=0),
                           weight_init='he_normal',
                           weight_decay=0.0001,
                           dropout_probability=0.,
                           wrn_depth=58,
                           wrn_k=2,
                           death_rate=0.5,
                           optimizer='adam'):

    def norm_act_block():
        def f(inputs):
            x = normalization()(inputs)
            x = activation()(x)
            return x
        return f

    def conv2(nb_filter, nb_row, nb_col, subsample=(1, 1), bias=True):
        return Convolution2D(nb_filter, nb_row, nb_col,
                             init=weight_init,
                             border_mode='same',
                             subsample=subsample,
                             bias=bias,
                             W_regularizer=l2(weight_decay))

    def dropout(p=None):
        if p is None:
            p = dropout_probability

        def f(inputs):
            if p > 0.:
                inputs = Dropout(p)(inputs)
            return inputs
        return f

    def equal_gate_shape(input_shapes):
        assert(input_shapes[0] == input_shapes[1])
        return input_shapes[1]

    def residual_block(nb_filter, stochastic=False, stochastic_layers=None):
        def f(inputs):
            x = norm_act_block()(inputs)
            x = conv2(nb_filter, 3, 3)(x)
            x = dropout()(x)
            x = norm_act_block()(x)
            x = conv2(nb_filter, 3, 3)(x)

            if inputs._keras_shape != x._keras_shape:
                inputs = conv2(nb_filter, 1, 1, bias=False)(inputs)

            if not stochastic:
                return merge((inputs, x), mode='sum')

            scale = ScaleInTestPhase(death_rate)
            x = scale(x)

            out = merge([inputs, x], mode="sum", output_shape=x._keras_shape[1:])
            rs = RandomSwitch(death_rate)
            stochastic_layers.append((rs.death_rate, scale.death_rate))
            return rs([out, inputs])
        return f

    def residual_reduction_block(nb_filter):
        def f(inputs):
            x = norm_act_block()(inputs)
            x = conv2(nb_filter, 3, 3, subsample=(2, 2))(x)
            x = dropout()(x)
            x = norm_act_block()(x)
            x = conv2(nb_filter, 3, 3)(x)

            inputs_bottleneck = conv2(nb_filter, 1, 1, subsample=(2, 2),
                                      bias=False)(inputs)

            s = merge((inputs_bottleneck, x), mode='sum')
            return s
        return f

    def skip_connection(inputs, residual, stochastic=False,
                        stochastic_layers=None):
        inputs_filters = inputs._keras_shape[1]
        residual_filters = residual._keras_shape[1]
        subsample = np.array(inputs._keras_shape[2:]) // np.array(residual._keras_shape[2:])

        inputs = dropout()(inputs)
        if (inputs_filters != residual_filters) or np.any(subsample > 1):
            skip = conv2(residual_filters, 1, 1, subsample=subsample, bias=False)(inputs)
        else:
            skip = inputs

        if not stochastic:
            return merge((skip, residual), mode='sum')
        else:
            scale = ScaleInTestPhase(death_rate)
            skip = scale(skip)

            out = merge([skip, residual], mode="sum")
            rs = RandomSwitch(death_rate)
            stochastic_layers.append((rs.death_rate, scale.death_rate))
            return rs([out, residual])

    n = (wrn_depth - 4) // 6
    stochastic_layers = []

    input = Input(shape=data_shape)

    m_stem = conv2(16, 3, 3, subsample=(2, 2))(input)
    m_stem = norm_act_block()(m_stem)

    m_b1 = residual_block(nb_filter=16 * wrn_k,
                          stochastic_layers=stochastic_layers)(m_stem)
    for _ in range(n - 1):
        m_b1 = residual_block(nb_filter=16 * wrn_k, stochastic=True,
                              stochastic_layers=stochastic_layers)(m_b1)
    m_b1 = skip_connection(input, m_b1, stochastic=True,
                           stochastic_layers=stochastic_layers)

    m_b2 = residual_reduction_block(nb_filter=32 * wrn_k)(m_b1)
    for _ in range(n - 1):
        m_b2 = residual_block(nb_filter=32 * wrn_k, stochastic=True,
                              stochastic_layers=stochastic_layers)(m_b2)
    m_b2 = skip_connection(m_b1, m_b2, stochastic=True,
                           stochastic_layers=stochastic_layers)

    m_b3 = residual_reduction_block(nb_filter=64 * wrn_k)(m_b2)
    for _ in range(n - 1):
        m_b3 = residual_block(nb_filter=64 * wrn_k, stochastic=True,
                              stochastic_layers=stochastic_layers)(m_b3)
    m_b3 = skip_connection(m_b2, m_b3, stochastic=True,
                           stochastic_layers=stochastic_layers)
    m_b3 = skip_connection(input, m_b3, stochastic=True,
                           stochastic_layers=stochastic_layers)

    x = norm_act_block()(m_b3)
    x = AveragePooling2D(pool_size=(8, 8))(x)
    x = dropout()(x)

    for i, (tb, ts) in enumerate(stochastic_layers, start=0):
        K.set_value(tb, i / len(stochastic_layers) * death_rate)
        K.set_value(ts, i / len(stochastic_layers) * death_rate)

    outputs, losses = decoder_end_block(x, label_sizes, nb_bits, activation, weight_decay)

    model = Model(input, list(outputs.values()))
    model.compile(optimizer, loss=list(losses.values()),
                  loss_weights={k: decoder_loss_weights(k) for k in losses.keys()})
    return model
Exemple #32
0
class SeqGAN:

    #
    # Initialization
    # ----------------------------------------------------------------------------
    def __init__(self, g, d, m, g_optimizer, d_optimizer):

        # Model of generator
        self.g = g

        # Model of discriminator
        self.d = d

        # Model of ???
        self.m = m

        self.z, self.seq_input = self.g.inputs
        self.fake_prob, = self.g.outputs

        self.history = None

        with trainable(m, False):
            # m_input = merge([self.seq_input, self.fake_prob], mode='concat', concat_axis=1)
            m_input = concatenate([self.seq_input, self.fake_prob], axis=1)
            self.m_realness = self.m(m_input)
            self.model_fit_g = Model(inputs=[self.z, self.seq_input],
                                     outputs=[self.m_realness])
            self.model_fit_g.compile(optimizer=g_optimizer,
                                     loss=K.binary_crossentropy)

        self.d.compile(optimizer=d_optimizer, loss=K.binary_crossentropy)

    #
    # Return the shape of input noise variables z
    # (batch_size: the number of data read per batch)
    # ----------------------------------------------------------------------------
    def z_shape(self, batch_size=64):
        # layer, _, _ = self.z._keras_history
        # _keras_history是一个tuple类型,第一个参数代表previous Layer; 第二个参数node_index; 第三个参数是tensor_index
        # ( 因为可能有多个tensor output,如果是只有一个tensor output的话,tensor_index = 0)
        layer, node_index, tensor_index = self.z._keras_history
        return (batch_size,
                ) + layer.output_shape[1:]  # the first dimension is data size

    #
    # Sample input noise variables z by uniform distributions
    # (batch_size: the number of data read per batch)
    # ----------------------------------------------------------------------------
    def sample_z(self, batch_size=64):
        shape = self.z_shape(batch_size)
        return np.random.uniform(-1, 1, shape)

    #
    # Generate the fake samples with: the input noise variables z & input sequence seq_input
    # ----------------------------------------------------------------------------
    def generate(self, z, seq_input, batch_size=32):
        return self.g.predict([z, seq_input], batch_size=batch_size)

    #
    # Training
    # ----------------------------------------------------------------------------
    def train_on_batch(self, seq_input, real, d_target=None):
        nb_real = len(real)
        nb_fake = len(seq_input)
        if d_target is None:
            d_target = np.concatenate(
                [np.zeros((nb_fake, 1)),
                 np.ones((nb_real, 1))])
        fake_prob = self.generate(self.sample_z(nb_fake), seq_input)
        fake = np.concatenate([seq_input, prob_to_sentence(fake_prob)], axis=1)
        fake_and_real = np.concatenate([fake, real], axis=0)
        d_loss = self.d.train_on_batch(x=fake_and_real, y=d_target)
        d_realness = self.d.predict(fake)
        m_loss = self.m.train_on_batch(x=np.concatenate([seq_input, fake_prob],
                                                        axis=1),
                                       y=d_realness)
        g_loss = self.model_fit_g.train_on_batch(
            x=[self.sample_z(nb_fake), seq_input], y=np.ones((nb_fake, 1)))
        return g_loss, d_loss, m_loss

    #
    # Training
    # ----------------------------------------------------------------------------
    def fit_generator(self,
                      generator,
                      nb_epoch,
                      nb_batches_per_epoch,
                      callbacks=[],
                      batch_size=None,
                      verbose=False):
        if batch_size is None:
            batch_size = 2 * len(next(generator)[0])

        out_labels = ['g', 'd', 'm']

        self.history = cbks.History()
        callbacks = [cbks.BaseLogger()] + callbacks + [self.history]
        if verbose:
            callbacks += [cbks.ProgbarLogger()]
        callbacks = cbks.CallbackList(callbacks)
        callbacks.set_model(self)
        callbacks.set_params({
            'nb_epoch': nb_epoch,
            'nb_sample': nb_batches_per_epoch * batch_size,
            'verbose': verbose,
            'metrics': out_labels,
        })
        callbacks.on_train_begin()

        for e in range(nb_epoch):
            callbacks.on_epoch_begin(e)
            for batch_index, (seq_input, real) in enumerate(generator):
                callbacks.on_batch_begin(batch_index)
                batch_logs = dict()
                batch_logs['batch'] = batch_index
                batch_logs['size'] = len(real) + len(seq_input)
                outs = self.train_on_batch(seq_input, real)

                for l, o in zip(out_labels, outs):
                    batch_logs[l] = o

                callbacks.on_batch_end(batch_index, batch_logs)
                if batch_index + 1 == nb_batches_per_epoch:
                    break

            callbacks.on_epoch_end(e)
        callbacks.on_train_end()
Exemple #33
0
def test_model_methods():
    a = Input(shape=(3,), name='input_a')
    b = Input(shape=(3,), name='input_b')

    a_2 = Dense(4, name='dense_1')(a)
    dp = Dropout(0.5, name='dropout')
    b_2 = dp(b)

    model = Model([a, b], [a_2, b_2])

    optimizer = 'rmsprop'
    loss = 'mse'
    loss_weights = [1., 0.5]

    input_a_np = np.random.random((10, 3))
    input_b_np = np.random.random((10, 3))

    output_a_np = np.random.random((10, 4))
    output_b_np = np.random.random((10, 3))

    # training/testing doesn't work before compiling.
    with pytest.raises(RuntimeError):
        model.train_on_batch([input_a_np, input_b_np], [output_a_np, output_b_np])

    model.compile(optimizer, loss, metrics=[], loss_weights=loss_weights,
                  sample_weight_mode=None)

    # test train_on_batch
    out = model.train_on_batch([input_a_np, input_b_np],
                               [output_a_np, output_b_np])
    out = model.train_on_batch({'input_a': input_a_np, 'input_b': input_b_np},
                               [output_a_np, output_b_np])
    out = model.train_on_batch({'input_a': input_a_np, 'input_b': input_b_np},
                               {'dense_1': output_a_np, 'dropout': output_b_np})

    # test fit
    out = model.fit([input_a_np, input_b_np],
                    [output_a_np, output_b_np], epochs=1, batch_size=4)
    out = model.fit({'input_a': input_a_np, 'input_b': input_b_np},
                    [output_a_np, output_b_np], epochs=1, batch_size=4)
    out = model.fit({'input_a': input_a_np, 'input_b': input_b_np},
                    {'dense_1': output_a_np, 'dropout': output_b_np},
                    epochs=1, batch_size=4)

    # test validation_split
    out = model.fit([input_a_np, input_b_np],
                    [output_a_np, output_b_np],
                    epochs=1, batch_size=4, validation_split=0.5)
    out = model.fit({'input_a': input_a_np, 'input_b': input_b_np},
                    [output_a_np, output_b_np],
                    epochs=1, batch_size=4, validation_split=0.5)

    # test validation data
    out = model.fit([input_a_np, input_b_np],
                    [output_a_np, output_b_np],
                    epochs=1, batch_size=4,
                    validation_data=([input_a_np, input_b_np], [output_a_np, output_b_np]))
    out = model.fit({'input_a': input_a_np, 'input_b': input_b_np},
                    [output_a_np, output_b_np],
                    epochs=1, batch_size=4, validation_split=0.5,
                    validation_data=({'input_a': input_a_np, 'input_b': input_b_np}, [output_a_np, output_b_np]))
    out = model.fit({'input_a': input_a_np, 'input_b': input_b_np},
                    {'dense_1': output_a_np, 'dropout': output_b_np},
                    epochs=1, batch_size=4, validation_split=0.5,
                    validation_data=(
                        {'input_a': input_a_np, 'input_b': input_b_np},
                        {'dense_1': output_a_np, 'dropout': output_b_np}))

    # test_on_batch
    out = model.test_on_batch([input_a_np, input_b_np],
                              [output_a_np, output_b_np])
    out = model.test_on_batch({'input_a': input_a_np, 'input_b': input_b_np},
                              [output_a_np, output_b_np])
    out = model.test_on_batch({'input_a': input_a_np, 'input_b': input_b_np},
                              {'dense_1': output_a_np, 'dropout': output_b_np})

    # predict_on_batch
    out = model.predict_on_batch([input_a_np, input_b_np])
    out = model.predict_on_batch({'input_a': input_a_np, 'input_b': input_b_np})

    # predict, evaluate
    input_a_np = np.random.random((10, 3))
    input_b_np = np.random.random((10, 3))

    output_a_np = np.random.random((10, 4))
    output_b_np = np.random.random((10, 3))

    out = model.evaluate([input_a_np, input_b_np], [output_a_np, output_b_np], batch_size=4)
    out = model.predict([input_a_np, input_b_np], batch_size=4)

    # with sample_weight
    input_a_np = np.random.random((10, 3))
    input_b_np = np.random.random((10, 3))

    output_a_np = np.random.random((10, 4))
    output_b_np = np.random.random((10, 3))

    sample_weight = [None, np.random.random((10,))]
    out = model.train_on_batch([input_a_np, input_b_np],
                               [output_a_np, output_b_np],
                               sample_weight=sample_weight)

    out = model.test_on_batch([input_a_np, input_b_np],
                              [output_a_np, output_b_np],
                              sample_weight=sample_weight)

    # test accuracy metric
    model.compile(optimizer, loss, metrics=['acc'],
                  sample_weight_mode=None)

    out = model.train_on_batch([input_a_np, input_b_np],
                               [output_a_np, output_b_np])
    assert len(out) == 5
    out = model.test_on_batch([input_a_np, input_b_np],
                              [output_a_np, output_b_np])
    assert len(out) == 5

    # this should also work
    model.compile(optimizer, loss, metrics={'dense_1': 'acc'},
                  sample_weight_mode=None)

    out = model.train_on_batch([input_a_np, input_b_np],
                               [output_a_np, output_b_np])
    assert len(out) == 4
    out = model.test_on_batch([input_a_np, input_b_np],
                              [output_a_np, output_b_np])
    assert len(out) == 4

    # and this as well
    model.compile(optimizer, loss, metrics={'dense_1': ['acc']},
                  sample_weight_mode=None)

    out = model.train_on_batch([input_a_np, input_b_np],
                               [output_a_np, output_b_np])
    assert len(out) == 4
    out = model.test_on_batch([input_a_np, input_b_np],
                              [output_a_np, output_b_np])
    assert len(out) == 4

    # test starting from non-zero initial epoch
    trained_epochs = []
    trained_batches = []

    # define tracer callback
    def on_epoch_begin(epoch, logs):
        trained_epochs.append(epoch)

    def on_batch_begin(batch, logs):
        trained_batches.append(batch)

    tracker_cb = LambdaCallback(on_epoch_begin=on_epoch_begin,
                                on_batch_begin=on_batch_begin)

    out = model.fit([input_a_np, input_b_np],
                    [output_a_np, output_b_np], epochs=5, batch_size=4,
                    initial_epoch=2, callbacks=[tracker_cb])
    assert trained_epochs == [2, 3, 4]

    # test starting from non-zero initial epoch for generator too
    trained_epochs = []

    def gen_data(batch_sz):
        while True:
            yield ([np.random.random((batch_sz, 3)), np.random.random((batch_sz, 3))],
                   [np.random.random((batch_sz, 4)), np.random.random((batch_sz, 3))])

    out = model.fit_generator(gen_data(4), steps_per_epoch=3, epochs=5,
                              initial_epoch=2, callbacks=[tracker_cb])
    assert trained_epochs == [2, 3, 4]

    # test with a custom metric function
    def mse(y_true, y_pred):
        return K.mean(K.pow(y_true - y_pred, 2))

    model.compile(optimizer, loss, metrics=[mse],
                  sample_weight_mode=None)

    out = model.train_on_batch([input_a_np, input_b_np],
                               [output_a_np, output_b_np])
    out_len = 1 + 2 * (1 + 1)  # total loss + 2 outputs * (loss + metric)
    assert len(out) == out_len
    out = model.test_on_batch([input_a_np, input_b_np],
                              [output_a_np, output_b_np])
    assert len(out) == out_len

    input_a_np = np.random.random((10, 3))
    input_b_np = np.random.random((10, 3))

    output_a_np = np.random.random((10, 4))
    output_b_np = np.random.random((10, 3))

    out = model.fit([input_a_np, input_b_np], [output_a_np, output_b_np], batch_size=4, epochs=1)
    out = model.evaluate([input_a_np, input_b_np], [output_a_np, output_b_np], batch_size=4)
    out = model.predict([input_a_np, input_b_np], batch_size=4)

    # enable verbose for evaluate_generator
    out = model.evaluate_generator(gen_data(4), steps=3, verbose=1)

    # empty batch
    with pytest.raises(ValueError):
        def gen_data():
            while True:
                yield (np.asarray([]), np.asarray([]))
        out = model.evaluate_generator(gen_data(), steps=1)

    # x is not a list of numpy arrays.
    with pytest.raises(ValueError):
        out = model.predict([None])

    # x does not match _feed_input_names.
    with pytest.raises(ValueError):
        out = model.predict([input_a_np, None, input_b_np])
    with pytest.raises(ValueError):
        out = model.predict([None, input_a_np, input_b_np])

    # all input/output/weight arrays should have the same number of samples.
    with pytest.raises(ValueError):
        out = model.train_on_batch([input_a_np, input_b_np[:2]],
                                   [output_a_np, output_b_np],
                                   sample_weight=sample_weight)
    with pytest.raises(ValueError):
        out = model.train_on_batch([input_a_np, input_b_np],
                                   [output_a_np, output_b_np[:2]],
                                   sample_weight=sample_weight)
    with pytest.raises(ValueError):
        out = model.train_on_batch([input_a_np, input_b_np],
                                   [output_a_np, output_b_np],
                                   sample_weight=[sample_weight[1], sample_weight[1][:2]])

    # `sample_weight` is neither a dict nor a list.
    with pytest.raises(TypeError):
        out = model.train_on_batch([input_a_np, input_b_np],
                                   [output_a_np, output_b_np],
                                   sample_weight=tuple(sample_weight))

    # `validation_data` is neither a tuple nor a triple.
    with pytest.raises(ValueError):
        out = model.fit([input_a_np, input_b_np],
                        [output_a_np, output_b_np],
                        epochs=1, batch_size=4,
                        validation_data=([input_a_np, input_b_np],))

    # `loss` does not match outputs.
    with pytest.raises(ValueError):
        model.compile(optimizer, loss=['mse', 'mae', 'mape'])

    # `loss_weights` does not match output_names.
    with pytest.raises(ValueError):
        model.compile(optimizer, loss='mse', loss_weights={'lstm': 0.5})

    # `loss_weights` does not match outputs.
    with pytest.raises(ValueError):
        model.compile(optimizer, loss='mse', loss_weights=[0.5])

    # `loss_weights` is invalid type.
    with pytest.raises(TypeError):
        model.compile(optimizer, loss='mse', loss_weights=(0.5, 0.5))

    # `sample_weight_mode` does not match output_names.
    with pytest.raises(ValueError):
        model.compile(optimizer, loss='mse', sample_weight_mode={'lstm': 'temporal'})

    # `sample_weight_mode` does not match output_names.
    with pytest.raises(ValueError):
        model.compile(optimizer, loss='mse', sample_weight_mode=['temporal'])

    # `sample_weight_mode` matches output_names partially.
    with pytest.raises(ValueError):
        model.compile(optimizer, loss='mse', sample_weight_mode={'dense_1': 'temporal'})

    # `loss` does not exist.
    with pytest.raises(ValueError):
        model.compile(optimizer, loss=[])

    model.compile(optimizer, loss=['mse', 'mae'])
    model.compile(optimizer, loss='mse', loss_weights={'dense_1': 0.2, 'dropout': 0.8})
    model.compile(optimizer, loss='mse', loss_weights=[0.2, 0.8])

    # the rank of weight arrays should be 1.
    with pytest.raises(ValueError):
        out = model.train_on_batch([input_a_np, input_b_np],
                                   [output_a_np, output_b_np],
                                   sample_weight=[None, np.random.random((10, 20, 30))])

    model.compile(optimizer, loss='mse', sample_weight_mode={'dense_1': None, 'dropout': 'temporal'})
    model.compile(optimizer, loss='mse', sample_weight_mode=[None, 'temporal'])

    # the rank of output arrays should be at least 3D.
    with pytest.raises(ValueError):
        out = model.train_on_batch([input_a_np, input_b_np],
                                   [output_a_np, output_b_np],
                                   sample_weight=sample_weight)

    model.compile(optimizer, loss, metrics=[], loss_weights=loss_weights,
                  sample_weight_mode=None)
    trained_epochs = []
    trained_batches = []
    out = model.fit_generator(generator=RandomSequence(3), steps_per_epoch=3, epochs=5,
                              initial_epoch=0, validation_data=RandomSequence(4),
                              validation_steps=3, callbacks=[tracker_cb])
    assert trained_epochs == [0, 1, 2, 3, 4]
    assert trained_batches == list(range(3)) * 5

    # steps_per_epoch will be equal to len of sequence if it's unspecified
    trained_epochs = []
    trained_batches = []
    out = model.fit_generator(generator=RandomSequence(3), epochs=5,
                              initial_epoch=0, validation_data=RandomSequence(4),
                              callbacks=[tracker_cb])
    assert trained_epochs == [0, 1, 2, 3, 4]
    assert trained_batches == list(range(12)) * 5

    # fit_generator will throw an exception if steps is unspecified for regular generator
    with pytest.raises(ValueError):
        def gen_data():
            while True:
                yield (np.asarray([]), np.asarray([]))
        out = model.fit_generator(generator=gen_data(), epochs=5,
                                  initial_epoch=0, validation_data=gen_data(),
                                  callbacks=[tracker_cb])

    # Check if generator is only accessed an expected number of times
    gen_counters = [0, 0]

    def gen_data(i):
        while True:
            gen_counters[i] += 1
            yield ([np.random.random((1, 3)), np.random.random((1, 3))],
                   [np.random.random((1, 4)), np.random.random((1, 3))])
    out = model.fit_generator(generator=gen_data(0), epochs=3,
                              steps_per_epoch=2,
                              validation_data=gen_data(1),
                              validation_steps=1,
                              max_queue_size=2,
                              workers=2)

    # Need range check here as filling of the queue depends on sleep in the enqueuers
    assert 6 <= gen_counters[0] <= 8
    # 12 = (epoch * workers * validation steps * max_queue_size)
    assert 3 <= gen_counters[1] <= 12

    gen_counters = [0]
    out = model.fit_generator(generator=RandomSequence(3), epochs=3,
                              validation_data=gen_data(0),
                              validation_steps=1,
                              max_queue_size=2,
                              workers=2)

    # 12 = (epoch * workers * validation steps * max_queue_size)
    # Need range check here as filling of the queue depends on sleep in the enqueuers
    assert 3 <= gen_counters[0] <= 12

    # predict_generator output shape behavior should be consistent
    def expected_shape(batch_size, n_batches):
        return (batch_size * n_batches, 4), (batch_size * n_batches, 3)

    # Multiple outputs and one step.
    batch_size = 5
    sequence_length = 1
    shape_0, shape_1 = expected_shape(batch_size, sequence_length)
    out = model.predict_generator(RandomSequence(batch_size,
                                                 sequence_length=sequence_length))
    assert np.shape(out[0]) == shape_0 and np.shape(out[1]) == shape_1

    # Multiple outputs and multiple steps.
    batch_size = 5
    sequence_length = 2
    shape_0, shape_1 = expected_shape(batch_size, sequence_length)
    out = model.predict_generator(RandomSequence(batch_size,
                                                 sequence_length=sequence_length))
    assert np.shape(out[0]) == shape_0 and np.shape(out[1]) == shape_1

    # Create a model with a single output.
    single_output_model = Model([a, b], a_2)
    single_output_model.compile(optimizer, loss, metrics=[], sample_weight_mode=None)

    # Single output and one step.
    batch_size = 5
    sequence_length = 1
    shape_0, _ = expected_shape(batch_size, sequence_length)
    out = single_output_model.predict_generator(RandomSequence(batch_size,
                                                sequence_length=sequence_length))
    assert np.shape(out) == shape_0

    # Single output and multiple steps.
    batch_size = 5
    sequence_length = 2
    shape_0, _ = expected_shape(batch_size, sequence_length)
    out = single_output_model.predict_generator(RandomSequence(batch_size,
                                                sequence_length=sequence_length))
    assert np.shape(out) == shape_0
Exemple #34
0
class AdditionNPIModel(NPIStep):
    model = None
    f_enc = None

    def __init__(self, system: RuntimeSystem, model_path: str=None, program_set: AdditionProgramSet=None):
        self.system = system
        self.model_path = model_path
        self.program_set = program_set
        self.batch_size = 1
        self.build()
        self.weight_loaded = False
        self.load_weights()

    def build(self):
        enc_size = self.size_of_env_observation()
        argument_size = IntegerArguments.size_of_arguments
        input_enc = InputLayer(batch_input_shape=(self.batch_size, enc_size), name='input_enc')
        input_arg = InputLayer(batch_input_shape=(self.batch_size, argument_size), name='input_arg')
        input_prg = Embedding(input_dim=PROGRAM_VEC_SIZE, output_dim=PROGRAM_KEY_VEC_SIZE, input_length=1,
                              batch_input_shape=(self.batch_size, 1))

        f_enc = Sequential(name='f_enc')
        f_enc.add(Merge([input_enc, input_arg], mode='concat'))
        f_enc.add(Dense(256))
        f_enc.add(Dense(32))
        f_enc.add(Activation('relu', name='relu_enc'))
        self.f_enc = f_enc

        program_embedding = Sequential(name='program_embedding')
        program_embedding.add(input_prg)

        f_enc_convert = Sequential(name='f_enc_convert')
        f_enc_convert.add(f_enc)
        f_enc_convert.add(RepeatVector(1))

        f_lstm = Sequential(name='f_lstm')
        f_lstm.add(Merge([f_enc_convert, program_embedding], mode='concat'))
        # f_lstm.add(Activation('relu', name='relu_lstm_0'))
        f_lstm.add(LSTM(256, return_sequences=False, stateful=True))
        f_lstm.add(Activation('relu', name='relu_lstm_1'))
        f_lstm.add(RepeatVector(1))
        f_lstm.add(LSTM(256, return_sequences=False, stateful=True))
        f_lstm.add(Activation('relu', name='relu_lstm_2'))
        # plot(f_lstm, to_file='f_lstm.png', show_shapes=True)

        f_end = Sequential(name='f_end')
        f_end.add(f_lstm)
        f_end.add(Dense(10))
        f_end.add(Dense(1))
        f_end.add(Activation('hard_sigmoid', name='hard_sigmoid_end'))
        # plot(f_end, to_file='f_end.png', show_shapes=True)

        f_prog = Sequential(name='f_prog')
        f_prog.add(f_lstm)
        f_prog.add(Dense(PROGRAM_KEY_VEC_SIZE))
        f_prog.add(Dense(PROGRAM_VEC_SIZE))
        f_prog.add(Activation('softmax', name='softmax_prog'))
        # plot(f_prog, to_file='f_prog.png', show_shapes=True)

        f_args = []
        for ai in range(1, IntegerArguments.max_arg_num+1):
            f_arg = Sequential(name='f_arg%s' % ai)
            f_arg.add(f_lstm)
            f_arg.add(Dense(32))
            f_arg.add(Dense(IntegerArguments.depth))
            f_arg.add(Activation('softmax', name='softmax_arg%s' % ai))
            f_args.append(f_arg)
        # plot(f_arg, to_file='f_arg.png', show_shapes=True)

        self.model = Model([input_enc.input, input_arg.input, input_prg.input],
                           [f_end.output, f_prog.output] + [fa.output for fa in f_args],
                           name="npi")
        self.compile_model()
        plot(self.model, to_file='model.png', show_shapes=True)

    def reset(self):
        super(AdditionNPIModel, self).reset()
        for l in self.model.layers:
            if type(l) is LSTM:
                l.reset_states()

    def compile_model(self, lr=0.0001, arg_weight=1.):
        arg_num = IntegerArguments.max_arg_num
        optimizer = Adam(lr=lr)
        loss = ['binary_crossentropy', 'categorical_crossentropy'] + ['categorical_crossentropy'] * arg_num
        self.model.compile(optimizer=optimizer, loss=loss, loss_weights=[0.25, 0.25] + [arg_weight] * arg_num)

    def fit(self, steps_list, epoch=3000):
        """

        :param int epoch:
        :param typing.List[typing.Dict[q=dict, steps=typing.List[StepInOut]]] steps_list:
        :return:
        """

        def filter_question(condition_func):
            sub_steps_list = []
            for steps_dict in steps_list:
                question = steps_dict['q']
                if condition_func(question['in1'], question['in2']):
                    sub_steps_list.append(steps_dict)
            return sub_steps_list

        # self.print_weights()
        if not self.weight_loaded:
            self.train_f_enc(filter_question(lambda a, b: 10 <= a < 100 and 10 <= b < 100), epoch=100)
        self.f_enc.trainable = False

        q_type = "training questions of a+b < 10"
        print(q_type)
        pr = 0.8
        all_ok = self.fit_to_subset(filter_question(lambda a, b: a+b < 10), epoch=epoch, pass_rate=pr)
        print("%s is pass_rate >= %s: %s" % (q_type, pr, all_ok))

        q_type = "training questions of a<10 and b< 10 and 10 <= a+b"
        print(q_type)
        pr = 0.8
        all_ok = self.fit_to_subset(filter_question(lambda a, b: a<10 and b<10 and a + b >= 10), epoch=epoch, pass_rate=pr)
        print("%s is pass_rate >= %s: %s" % (q_type, pr, all_ok))

        q_type = "training questions of a<10 and b<10"
        print(q_type)
        pr = 0.8
        all_ok = self.fit_to_subset(filter_question(lambda a, b: a < 10 and b < 10), epoch=epoch, pass_rate=pr)
        print("%s is pass_rate >= %s: %s" % (q_type, pr, all_ok))

        q_type = "training questions of a<100 and b<100"
        print(q_type)
        pr = 0.8
        all_ok = self.fit_to_subset(filter_question(lambda a, b: a < 100 and b < 100), epoch=epoch, pass_rate=pr)
        print("%s is pass_rate >= %s: %s" % (q_type, pr, all_ok))

        while True:
            print("test all type of questions")
            cc, wc = self.test_to_subset(create_questions(1000))
            print("Accuracy %s(OK=%d, NG=%d)" % (cc/(cc+wc), cc, wc))
            if wc == 0:
                break

            q_type = "training questions of ALL"
            print(q_type)
            pr = 1.0
            self.fit_to_subset(filter_question(lambda a, b: True), epoch=epoch, pass_rate=pr)
            all_ok = self.fit_to_subset(filter_question(lambda a, b: True), epoch=epoch, pass_rate=pr, skip_correct=True)
            print("%s is pass_rate >= %s: %s" % (q_type, pr, all_ok))

    def fit_to_subset(self, steps_list, epoch=3000, pass_rate=1.0, skip_correct=False):
        learning_rate = 0.0001
        for i in range(30):
            all_ok = self.do_learn(steps_list, 30, learning_rate=learning_rate, pass_rate=pass_rate, arg_weight=1.,
                                   skip_correct=skip_correct)
            if all_ok:
                return True
            learning_rate *= 0.95
        return False

    def test_to_subset(self, questions):
        addition_env = AdditionEnv(FIELD_ROW, FIELD_WIDTH, FIELD_DEPTH)
        npi_runner = TerminalNPIRunner(None, self)
        correct_count = wrong_count = 0
        for idx, question in enumerate(questions):
            question = copy(question)
            if self.question_test(addition_env, npi_runner, question):
                correct_count += 1
            else:
                wrong_count += 1
        return correct_count, wrong_count

    @staticmethod
    def dict_to_str(d):
        return str(tuple([(k, d[k]) for k in sorted(d)]))

    def do_learn(self, steps_list, epoch, learning_rate=None, pass_rate=1.0, arg_weight=1., skip_correct=False):
        if learning_rate is not None:
            self.update_learning_rate(learning_rate, arg_weight)
        addition_env = AdditionEnv(FIELD_ROW, FIELD_WIDTH, FIELD_DEPTH)
        npi_runner = TerminalNPIRunner(None, self)
        last_weights = None
        correct_count = Counter()
        no_change_count = 0
        last_loss = 1000
        for ep in range(1, epoch+1):
            correct_new = wrong_new = 0
            losses = []
            ok_rate = []
            np.random.shuffle(steps_list)
            for idx, steps_dict in enumerate(steps_list):
                question = copy(steps_dict['q'])
                question_key = self.dict_to_str(question)
                if self.question_test(addition_env, npi_runner, question):
                    if correct_count[question_key] == 0:
                        correct_new += 1
                    correct_count[question_key] += 1
                    print("GOOD!: ep=%2d idx=%3d :%s CorrectCount=%s" % (ep, idx, self.dict_to_str(question), correct_count[question_key]))
                    ok_rate.append(1)
                    if skip_correct or int(math.sqrt(correct_count[question_key])) ** 2 != correct_count[question_key]:
                        continue
                else:
                    ok_rate.append(0)
                    if correct_count[question_key] > 0:
                        print("Degraded: ep=%2d idx=%3d :%s CorrectCount=%s -> 0" % (ep, idx, self.dict_to_str(question), correct_count[question_key]))
                        correct_count[question_key] = 0
                        wrong_new += 1

                steps = steps_dict['steps']
                xs = []
                ys = []
                ws = []
                for step in steps:
                    xs.append(self.convert_input(step.input))
                    y, w = self.convert_output(step.output)
                    ys.append(y)
                    ws.append(w)

                self.reset()

                for i, (x, y, w) in enumerate(zip(xs, ys, ws)):
                    loss = self.model.train_on_batch(x, y, sample_weight=w)
                    if not np.isfinite(loss):
                        print("Loss is not finite!, Last Input=%s" % ([i, (x, y, w)]))
                        self.print_weights(last_weights, detail=True)
                        raise RuntimeError("Loss is not finite!")
                    losses.append(loss)
                    last_weights = self.model.get_weights()
            if losses:
                cur_loss = np.average(losses)
                print("ep=%2d: ok_rate=%.2f%% (+%s -%s): ave loss %s (%s samples)" %
                      (ep, np.average(ok_rate)*100, correct_new, wrong_new, cur_loss, len(steps_list)))
                # self.print_weights()
                if correct_new + wrong_new == 0:
                    no_change_count += 1
                else:
                    no_change_count = 0

                if math.fabs(1 - cur_loss/last_loss) < 0.001 and no_change_count > 5:
                    print("math.fabs(1 - cur_loss/last_loss) < 0.001 and no_change_count > 5:")
                    return False
                last_loss = cur_loss
                print("=" * 80)
            self.save()
            if np.average(ok_rate) >= pass_rate:
                return True
        return False

    def update_learning_rate(self, learning_rate, arg_weight=1.):
        print("Re-Compile Model lr=%s aw=%s" % (learning_rate, arg_weight))
        self.compile_model(learning_rate, arg_weight=arg_weight)

    def train_f_enc(self, steps_list, epoch=50):
        print("training f_enc")
        f_add0 = Sequential(name='f_add0')
        f_add0.add(self.f_enc)
        f_add0.add(Dense(FIELD_DEPTH))
        f_add0.add(Activation('softmax', name='softmax_add0'))

        f_add1 = Sequential(name='f_add1')
        f_add1.add(self.f_enc)
        f_add1.add(Dense(FIELD_DEPTH))
        f_add1.add(Activation('softmax', name='softmax_add1'))

        env_model = Model(self.f_enc.inputs, [f_add0.output, f_add1.output], name="env_model")
        env_model.compile(optimizer='adam', loss=['categorical_crossentropy']*2)

        for ep in range(epoch):
            losses = []
            for idx, steps_dict in enumerate(steps_list):
                prev = None
                for step in steps_dict['steps']:
                    x = self.convert_input(step.input)[:2]
                    env_values = step.input.env.reshape((4, -1))
                    in1 = np.clip(env_values[0].argmax() - 1, 0, 9)
                    in2 = np.clip(env_values[1].argmax() - 1, 0, 9)
                    carry = np.clip(env_values[2].argmax() - 1, 0, 9)
                    y_num = in1 + in2 + carry
                    now = (in1, in2, carry)
                    if prev == now:
                        continue
                    prev = now
                    y0 = to_one_hot_array((y_num %  10)+1, FIELD_DEPTH)
                    y1 = to_one_hot_array((y_num // 10)+1, FIELD_DEPTH)
                    y = [yy.reshape((self.batch_size, -1)) for yy in [y0, y1]]
                    loss = env_model.train_on_batch(x, y)
                    losses.append(loss)
            print("ep %3d: loss=%s" % (ep, np.average(losses)))

    def question_test(self, addition_env, npi_runner, question):
        addition_env.reset()
        self.reset()
        try:
            run_npi(addition_env, npi_runner, self.program_set.ADD, question)
            if question['correct']:
                return True
        except StopIteration:
            pass
        return False

    def convert_input(self, p_in: StepInput):
        x_pg = np.array((p_in.program.program_id,))
        x = [xx.reshape((self.batch_size, -1)) for xx in (p_in.env, p_in.arguments.values, x_pg)]
        return x

    def convert_output(self, p_out: StepOutput):
        y = [np.array((p_out.r,))]
        weights = [[1.]]
        if p_out.program:
            arg_values = p_out.arguments.values
            arg_num = len(p_out.program.args or [])
            y += [p_out.program.to_one_hot(PROGRAM_VEC_SIZE)]
            weights += [[1.]]
        else:
            arg_values = IntegerArguments().values
            arg_num = 0
            y += [np.zeros((PROGRAM_VEC_SIZE, ))]
            weights += [[1e-10]]

        for v in arg_values:  # split by each args
            y += [v]
        weights += [[1.]] * arg_num + [[1e-10]] * (len(arg_values) - arg_num)
        weights = [np.array(w) for w in weights]
        return [yy.reshape((self.batch_size, -1)) for yy in y], weights

    def step(self, env_observation: np.ndarray, pg: Program, arguments: IntegerArguments) -> StepOutput:
        x = self.convert_input(StepInput(env_observation, pg, arguments))
        results = self.model.predict(x, batch_size=1)  # if batch_size==1, returns single row

        r, pg_one_hot, arg_values = results[0], results[1], results[2:]
        program = self.program_set.get(pg_one_hot.argmax())
        ret = StepOutput(r, program, IntegerArguments(values=np.stack(arg_values)))
        return ret

    def save(self):
        self.model.save_weights(self.model_path, overwrite=True)

    def load_weights(self):
        if os.path.exists(self.model_path):
            self.model.load_weights(self.model_path)
            self.weight_loaded = True

    def print_weights(self, weights=None, detail=False):
        weights = weights or self.model.get_weights()
        for w in weights:
            print("w%s: sum(w)=%s, ave(w)=%s" % (w.shape, np.sum(w), np.average(w)))
        if detail:
            for w in weights:
                print("%s: %s" % (w.shape, w))

    @staticmethod
    def size_of_env_observation():
        return FIELD_ROW * FIELD_DEPTH
class PolicyValueNet():
    """policy-value network """
    def __init__(self, board_width, board_height, model_file=None):
        self.board_width = board_width
        self.board_height = board_height
        self.l2_const = 1e-4  # coef of l2 penalty
        self.create_policy_value_net()

        if model_file:
            print("[Notice] load model from file")
            self.model = load_model(model_file)
        else:
            print("[Notice] create model")
            self.create_policy_value_net()
        self._loss_train_op()

    def create_policy_value_net(self):
        """create the policy value network """
        in_x = network = Input((4, self.board_width, self.board_height))

        # conv layers
        network = Conv2D(filters=32,
                         kernel_size=(3, 3),
                         padding="same",
                         data_format="channels_first",
                         activation="relu",
                         kernel_regularizer=l2(self.l2_const))(network)
        network = Conv2D(filters=64,
                         kernel_size=(3, 3),
                         padding="same",
                         data_format="channels_first",
                         activation="relu",
                         kernel_regularizer=l2(self.l2_const))(network)
        network = Conv2D(filters=128,
                         kernel_size=(3, 3),
                         padding="same",
                         data_format="channels_first",
                         activation="relu",
                         kernel_regularizer=l2(self.l2_const))(network)
        # action policy layers
        policy_net = Conv2D(filters=4,
                            kernel_size=(1, 1),
                            data_format="channels_first",
                            activation="relu",
                            kernel_regularizer=l2(self.l2_const))(network)
        policy_net = Flatten()(policy_net)
        self.policy_net = Dense(self.board_width * self.board_height,
                                activation="softmax",
                                kernel_regularizer=l2(
                                    self.l2_const))(policy_net)
        # state value layers
        value_net = Conv2D(filters=2,
                           kernel_size=(1, 1),
                           data_format="channels_first",
                           activation="relu",
                           kernel_regularizer=l2(self.l2_const))(network)
        value_net = Flatten()(value_net)
        value_net = Dense(64, kernel_regularizer=l2(self.l2_const))(value_net)
        self.value_net = Dense(1,
                               activation="tanh",
                               kernel_regularizer=l2(self.l2_const))(value_net)

        self.model = Model(in_x, [self.policy_net, self.value_net])

        def policy_value(state_input):
            state_input_union = np.array(state_input)
            results = self.model.predict_on_batch(state_input_union)
            return results

        self.policy_value = policy_value

    def policy_value_fn(self, board):
        """
        input: board
        output: a list of (action, probability) tuples for each available action and the score of the board state
        """
        legal_positions = board.availables
        current_state = board.current_state()
        act_probs, value = self.policy_value(
            current_state.reshape(-1, 4, self.board_width, self.board_height))
        act_probs = zip(legal_positions, act_probs.flatten()[legal_positions])
        return act_probs, value[0][0]

    def _loss_train_op(self):
        """
        Three loss terms:
        loss = (z - v)^2 + pi^T * log(p) + c||theta||^2
        """

        # get the train op
        opt = Adam()
        losses = ['categorical_crossentropy', 'mean_squared_error']
        self.model.compile(optimizer=opt, loss=losses)

        def self_entropy(probs):
            return -np.mean(np.sum(probs * np.log(probs + 1e-10), axis=1))

        def train_step(state_input, mcts_probs, winner, learning_rate):
            state_input_union = np.array(state_input)
            mcts_probs_union = np.array(mcts_probs)
            winner_union = np.array(winner)
            loss = self.model.evaluate(state_input_union,
                                       [mcts_probs_union, winner_union],
                                       batch_size=len(state_input),
                                       verbose=0)
            action_probs, _ = self.model.predict_on_batch(state_input_union)
            entropy = self_entropy(action_probs)
            K.set_value(self.model.optimizer.lr, learning_rate)
            self.model.fit(state_input_union, [mcts_probs_union, winner_union],
                           batch_size=len(state_input),
                           verbose=0)
            return loss[0], entropy

        self.train_step = train_step

    def get_policy_param(self):
        net_params = self.model.get_weights()
        return net_params

    def save_model(self, model_file):
        """ save model to file """
        print("save model file")
        self.model.save(model_file)
Exemple #36
0
def test_pandas_dataframe():
    input_a = Input(shape=(3, ), name='input_a')
    input_b = Input(shape=(3, ), name='input_b')

    x = Dense(4, name='dense_1')(input_a)
    y = Dense(3, name='desne_2')(input_b)

    model_1 = Model(inputs=input_a, outputs=x)
    model_2 = Model(inputs=[input_a, input_b], outputs=[x, y])

    optimizer = 'rmsprop'
    loss = 'mse'

    model_1.compile(optimizer=optimizer, loss=loss)
    model_2.compile(optimizer=optimizer, loss=loss)

    input_a_df = pd.DataFrame(np.random.random((10, 3)))
    input_b_df = pd.DataFrame(np.random.random((10, 3)))

    output_a_df = pd.DataFrame(np.random.random((10, 4)))
    output_b_df = pd.DataFrame(np.random.random((10, 3)))

    model_1.fit(input_a_df, output_a_df)
    model_2.fit([input_a_df, input_b_df], [output_a_df, output_b_df])
    model_1.fit([input_a_df], [output_a_df])
    model_1.fit({'input_a': input_a_df}, output_a_df)
    model_2.fit({
        'input_a': input_a_df,
        'input_b': input_b_df
    }, [output_a_df, output_b_df])

    model_1.predict(input_a_df)
    model_2.predict([input_a_df, input_b_df])
    model_1.predict([input_a_df])
    model_1.predict({'input_a': input_a_df})
    model_2.predict({'input_a': input_a_df, 'input_b': input_b_df})

    model_1.predict_on_batch(input_a_df)
    model_2.predict_on_batch([input_a_df, input_b_df])
    model_1.predict_on_batch([input_a_df])
    model_1.predict_on_batch({'input_a': input_a_df})
    model_2.predict_on_batch({'input_a': input_a_df, 'input_b': input_b_df})

    model_1.evaluate(input_a_df, output_a_df)
    model_2.evaluate([input_a_df, input_b_df], [output_a_df, output_b_df])
    model_1.evaluate([input_a_df], [output_a_df])
    model_1.evaluate({'input_a': input_a_df}, output_a_df)
    model_2.evaluate({
        'input_a': input_a_df,
        'input_b': input_b_df
    }, [output_a_df, output_b_df])

    model_1.train_on_batch(input_a_df, output_a_df)
    model_2.train_on_batch([input_a_df, input_b_df],
                           [output_a_df, output_b_df])
    model_1.train_on_batch([input_a_df], [output_a_df])
    model_1.train_on_batch({'input_a': input_a_df}, output_a_df)
    model_2.train_on_batch({
        'input_a': input_a_df,
        'input_b': input_b_df
    }, [output_a_df, output_b_df])

    model_1.test_on_batch(input_a_df, output_a_df)
    model_2.test_on_batch([input_a_df, input_b_df], [output_a_df, output_b_df])
    model_1.test_on_batch([input_a_df], [output_a_df])
    model_1.test_on_batch({'input_a': input_a_df}, output_a_df)
    model_2.test_on_batch({
        'input_a': input_a_df,
        'input_b': input_b_df
    }, [output_a_df, output_b_df])
Exemple #37
0
class VAE(AE):
    """
    Variational Autoencoder. This consists of an encoder and a decoder plus an interpolateable latent space.
    """
    def __init__(self,
                 encoder=None,
                 decoder=None,
                 autoencoder=None,
                 latent_dim=None):
        super(VAE, self).__init__(encoder=None, decoder=None)

        # Encoder and decoder must be provided.
        assert (encoder != None and decoder != None)

        # From loading.
        if encoder != None and decoder != None and autoencoder != None:
            self.encoder = encoder
            self.decoder = decoder
            self.autoencoder = autoencoder
            self.latent_dim = decoder.inputs[0].shape.as_list()[-1]
            return

        # Set the latent dimensions.
        self.latent_dim = latent_dim
        assert self.latent_dim != None

        # Encoder.
        encoder_input = encoder.inputs[0]
        encoder_output = encoder.outputs[0]
        z_mean = layers.Dense(self.latent_dim, name='z_mean')(encoder_output)
        z_log_var = layers.Dense(self.latent_dim,
                                 name='z_log_var')(encoder_output)
        z = layers.Lambda(sampling, output_shape=(self.latent_dim, ),
                          name='z')([z_mean, z_log_var])
        self.encoder = Model(encoder_input, [z_mean, z_log_var, z],
                             name='encoder')

        # Decoder.
        self.decoder = decoder

        # Creating the VAE.
        inputs = self.encoder.inputs[0]
        outputs = self.decoder(self.encoder(inputs)[2])  # This is z.
        self.autoencoder = Model(inputs, outputs, name="vae")

    def compile(self,
                optimizer,
                loss=None,
                metrics=None,
                loss_weights=None,
                sample_weight_mode=None,
                weighted_metrics=None,
                target_tensors=None,
                **kwargs):
        """
        Compiles the VAE.

        Additionally to the default functionality of *compile*, it adds the VAE-loss.
        This loss takes the provided loss and interprets it as a reconstruction-loss.

        The VAE loss is similar to

        >>> vae_loss = mean(r_loss + kl_loss)

        See the literature for details.

        """

        self.loss = loss

        # Inputs.
        inputs = self.encoder.inputs[0]
        inputs_dim = int(np.prod(inputs.shape.as_list()[1:]))

        # Outputs.
        z_mean = self.encoder.outputs[0]
        z_log_var = self.encoder.outputs[1]
        outputs = self.decoder(self.encoder(inputs)[2])  # This is z.

        # Define the loss.
        def vae_loss(loss_inputs, loss_outputs):

            # Flatten all to accept different dimensions.
            loss_inputs = K.flatten(loss_inputs)
            loss_outputs = K.flatten(loss_outputs)

            # Reconstruction loss.
            if isinstance(self.loss, str):
                r_loss = losses.get(self.loss)
            else:
                r_loss = self.loss
            r_loss *= inputs_dim

            # kl loss.
            kl_loss = 1 + z_log_var - K.square(z_mean) - K.exp(z_log_var)
            kl_loss = K.sum(kl_loss, axis=-1)
            kl_loss *= -0.5

            # VAE loss.
            vae_loss = K.mean(r_loss + kl_loss)
            vae_loss /= inputs_dim
            return vae_loss

        # Compile model.
        loss = vae_loss
        self.autoencoder.compile(optimizer, loss, metrics, loss_weights,
                                 sample_weight_mode, weighted_metrics,
                                 **kwargs)

    def predict_embed_samples_into_latent(self,
                                          x,
                                          batch_size=None,
                                          verbose=0,
                                          steps=None):

        return self.encoder.predict(x, batch_size, verbose, steps)[2]
Exemple #38
0
def test_model_with_input_feed_tensor():
    """We test building a model with a TF variable as input.
    We should be able to call fit, evaluate, predict,
    by only passing them data for the placeholder inputs
    in the model.
    """
    import tensorflow as tf

    input_a_np = np.random.random((10, 3))
    input_b_np = np.random.random((10, 3))

    output_a_np = np.random.random((10, 4))
    output_b_np = np.random.random((10, 3))

    a = Input(tensor=tf.Variable(input_a_np, dtype=tf.float32))
    b = Input(shape=(3, ), name='input_b')

    a_2 = Dense(4, name='dense_1')(a)
    dp = Dropout(0.5, name='dropout')
    b_2 = dp(b)

    model = Model([a, b], [a_2, b_2])
    model.summary()

    optimizer = 'rmsprop'
    loss = 'mse'
    loss_weights = [1., 0.5]
    model.compile(optimizer,
                  loss,
                  metrics=['mean_squared_error'],
                  loss_weights=loss_weights,
                  sample_weight_mode=None)

    # test train_on_batch
    out = model.train_on_batch(input_b_np, [output_a_np, output_b_np])
    out = model.train_on_batch({'input_b': input_b_np},
                               [output_a_np, output_b_np])
    out = model.test_on_batch({'input_b': input_b_np},
                              [output_a_np, output_b_np])
    out = model.predict_on_batch({'input_b': input_b_np})

    # test fit
    out = model.fit({'input_b': input_b_np}, [output_a_np, output_b_np],
                    epochs=1,
                    batch_size=10)
    out = model.fit(input_b_np, [output_a_np, output_b_np],
                    epochs=1,
                    batch_size=10)

    # test evaluate
    out = model.evaluate({'input_b': input_b_np}, [output_a_np, output_b_np],
                         batch_size=10)
    out = model.evaluate(input_b_np, [output_a_np, output_b_np], batch_size=10)

    # test predict
    out = model.predict({'input_b': input_b_np}, batch_size=10)
    out = model.predict(input_b_np, batch_size=10)
    assert len(out) == 2

    # Now test a model with a single input
    # i.e. we don't pass any data to fit the model.
    a = Input(tensor=tf.Variable(input_a_np, dtype=tf.float32))
    a_2 = Dense(4, name='dense_1')(a)
    a_2 = Dropout(0.5, name='dropout')(a_2)
    model = Model(a, a_2)
    model.summary()

    optimizer = 'rmsprop'
    loss = 'mse'
    model.compile(optimizer, loss, metrics=['mean_squared_error'])

    # test train_on_batch
    out = model.train_on_batch(None, output_a_np)
    out = model.train_on_batch(None, output_a_np)
    out = model.test_on_batch(None, output_a_np)
    out = model.predict_on_batch(None)
    out = model.train_on_batch([], output_a_np)
    out = model.train_on_batch({}, output_a_np)

    # test fit
    out = model.fit(None, output_a_np, epochs=1, batch_size=10)
    out = model.fit(None, output_a_np, epochs=1, batch_size=10)

    # test evaluate
    out = model.evaluate(None, output_a_np, batch_size=10)
    out = model.evaluate(None, output_a_np, batch_size=10)

    # test predict
    out = model.predict(None, steps=3)
    out = model.predict(None, steps=3)
    assert out.shape == (10 * 3, 4)

    # Same, without learning phase
    # i.e. we don't pass any data to fit the model.
    a = Input(tensor=tf.Variable(input_a_np, dtype=tf.float32))
    a_2 = Dense(4, name='dense_1')(a)
    model = Model(a, a_2)
    model.summary()

    optimizer = 'rmsprop'
    loss = 'mse'
    model.compile(optimizer, loss, metrics=['mean_squared_error'])

    # test train_on_batch
    out = model.train_on_batch(None, output_a_np)
    out = model.train_on_batch(None, output_a_np)
    out = model.test_on_batch(None, output_a_np)
    out = model.predict_on_batch(None)
    out = model.train_on_batch([], output_a_np)
    out = model.train_on_batch({}, output_a_np)

    # test fit
    out = model.fit(None, output_a_np, epochs=1, batch_size=10)
    out = model.fit(None, output_a_np, epochs=1, batch_size=10)

    # test evaluate
    out = model.evaluate(None, output_a_np, batch_size=10)
    out = model.evaluate(None, output_a_np, batch_size=10)

    # test predict
    out = model.predict(None, steps=3)
    out = model.predict(None, steps=3)
    assert out.shape == (10 * 3, 4)
Exemple #39
0
class TL(Model):
    """
    Triplet-Loss trained Neural Network.

    https://arxiv.org/abs/1503.03832
    """
    def __init__(self, base=None, siamese=None):
        super(TL, self).__init__()

        # Store the base model.
        assert (base != None)
        self.base = base

        # For loading.
        if base != None and siamese != None:
            self.base = base
            self.siamese = siamese
            self.latent_dim = self.base.outputs[0].shape[1]
            return

        # Get the latent dimension.
        assert len(self.base.outputs) == 1
        assert len(self.base.outputs[0].shape) == 2
        self.latent_dim = self.base.outputs[0].shape[1]

        # Get the input shape.
        input_shape = self.base.inputs[0].shape.as_list()[1:]

        # Create the anchor.
        input_anchor = layers.Input(shape=input_shape)
        output_anchor = input_anchor
        output_anchor = self.base(output_anchor)

        # Create the positive.
        input_positive = layers.Input(shape=input_shape)
        output_positive = input_positive
        output_positive = self.base(output_positive)

        # Create the negative.
        input_negative = layers.Input(shape=input_shape)
        output_negative = input_negative
        output_negative = self.base(output_negative)

        # Create a dummy output.
        output = layers.concatenate(
            [output_anchor, output_positive, output_negative])

        # Create the model.
        self.siamese = Model([input_anchor, input_positive, input_negative],
                             output,
                             name="triplet_model")

    def compile(self,
                optimizer,
                loss=None,
                metrics=None,
                loss_weights=None,
                sample_weight_mode=None,
                weighted_metrics=None,
                target_tensors=None,
                triplet_loss="mse",
                **kwargs):
        """
        Compiles the TL.

        Additionally to the default functionality of *compile*, it adds the triplet-loss.
        In order to do so you have to provide it via the parameter *triplet_loss*.

        The VAE loss is similar to

        >>> vae_loss = max(0.0, pos_dist - neg_dist + alpha)

        See the literature for details.

        Additional args:
            triplet_loss (string): The base-loss for the triplet-loss. Values are either *euclidean* for euclidean norm or *cosine* for cosine similarity.

        """
        assert loss == None, "Not expected to provide an explicit loss for TL. Use 'triplet_loss'"

        self.triplet_loss = triplet_loss

        def triplet_loss_function(y_true, y_pred, alpha=0.4):

            anchor = y_pred[:, 0:self.latent_dim]
            positive = y_pred[:, self.latent_dim:self.latent_dim * 2]
            negative = y_pred[:, self.latent_dim * 2:self.latent_dim * 3]

            if triplet_loss == "euclidean":
                pos_dist = euclidean_loss(positive, anchor)
                neg_dist = euclidean_loss(negative, anchor)
            elif triplet_loss == "cosine":
                pos_dist = cosine_loss(positive, anchor)
                neg_dist = cosine_loss(negative, anchor)
            else:
                raise Exception("Unexpected: " + triplet_loss)

            basic_loss = pos_dist - neg_dist + alpha
            loss = K.maximum(basic_loss, 0.0)
            return loss

        loss = triplet_loss_function

        self.siamese.compile(optimizer, loss, metrics, loss_weights,
                             sample_weight_mode, weighted_metrics, **kwargs)

    def fit(self,
            x=None,
            y=None,
            batch_size=None,
            minibatch_size=None,
            epochs=1,
            verbose=1,
            callbacks=None,
            validation_split=0.,
            validation_data=None,
            shuffle=True,
            class_weight=None,
            sample_weight=None,
            initial_epoch=0,
            steps_per_epoch=None,
            validation_steps=None,
            **kwargs):
        """
        This is basically the same as in vanilla Keras.

        Additional args:
            minibatch_size (int): The model internally does some sampling. The *minibatch_size* specifies how many candidates to use in order to create a triplet for training.
        """

        assert minibatch_size != None, "ERROR! Must provide 'minibatch_size'."
        assert steps_per_epoch != None, "ERROR! Must provide 'steps_per_epoch'."
        assert validation_steps != None, "ERROR! Must provide 'validation_steps'."

        y_dummy = np.zeros((batch_size, self.latent_dim * 3))

        # Template generator.
        def triplet_loss_generator(x_generator, y_generator, model, sampling):

            # Get the classes.
            classes = sorted(list(set(y_generator)))

            # Sort by classes for easy indexing.
            class_indices = {}
            for c in classes:
                class_indices[c] = []
            for index, c in enumerate(y_generator):
                class_indices[c].append(index)

            # Compute the complements.
            class_complements = {}
            for c in classes:
                class_complements[c] = [c2 for c2 in classes if c2 != c]

            # Generator loop.
            while True:

                x_input_anchors = []
                x_input_positives = []
                x_input_negatives = []

                # Generate a whole batch.
                for _ in range(batch_size):
                    anchor_class = random.choice(classes)
                    anchor_index = random.choice(class_indices[anchor_class])
                    anchor_input = x_generator[anchor_index]
                    #print("anchor_class", anchor_class)
                    anchor_latent = self.base.predict(
                        np.expand_dims(anchor_input, axis=0))[0]

                    # Generate some positive candidates.
                    positive_candidates = []
                    while len(positive_candidates) < minibatch_size:
                        positive_class = anchor_class
                        positive_index = random.choice(
                            class_indices[positive_class])
                        positive_input = x_generator[positive_index]
                        assert positive_class == y_generator[positive_index]
                        #print("positive_class", positive_class)
                        positive_candidates.append(positive_input)

                    # Find the farthest positive candidate.
                    positive_candidates = np.array(positive_candidates)
                    positive_latents = self.base.predict(positive_candidates)
                    positive_extremum = compute_latent_extremum(
                        anchor_latent, positive_latents, "argmax",
                        self.triplet_loss)
                    positive_input = positive_candidates[positive_extremum]

                    # Generate some negative candidates.
                    negative_candidates = []
                    while len(negative_candidates) < minibatch_size:
                        negative_class = random.choice(
                            class_complements[anchor_class])
                        negative_index = random.choice(
                            class_indices[negative_class])
                        negative_input = x_generator[negative_index]
                        assert negative_class == y_generator[negative_index]
                        #print("negative_class", negative_class)
                        negative_candidates.append(negative_input)

                    # Find the closest negative candidate.
                    negative_candidates = np.array(negative_candidates)
                    negative_latents = self.base.predict(negative_candidates)
                    negative_extremum = compute_latent_extremum(
                        anchor_latent, negative_latents, "argmin",
                        self.triplet_loss)
                    negative_input = negative_candidates[negative_extremum]

                    # Done.
                    x_input_anchors.append(anchor_input)
                    x_input_positives.append(positive_input)
                    x_input_negatives.append(negative_input)

                x_input_anchors = np.array(x_input_anchors)
                x_input_positives = np.array(x_input_positives)
                x_input_negatives = np.array(x_input_negatives)
                x_input = [
                    x_input_anchors, x_input_positives, x_input_negatives
                ]

                yield x_input, y_dummy

        # Create the generators.
        training_generator = triplet_loss_generator(x, y, batch_size,
                                                    self.siamese)
        if validation_data != None:
            validation_generator = triplet_loss_generator(
                validation_data[0], validation_data[1], batch_size,
                self.siamese)
        else:
            validation_generator = None

        # Create the history.
        history_keys = ["loss", "val_loss"]
        history = {}
        for history_key in history_keys:
            history[history_key] = []

        # Training the model
        for epoch in range(epochs):

            print("Epoch " + str(epoch + 1) + "/" + str(epochs) + "...")

            # Generating data for training.
            training_input, training_output = next(training_generator)
            if validation_generator != None:
                validation_input, validation_output = next(
                    validation_generator)

            model_history = self.siamese.fit(
                training_input,
                training_output,
                validation_data=(validation_input, validation_output),
                epochs=1,
                steps_per_epoch=steps_per_epoch,
                verbose=0,
                validation_steps=validation_steps)

            # Update the history.
            for history_key in history_keys:
                history_value = model_history.history[history_key]
                history[history_key].append(history_value)
                print(history_key, history_value)

        return history

    def fit_generator(self,
                      generator,
                      steps_per_epoch=None,
                      epochs=1,
                      verbose=1,
                      callbacks=None,
                      validation_data=None,
                      validation_steps=None,
                      class_weight=None,
                      max_queue_size=10,
                      workers=1,
                      use_multiprocessing=False,
                      shuffle=True,
                      initial_epoch=0):
        """
        Coming soon...
        """

        print("TODO: implement fit_generator!")

        raise Exception("Not implemented!")

        return self.siamese.fit_generator(generator, steps_per_epoch, epochs,
                                          verbose, callbacks, validation_data,
                                          validation_steps, class_weight,
                                          max_queue_size, workers,
                                          use_multiprocessing, shuffle,
                                          initial_epoch)

    def evaluate(self,
                 x=None,
                 y=None,
                 batch_size=None,
                 verbose=1,
                 sample_weight=None,
                 steps=None):
        """
        Evaluates the model. Same as vanilla Keras.
        """

        return self.siamese.evaluate(x,
                                     y,
                                     batch_size,
                                     verbose,
                                     sample_weight,
                                     steps=None)

    def predict(self, x, batch_size=None, verbose=0, steps=None):
        """
        Does a prediction. Same as vanilla Keras.
        """

        return self.siamese.predict(x, batch_size, verbose, steps)

    def summary(self):
        """
        Provides a summary.
        """

        print("Basemodel:")
        self.base.summary()
        print("Siamese model:")
        self.siamese.summary()

    def save(self, path):
        """
        Saves the TL.

        This includes the whole Siamese Net plus the base-model.

        This code

        >>> tl.save("myae.h5")

        will create the files *tl.h5*, and *tl-base.h5*.

        """
        self.siamese.save(path)
        self.base.save(append_to_filepath(path, "-base"))
Exemple #40
0
cnn = Convolution1D(filters=50, kernel_size=3, activation='tanh')(dropouted)
cnn = Convolution1D(filters=50, kernel_size=3, activation='tanh')(cnn)
flattened = Flatten()(cnn)
dense = Dense(100, activation='tanh')(flattened)

predict = Dense(2, activation='softmax')(dense)
model = Model(input=[word, distance_e1, distance_e2], output=predict)

# opt = RMSprop(lr=0.001, rho=0.9, epsilon=1e-06)
#    opt = Adagrad(lr=0.01, epsilon=1e-06)
#    opt = Adadelta(lr=1.0, rho=0.95, epsilon=1e-06)
#    opt = Adam(lr=0.001, beta_1=0.9, beta_2=0.999, epsilon=1e-08)
opt = SGD(lr=0.01, decay=1e-6, momentum=0.9, nesterov=True)

model.compile(loss='categorical_crossentropy', optimizer=opt)
train_instances = [line.strip() for line in lines]
label_array_t, word_array_t, dis_e1_array_t, dis_e2_array_t = rep.represent_instances(
    train_instances)

model.fit([word_array_t, dis_e1_array_t, dis_e2_array_t],
          label_array_t,
          batch_size=128,
          epochs=epoch_size)
model.save(output_file)
label_array_ans = model.predict([word_array_t, dis_e1_array_t, dis_e2_array_t],
                                batch_size=128)
print(label_array_ans)
print("训练完成!!")
eval_mulclass(label_array_t, label_array_ans)
Exemple #41
0
class GAN(Model):
    """ Generative Adversarial Network (GAN). """
    def __init__(self, generator, discriminator):
        super(GAN, self).__init__()

        assert generator != None
        assert discriminator != None
        assert discriminator.optimizer != None, "Discriminator must be compiled!"

        self.generator = generator
        self.discriminator = discriminator

        # Create the GAN.
        z_shape = generator.inputs[0].shape[1:]
        gan_input = layers.Input(shape=z_shape)
        gan_output = gan_input
        gan_output = self.generator(gan_output)
        self.discriminator.trainable = False
        gan_output = self.discriminator(gan_output)
        self.gan = Model(gan_input, gan_output)

    def compile(self,
                optimizer,
                loss=None,
                metrics=None,
                loss_weights=None,
                sample_weight_mode=None,
                weighted_metrics=None,
                target_tensors=None,
                **kwargs):
        """
        Compiles the model. Same as vanilla Keras.
        """

        self.gan.compile(optimizer, loss, metrics, loss_weights,
                         sample_weight_mode, weighted_metrics, **kwargs)

    def fit(
            self,
            x=None,
            y=None,
            batch_size=None,
            epochs=1,
            sample_interval=None,  # TODO document!
            verbose=1,
            callbacks=None,
            validation_split=0.,
            validation_data=None,
            shuffle=True,
            class_weight=None,
            sample_weight=None,
            initial_epoch=0,
            steps_per_epoch=None,
            validation_steps=None,
            **kwargs):
        """
        Trains the GAN.

        This is almost the same as in vanilla Keras.
        """

        # Adversarial ground truths
        valid = np.ones((batch_size, 1))
        fake = np.zeros((batch_size, 1))

        for epoch in range(epochs):

            # Select a random batch of images
            idx = np.random.randint(0, x.shape[0], batch_size)
            imgs = x[idx]

            # Create some noise.
            noise = np.random.normal(0, 1, (batch_size, 100))

            # Generate a batch of new images.
            gen_imgs = self.generator.predict(noise)

            # Train the discriminator
            d_loss_real = self.discriminator.train_on_batch(imgs, valid)
            d_loss_fake = self.discriminator.train_on_batch(gen_imgs, fake)
            d_loss = 0.5 * np.add(d_loss_real, d_loss_fake)

            # Create some noise.
            noise = np.random.normal(0, 1, (batch_size, 100))

            # Train the generator (to have the discriminator label samples as valid).
            g_loss = self.gan.train_on_batch(noise, valid)
            if type(g_loss) == list:
                g_loss = g_loss[0]

            # Plot the progress.
            print("%d [D loss: %f, acc.: %.2f%%] [G loss: %f]" %
                  (epoch, d_loss[0], 100 * d_loss[1], g_loss),
                  end="\r")

            # If at save interval => save generated image samples
            if sample_interval != None and epoch % sample_interval == 0:
                self.sample_images(epoch)

    def sample_images(self, epoch):
        """
        Samples images.
        """

        r, c = 5, 5
        noise = np.random.normal(0, 1, (r * c, 100))
        gen_imgs = self.generator.predict(noise)

        # Rescale images 0 - 1
        gen_imgs = 0.5 * gen_imgs + 0.5

        fig, axs = plt.subplots(r, c)
        cnt = 0
        for i in range(r):
            for j in range(c):
                axs[i, j].imshow(gen_imgs[cnt, :, :, 0], cmap='gray')
                axs[i, j].axis('off')
                cnt += 1
        #fig.savefig("images/%d.png" % epoch)
        plt.show()
        plt.close()

    def summary(self):
        """
        Provides a summary.
        """

        print("Generator:")
        self.generator.summary()
        print("Discriminator:")
        self.discriminator.summary()
        print("GAN:")
        self.gan.summary()

    def save(self, path):
        """
        Saves the GAN.

        This includes the whole autoencoder plus the encoder and the decoder.
        The encoder and decoder use the path plus a respective annotation.

        This code

        >>> ae.save("myae.h5")

        will create the files *myae.h5*, *myae-encoder.h5*, and *myae-decoder.h5*.

        """
        self.gan.save(path)
        self.generator.save(append_to_filepath(path, "-generator"))
        self.discriminator.save(append_to_filepath(path, "-discriminator"))
class AIPlayer(Player):
    
    def __init__(self, buffer_size, sim_count, train=True, model="", tau = 1, compile=False):
        self.buffer = ReplayBuffer(buffer_size)
        self.temp_state = deque()
        self.train = train
        self.loss = 0
        self.acc = 0
        self.batch_count = 0
        self.sim_count = sim_count
        if model != "":
            self.load(model, compile)
        else:
            self.create_network()
        self.tau = tau

    @staticmethod
    def create_if_nonexistant(config):
        models = glob.glob(config.data.model_location + "*.h5")
        if len(models) == 0:
            ai = AIPlayer(config.buffer_size, config.game.simulation_num_per_move)
            ai.save(config.data.model_location+"model_0.h5")
            del ai

    def set_training(self, train):
        self.train = train
    
    @staticmethod
    def clear():
        K.clear_session()
    
    def load(self, file, compile=False):
        try:
            del self.network
        except Exception:
            pass
        self.network = load_model(file, custom_objects={"objective_function_for_policy":AIPlayer.objective_function_for_policy,
                                                        "objective_function_for_value":AIPlayer.objective_function_for_value}, compile=compile)
        
    def save(self, file):
        self.network.save(file)
    
    def create_network(self):
        x_in = Input((3, 8, 8))
        x = Conv2D(filters=128, kernel_size=(3,3), padding="same", data_format="channels_first")(x_in)
        x = BatchNormalization(axis=1)(x)
        x = Activation("relu")(x)
        for _ in range(10):
            x = self._build_residual_block(x)

        res_out = x
        
        x = Conv2D(filters=2, kernel_size=1, data_format="channels_first")(res_out)
        x = BatchNormalization(axis=1)(x)
        x = Activation("relu")(x)
        x = Flatten()(x)
        policy_out = Dense(8*8+1, activation="softmax", name="policy_out")(x)

        x = Conv2D(filters=1, kernel_size=1, data_format="channels_first")(res_out)
        x = BatchNormalization(axis=1)(x)
        x = Activation("relu")(x)
        x = Flatten()(x)
        x = Dense(64, activation="relu")(x)
        value_out =  Dense(1, activation="tanh", name="value_out")(x)
        
        self.network = Model(x_in, [policy_out, value_out], name="reversi_model")
        self.compile()
      
    def _build_residual_block(self, x):
        in_x = x
        x = Conv2D(filters=128, kernel_size=(3,3), padding="same", data_format="channels_first")(x)
        x = BatchNormalization(axis=1)(x)
        x = Activation("relu")(x)
        x = Conv2D(filters=128, kernel_size=(3,3), padding="same", data_format="channels_first")(x)
        x = BatchNormalization(axis=1)(x)
        x = Add()([in_x, x])
        x = Activation("relu")(x)
        return x
        
    def compile(self):
        losses = [AIPlayer.objective_function_for_policy, AIPlayer.objective_function_for_value]
        self.network.compile(optimizer=optimizers.SGD(lr=1e-3, momentum=0.9), loss=losses)
      
    def update_lr(self, lr):
         K.set_value(self.network.optimizer.lr, lr)
        
    @staticmethod
    def objective_function_for_policy(y_true, y_pred):
        # can use categorical_crossentropy??
        return K.sum(-y_true * K.log(y_pred + K.epsilon()), axis=-1)

    @staticmethod
    def objective_function_for_value(y_true, y_pred):
        return mean_squared_error(y_true, y_pred)
        
    def update_buffer(self, winner):
        if self.train:
            while len(self.temp_state) > 0:
                t = self.temp_state.pop()
                self.buffer.add((t[0], t[1], winner))
    
    def train_batches(self, batch_size, batches=-1, verbose=2):
        if batches == -1:
            s_buffer = np.array([_[0] for _ in self.buffer.buffer])
            p_buffer = np.array([_[1] for _ in self.buffer.buffer])
            v_buffer = np.array([_[2] for _ in self.buffer.buffer])
        else:
            sample_size = batch_size*batches
            sample = []
            while sample_size > 0:
                sample += self.buffer.sample(sample_size)
                sample_size -= self.buffer.size()
            s_buffer = np.array([_[0] for _ in sample])
            p_buffer = np.array([_[1] for _ in sample])
            v_buffer = np.array([_[2] for _ in sample])
        history = self.network.fit(s_buffer, [p_buffer, v_buffer], batch_size=batch_size, epochs=1, verbose=verbose)
        return history
    
    def preprocess_input(self, board, side):
        state = np.zeros((3, 8, 8), dtype=np.int)
        for i in range(8):
            for j in range(8):
                if board[i,j] == 1:
                    state[0,i,j] = 1
                elif board[i,j] == -1:
                    state[1,i,j] = 1
                if side == 1:
                    state[2,i,j] = 1
        return state
    
    def evaluate(self, game, side):
        current_input = self.preprocess_input(game.board, side)
        pred = self.network.predict(current_input[np.newaxis,:])
        return pred[1][0]
    
    def pick_move(self, game, side):
        possible_moves = game.possible_moves(side)
        if len(possible_moves) == 0:
            possible_moves.append((-1,-1))
        monte_prob = self.monte_carlo(game, side)
        
        if self.train:
            self.temp_state.append((self.preprocess_input(game.board, side), np.divide(monte_prob, np.sum(monte_prob))))
        
        monte_prob = np.float_power(monte_prob, 1/self.tau)
        monte_prob = np.divide(monte_prob, np.sum(monte_prob))
        
        r = random()
        for i, move in enumerate(possible_moves):
            r -= monte_prob[Othello.move_id(move)]
            if r <= 0:
                return move
        return possible_moves[-1]
            
    def monte_carlo(self, game, side):
        N = defaultdict(lambda: 0)
        W = defaultdict(lambda: 0)
        Q = defaultdict(lambda: 0)
        P = defaultdict(lambda: 0)
        
        
        possible_moves = game.possible_moves(side)
        if len(possible_moves) == 0:
            policy = np.zeros((65))
            policy[64] = 1
            return policy
        elif len(possible_moves) == 1:
            policy = np.zeros((65))
            policy[Othello.move_id(possible_moves[0])] = 1
            return policy
        
        current_input = self.preprocess_input(game.board, side)
        sid = Othello.state_id(game.board)
        pred = self.network.predict(current_input[np.newaxis,:])
        policy = pred[0][0]
        
        total = 1e-10
        for i, move in enumerate(possible_moves):
            total += policy[Othello.move_id(move)]
          
        for move in possible_moves:
            P[(sid, Othello.move_id(move))] = policy[Othello.move_id(move)]/total
        
        for i in range(self.sim_count):
            #print("Sim #%d"% i)
            clone = deepcopy(game)
            current_side = side
            visited = deque()
            while True:
                possible_moves = clone.possible_moves(current_side)
                if len(possible_moves) == 0:
                    possible_moves.append((-1,-1))
                best_move = None
                best_move_value = -2
                sid = Othello.state_id(clone.board)
                for move in possible_moves:
                    mid = Othello.move_id(move)
                    qu_val = Q[(sid, mid)] + P[(sid, mid)]/(N[(sid, mid)]+1)
                    if qu_val > best_move_value:
                        best_move_value = qu_val
                        best_move = move
                
                #print(best_move)
                
                if N[(sid, Othello.move_id(best_move))] == 0:
                    visited.append((sid, Othello.move_id(best_move)))
                    clone.play_move(best_move[0], best_move[1], current_side)
                    current_side *= -1
                    if clone.game_over():
                        for node in visited:
                            N[node] += 1
                            W[node] += clone.get_winner()*side
                            Q[node] = W[node]/N[node]
                        break
                    
                    current_input = self.preprocess_input(clone.board, current_side)
                    sid = Othello.state_id(clone.board)
                    pred = self.network.predict(current_input[np.newaxis,:])
                    policy = pred[0][0]
                    value = pred[1][0]
                    
                    possible_moves = clone.possible_moves(current_side)
                    if len(possible_moves) == 0:
                        possible_moves.append((-1,-1))
                    total = 1e-10
                    for i, move in enumerate(possible_moves):
                        total += policy[Othello.move_id(move)]
                      
                    for move in possible_moves:
                        P[(sid, Othello.move_id(move))] = policy[Othello.move_id(move)]/total
                    
                    for node in visited:
                        N[node] += 1
                        W[node] += value*side
                        Q[node] = W[node]/N[node]
                    #print()
                    break
                else:
                    visited.append((sid, Othello.move_id(best_move)))
                    clone.play_move(best_move[0], best_move[1], current_side)
                    current_side *= -1
                    if clone.game_over():
                        for node in visited:
                            N[node] += 1
                            W[node] += clone.get_winner()*side
                            Q[node] = W[node]/N[node]
                        break
                             
        policy = np.zeros((65))
        possible_moves = game.possible_moves(side)
        sid = Othello.state_id(game.board)
        for move in possible_moves:
            mid = Othello.move_id(move)
            policy[mid] = N[(sid,mid)]
        
        return policy
Exemple #43
0
class AdditionNPIModel(NPIStep):
    model = None
    f_enc = None

    def __init__(self,
                 system: RuntimeSystem,
                 model_path: str = None,
                 program_set: AdditionProgramSet = None):
        self.system = system
        self.model_path = model_path
        self.program_set = program_set
        self.batch_size = 1
        self.build()
        self.weight_loaded = False
        self.load_weights()

    def build(self):
        enc_size = self.size_of_env_observation()
        argument_size = IntegerArguments.size_of_arguments
        input_enc = InputLayer(batch_input_shape=(self.batch_size, enc_size),
                               name='input_enc')
        input_arg = InputLayer(batch_input_shape=(self.batch_size,
                                                  argument_size),
                               name='input_arg')
        input_prg = Embedding(input_dim=PROGRAM_VEC_SIZE,
                              output_dim=PROGRAM_KEY_VEC_SIZE,
                              input_length=1,
                              batch_input_shape=(self.batch_size, 1))

        f_enc = Sequential(name='f_enc')
        f_enc.add(Merge([input_enc, input_arg], mode='concat'))
        f_enc.add(MaxoutDense(128, nb_feature=4))
        self.f_enc = f_enc

        program_embedding = Sequential(name='program_embedding')
        program_embedding.add(input_prg)

        f_enc_convert = Sequential(name='f_enc_convert')
        f_enc_convert.add(f_enc)
        f_enc_convert.add(RepeatVector(1))

        f_lstm = Sequential(name='f_lstm')
        f_lstm.add(Merge([f_enc_convert, program_embedding], mode='concat'))
        f_lstm.add(
            LSTM(256,
                 return_sequences=False,
                 stateful=True,
                 W_regularizer=l2(0.0000001)))
        f_lstm.add(Activation('relu', name='relu_lstm_1'))
        f_lstm.add(RepeatVector(1))
        f_lstm.add(
            LSTM(256,
                 return_sequences=False,
                 stateful=True,
                 W_regularizer=l2(0.0000001)))
        f_lstm.add(Activation('relu', name='relu_lstm_2'))
        # plot(f_lstm, to_file='f_lstm.png', show_shapes=True)

        f_end = Sequential(name='f_end')
        f_end.add(f_lstm)
        f_end.add(Dense(1, W_regularizer=l2(0.001)))
        f_end.add(Activation('sigmoid', name='sigmoid_end'))

        f_prog = Sequential(name='f_prog')
        f_prog.add(f_lstm)
        f_prog.add(Dense(PROGRAM_KEY_VEC_SIZE, activation="relu"))
        f_prog.add(Dense(PROGRAM_VEC_SIZE, W_regularizer=l2(0.0001)))
        f_prog.add(Activation('softmax', name='softmax_prog'))
        # plot(f_prog, to_file='f_prog.png', show_shapes=True)

        f_args = []
        for ai in range(1, IntegerArguments.max_arg_num + 1):
            f_arg = Sequential(name='f_arg%s' % ai)
            f_arg.add(f_lstm)
            f_arg.add(Dense(IntegerArguments.depth, W_regularizer=l2(0.0001)))
            f_arg.add(Activation('softmax', name='softmax_arg%s' % ai))
            f_args.append(f_arg)
        # plot(f_arg, to_file='f_arg.png', show_shapes=True)

        self.model = Model([input_enc.input, input_arg.input, input_prg.input],
                           [f_end.output, f_prog.output] +
                           [fa.output for fa in f_args],
                           name="npi")
        self.compile_model()
        plot(self.model, to_file='model.png', show_shapes=True)

    def reset(self):
        super(AdditionNPIModel, self).reset()
        for l in self.model.layers:
            if type(l) is LSTM:
                l.reset_states()

    def compile_model(self, lr=0.0001, arg_weight=1.):
        arg_num = IntegerArguments.max_arg_num
        optimizer = Adam(lr=lr)
        loss = ['binary_crossentropy', 'categorical_crossentropy'
                ] + ['categorical_crossentropy'] * arg_num
        self.model.compile(optimizer=optimizer,
                           loss=loss,
                           loss_weights=[0.25, 0.25] + [arg_weight] * arg_num)

    def fit(self, steps_list, epoch=3000):
        # 过滤一些问题
        def filter_question(condition_func):
            sub_steps_list = []
            for steps_dict in steps_list:
                question = steps_dict['q']
                if condition_func(question['in1'], question['in2']):
                    sub_steps_list.append(steps_dict)
            return sub_steps_list

        if not self.weight_loaded:
            self.train_f_enc(
                filter_question(lambda a, b: 10 <= a < 100 and 10 <= b < 100),
                epoch=100)
        self.f_enc.trainable = False

        self.update_learning_rate(0.0001)

        q_type = "training questions of a<100 and b<100"
        print(q_type)
        pr = 0.8
        all_ok = self.fit_to_subset(
            filter_question(lambda a, b: a < 100 and b < 100), pass_rate=pr)
        print("%s is pass_rate >= %s: %s" % (q_type, pr, all_ok))

        while True:
            if self.test_and_learn([10, 100, 1000]):
                break

            q_type = "training questions of ALL"
            print(q_type)

            q_num = 100
            skip_correct = False
            pr = 1.0
            questions = filter_question(lambda a, b: True)
            np.random.shuffle(questions)
            questions = questions[:q_num]
            all_ok = self.fit_to_subset(questions,
                                        pass_rate=pr,
                                        skip_correct=skip_correct)
            print("%s is pass_rate >= %s: %s" % (q_type, pr, all_ok))

    def fit_to_subset(self, steps_list, pass_rate=1.0, skip_correct=False):
        for i in range(10):
            all_ok = self.do_learn(steps_list,
                                   100,
                                   pass_rate=pass_rate,
                                   skip_correct=skip_correct)
            if all_ok:
                return True
        return False

    def test_and_learn(self, num_questions):
        for num in num_questions:
            print("test all type of %d questions" % num)
            cc, wc, wrong_questions = self.test_to_subset(
                create_random_questions(num))
            acc_rate = cc / (cc + wc)
            print("Accuracy %s(OK=%d, NG=%d)" % (acc_rate, cc, wc))
            if wc > 0:
                self.fit_to_subset(wrong_questions,
                                   pass_rate=1.0,
                                   skip_correct=False)
                return False
        return True

    def test_to_subset(self, questions):
        addition_env = AdditionEnv(FIELD_ROW, FIELD_WIDTH, FIELD_DEPTH)
        teacher = AdditionTeacher(self.program_set)
        npi_runner = TerminalNPIRunner(None, self)
        teacher_runner = TerminalNPIRunner(None, teacher)
        correct_count = wrong_count = 0
        wrong_steps_list = []
        for idx, question in enumerate(questions):
            question = copy(question)
            if self.question_test(addition_env, npi_runner, question):
                correct_count += 1
            else:
                self.question_test(addition_env, teacher_runner, question)
                wrong_steps_list.append({
                    "q": question,
                    "steps": teacher_runner.step_list
                })
                wrong_count += 1
        return correct_count, wrong_count, wrong_steps_list

    @staticmethod
    def dict_to_str(d):
        return str(tuple([(k, d[k]) for k in sorted(d)]))

    def do_learn(self, steps_list, epoch, pass_rate=1.0, skip_correct=False):
        addition_env = AdditionEnv(FIELD_ROW, FIELD_WIDTH, FIELD_DEPTH)
        npi_runner = TerminalNPIRunner(None, self)
        last_weights = None
        correct_count = Counter()
        no_change_count = 0
        last_loss = 1000
        for ep in range(1, epoch + 1):
            correct_new = wrong_new = 0
            losses = []
            ok_rate = []
            np.random.shuffle(steps_list)
            for idx, steps_dict in enumerate(steps_list):
                question = copy(steps_dict['q'])
                question_key = self.dict_to_str(question)
                if self.question_test(addition_env, npi_runner, question):
                    if correct_count[question_key] == 0:
                        correct_new += 1
                    correct_count[question_key] += 1
                    print("GOOD!: ep=%2d idx=%3d :%s CorrectCount=%s" %
                          (ep, idx, self.dict_to_str(question),
                           correct_count[question_key]))
                    ok_rate.append(1)
                    cc = correct_count[question_key]
                    if skip_correct or int(math.sqrt(cc))**2 != cc:
                        continue
                else:
                    ok_rate.append(0)
                    if correct_count[question_key] > 0:
                        print(
                            "Degraded: ep=%2d idx=%3d :%s CorrectCount=%s -> 0"
                            % (ep, idx, self.dict_to_str(question),
                               correct_count[question_key]))
                        correct_count[question_key] = 0
                        wrong_new += 1

                steps = steps_dict['steps']
                xs = []
                ys = []
                ws = []
                for step in steps:
                    xs.append(self.convert_input(step.input))
                    y, w = self.convert_output(step.output)
                    ys.append(y)
                    ws.append(w)

                self.reset()

                for i, (x, y, w) in enumerate(zip(xs, ys, ws)):
                    loss = self.model.train_on_batch(x, y, sample_weight=w)
                    if not np.isfinite(loss):
                        print("Loss is not finite!, Last Input=%s" %
                              ([i, (x, y, w)]))
                        self.print_weights(last_weights, detail=True)
                        raise RuntimeError("Loss is not finite!")
                    losses.append(loss)
                    last_weights = self.model.get_weights()
            if losses:
                cur_loss = np.average(losses)
                print(
                    "ep=%2d: ok_rate=%.2f%% (+%s -%s): ave loss %s (%s samples)"
                    % (ep, np.average(ok_rate) * 100, correct_new, wrong_new,
                       cur_loss, len(steps_list)))
                # self.print_weights()
                if correct_new + wrong_new == 0:
                    no_change_count += 1
                else:
                    no_change_count = 0

                if math.fabs(1 - cur_loss /
                             last_loss) < 0.001 and no_change_count > 5:
                    print(
                        "math.fabs(1 - cur_loss/last_loss) < 0.001 and no_change_count > 5:"
                    )
                    return False
                last_loss = cur_loss
                print("=" * 80)
            self.save()
            if np.average(ok_rate) >= pass_rate:
                return True
        return False

    def update_learning_rate(self, learning_rate, arg_weight=1.):
        print("Re-Compile Model lr=%s aw=%s" % (learning_rate, arg_weight))
        self.compile_model(learning_rate, arg_weight=arg_weight)

    def train_f_enc(self, steps_list, epoch=50):
        print("training f_enc")
        f_add0 = Sequential(name='f_add0')
        f_add0.add(self.f_enc)
        f_add0.add(Dense(FIELD_DEPTH))
        f_add0.add(Activation('softmax', name='softmax_add0'))

        f_add1 = Sequential(name='f_add1')
        f_add1.add(self.f_enc)
        f_add1.add(Dense(FIELD_DEPTH))
        f_add1.add(Activation('softmax', name='softmax_add1'))

        env_model = Model(self.f_enc.inputs, [f_add0.output, f_add1.output],
                          name="env_model")
        env_model.compile(optimizer='adam',
                          loss=['categorical_crossentropy'] * 2)

        for ep in range(epoch):
            losses = []
            for idx, steps_dict in enumerate(steps_list):
                prev = None
                for step in steps_dict['steps']:
                    x = self.convert_input(step.input)[:2]
                    env_values = step.input.env.reshape((4, -1))
                    in1 = np.clip(env_values[0].argmax() - 1, 0, 9)
                    in2 = np.clip(env_values[1].argmax() - 1, 0, 9)
                    carry = np.clip(env_values[2].argmax() - 1, 0, 9)
                    y_num = in1 + in2 + carry
                    now = (in1, in2, carry)
                    if prev == now:
                        continue
                    prev = now
                    y0 = to_one_hot_array((y_num % 10) + 1, FIELD_DEPTH)
                    y1 = to_one_hot_array((y_num // 10) + 1, FIELD_DEPTH)
                    y = [yy.reshape((self.batch_size, -1)) for yy in [y0, y1]]
                    loss = env_model.train_on_batch(x, y)
                    losses.append(loss)
            print("ep %3d: loss=%s" % (ep, np.average(losses)))
            if np.average(losses) < 1e-06:
                break

    def question_test(self, addition_env, npi_runner, question):
        addition_env.reset()
        self.reset()
        try:
            run_npi(addition_env, npi_runner, self.program_set.ADD, question)
            if question['correct']:
                return True
        except StopIteration:
            pass
        return False

    def convert_input(self, p_in: StepInput):
        x_pg = np.array((p_in.program.program_id, ))
        x = [
            xx.reshape((self.batch_size, -1))
            for xx in (p_in.env, p_in.arguments.values, x_pg)
        ]
        return x

    def convert_output(self, p_out: StepOutput):
        y = [np.array((p_out.r, ))]
        weights = [[1.]]
        if p_out.program:
            arg_values = p_out.arguments.values
            arg_num = len(p_out.program.args or [])
            y += [p_out.program.to_one_hot(PROGRAM_VEC_SIZE)]
            weights += [[1.]]
        else:
            arg_values = IntegerArguments().values
            arg_num = 0
            y += [np.zeros((PROGRAM_VEC_SIZE, ))]
            weights += [[1e-10]]

        for v in arg_values:  # split by each args
            y += [v]
        weights += [[1.]] * arg_num + [[1e-10]] * (len(arg_values) - arg_num)
        weights = [np.array(w) for w in weights]
        return [yy.reshape((self.batch_size, -1)) for yy in y], weights

    def step(self, env_observation: np.ndarray, pg: Program,
             arguments: IntegerArguments) -> StepOutput:
        x = self.convert_input(StepInput(env_observation, pg, arguments))
        results = self.model.predict(
            x, batch_size=1)  # if batch_size==1, returns single row

        r, pg_one_hot, arg_values = results[0], results[1], results[2:]
        program = self.program_set.get(pg_one_hot.argmax())
        ret = StepOutput(r, program,
                         IntegerArguments(values=np.stack(arg_values)))
        return ret

    def save(self):
        self.model.save_weights(self.model_path, overwrite=True)

    def load_weights(self):
        if os.path.exists(self.model_path):
            self.model.load_weights(self.model_path)
            self.weight_loaded = True

    def print_weights(self, weights=None, detail=False):
        weights = weights or self.model.get_weights()
        for w in weights:
            print("w%s: sum(w)=%s, ave(w)=%s" %
                  (w.shape, np.sum(w), np.average(w)))
        if detail:
            for w in weights:
                print("%s: %s" % (w.shape, w))

    @staticmethod
    def size_of_env_observation():
        return FIELD_ROW * FIELD_DEPTH
Exemple #44
0
class AIPlayer(Player):
    def __init__(self,
                 buffer_size,
                 sim_count,
                 train=True,
                 model="",
                 tau=1,
                 compile=False):
        self.buffer = ReplayBuffer(buffer_size)
        self.temp_state = deque()
        self.train = train
        self.loss = 0
        self.acc = 0
        self.batch_count = 0
        self.sim_count = sim_count
        if model != "":
            self.load(model, compile)
        else:
            self.create_network()
        self.tau = tau

    @staticmethod
    def create_if_nonexistant(config):
        models = glob.glob(config.data.model_location + "*.h5")
        if len(models) == 0:
            ai = AIPlayer(config.buffer_size,
                          config.game.simulation_num_per_move)
            ai.save(config.data.model_location + "model_0.h5")
            del ai

    def set_training(self, train):
        self.train = train

    @staticmethod
    def clear():
        K.clear_session()

    def load(self, file, compile=False):
        try:
            del self.network
        except Exception:
            pass
        self.network = load_model(file,
                                  custom_objects={
                                      "objective_function_for_policy":
                                      AIPlayer.objective_function_for_policy,
                                      "objective_function_for_value":
                                      AIPlayer.objective_function_for_value
                                  },
                                  compile=compile)

    def save(self, file):
        self.network.save(file)

    def create_network(self):
        x_in = Input((3, 8, 8))
        x = Conv2D(filters=128,
                   kernel_size=(3, 3),
                   padding="same",
                   data_format="channels_first")(x_in)
        x = BatchNormalization(axis=1)(x)
        x = Activation("relu")(x)
        for _ in range(10):
            x = self._build_residual_block(x)

        res_out = x

        x = Conv2D(filters=2, kernel_size=1,
                   data_format="channels_first")(res_out)
        x = BatchNormalization(axis=1)(x)
        x = Activation("relu")(x)
        x = Flatten()(x)
        policy_out = Dense(8 * 8 + 1, activation="softmax",
                           name="policy_out")(x)

        x = Conv2D(filters=1, kernel_size=1,
                   data_format="channels_first")(res_out)
        x = BatchNormalization(axis=1)(x)
        x = Activation("relu")(x)
        x = Flatten()(x)
        x = Dense(64, activation="relu")(x)
        value_out = Dense(1, activation="tanh", name="value_out")(x)

        self.network = Model(x_in, [policy_out, value_out],
                             name="reversi_model")
        self.compile()

    def _build_residual_block(self, x):
        in_x = x
        x = Conv2D(filters=128,
                   kernel_size=(3, 3),
                   padding="same",
                   data_format="channels_first")(x)
        x = BatchNormalization(axis=1)(x)
        x = Activation("relu")(x)
        x = Conv2D(filters=128,
                   kernel_size=(3, 3),
                   padding="same",
                   data_format="channels_first")(x)
        x = BatchNormalization(axis=1)(x)
        x = Add()([in_x, x])
        x = Activation("relu")(x)
        return x

    def compile(self):
        losses = [
            AIPlayer.objective_function_for_policy,
            AIPlayer.objective_function_for_value
        ]
        self.network.compile(optimizer=optimizers.SGD(lr=1e-3, momentum=0.9),
                             loss=losses)

    def update_lr(self, lr):
        K.set_value(self.network.optimizer.lr, lr)

    @staticmethod
    def objective_function_for_policy(y_true, y_pred):
        # can use categorical_crossentropy??
        return K.sum(-y_true * K.log(y_pred + K.epsilon()), axis=-1)

    @staticmethod
    def objective_function_for_value(y_true, y_pred):
        return mean_squared_error(y_true, y_pred)

    def update_buffer(self, winner):
        if self.train:
            while len(self.temp_state) > 0:
                t = self.temp_state.pop()
                self.buffer.add((t[0], t[1], winner))

    def train_batches(self, batch_size, batches=-1, verbose=2):
        if batches == -1:
            s_buffer = np.array([_[0] for _ in self.buffer.buffer])
            p_buffer = np.array([_[1] for _ in self.buffer.buffer])
            v_buffer = np.array([_[2] for _ in self.buffer.buffer])
        else:
            sample_size = batch_size * batches
            sample = []
            while sample_size > 0:
                sample += self.buffer.sample(sample_size)
                sample_size -= self.buffer.size()
            s_buffer = np.array([_[0] for _ in sample])
            p_buffer = np.array([_[1] for _ in sample])
            v_buffer = np.array([_[2] for _ in sample])
        history = self.network.fit(s_buffer, [p_buffer, v_buffer],
                                   batch_size=batch_size,
                                   epochs=1,
                                   verbose=verbose)
        return history

    def preprocess_input(self, board, side):
        state = np.zeros((3, 8, 8), dtype=np.int)
        for i in range(8):
            for j in range(8):
                if board[i, j] == 1:
                    state[0, i, j] = 1
                elif board[i, j] == -1:
                    state[1, i, j] = 1
                if side == 1:
                    state[2, i, j] = 1
        return state

    def evaluate(self, game, side):
        current_input = self.preprocess_input(game.board, side)
        pred = self.network.predict(current_input[np.newaxis, :])
        return pred[1][0]

    def pick_move(self, game, side):
        possible_moves = game.possible_moves(side)
        if len(possible_moves) == 0:
            possible_moves.append((-1, -1))
        monte_prob = self.monte_carlo(game, side)

        if self.train:
            self.temp_state.append((self.preprocess_input(game.board, side),
                                    np.divide(monte_prob, np.sum(monte_prob))))

        monte_prob = np.float_power(monte_prob, 1 / self.tau)
        monte_prob = np.divide(monte_prob, np.sum(monte_prob))

        r = random()
        for i, move in enumerate(possible_moves):
            r -= monte_prob[Othello.move_id(move)]
            if r <= 0:
                return move
        return possible_moves[-1]

    def monte_carlo(self, game, side):
        N = defaultdict(lambda: 0)
        W = defaultdict(lambda: 0)
        Q = defaultdict(lambda: 0)
        P = defaultdict(lambda: 0)

        possible_moves = game.possible_moves(side)
        if len(possible_moves) == 0:
            policy = np.zeros((65))
            policy[64] = 1
            return policy
        elif len(possible_moves) == 1:
            policy = np.zeros((65))
            policy[Othello.move_id(possible_moves[0])] = 1
            return policy

        current_input = self.preprocess_input(game.board, side)
        sid = Othello.state_id(game.board)
        pred = self.network.predict(current_input[np.newaxis, :])
        policy = pred[0][0]

        total = 1e-10
        for i, move in enumerate(possible_moves):
            total += policy[Othello.move_id(move)]

        for move in possible_moves:
            P[(sid,
               Othello.move_id(move))] = policy[Othello.move_id(move)] / total

        for i in range(self.sim_count):
            #print("Sim #%d"% i)
            clone = deepcopy(game)
            current_side = side
            visited = deque()
            while True:
                possible_moves = clone.possible_moves(current_side)
                if len(possible_moves) == 0:
                    possible_moves.append((-1, -1))
                best_move = None
                best_move_value = -2
                sid = Othello.state_id(clone.board)
                for move in possible_moves:
                    mid = Othello.move_id(move)
                    qu_val = Q[(sid,
                                mid)] + P[(sid, mid)] / (N[(sid, mid)] + 1)
                    if qu_val > best_move_value:
                        best_move_value = qu_val
                        best_move = move

                #print(best_move)

                if N[(sid, Othello.move_id(best_move))] == 0:
                    visited.append((sid, Othello.move_id(best_move)))
                    clone.play_move(best_move[0], best_move[1], current_side)
                    current_side *= -1
                    if clone.game_over():
                        for node in visited:
                            N[node] += 1
                            W[node] += clone.get_winner() * side
                            Q[node] = W[node] / N[node]
                        break

                    current_input = self.preprocess_input(
                        clone.board, current_side)
                    sid = Othello.state_id(clone.board)
                    pred = self.network.predict(current_input[np.newaxis, :])
                    policy = pred[0][0]
                    value = pred[1][0]

                    possible_moves = clone.possible_moves(current_side)
                    if len(possible_moves) == 0:
                        possible_moves.append((-1, -1))
                    total = 1e-10
                    for i, move in enumerate(possible_moves):
                        total += policy[Othello.move_id(move)]

                    for move in possible_moves:
                        P[(sid, Othello.move_id(move)
                           )] = policy[Othello.move_id(move)] / total

                    for node in visited:
                        N[node] += 1
                        W[node] += value * side
                        Q[node] = W[node] / N[node]
                    #print()
                    break
                else:
                    visited.append((sid, Othello.move_id(best_move)))
                    clone.play_move(best_move[0], best_move[1], current_side)
                    current_side *= -1
                    if clone.game_over():
                        for node in visited:
                            N[node] += 1
                            W[node] += clone.get_winner() * side
                            Q[node] = W[node] / N[node]
                        break

        policy = np.zeros((65))
        possible_moves = game.possible_moves(side)
        sid = Othello.state_id(game.board)
        for move in possible_moves:
            mid = Othello.move_id(move)
            policy[mid] = N[(sid, mid)]

        return policy
Exemple #45
0
class FinancialTimeSeriesAnalysisModel(object):
    model = None

    def __init__(self, nb_time_step, dim_data, batch_size=1, model_path=None):
        self.model_path = model_path
        self.model_path = model_path
        self.batch_size = batch_size
        self.size_of_input_data_dim = dim_data
        self.size_of_input_timesteps = nb_time_step
        self.build()
        self.weight_loaded = False
        if model_path is not None:
            self.load_weights()

    def build(self):
        dim_data = self.size_of_input_data_dim
        nb_time_step = self.size_of_input_timesteps
        financial_time_series_input = Input(shape=(nb_time_step, dim_data), name='x1')
        lstm_layer_1 = LSTM(output_dim=nb_hidden_units, dropout_U=dropout, dropout_W=dropout,
                            W_regularizer=l2(l2_norm_alpha), b_regularizer=l2(l2_norm_alpha), activation='tanh',
                            return_sequences=True, name='lstm_layer1')
        lstm_layer_21 = LSTM(output_dim=nb_hidden_units, dropout_U=dropout, dropout_W=dropout,
                             W_regularizer=l2(l2_norm_alpha), b_regularizer=l2(l2_norm_alpha), activation='tanh',
                             return_sequences=True, name='lstm_layer2_loss1')
        lstm_layer_22 = LSTM(output_dim=nb_hidden_units, dropout_U=dropout, dropout_W=dropout,
                             W_regularizer=l2(l2_norm_alpha), b_regularizer=l2(l2_norm_alpha), activation='tanh',
                             return_sequences=True, name='lstm_layer2_loss2')
        lstm_layer_23 = LSTM(output_dim=nb_hidden_units, dropout_U=dropout, dropout_W=dropout,
                             W_regularizer=l2(l2_norm_alpha), b_regularizer=l2(l2_norm_alpha), activation='tanh',
                             return_sequences=True, name='lstm_layer2_loss3')

        lstm_layer_24 = LSTM(output_dim=nb_hidden_units, dropout_U=dropout, dropout_W=dropout,
                             W_regularizer=l2(l2_norm_alpha), b_regularizer=l2(l2_norm_alpha), activation='tanh',
                             return_sequences=True, name='lstm_layer2_loss4')

        lstm_layer_25 = LSTM(output_dim=nb_hidden_units, dropout_U=dropout, dropout_W=dropout,
                             W_regularizer=l2(l2_norm_alpha), b_regularizer=l2(l2_norm_alpha), activation='tanh',
                             return_sequences=True, name='lstm_layer2_loss5')
        h1 = lstm_layer_1(financial_time_series_input)
        h21 = lstm_layer_21(h1)
        h22 = lstm_layer_22(h1)
        h23 = lstm_layer_23(h1)
        h24 = lstm_layer_24(h1)
        h25 = lstm_layer_25(h1)
        time_series_predictions1 = TimeDistributed(Dense(1), name="p1")(h21)  # custom 1
        time_series_predictions2 = TimeDistributed(Dense(1), name="p2")(h22)  # custom 2
        time_series_predictions3 = TimeDistributed(Dense(1), name="p3")(h23)  # mse
        time_series_predictions4 = TimeDistributed(Dense(1, activation='sigmoid'), name="p4")(h24)  # logloss
        time_series_predictions5 = TimeDistributed(Dense(nb_labels, activation='softmax'), name="p5")(h25)  # cross
        self.model = Model(input=financial_time_series_input,
                           output=[time_series_predictions1, time_series_predictions2,
                                   time_series_predictions3, time_series_predictions4,
                                   time_series_predictions5],
                           name="multi-task deep rnn for financial time series forecasting")
        plot(self.model, to_file='model.png')

    def reset(self):
        for l in self.model.layers:
            if type(l) is LSTM:
                l.reset_status()

    def compile_model(self, lr=0.0001, arg_weight=1.):
        optimizer = Adam(lr=lr)
        loss = [custom_objective1, custom_objective2, 'mse', 'binary_crossentropy', 'categorical_crossentropy']
        self.model.compile(optimizer=optimizer, loss=loss)

    def fit_model(self, X, y, y_label, epoch=300):
        early_stopping = EarlyStopping(monitor='val_loss', patience=10, verbose=0)

        self.model.fit(X, [y]*3 + [y > 0] + [y_label], batch_size=self.batch_size, nb_epoch=epoch, validation_split=0.2,
                       shuffle=True, callbacks=[early_stopping])

    def save(self):
        self.model.save_weights(self.model_path, overwrite=True)

    def load_weights(self):
        if os.path.exists(self.model_path):
            self.model.load_weights(self.model_path)
            self.weight_loaded = True

    def print_weights(self, weights=None, detail=False):
        weights = weights or self.model.get_weights()
        for w in weights:
            print("w%s: sum(w)=%s, ave(w)=%s" % (w.shape, np.sum(w), np.average(w)))
        if detail:
            for w in weights:
                print("%s: %s" % (w.shape, w))

    def model_eval(self, X, y):
        y_hat = self.model.predict(X, batch_size=1)[0]
        count_true = 0
        count_all = y.shape[1]
        for i in range(y.shape[1]):
            count_true = count_true + 1 if y[0,i,0]*y_hat[0,i,0]>0 else count_true
            print(y[0,i,0],y_hat[0,i,0])
        print(count_all,count_true)
x = Flatten()(x)
x = Dense(512, activation=None)(x)
x = BatchNormalization()(x)
x = advanced_activations.LeakyReLU(alpha=0.1)(x)

logits = Dense(num_classes, activation=None)(x)
output = Activation('softmax')(logits)

opt = keras.optimizers.Adam(lr=0.003, beta_1=0.9, beta_2=0.999, epsilon=1e-08)

model = Model(input_layer, output)
model.summary()
plot_model(model, show_shapes=True, to_file='teacher_model.png')
model.compile(loss='categorical_crossentropy',
              optimizer=opt,
              metrics=['accuracy'])

print('Using real-time data augmentation.')

datagen = ImageDataGenerator(
    featurewise_center=False,  # set input mean to 0 over the dataset
    samplewise_center=False,  # set each sample mean to 0
    featurewise_std_normalization=False,  # divide inputs by std of the dataset
    samplewise_std_normalization=False,  # divide each input by its std
    zca_whitening=False,  # apply ZCA whitening
    zca_epsilon=1e-06,  # epsilon for ZCA whitening
    rotation_range=20,  # randomly rotate images in the range (degrees, 0 to 180)
    # randomly shift images horizontally (fraction of total width)
    width_shift_range=0.1,
    # randomly shift images vertically (fraction of total height)
Exemple #47
0
def test_model_methods():
    a = Input(shape=(3, ), name='input_a')
    b = Input(shape=(3, ), name='input_b')

    a_2 = Dense(4, name='dense_1')(a)
    dp = Dropout(0.5, name='dropout')
    b_2 = dp(b)

    model = Model([a, b], [a_2, b_2])

    optimizer = 'rmsprop'
    loss = 'mse'
    loss_weights = [1., 0.5]

    input_a_np = np.random.random((10, 3))
    input_b_np = np.random.random((10, 3))

    output_a_np = np.random.random((10, 4))
    output_b_np = np.random.random((10, 3))

    # training/testing doesn't work before compiling.
    with pytest.raises(RuntimeError):
        model.train_on_batch([input_a_np, input_b_np],
                             [output_a_np, output_b_np])

    model.compile(optimizer,
                  loss,
                  metrics=[],
                  loss_weights=loss_weights,
                  sample_weight_mode=None)

    # test train_on_batch
    out = model.train_on_batch([input_a_np, input_b_np],
                               [output_a_np, output_b_np])
    out = model.train_on_batch({
        'input_a': input_a_np,
        'input_b': input_b_np
    }, [output_a_np, output_b_np])
    out = model.train_on_batch({
        'input_a': input_a_np,
        'input_b': input_b_np
    }, {
        'dense_1': output_a_np,
        'dropout': output_b_np
    })

    # test fit
    out = model.fit([input_a_np, input_b_np], [output_a_np, output_b_np],
                    epochs=1,
                    batch_size=4)
    out = model.fit({
        'input_a': input_a_np,
        'input_b': input_b_np
    }, [output_a_np, output_b_np],
                    epochs=1,
                    batch_size=4)
    out = model.fit({
        'input_a': input_a_np,
        'input_b': input_b_np
    }, {
        'dense_1': output_a_np,
        'dropout': output_b_np
    },
                    epochs=1,
                    batch_size=4)

    # test validation_split
    out = model.fit([input_a_np, input_b_np], [output_a_np, output_b_np],
                    epochs=1,
                    batch_size=4,
                    validation_split=0.5)
    out = model.fit({
        'input_a': input_a_np,
        'input_b': input_b_np
    }, [output_a_np, output_b_np],
                    epochs=1,
                    batch_size=4,
                    validation_split=0.5)

    # test validation data
    out = model.fit([input_a_np, input_b_np], [output_a_np, output_b_np],
                    epochs=1,
                    batch_size=4,
                    validation_data=([input_a_np,
                                      input_b_np], [output_a_np, output_b_np]))
    out = model.fit({
        'input_a': input_a_np,
        'input_b': input_b_np
    }, [output_a_np, output_b_np],
                    epochs=1,
                    batch_size=4,
                    validation_split=0.5,
                    validation_data=({
                        'input_a': input_a_np,
                        'input_b': input_b_np
                    }, [output_a_np, output_b_np]))
    out = model.fit({
        'input_a': input_a_np,
        'input_b': input_b_np
    }, {
        'dense_1': output_a_np,
        'dropout': output_b_np
    },
                    epochs=1,
                    batch_size=4,
                    validation_split=0.5,
                    validation_data=({
                        'input_a': input_a_np,
                        'input_b': input_b_np
                    }, {
                        'dense_1': output_a_np,
                        'dropout': output_b_np
                    }))

    # test_on_batch
    out = model.test_on_batch([input_a_np, input_b_np],
                              [output_a_np, output_b_np])
    out = model.test_on_batch({
        'input_a': input_a_np,
        'input_b': input_b_np
    }, [output_a_np, output_b_np])
    out = model.test_on_batch({
        'input_a': input_a_np,
        'input_b': input_b_np
    }, {
        'dense_1': output_a_np,
        'dropout': output_b_np
    })

    # predict_on_batch
    out = model.predict_on_batch([input_a_np, input_b_np])
    out = model.predict_on_batch({
        'input_a': input_a_np,
        'input_b': input_b_np
    })

    # predict, evaluate
    input_a_np = np.random.random((10, 3))
    input_b_np = np.random.random((10, 3))

    output_a_np = np.random.random((10, 4))
    output_b_np = np.random.random((10, 3))

    out = model.evaluate([input_a_np, input_b_np], [output_a_np, output_b_np],
                         batch_size=4)
    out = model.predict([input_a_np, input_b_np], batch_size=4)

    # with sample_weight
    input_a_np = np.random.random((10, 3))
    input_b_np = np.random.random((10, 3))

    output_a_np = np.random.random((10, 4))
    output_b_np = np.random.random((10, 3))

    sample_weight = [None, np.random.random((10, ))]
    out = model.train_on_batch([input_a_np, input_b_np],
                               [output_a_np, output_b_np],
                               sample_weight=sample_weight)

    out = model.test_on_batch([input_a_np, input_b_np],
                              [output_a_np, output_b_np],
                              sample_weight=sample_weight)

    # test accuracy metric
    model.compile(optimizer, loss, metrics=['acc'], sample_weight_mode=None)

    out = model.train_on_batch([input_a_np, input_b_np],
                               [output_a_np, output_b_np])
    assert len(out) == 5
    out = model.test_on_batch([input_a_np, input_b_np],
                              [output_a_np, output_b_np])
    assert len(out) == 5

    # this should also work
    model.compile(optimizer,
                  loss,
                  metrics={'dense_1': 'acc'},
                  sample_weight_mode=None)

    out = model.train_on_batch([input_a_np, input_b_np],
                               [output_a_np, output_b_np])
    assert len(out) == 4
    out = model.test_on_batch([input_a_np, input_b_np],
                              [output_a_np, output_b_np])
    assert len(out) == 4

    # and this as well
    model.compile(optimizer,
                  loss,
                  metrics={'dense_1': ['acc']},
                  sample_weight_mode=None)

    out = model.train_on_batch([input_a_np, input_b_np],
                               [output_a_np, output_b_np])
    assert len(out) == 4
    out = model.test_on_batch([input_a_np, input_b_np],
                              [output_a_np, output_b_np])
    assert len(out) == 4

    # test starting from non-zero initial epoch
    trained_epochs = []
    trained_batches = []

    # define tracer callback
    def on_epoch_begin(epoch, logs):
        trained_epochs.append(epoch)

    def on_batch_begin(batch, logs):
        trained_batches.append(batch)

    tracker_cb = LambdaCallback(on_epoch_begin=on_epoch_begin,
                                on_batch_begin=on_batch_begin)

    out = model.fit([input_a_np, input_b_np], [output_a_np, output_b_np],
                    epochs=5,
                    batch_size=4,
                    initial_epoch=2,
                    callbacks=[tracker_cb])
    assert trained_epochs == [2, 3, 4]

    # test starting from non-zero initial epoch for generator too
    trained_epochs = []

    def gen_data(batch_sz):
        while True:
            yield ([
                np.random.random((batch_sz, 3)),
                np.random.random((batch_sz, 3))
            ], [
                np.random.random((batch_sz, 4)),
                np.random.random((batch_sz, 3))
            ])

    out = model.fit_generator(gen_data(4),
                              steps_per_epoch=3,
                              epochs=5,
                              initial_epoch=2,
                              callbacks=[tracker_cb])
    assert trained_epochs == [2, 3, 4]

    # test with a custom metric function
    def mse(y_true, y_pred):
        return K.mean(K.pow(y_true - y_pred, 2))

    model.compile(optimizer, loss, metrics=[mse], sample_weight_mode=None)

    out = model.train_on_batch([input_a_np, input_b_np],
                               [output_a_np, output_b_np])
    out_len = 1 + 2 * (1 + 1)  # total loss + 2 outputs * (loss + metric)
    assert len(out) == out_len
    out = model.test_on_batch([input_a_np, input_b_np],
                              [output_a_np, output_b_np])
    assert len(out) == out_len

    input_a_np = np.random.random((10, 3))
    input_b_np = np.random.random((10, 3))

    output_a_np = np.random.random((10, 4))
    output_b_np = np.random.random((10, 3))

    out = model.fit([input_a_np, input_b_np], [output_a_np, output_b_np],
                    batch_size=4,
                    epochs=1)
    out = model.evaluate([input_a_np, input_b_np], [output_a_np, output_b_np],
                         batch_size=4)
    out = model.predict([input_a_np, input_b_np], batch_size=4)

    # empty batch
    with pytest.raises(ValueError):

        def gen_data():
            while True:
                yield (np.asarray([]), np.asarray([]))

        out = model.evaluate_generator(gen_data(), steps=1)

    # x is not a list of numpy arrays.
    with pytest.raises(ValueError):
        out = model.predict([None])

    # x does not match _feed_input_names.
    with pytest.raises(ValueError):
        out = model.predict([input_a_np, None, input_b_np])
    with pytest.raises(ValueError):
        out = model.predict([None, input_a_np, input_b_np])

    # all input/output/weight arrays should have the same number of samples.
    with pytest.raises(ValueError):
        out = model.train_on_batch([input_a_np, input_b_np[:2]],
                                   [output_a_np, output_b_np],
                                   sample_weight=sample_weight)
    with pytest.raises(ValueError):
        out = model.train_on_batch([input_a_np, input_b_np],
                                   [output_a_np, output_b_np[:2]],
                                   sample_weight=sample_weight)
    with pytest.raises(ValueError):
        out = model.train_on_batch(
            [input_a_np, input_b_np], [output_a_np, output_b_np],
            sample_weight=[sample_weight[1], sample_weight[1][:2]])

    # `sample_weight` is neither a dict nor a list.
    with pytest.raises(TypeError):
        out = model.train_on_batch([input_a_np, input_b_np],
                                   [output_a_np, output_b_np],
                                   sample_weight=tuple(sample_weight))

    # `validation_data` is neither a tuple nor a triple.
    with pytest.raises(ValueError):
        out = model.fit([input_a_np, input_b_np], [output_a_np, output_b_np],
                        epochs=1,
                        batch_size=4,
                        validation_data=([input_a_np, input_b_np], ))

    # `loss` does not match outputs.
    with pytest.raises(ValueError):
        model.compile(optimizer, loss=['mse', 'mae', 'mape'])

    # `loss_weights` does not match output_names.
    with pytest.raises(ValueError):
        model.compile(optimizer, loss='mse', loss_weights={'lstm': 0.5})

    # `loss_weights` does not match outputs.
    with pytest.raises(ValueError):
        model.compile(optimizer, loss='mse', loss_weights=[0.5])

    # `loss_weights` is invalid type.
    with pytest.raises(TypeError):
        model.compile(optimizer, loss='mse', loss_weights=(0.5, 0.5))

    # `sample_weight_mode` does not match output_names.
    with pytest.raises(ValueError):
        model.compile(optimizer,
                      loss='mse',
                      sample_weight_mode={'lstm': 'temporal'})

    # `sample_weight_mode` does not match output_names.
    with pytest.raises(ValueError):
        model.compile(optimizer, loss='mse', sample_weight_mode=['temporal'])

    # `sample_weight_mode` matches output_names partially.
    with pytest.raises(ValueError):
        model.compile(optimizer,
                      loss='mse',
                      sample_weight_mode={'dense_1': 'temporal'})

    # `loss` does not exist.
    with pytest.raises(ValueError):
        model.compile(optimizer, loss=[])

    model.compile(optimizer, loss=['mse', 'mae'])
    model.compile(optimizer,
                  loss='mse',
                  loss_weights={
                      'dense_1': 0.2,
                      'dropout': 0.8
                  })
    model.compile(optimizer, loss='mse', loss_weights=[0.2, 0.8])

    # the rank of weight arrays should be 1.
    with pytest.raises(ValueError):
        out = model.train_on_batch(
            [input_a_np, input_b_np], [output_a_np, output_b_np],
            sample_weight=[None, np.random.random((10, 20, 30))])

    model.compile(optimizer,
                  loss='mse',
                  sample_weight_mode={
                      'dense_1': None,
                      'dropout': 'temporal'
                  })
    model.compile(optimizer, loss='mse', sample_weight_mode=[None, 'temporal'])

    # the rank of output arrays should be at least 3D.
    with pytest.raises(ValueError):
        out = model.train_on_batch([input_a_np, input_b_np],
                                   [output_a_np, output_b_np],
                                   sample_weight=sample_weight)

    model.compile(optimizer,
                  loss,
                  metrics=[],
                  loss_weights=loss_weights,
                  sample_weight_mode=None)
    trained_epochs = []
    trained_batches = []
    out = model.fit_generator(generator=RandomSequence(3),
                              steps_per_epoch=3,
                              epochs=5,
                              initial_epoch=0,
                              validation_data=RandomSequence(4),
                              validation_steps=3,
                              callbacks=[tracker_cb])
    assert trained_epochs == [0, 1, 2, 3, 4]
    assert trained_batches == list(range(3)) * 5

    # steps_per_epoch will be equal to len of sequence if it's unspecified
    trained_epochs = []
    trained_batches = []
    out = model.fit_generator(generator=RandomSequence(3),
                              epochs=5,
                              initial_epoch=0,
                              validation_data=RandomSequence(4),
                              callbacks=[tracker_cb])
    assert trained_epochs == [0, 1, 2, 3, 4]
    assert trained_batches == list(range(12)) * 5

    # fit_generator will throw an exception if steps is unspecified for regular generator
    with pytest.raises(ValueError):

        def gen_data():
            while True:
                yield (np.asarray([]), np.asarray([]))

        out = model.fit_generator(generator=gen_data(),
                                  epochs=5,
                                  initial_epoch=0,
                                  validation_data=gen_data(),
                                  callbacks=[tracker_cb])

    # predict_generator output shape behavior should be consistent
    def expected_shape(batch_size, n_batches):
        return (batch_size * n_batches, 4), (batch_size * n_batches, 3)

    # Multiple outputs and one step.
    batch_size = 5
    sequence_length = 1
    shape_0, shape_1 = expected_shape(batch_size, sequence_length)
    out = model.predict_generator(
        RandomSequence(batch_size, sequence_length=sequence_length))
    assert np.shape(out[0]) == shape_0 and np.shape(out[1]) == shape_1

    # Multiple outputs and multiple steps.
    batch_size = 5
    sequence_length = 2
    shape_0, shape_1 = expected_shape(batch_size, sequence_length)
    out = model.predict_generator(
        RandomSequence(batch_size, sequence_length=sequence_length))
    assert np.shape(out[0]) == shape_0 and np.shape(out[1]) == shape_1

    # Create a model with a single output.
    single_output_model = Model([a, b], a_2)
    single_output_model.compile(optimizer,
                                loss,
                                metrics=[],
                                sample_weight_mode=None)

    # Single output and one step.
    batch_size = 5
    sequence_length = 1
    shape_0, _ = expected_shape(batch_size, sequence_length)
    out = single_output_model.predict_generator(
        RandomSequence(batch_size, sequence_length=sequence_length))
    assert np.shape(out) == shape_0

    # Single output and multiple steps.
    batch_size = 5
    sequence_length = 2
    shape_0, _ = expected_shape(batch_size, sequence_length)
    out = single_output_model.predict_generator(
        RandomSequence(batch_size, sequence_length=sequence_length))
    assert np.shape(out) == shape_0
Exemple #48
0
def test_model_methods():
    a = Input(shape=(3, ), name='input_a')
    b = Input(shape=(3, ), name='input_b')

    a_2 = Dense(4, name='dense_1')(a)
    dp = Dropout(0.5, name='dropout')
    b_2 = dp(b)

    model = Model([a, b], [a_2, b_2])

    optimizer = 'rmsprop'
    loss = 'mse'
    loss_weights = [1., 0.5]
    model.compile(optimizer,
                  loss,
                  metrics=[],
                  loss_weights=loss_weights,
                  sample_weight_mode=None)

    input_a_np = np.random.random((10, 3))
    input_b_np = np.random.random((10, 3))

    output_a_np = np.random.random((10, 4))
    output_b_np = np.random.random((10, 3))

    # test train_on_batch
    out = model.train_on_batch([input_a_np, input_b_np],
                               [output_a_np, output_b_np])
    out = model.train_on_batch({
        'input_a': input_a_np,
        'input_b': input_b_np
    }, [output_a_np, output_b_np])
    out = model.train_on_batch({
        'input_a': input_a_np,
        'input_b': input_b_np
    }, {
        'dense_1': output_a_np,
        'dropout': output_b_np
    })

    # test fit
    out = model.fit([input_a_np, input_b_np], [output_a_np, output_b_np],
                    nb_epoch=1,
                    batch_size=4)
    out = model.fit({
        'input_a': input_a_np,
        'input_b': input_b_np
    }, [output_a_np, output_b_np],
                    nb_epoch=1,
                    batch_size=4)
    out = model.fit({
        'input_a': input_a_np,
        'input_b': input_b_np
    }, {
        'dense_1': output_a_np,
        'dropout': output_b_np
    },
                    nb_epoch=1,
                    batch_size=4)

    # test validation_split
    out = model.fit([input_a_np, input_b_np], [output_a_np, output_b_np],
                    nb_epoch=1,
                    batch_size=4,
                    validation_split=0.5)
    out = model.fit({
        'input_a': input_a_np,
        'input_b': input_b_np
    }, [output_a_np, output_b_np],
                    nb_epoch=1,
                    batch_size=4,
                    validation_split=0.5)
    out = model.fit({
        'input_a': input_a_np,
        'input_b': input_b_np
    }, {
        'dense_1': output_a_np,
        'dropout': output_b_np
    },
                    nb_epoch=1,
                    batch_size=4,
                    validation_split=0.5)

    # test validation data
    out = model.fit([input_a_np, input_b_np], [output_a_np, output_b_np],
                    nb_epoch=1,
                    batch_size=4,
                    validation_data=([input_a_np,
                                      input_b_np], [output_a_np, output_b_np]))
    out = model.fit({
        'input_a': input_a_np,
        'input_b': input_b_np
    }, [output_a_np, output_b_np],
                    nb_epoch=1,
                    batch_size=4,
                    validation_split=0.5,
                    validation_data=({
                        'input_a': input_a_np,
                        'input_b': input_b_np
                    }, [output_a_np, output_b_np]))
    out = model.fit({
        'input_a': input_a_np,
        'input_b': input_b_np
    }, {
        'dense_1': output_a_np,
        'dropout': output_b_np
    },
                    nb_epoch=1,
                    batch_size=4,
                    validation_split=0.5,
                    validation_data=({
                        'input_a': input_a_np,
                        'input_b': input_b_np
                    }, {
                        'dense_1': output_a_np,
                        'dropout': output_b_np
                    }))

    # test_on_batch
    out = model.test_on_batch([input_a_np, input_b_np],
                              [output_a_np, output_b_np])
    out = model.test_on_batch({
        'input_a': input_a_np,
        'input_b': input_b_np
    }, [output_a_np, output_b_np])
    out = model.test_on_batch({
        'input_a': input_a_np,
        'input_b': input_b_np
    }, {
        'dense_1': output_a_np,
        'dropout': output_b_np
    })

    # predict_on_batch
    out = model.predict_on_batch([input_a_np, input_b_np])
    out = model.predict_on_batch({
        'input_a': input_a_np,
        'input_b': input_b_np
    })

    # predict, evaluate
    input_a_np = np.random.random((10, 3))
    input_b_np = np.random.random((10, 3))

    output_a_np = np.random.random((10, 4))
    output_b_np = np.random.random((10, 3))

    out = model.evaluate([input_a_np, input_b_np], [output_a_np, output_b_np],
                         batch_size=4)
    out = model.predict([input_a_np, input_b_np], batch_size=4)

    # with sample_weight
    input_a_np = np.random.random((10, 3))
    input_b_np = np.random.random((10, 3))

    output_a_np = np.random.random((10, 4))
    output_b_np = np.random.random((10, 3))

    sample_weight = [None, np.random.random((10, ))]
    out = model.train_on_batch([input_a_np, input_b_np],
                               [output_a_np, output_b_np],
                               sample_weight=sample_weight)

    out = model.test_on_batch([input_a_np, input_b_np],
                              [output_a_np, output_b_np],
                              sample_weight=sample_weight)

    # test accuracy metric
    model.compile(optimizer, loss, metrics=['acc'], sample_weight_mode=None)

    out = model.train_on_batch([input_a_np, input_b_np],
                               [output_a_np, output_b_np])
    assert len(out) == 5
    out = model.test_on_batch([input_a_np, input_b_np],
                              [output_a_np, output_b_np])
    assert len(out) == 5

    # this should also work
    model.compile(optimizer,
                  loss,
                  metrics={'dense_1': 'acc'},
                  sample_weight_mode=None)

    out = model.train_on_batch([input_a_np, input_b_np],
                               [output_a_np, output_b_np])
    assert len(out) == 4
    out = model.test_on_batch([input_a_np, input_b_np],
                              [output_a_np, output_b_np])
    assert len(out) == 4

    # and this as well
    model.compile(optimizer,
                  loss,
                  metrics={'dense_1': ['acc']},
                  sample_weight_mode=None)

    out = model.train_on_batch([input_a_np, input_b_np],
                               [output_a_np, output_b_np])
    assert len(out) == 4
    out = model.test_on_batch([input_a_np, input_b_np],
                              [output_a_np, output_b_np])
    assert len(out) == 4

    # test starting from non-zero initial epoch
    trained_epochs = []

    def on_epoch_begin(epoch, logs):
        trained_epochs.append(epoch)

    tracker_cb = LambdaCallback(on_epoch_begin=on_epoch_begin)
    out = model.fit([input_a_np, input_b_np], [output_a_np, output_b_np],
                    nb_epoch=5,
                    batch_size=4,
                    initial_epoch=2,
                    callbacks=[tracker_cb])
    assert trained_epochs == [2, 3, 4]

    # test starting from non-zero initial epoch for generator too
    trained_epochs = []

    def gen_data(batch_sz):
        while True:
            yield ([
                np.random.random((batch_sz, 3)),
                np.random.random((batch_sz, 3))
            ], [
                np.random.random((batch_sz, 4)),
                np.random.random((batch_sz, 3))
            ])

    out = model.fit_generator(gen_data(4),
                              samples_per_epoch=10,
                              nb_epoch=5,
                              initial_epoch=2,
                              callbacks=[tracker_cb])
    assert trained_epochs == [2, 3, 4]

    # test with a custom metric function
    mse = lambda y_true, y_pred: K.mean(K.pow(y_true - y_pred, 2))

    def mse_powers(y_true, y_pred):
        m = mse(y_true, y_pred)
        return {'mse_squared': K.pow(m, 2), 'mse_cubed': K.pow(m, 3)}

    model.compile(optimizer,
                  loss,
                  metrics=[mse, mse_powers],
                  sample_weight_mode=None)

    out = model.train_on_batch([input_a_np, input_b_np],
                               [output_a_np, output_b_np])
    out_len = 1 + 2 * 4  # total loss, per layer: loss + 3 metrics
    assert len(out) == out_len
    out = model.test_on_batch([input_a_np, input_b_np],
                              [output_a_np, output_b_np])
    assert len(out) == out_len

    input_a_np = np.random.random((10, 3))
    input_b_np = np.random.random((10, 3))

    output_a_np = np.random.random((10, 4))
    output_b_np = np.random.random((10, 3))

    out = model.fit([input_a_np, input_b_np], [output_a_np, output_b_np],
                    batch_size=4,
                    nb_epoch=1)
    out = model.evaluate([input_a_np, input_b_np], [output_a_np, output_b_np],
                         batch_size=4)
    out = model.predict([input_a_np, input_b_np], batch_size=4)
Exemple #49
0
def decoder_stochastic_wrn(
        label_sizes,
        nb_bits=12,
        data_shape=(1, 64, 64),
        activation=lambda: Activation('relu'),
        normalization=lambda: BatchNormalization(axis=1, mode=0),
        weight_init='he_normal',
        weight_decay=0.0001,
        dropout_probability=0.,
        wrn_depth=58,
        wrn_k=2,
        death_rate=0.5,
        optimizer='adam'):
    def norm_act_block():
        def f(inputs):
            x = normalization()(inputs)
            x = activation()(x)
            return x

        return f

    def conv2(nb_filter, nb_row, nb_col, subsample=(1, 1), bias=True):
        return Convolution2D(nb_filter,
                             nb_row,
                             nb_col,
                             init=weight_init,
                             border_mode='same',
                             subsample=subsample,
                             bias=bias,
                             W_regularizer=l2(weight_decay))

    def dropout(p=None):
        if p is None:
            p = dropout_probability

        def f(inputs):
            if p > 0.:
                inputs = Dropout(p)(inputs)
            return inputs

        return f

    def equal_gate_shape(input_shapes):
        assert (input_shapes[0] == input_shapes[1])
        return input_shapes[1]

    def residual_block(nb_filter, stochastic=False, stochastic_layers=None):
        def f(inputs):
            x = norm_act_block()(inputs)
            x = conv2(nb_filter, 3, 3)(x)
            x = dropout()(x)
            x = norm_act_block()(x)
            x = conv2(nb_filter, 3, 3)(x)

            if inputs._keras_shape != x._keras_shape:
                inputs = conv2(nb_filter, 1, 1, bias=False)(inputs)

            if not stochastic:
                return merge((inputs, x), mode='sum')

            scale = ScaleInTestPhase(death_rate)
            x = scale(x)

            out = merge([inputs, x],
                        mode="sum",
                        output_shape=x._keras_shape[1:])
            rs = RandomSwitch(death_rate)
            stochastic_layers.append((rs.death_rate, scale.death_rate))
            return rs([out, inputs])

        return f

    def residual_reduction_block(nb_filter):
        def f(inputs):
            x = norm_act_block()(inputs)
            x = conv2(nb_filter, 3, 3, subsample=(2, 2))(x)
            x = dropout()(x)
            x = norm_act_block()(x)
            x = conv2(nb_filter, 3, 3)(x)

            inputs_bottleneck = conv2(nb_filter,
                                      1,
                                      1,
                                      subsample=(2, 2),
                                      bias=False)(inputs)

            s = merge((inputs_bottleneck, x), mode='sum')
            return s

        return f

    def skip_connection(inputs,
                        residual,
                        stochastic=False,
                        stochastic_layers=None):
        inputs_filters = inputs._keras_shape[1]
        residual_filters = residual._keras_shape[1]
        subsample = np.array(inputs._keras_shape[2:]) // np.array(
            residual._keras_shape[2:])

        inputs = dropout()(inputs)
        if (inputs_filters != residual_filters) or np.any(subsample > 1):
            skip = conv2(residual_filters,
                         1,
                         1,
                         subsample=subsample,
                         bias=False)(inputs)
        else:
            skip = inputs

        if not stochastic:
            return merge((skip, residual), mode='sum')
        else:
            scale = ScaleInTestPhase(death_rate)
            skip = scale(skip)

            out = merge([skip, residual], mode="sum")
            rs = RandomSwitch(death_rate)
            stochastic_layers.append((rs.death_rate, scale.death_rate))
            return rs([out, residual])

    n = (wrn_depth - 4) // 6
    stochastic_layers = []

    input = Input(shape=data_shape)

    m_stem = conv2(16, 3, 3, subsample=(2, 2))(input)
    m_stem = norm_act_block()(m_stem)

    m_b1 = residual_block(nb_filter=16 * wrn_k,
                          stochastic_layers=stochastic_layers)(m_stem)
    for _ in range(n - 1):
        m_b1 = residual_block(nb_filter=16 * wrn_k,
                              stochastic=True,
                              stochastic_layers=stochastic_layers)(m_b1)
    m_b1 = skip_connection(input,
                           m_b1,
                           stochastic=True,
                           stochastic_layers=stochastic_layers)

    m_b2 = residual_reduction_block(nb_filter=32 * wrn_k)(m_b1)
    for _ in range(n - 1):
        m_b2 = residual_block(nb_filter=32 * wrn_k,
                              stochastic=True,
                              stochastic_layers=stochastic_layers)(m_b2)
    m_b2 = skip_connection(m_b1,
                           m_b2,
                           stochastic=True,
                           stochastic_layers=stochastic_layers)

    m_b3 = residual_reduction_block(nb_filter=64 * wrn_k)(m_b2)
    for _ in range(n - 1):
        m_b3 = residual_block(nb_filter=64 * wrn_k,
                              stochastic=True,
                              stochastic_layers=stochastic_layers)(m_b3)
    m_b3 = skip_connection(m_b2,
                           m_b3,
                           stochastic=True,
                           stochastic_layers=stochastic_layers)
    m_b3 = skip_connection(input,
                           m_b3,
                           stochastic=True,
                           stochastic_layers=stochastic_layers)

    x = norm_act_block()(m_b3)
    x = AveragePooling2D(pool_size=(8, 8))(x)
    x = dropout()(x)

    for i, (tb, ts) in enumerate(stochastic_layers, start=0):
        K.set_value(tb, i / len(stochastic_layers) * death_rate)
        K.set_value(ts, i / len(stochastic_layers) * death_rate)

    outputs, losses = decoder_end_block(x, label_sizes, nb_bits, activation,
                                        weight_decay)

    model = Model(input, list(outputs.values()))
    model.compile(
        optimizer,
        loss=list(losses.values()),
        loss_weights={k: decoder_loss_weights(k)
                      for k in losses.keys()})
    return model
Exemple #50
0
class SeqGAN:
    def __init__(self, g, d, m, g_optimizer, d_optimizer):
        self.g = g
        self.d = d
        self.m = m

        self.z, self.seq_input = self.g.inputs
        self.fake_prob, = self.g.outputs
        with trainable(m, False):
            m_input = merge([self.seq_input, self.fake_prob], mode='concat', concat_axis=1)
            self.m_realness = self.m(m_input)
            self.model_fit_g = Model([self.z, self.seq_input], [self.m_realness])
            self.model_fit_g.compile(g_optimizer, K.binary_crossentropy)

        self.d.compile(d_optimizer, loss=K.binary_crossentropy)

    def z_shape(self, batch_size=64):
        layer, _, _ = self.z._keras_history
        return (batch_size,) + layer.output_shape[1:]

    def sample_z(self, batch_size=64):
        shape = self.z_shape(batch_size)
        return np.random.uniform(-1, 1, shape)

    def generate(self, z, seq_input, batch_size=32):
        return self.g.predict([z, seq_input], batch_size=batch_size)

    def train_on_batch(self, seq_input, real, d_target=None):
        nb_real = len(real)
        nb_fake = len(seq_input)
        if d_target is None:
            d_target = np.concatenate([
                np.zeros((nb_fake, 1)),
                np.ones((nb_real, 1))
            ])
        fake_prob = self.generate(self.sample_z(nb_fake), seq_input)
        fake = np.concatenate([seq_input, prob_to_sentence(fake_prob)], axis=1)
        fake_and_real = np.concatenate([fake, real], axis=0)
        d_loss = self.d.train_on_batch(fake_and_real, d_target)
        d_realness = self.d.predict(fake)
        m_loss = self.m.train_on_batch(
            np.concatenate([seq_input, fake_prob], axis=1), d_realness)
        g_loss = self.model_fit_g.train_on_batch([self.sample_z(nb_fake), seq_input],
                                                 np.ones((nb_fake, 1)))
        return g_loss, d_loss, m_loss

    def fit_generator(self, generator, nb_epoch, nb_batches_per_epoch, callbacks=[],
                      batch_size=None,
                      verbose=False):
        if batch_size is None:
            batch_size = 2*len(next(generator)[0])

        out_labels = ['g', 'd', 'm']

        self.history = cbks.History()
        callbacks = [cbks.BaseLogger()] + callbacks + [self.history]
        if verbose:
            callbacks += [cbks.ProgbarLogger()]
        callbacks = cbks.CallbackList(callbacks)
        callbacks._set_model(self)
        callbacks._set_params({
            'nb_epoch': nb_epoch,
            'nb_sample': nb_batches_per_epoch*batch_size,
            'verbose': verbose,
            'metrics': out_labels,
        })
        callbacks.on_train_begin()

        for e in range(nb_epoch):
            callbacks.on_epoch_begin(e)
            for batch_index, (seq_input, real) in enumerate(generator):
                callbacks.on_batch_begin(batch_index)
                batch_logs = {}
                batch_logs['batch'] = batch_index
                batch_logs['size'] = len(real) + len(seq_input
                                                     )
                outs = self.train_on_batch(seq_input, real)

                for l, o in zip(out_labels, outs):
                    batch_logs[l] = o

                callbacks.on_batch_end(batch_index, batch_logs)
                if batch_index + 1 == nb_batches_per_epoch:
                    break

            callbacks.on_epoch_end(e)
        callbacks.on_train_end()
def build_CNN_model(inputType, do_training=False, model_inputs=None, loss_func='binary_crossentropy',
                    optimize_proc='adam', is_IntermediateModel=False, load_weight_path=None, **kwargs):
    """

    :param inputType:
    :param do_training:
    :param model_inputs:
    :param loss_func:
    :param optimize_proc:
    :param is_IntermediateModel:
    :param load_weight_path:
    :param kwargs:
    :return:
    """

    # assert not do_training and model_inputs, "if do_training then must pass in model_inputs dictionary"

    EMBEDDING_TYPE = 'embeddingMatrix'
    ONEHOT_TYPE = '1hotVector'

    defined_input_types = {EMBEDDING_TYPE, ONEHOT_TYPE}

    assert inputType in defined_input_types, "unknown input type {0}".format(inputType)

    if inputType is ONEHOT_TYPE:

        review_input = Input(shape=(modelParameters.MaxLen_w,), dtype='float32',
                             name="ONEHOT_INPUT")

        layer = Embedding(modelParameters.VocabSize_w + modelParameters.INDEX_FROM, embedding_dims,
                          embeddings_initializer=embedding_init, embeddings_regularizer=embedding_reg,
                          input_length=modelParameters.MaxLen_w, name='1hot_embeddingLayer')(review_input)

        layer = SpatialDropout1D(0.50)(layer)

    elif inputType is EMBEDDING_TYPE:
        review_input = Input(shape=(modelParameters.MaxLen_w, embedding_dims), dtype="float32", name="EMBEDDING_INPUT")
        layer = review_input

    else:
        raise ValueError("Bad inputType arg to build_CNN_model")

    layer = Convolution1D(filters=num_filters1,
                          kernel_size=filter_length1,
                          padding=region,
                          strides=1,
                          activation=conv_activation1,
                          kernel_initializer='glorot_uniform',
                          bias_initializer='zeros',
                          kernel_regularizer=conv_reg1,
                          dilation_rate=1,
                          name='ConvLayer1')(layer)

    layer = SpatialDropout1D(0.50)(layer)

    layer = MaxPooling1D(pool_size=pool_len1)(layer)

    # layer = Convolution1D(filters=num_filters2,
    #                       kernel_size=filter_length2,
    #                       padding=region,
    #                       strides=1,
    #                       activation=conv_activation2,
    #                       kernel_initializer=conv_init2,
    #                       kernel_regularizer=conv_reg2,
    #                       dilation_rate=1,
    #                       name='ConvLayer2')(layer)
    #
    # layer = SpatialDropout1D(0.50)(layer)
    #
    # layer = MaxPooling1D(pool_size=pool_len2)(layer)

    # layer = Convolution1D(filters=num_filters3,
    #                       kernel_size=filter_length3,
    #                       padding=region,
    #                       activation=conv_activation3,
    #                       kernel_initializer=conv_init3,
    #                       kernel_regularizer=conv_reg3,
    #                       dilation_rate=1,
    #                       name='ConvLayer3')(layer)
    #
    # layer = SpatialDropout1D(0.50)(layer)
    #
    # layer = MaxPooling1D(pool_size=pool_len3)(layer)



    # #layer = GlobalMaxPool1D()(layer)
    #
    # layer = Convolution1D(filters=num_filters4,
    #                       kernel_size=filter_length4,
    #                       padding=region,
    #                       activation=conv_activation4,
    #                       kernel_initializer=conv_init4,
    #                       kernel_regularizer=conv_reg4,
    #                       dilation_rate=1,
    #                       name='ConvLayer4')(layer)
    #
    # #layer = leaky_relu(layer)
    #
    # layer = SpatialDropout1D(0.50)(layer)
    #
    # layer = MaxPooling1D(pool_size=pool_len4)(layer)
    # #layer = GlobalMaxPool1D()(layer)
    #
    # # layer = BatchNormalization()(layer)

    layer = Flatten()(layer)

    layer = Dense(dense_dims0, activation=dense_activation0, kernel_regularizer=dense_reg0,
                  kernel_initializer='glorot_normal', bias_initializer='zeros',
                  name='dense0')(layer)

    layer = Dropout(0.50)(layer)

    layer = Dense(dense_dims1, activation=dense_activation1, kernel_regularizer=dense_reg1,
                  kernel_initializer='glorot_normal', bias_initializer='zeros',
                  name='dense1')(layer)

    layer = Dropout(0.50)(layer)

    # layer = Dense(dense_dims2, activation=dense_activation2, kernel_regularizer=dense_reg2,
    #               kernel_initializer=dense_init2,
    #               name='dense2')(layer)
    #
    #
    # layer = Dropout(0.50)(layer)
    #
    # layer = Dense(dense_dims3, activation=dense_activation3, kernel_regularizer=dense_reg3,
    #               kernel_initializer=dense_init3,
    #               name='dense3_outA')(layer)
    # #layer = leaky_relu(layer)
    #
    if is_IntermediateModel:
        return Model(inputs=[review_input], outputs=[layer], name="CNN_model")

    #
    # layer = Dropout(0.5)(layer)

    layer = Dense(dense_dims_final, activation=dense_activation_final, kernel_initializer=dense_init_final,
                  kernel_regularizer=dense_reg0,
                  name='output_Full')(layer)

    CNN_model = Model(inputs=[review_input], outputs=[layer], name="CNN_model")

    CNN_model.compile(optimizer=Adam(lr=0.001, decay=0.0), loss=loss_func, metrics=[binary_accuracy])

    if load_weight_path is not None:
        CNN_model.load_weights(load_weight_path)

    hist = ""
    if do_training:
        weightPath = os.path.join(modelParameters.WEIGHT_PATH, filename)
        configPath = os.path.join(modelParameters.WEIGHT_PATH, filename_config)

        with open(configPath + ".json", 'wb') as f:
            f.write(CNN_model.to_json())

        checkpoint = ModelCheckpoint(weightPath + '_W.{epoch:02d}-{val_loss:.4f}.hdf5',
                                     verbose=1, save_best_only=True, save_weights_only=False, monitor='val_loss')

        earlyStop = EarlyStopping(patience=3, verbose=1, monitor='val_loss')

        LRadjuster = ReduceLROnPlateau(monitor='val_loss', factor=0.30, patience=0, verbose=1, cooldown=1,
                                       min_lr=0.00001, epsilon=1e-2)

        call_backs = [checkpoint, earlyStop, LRadjuster]

        CNN_model.summary()

        hist = CNN_model.fit(*model_inputs['training'],
                             batch_size=batch_size,
                             epochs=nb_epoch, verbose=1,
                             validation_data=model_inputs['dev'],
                             callbacks=call_backs)

    return {"model": CNN_model, "hist": hist}
Exemple #52
0
def test_model_with_input_feed_tensor():
    """We test building a model with a TF variable as input.
    We should be able to call fit, evaluate, predict,
    by only passing them data for the placeholder inputs
    in the model.
    """
    import tensorflow as tf

    input_a_np = np.random.random((10, 3))
    input_b_np = np.random.random((10, 3))

    output_a_np = np.random.random((10, 4))
    output_b_np = np.random.random((10, 3))

    a = Input(tensor=tf.Variable(input_a_np, dtype=tf.float32))
    b = Input(shape=(3,), name='input_b')

    a_2 = Dense(4, name='dense_1')(a)
    dp = Dropout(0.5, name='dropout')
    b_2 = dp(b)

    model = Model([a, b], [a_2, b_2])
    model.summary()

    optimizer = 'rmsprop'
    loss = 'mse'
    loss_weights = [1., 0.5]
    model.compile(optimizer, loss, metrics=['mean_squared_error'],
                  loss_weights=loss_weights,
                  sample_weight_mode=None)

    # test train_on_batch
    out = model.train_on_batch(input_b_np,
                               [output_a_np, output_b_np])
    out = model.train_on_batch({'input_b': input_b_np},
                               [output_a_np, output_b_np])
    out = model.test_on_batch({'input_b': input_b_np},
                              [output_a_np, output_b_np])
    out = model.predict_on_batch({'input_b': input_b_np})

    # test fit
    out = model.fit({'input_b': input_b_np},
                    [output_a_np, output_b_np], epochs=1, batch_size=10)
    out = model.fit(input_b_np,
                    [output_a_np, output_b_np], epochs=1, batch_size=10)

    # test evaluate
    out = model.evaluate({'input_b': input_b_np},
                         [output_a_np, output_b_np], batch_size=10)
    out = model.evaluate(input_b_np,
                         [output_a_np, output_b_np], batch_size=10)

    # test predict
    out = model.predict({'input_b': input_b_np}, batch_size=10)
    out = model.predict(input_b_np, batch_size=10)
    assert len(out) == 2

    # Now test a model with a single input
    # i.e. we don't pass any data to fit the model.
    a = Input(tensor=tf.Variable(input_a_np, dtype=tf.float32))
    a_2 = Dense(4, name='dense_1')(a)
    a_2 = Dropout(0.5, name='dropout')(a_2)
    model = Model(a, a_2)
    model.summary()

    optimizer = 'rmsprop'
    loss = 'mse'
    model.compile(optimizer, loss, metrics=['mean_squared_error'])

    # test train_on_batch
    out = model.train_on_batch(None,
                               output_a_np)
    out = model.train_on_batch(None,
                               output_a_np)
    out = model.test_on_batch(None,
                              output_a_np)
    out = model.predict_on_batch(None)
    out = model.train_on_batch([],
                               output_a_np)
    out = model.train_on_batch({},
                               output_a_np)

    # test fit
    out = model.fit(None,
                    output_a_np, epochs=1, batch_size=10)
    out = model.fit(None,
                    output_a_np, epochs=1, batch_size=10)

    # test evaluate
    out = model.evaluate(None,
                         output_a_np, batch_size=10)
    out = model.evaluate(None,
                         output_a_np, batch_size=10)

    # test predict
    out = model.predict(None, steps=3)
    out = model.predict(None, steps=3)
    assert out.shape == (10 * 3, 4)

    # Same, without learning phase
    # i.e. we don't pass any data to fit the model.
    a = Input(tensor=tf.Variable(input_a_np, dtype=tf.float32))
    a_2 = Dense(4, name='dense_1')(a)
    model = Model(a, a_2)
    model.summary()

    optimizer = 'rmsprop'
    loss = 'mse'
    model.compile(optimizer, loss, metrics=['mean_squared_error'])

    # test train_on_batch
    out = model.train_on_batch(None,
                               output_a_np)
    out = model.train_on_batch(None,
                               output_a_np)
    out = model.test_on_batch(None,
                              output_a_np)
    out = model.predict_on_batch(None)
    out = model.train_on_batch([],
                               output_a_np)
    out = model.train_on_batch({},
                               output_a_np)

    # test fit
    out = model.fit(None,
                    output_a_np, epochs=1, batch_size=10)
    out = model.fit(None,
                    output_a_np, epochs=1, batch_size=10)

    # test evaluate
    out = model.evaluate(None,
                         output_a_np, batch_size=10)
    out = model.evaluate(None,
                         output_a_np, batch_size=10)

    # test predict
    out = model.predict(None, steps=3)
    out = model.predict(None, steps=3)
    assert out.shape == (10 * 3, 4)
Exemple #53
0
numerical_inputs = Input(shape=(11, ), name='num')
numerical_logits = numerical_inputs
numerical_logits = BatchNormalization()(numerical_logits)

numerical_logits = Dense(128, activation='relu')(numerical_logits)
numerical_logits = Dropout(0.5)(numerical_logits)
numerical_logits = BatchNormalization()(numerical_logits)
numerical_logits = Dense(128, activation='relu')(numerical_logits)
numerical_logits = Dense(64, activation='relu')(numerical_logits)

logits = Concatenate()([numerical_logits, categorical_logits])
logits = Dense(64, activation='relu')(logits)
out = Dense(1, activation='sigmoid')(logits)

model = Model(inputs=categorical_inputs + [numerical_inputs], outputs=out)
model.compile(optimizer='adam', loss=binary_crossentropy)

# In[ ]:


def get_input(market_train, indices):
    X_num = market_train.loc[indices, num_cols].values
    X = {'num': X_num}
    for cat in cat_cols:
        X[cat] = market_train.loc[indices, cat_cols].values
    y = (market_train.loc[indices, 'returnsOpenNextMktres10'] >= 0).values
    r = market_train.loc[indices, 'returnsOpenNextMktres10'].values
    u = market_train.loc[indices, 'universe']
    d = market_train.loc[indices, 'time'].dt.date
    return X, y, r, u, d
Exemple #54
0
def test_model_with_external_loss():
    # None loss, only regularization loss.
    a = Input(shape=(3,), name='input_a')
    a_2 = Dense(4, name='dense_1',
                kernel_regularizer='l1',
                bias_regularizer='l2')(a)
    dp = Dropout(0.5, name='dropout')
    a_3 = dp(a_2)

    model = Model(a, [a_2, a_3])

    optimizer = 'rmsprop'
    loss = None
    model.compile(optimizer, loss, metrics=['mae'])

    input_a_np = np.random.random((10, 3))

    # test train_on_batch
    out = model.train_on_batch(input_a_np, None)
    out = model.test_on_batch(input_a_np, None)
    # fit
    out = model.fit(input_a_np, None)
    # evaluate
    out = model.evaluate(input_a_np, None)

    # No dropout, external loss.
    a = Input(shape=(3,), name='input_a')
    a_2 = Dense(4, name='dense_1')(a)
    a_3 = Dense(4, name='dense_2')(a)

    model = Model(a, [a_2, a_3])
    model.add_loss(K.mean(a_3 + a_2))

    optimizer = 'rmsprop'
    loss = None
    model.compile(optimizer, loss, metrics=['mae'])

    # test train_on_batch
    out = model.train_on_batch(input_a_np, None)
    out = model.test_on_batch(input_a_np, None)
    # fit
    out = model.fit(input_a_np, None)
    # evaluate
    out = model.evaluate(input_a_np, None)

    # Test fit with no external data at all.
    if K.backend() == 'tensorflow':
        import tensorflow as tf

        a = Input(tensor=tf.Variable(input_a_np, dtype=tf.float32))
        a_2 = Dense(4, name='dense_1')(a)
        a_2 = Dropout(0.5, name='dropout')(a_2)
        model = Model(a, a_2)
        model.add_loss(K.mean(a_2))

        model.compile(optimizer='rmsprop',
                      loss=None,
                      metrics=['mean_squared_error'])

        # test train_on_batch
        out = model.train_on_batch(None, None)
        out = model.test_on_batch(None, None)
        out = model.predict_on_batch(None)

        # test fit
        with pytest.raises(ValueError):
            out = model.fit(None, None, epochs=1, batch_size=10)
        out = model.fit(None, None, epochs=1, steps_per_epoch=1)

        # test fit with validation data
        with pytest.raises(ValueError):
            out = model.fit(None, None,
                            epochs=1,
                            steps_per_epoch=None,
                            validation_steps=2)
        out = model.fit(None, None,
                        epochs=1,
                        steps_per_epoch=2,
                        validation_steps=2)

        # test evaluate
        with pytest.raises(ValueError):
            out = model.evaluate(None, None, batch_size=10)
        out = model.evaluate(None, None, steps=3)

        # test predict
        with pytest.raises(ValueError):
            out = model.predict(None, batch_size=10)
        out = model.predict(None, steps=3)
        assert out.shape == (10 * 3, 4)

        # Test multi-output model without external data.
        a = Input(tensor=tf.Variable(input_a_np, dtype=tf.float32))
        a_1 = Dense(4, name='dense_1')(a)
        a_2 = Dropout(0.5, name='dropout')(a_1)
        model = Model(a, [a_1, a_2])
        model.add_loss(K.mean(a_2))
        model.compile(optimizer='rmsprop',
                      loss=None,
                      metrics=['mean_squared_error'])

        # test train_on_batch
        out = model.train_on_batch(None, None)
        out = model.test_on_batch(None, None)
        out = model.predict_on_batch(None)

        # test fit
        with pytest.raises(ValueError):
            out = model.fit(None, None, epochs=1, batch_size=10)
        out = model.fit(None, None, epochs=1, steps_per_epoch=1)

        # test fit with validation data
        with pytest.raises(ValueError):
            out = model.fit(None, None,
                            epochs=1,
                            steps_per_epoch=None,
                            validation_steps=2)
        out = model.fit(None, None,
                        epochs=1,
                        steps_per_epoch=2,
                        validation_steps=2)

        # test evaluate
        with pytest.raises(ValueError):
            out = model.evaluate(None, None, batch_size=10)
        out = model.evaluate(None, None, steps=3)

        # test predict
        with pytest.raises(ValueError):
            out = model.predict(None, batch_size=10)
        out = model.predict(None, steps=3)
        assert len(out) == 2
        assert out[0].shape == (10 * 3, 4)
        assert out[1].shape == (10 * 3, 4)
Exemple #55
0
def test_model_methods():
    a = Input(shape=(3,), name='input_a')
    b = Input(shape=(3,), name='input_b')

    a_2 = Dense(4, name='dense_1')(a)
    dp = Dropout(0.5, name='dropout')
    b_2 = dp(b)

    model = Model([a, b], [a_2, b_2])

    optimizer = 'rmsprop'
    loss = 'mse'
    loss_weights = [1., 0.5]
    model.compile(optimizer, loss, metrics=[], loss_weights=loss_weights,
                  sample_weight_mode=None)

    input_a_np = np.random.random((10, 3))
    input_b_np = np.random.random((10, 3))

    output_a_np = np.random.random((10, 4))
    output_b_np = np.random.random((10, 3))

    # test train_on_batch
    out = model.train_on_batch([input_a_np, input_b_np],
                               [output_a_np, output_b_np])
    out = model.train_on_batch({'input_a': input_a_np, 'input_b': input_b_np},
                               [output_a_np, output_b_np])
    out = model.train_on_batch({'input_a': input_a_np, 'input_b': input_b_np},
                               {'dense_1': output_a_np, 'dropout': output_b_np})

    # test fit
    out = model.fit([input_a_np, input_b_np],
                    [output_a_np, output_b_np], nb_epoch=1, batch_size=4)
    out = model.fit({'input_a': input_a_np, 'input_b': input_b_np},
                    [output_a_np, output_b_np], nb_epoch=1, batch_size=4)
    out = model.fit({'input_a': input_a_np, 'input_b': input_b_np},
                    {'dense_1': output_a_np, 'dropout': output_b_np},
                    nb_epoch=1, batch_size=4)

    # test validation_split
    out = model.fit([input_a_np, input_b_np],
                    [output_a_np, output_b_np],
                    nb_epoch=1, batch_size=4, validation_split=0.5)
    out = model.fit({'input_a': input_a_np, 'input_b': input_b_np},
                    [output_a_np, output_b_np],
                    nb_epoch=1, batch_size=4, validation_split=0.5)
    out = model.fit({'input_a': input_a_np, 'input_b': input_b_np},
                    {'dense_1': output_a_np, 'dropout': output_b_np},
                    nb_epoch=1, batch_size=4, validation_split=0.5)

    # test validation data
    out = model.fit([input_a_np, input_b_np],
                    [output_a_np, output_b_np],
                    nb_epoch=1, batch_size=4,
                    validation_data=([input_a_np, input_b_np], [output_a_np, output_b_np]))
    out = model.fit({'input_a': input_a_np, 'input_b': input_b_np},
                    [output_a_np, output_b_np],
                    nb_epoch=1, batch_size=4, validation_split=0.5,
                    validation_data=({'input_a': input_a_np, 'input_b': input_b_np}, [output_a_np, output_b_np]))
    out = model.fit({'input_a': input_a_np, 'input_b': input_b_np},
                    {'dense_1': output_a_np, 'dropout': output_b_np},
                    nb_epoch=1, batch_size=4, validation_split=0.5,
                    validation_data=({'input_a': input_a_np, 'input_b': input_b_np}, {'dense_1': output_a_np, 'dropout': output_b_np}))

    # test_on_batch
    out = model.test_on_batch([input_a_np, input_b_np],
                              [output_a_np, output_b_np])
    out = model.test_on_batch({'input_a': input_a_np, 'input_b': input_b_np},
                              [output_a_np, output_b_np])
    out = model.test_on_batch({'input_a': input_a_np, 'input_b': input_b_np},
                              {'dense_1': output_a_np, 'dropout': output_b_np})

    # predict_on_batch
    out = model.predict_on_batch([input_a_np, input_b_np])
    out = model.predict_on_batch({'input_a': input_a_np, 'input_b': input_b_np})

    # predict, evaluate
    input_a_np = np.random.random((10, 3))
    input_b_np = np.random.random((10, 3))

    output_a_np = np.random.random((10, 4))
    output_b_np = np.random.random((10, 3))

    out = model.evaluate([input_a_np, input_b_np], [output_a_np, output_b_np], batch_size=4)
    out = model.predict([input_a_np, input_b_np], batch_size=4)

    # with sample_weight
    input_a_np = np.random.random((10, 3))
    input_b_np = np.random.random((10, 3))

    output_a_np = np.random.random((10, 4))
    output_b_np = np.random.random((10, 3))

    sample_weight = [None, np.random.random((10,))]
    out = model.train_on_batch([input_a_np, input_b_np],
                               [output_a_np, output_b_np],
                               sample_weight=sample_weight)

    out = model.test_on_batch([input_a_np, input_b_np],
                              [output_a_np, output_b_np],
                              sample_weight=sample_weight)

    # test accuracy metric
    model.compile(optimizer, loss, metrics=['acc'],
                  sample_weight_mode=None)

    out = model.train_on_batch([input_a_np, input_b_np],
                               [output_a_np, output_b_np])
    assert len(out) == 5
    out = model.test_on_batch([input_a_np, input_b_np],
                              [output_a_np, output_b_np])
    assert len(out) == 5

    # this should also work
    model.compile(optimizer, loss, metrics={'dense_1': 'acc'},
                  sample_weight_mode=None)

    out = model.train_on_batch([input_a_np, input_b_np],
                               [output_a_np, output_b_np])
    assert len(out) == 4
    out = model.test_on_batch([input_a_np, input_b_np],
                              [output_a_np, output_b_np])
    assert len(out) == 4

    # and this as well
    model.compile(optimizer, loss, metrics={'dense_1': ['acc']},
                  sample_weight_mode=None)

    out = model.train_on_batch([input_a_np, input_b_np],
                               [output_a_np, output_b_np])
    assert len(out) == 4
    out = model.test_on_batch([input_a_np, input_b_np],
                              [output_a_np, output_b_np])
    assert len(out) == 4

    # test with a custom metric function
    mse = lambda y_true, y_pred: K.mean(K.pow(y_true - y_pred, 2))
    model.compile(optimizer, loss, metrics=[mse],
                  sample_weight_mode=None)

    out = model.train_on_batch([input_a_np, input_b_np],
                               [output_a_np, output_b_np])
    assert len(out) == 5
    out = model.test_on_batch([input_a_np, input_b_np],
                              [output_a_np, output_b_np])
    assert len(out) == 5

    input_a_np = np.random.random((10, 3))
    input_b_np = np.random.random((10, 3))

    output_a_np = np.random.random((10, 4))
    output_b_np = np.random.random((10, 3))

    out = model.fit([input_a_np, input_b_np], [output_a_np, output_b_np], batch_size=4, nb_epoch=1)
    out = model.evaluate([input_a_np, input_b_np], [output_a_np, output_b_np], batch_size=4)
    out = model.predict([input_a_np, input_b_np], batch_size=4)
Exemple #56
0
def test_model_methods():
    a = Input(shape=(3,), name='input_a')
    b = Input(shape=(3,), name='input_b')

    a_2 = Dense(4, name='dense_1')(a)
    dp = Dropout(0.5, name='dropout')
    b_2 = dp(b)

    model = Model([a, b], [a_2, b_2])

    optimizer = 'rmsprop'
    loss = 'mse'
    loss_weights = [1., 0.5]

    input_a_np = np.random.random((10, 3))
    input_b_np = np.random.random((10, 3))
    input_a_df = pd.DataFrame(input_a_np)
    input_b_df = pd.DataFrame(input_b_np)

    output_a_np = np.random.random((10, 4))
    output_b_np = np.random.random((10, 3))
    output_a_df = pd.DataFrame(output_a_np)
    output_b_df = pd.DataFrame(output_b_np)

    # training/testing doesn't work before compiling.
    with pytest.raises(RuntimeError):
        model.train_on_batch([input_a_np, input_b_np], [output_a_np, output_b_np])

    model.compile(optimizer, loss, metrics=[], loss_weights=loss_weights,
                  sample_weight_mode=None)

    # test train_on_batch
    out = model.train_on_batch([input_a_np, input_b_np],
                               [output_a_np, output_b_np])
    out = model.train_on_batch({'input_a': input_a_np, 'input_b': input_b_np},
                               [output_a_np, output_b_np])
    out = model.train_on_batch({'input_a': input_a_np, 'input_b': input_b_np},
                               {'dense_1': output_a_np, 'dropout': output_b_np})
    out = model.train_on_batch([input_a_df, input_b_df],
                               [output_a_df, output_b_df])

    # test fit
    out = model.fit([input_a_np, input_b_np],
                    [output_a_np, output_b_np], epochs=1, batch_size=4)
    out = model.fit({'input_a': input_a_np, 'input_b': input_b_np},
                    [output_a_np, output_b_np], epochs=1, batch_size=4)
    out = model.fit({'input_a': input_a_np, 'input_b': input_b_np},
                    {'dense_1': output_a_np, 'dropout': output_b_np},
                    epochs=1, batch_size=4)
    out = model.fit([input_a_df, input_b_df],
                    [output_a_df, output_b_df], epochs=1, batch_size=4)

    # test validation_split
    out = model.fit([input_a_np, input_b_np],
                    [output_a_np, output_b_np],
                    epochs=1, batch_size=4, validation_split=0.5)
    out = model.fit({'input_a': input_a_np, 'input_b': input_b_np},
                    [output_a_np, output_b_np],
                    epochs=1, batch_size=4, validation_split=0.5)

    # test validation data
    out = model.fit([input_a_np, input_b_np],
                    [output_a_np, output_b_np],
                    epochs=1, batch_size=4,
                    validation_data=([input_a_np, input_b_np], [output_a_np, output_b_np]))
    out = model.fit({'input_a': input_a_np, 'input_b': input_b_np},
                    [output_a_np, output_b_np],
                    epochs=1, batch_size=4, validation_split=0.5,
                    validation_data=({'input_a': input_a_np, 'input_b': input_b_np}, [output_a_np, output_b_np]))
    out = model.fit({'input_a': input_a_np, 'input_b': input_b_np},
                    {'dense_1': output_a_np, 'dropout': output_b_np},
                    epochs=1, batch_size=4, validation_split=0.5,
                    validation_data=(
                        {'input_a': input_a_np, 'input_b': input_b_np},
                        {'dense_1': output_a_np, 'dropout': output_b_np}))

    # test_on_batch
    out = model.test_on_batch([input_a_np, input_b_np],
                              [output_a_np, output_b_np])
    out = model.test_on_batch({'input_a': input_a_np, 'input_b': input_b_np},
                              [output_a_np, output_b_np])
    out = model.test_on_batch({'input_a': input_a_np, 'input_b': input_b_np},
                              {'dense_1': output_a_np, 'dropout': output_b_np})
    out = model.test_on_batch([input_a_df, input_b_df],
                              [output_a_df, output_b_df])

    # predict_on_batch
    out = model.predict_on_batch([input_a_np, input_b_np])
    out = model.predict_on_batch({'input_a': input_a_np, 'input_b': input_b_np})
    out = model.predict_on_batch([input_a_df, input_b_df])

    # predict, evaluate
    input_a_np = np.random.random((10, 3))
    input_b_np = np.random.random((10, 3))

    output_a_np = np.random.random((10, 4))
    output_b_np = np.random.random((10, 3))

    out = model.evaluate([input_a_np, input_b_np], [output_a_np, output_b_np], batch_size=4)
    out = model.evaluate([input_a_df, input_b_df], [output_a_df, output_b_df], batch_size=4)
    out = model.predict([input_a_np, input_b_np], batch_size=4)
    out = model.predict([input_a_df, input_b_df], batch_size=4)

    # with sample_weight
    input_a_np = np.random.random((10, 3))
    input_b_np = np.random.random((10, 3))

    output_a_np = np.random.random((10, 4))
    output_b_np = np.random.random((10, 3))

    sample_weight = [None, np.random.random((10,))]
    out = model.train_on_batch([input_a_np, input_b_np],
                               [output_a_np, output_b_np],
                               sample_weight=sample_weight)

    out = model.test_on_batch([input_a_np, input_b_np],
                              [output_a_np, output_b_np],
                              sample_weight=sample_weight)

    # test accuracy metric
    model.compile(optimizer, loss, metrics=['acc'],
                  sample_weight_mode=None)

    out = model.train_on_batch([input_a_np, input_b_np],
                               [output_a_np, output_b_np])
    assert len(out) == 5
    out = model.test_on_batch([input_a_np, input_b_np],
                              [output_a_np, output_b_np])
    assert len(out) == 5

    # this should also work
    model.compile(optimizer, loss, metrics={'dense_1': 'acc'},
                  sample_weight_mode=None)

    out = model.train_on_batch([input_a_np, input_b_np],
                               [output_a_np, output_b_np])
    assert len(out) == 4
    out = model.test_on_batch([input_a_np, input_b_np],
                              [output_a_np, output_b_np])
    assert len(out) == 4

    # and this as well
    model.compile(optimizer, loss, metrics={'dense_1': ['acc']},
                  sample_weight_mode=None)

    out = model.train_on_batch([input_a_np, input_b_np],
                               [output_a_np, output_b_np])
    assert len(out) == 4
    out = model.test_on_batch([input_a_np, input_b_np],
                              [output_a_np, output_b_np])
    assert len(out) == 4

    # test starting from non-zero initial epoch
    trained_epochs = []

    # define tracer callback
    def on_epoch_begin(epoch, logs):
        trained_epochs.append(epoch)

    tracker_cb = LambdaCallback(on_epoch_begin=on_epoch_begin)

    out = model.fit([input_a_np, input_b_np],
                    [output_a_np, output_b_np], epochs=5, batch_size=4,
                    initial_epoch=2, callbacks=[tracker_cb])
    assert trained_epochs == [2, 3, 4]

    # test starting from non-zero initial epoch for generator too
    trained_epochs = []

    def gen_data(batch_sz):
        while True:
            yield ([np.random.random((batch_sz, 3)), np.random.random((batch_sz, 3))],
                   [np.random.random((batch_sz, 4)), np.random.random((batch_sz, 3))])

    out = model.fit_generator(gen_data(4), steps_per_epoch=3, epochs=5,
                              initial_epoch=2, callbacks=[tracker_cb])
    assert trained_epochs == [2, 3, 4]

    # test with a custom metric function
    def mse(y_true, y_pred):
        return K.mean(K.pow(y_true - y_pred, 2))

    model.compile(optimizer, loss, metrics=[mse],
                  sample_weight_mode=None)

    out = model.train_on_batch([input_a_np, input_b_np],
                               [output_a_np, output_b_np])
    out_len = 1 + 2 * (1 + 1)  # total loss + 2 outputs * (loss + metric)
    assert len(out) == out_len
    out = model.test_on_batch([input_a_np, input_b_np],
                              [output_a_np, output_b_np])
    assert len(out) == out_len

    input_a_np = np.random.random((10, 3))
    input_b_np = np.random.random((10, 3))

    output_a_np = np.random.random((10, 4))
    output_b_np = np.random.random((10, 3))

    out = model.fit([input_a_np, input_b_np], [output_a_np, output_b_np], batch_size=4, epochs=1)
    out = model.evaluate([input_a_np, input_b_np], [output_a_np, output_b_np], batch_size=4)
    out = model.predict([input_a_np, input_b_np], batch_size=4)

    # empty batch
    with pytest.raises(ValueError):
        def gen_data():
            while True:
                yield (np.asarray([]), np.asarray([]))
        out = model.evaluate_generator(gen_data(), steps=1)

    # x is not a list of numpy arrays.
    with pytest.raises(ValueError):
        out = model.predict([None])

    # x does not match _feed_input_names.
    with pytest.raises(ValueError):
        out = model.predict([input_a_np, None, input_b_np])
    with pytest.raises(ValueError):
        out = model.predict([None, input_a_np, input_b_np])

    # all input/output/weight arrays should have the same number of samples.
    with pytest.raises(ValueError):
        out = model.train_on_batch([input_a_np, input_b_np[:2]],
                                   [output_a_np, output_b_np],
                                   sample_weight=sample_weight)
    with pytest.raises(ValueError):
        out = model.train_on_batch([input_a_np, input_b_np],
                                   [output_a_np, output_b_np[:2]],
                                   sample_weight=sample_weight)
    with pytest.raises(ValueError):
        out = model.train_on_batch([input_a_np, input_b_np],
                                   [output_a_np, output_b_np],
                                   sample_weight=[sample_weight[1], sample_weight[1][:2]])

    # `sample_weight` is neither a dict nor a list.
    with pytest.raises(TypeError):
        out = model.train_on_batch([input_a_np, input_b_np],
                                   [output_a_np, output_b_np],
                                   sample_weight=tuple(sample_weight))

    # `validation_data` is neither a tuple nor a triple.
    with pytest.raises(ValueError):
        out = model.fit([input_a_np, input_b_np],
                        [output_a_np, output_b_np],
                        epochs=1, batch_size=4,
                        validation_data=([input_a_np, input_b_np],))

    # `loss` does not match outputs.
    with pytest.raises(ValueError):
        model.compile(optimizer, loss=['mse', 'mae', 'mape'])

    # `loss_weights` does not match output_names.
    with pytest.raises(ValueError):
        model.compile(optimizer, loss='mse', loss_weights={'lstm': 0.5})

    # `loss_weights` does not match outputs.
    with pytest.raises(ValueError):
        model.compile(optimizer, loss='mse', loss_weights=[0.5])

    # `loss_weights` is invalid type.
    with pytest.raises(TypeError):
        model.compile(optimizer, loss='mse', loss_weights=(0.5, 0.5))

    # `sample_weight_mode` does not match output_names.
    with pytest.raises(ValueError):
        model.compile(optimizer, loss='mse', sample_weight_mode={'lstm': 'temporal'})

    # `sample_weight_mode` does not match output_names.
    with pytest.raises(ValueError):
        model.compile(optimizer, loss='mse', sample_weight_mode=['temporal'])

    # `sample_weight_mode` matches output_names partially.
    with pytest.raises(ValueError):
        model.compile(optimizer, loss='mse', sample_weight_mode={'dense_1': 'temporal'})

    # `loss` does not exist.
    with pytest.raises(ValueError):
        model.compile(optimizer, loss=[])

    model.compile(optimizer, loss=['mse', 'mae'])
    model.compile(optimizer, loss='mse', loss_weights={'dense_1': 0.2, 'dropout': 0.8})
    model.compile(optimizer, loss='mse', loss_weights=[0.2, 0.8])

    # the rank of weight arrays should be 1.
    with pytest.raises(ValueError):
        out = model.train_on_batch([input_a_np, input_b_np],
                                   [output_a_np, output_b_np],
                                   sample_weight=[None, np.random.random((10, 20, 30))])

    model.compile(optimizer, loss='mse', sample_weight_mode={'dense_1': None, 'dropout': 'temporal'})
    model.compile(optimizer, loss='mse', sample_weight_mode=[None, 'temporal'])

    # the rank of output arrays should be at least 3D.
    with pytest.raises(ValueError):
        out = model.train_on_batch([input_a_np, input_b_np],
                                   [output_a_np, output_b_np],
                                   sample_weight=sample_weight)

    model.compile(optimizer, loss, metrics=[], loss_weights=loss_weights,
                  sample_weight_mode=None)
    trained_epochs = []
    out = model.fit_generator(generator=RandomSequence(3), steps_per_epoch=12, epochs=5,
                              initial_epoch=0, validation_data=RandomSequence(4),
                              validation_steps=12, callbacks=[tracker_cb])
    assert trained_epochs == [0, 1, 2, 3, 4]
Exemple #57
0
class AE(Model):
    """
    Autoencoder. This is a simple autoencoder consisting of an encoder and a decoder.

    You can use the class like this:
    >>> encoder = ...
    >>> decoder = ...
    >>> ae = Autoencoder(encoder=encoder, decoder=decoder)
    >>> ae.compile(...)
    >>> ae.fit(...)

    """
    def __init__(self, encoder=None, decoder=None, autoencoder=None):
        super(AE, self).__init__()

        # For calling this as a super-constructor.
        parameters = [encoder, decoder]
        if all(v is None for v in parameters):
            return

        # From loading.
        if encoder != None and decoder != None and autoencoder != None:
            self.encoder = encoder
            self.decoder = decoder
            self.autoencoder = autoencoder
            return

        # Check preconditions.
        assert len(encoder.outputs) == 1
        assert len(decoder.inputs) == 1
        assert encoder.outputs[0].shape[1:] == decoder.inputs[0].shape[
            1:], str(encoder.outputs[0].shape) + " " + str(
                decoder.inputs[0].shape)
        self.latent_dim = encoder.outputs[0].shape[1]

        self.encoder = encoder
        self.decoder = decoder

        # Creating the AE.
        inputs = self.encoder.inputs[0]
        outputs = self.decoder(self.encoder(inputs))
        self.autoencoder = Model(inputs, outputs, name='ae')

    def compile(self,
                optimizer,
                loss=None,
                metrics=None,
                loss_weights=None,
                sample_weight_mode=None,
                weighted_metrics=None,
                target_tensors=None,
                **kwargs):
        """
        Compiles the model.

        This is the same as compilation in Keras.

        """

        assert "reconstruction_loss" not in kwargs, "Not expected to use reconstruction_loss in AE."

        self.autoencoder.compile(optimizer, loss, metrics, loss_weights,
                                 sample_weight_mode, weighted_metrics,
                                 **kwargs)

    def fit(self,
            x=None,
            y=None,
            batch_size=None,
            epochs=1,
            verbose=1,
            callbacks=None,
            validation_split=0.,
            validation_data=None,
            shuffle=True,
            class_weight=None,
            sample_weight=None,
            initial_epoch=0,
            steps_per_epoch=None,
            validation_steps=None,
            **kwargs):
        """
        Trains the autoencoder.
        """

        return self.autoencoder.fit(x, y, batch_size, epochs, verbose,
                                    callbacks, validation_split,
                                    validation_data, shuffle, class_weight,
                                    sample_weight, initial_epoch,
                                    steps_per_epoch, validation_steps,
                                    **kwargs)

    def fit_generator(self,
                      generator,
                      steps_per_epoch=None,
                      epochs=1,
                      verbose=1,
                      callbacks=None,
                      validation_data=None,
                      validation_steps=None,
                      class_weight=None,
                      max_queue_size=10,
                      workers=1,
                      use_multiprocessing=False,
                      shuffle=True,
                      initial_epoch=0):
        """
        Trains the autoencoder with a generator.
        """

        return self.autoencoder.fit_generator(
            generator,
            steps_per_epoch,
            epochs,
            verbose=verbose,
            callbacks=callbacks,
            validation_data=validation_data,
            validation_steps=validation_steps,
            class_weight=class_weight,
            max_queue_size=max_queue_size,
            workers=workers,
            use_multiprocessing=use_multiprocessing,
            shuffle=shuffle,
            initial_epoch=initial_epoch)

    def evaluate(self,
                 x=None,
                 y=None,
                 batch_size=None,
                 verbose=1,
                 sample_weight=None,
                 steps=None):
        """
        Evaluates the autoencoder.
        """

        return self.autoencoder.evaluate(x,
                                         y,
                                         batch_size,
                                         verbose,
                                         sample_weight,
                                         steps=None)

    def predict(self, x, batch_size=None, verbose=0, steps=None):
        """
        Does a prediction. This is the same as :func:`~ngdlm.models.AE.predict_reconstruct_from_samples`
        """

        return self.predict_reconstruct_from_samples(x, batch_size, verbose,
                                                     steps)

    def predict_reconstruct_from_samples(self,
                                         x,
                                         batch_size=None,
                                         verbose=0,
                                         steps=None):
        """
        Reconstructs samples.

        Samples are firstly mapped to latent space using the encoder.
        The resulting latent vectors are then mapped to reconstruction space via the decoder.
        """

        return self.autoencoder.predict(x, batch_size, verbose, steps)

    def predict_embed_samples_into_latent(self,
                                          x,
                                          batch_size=None,
                                          verbose=0,
                                          steps=None):
        """
        Embeds samples into latent space using the encoder.
        """

        return self.encoder.predict(x, batch_size, verbose, steps)

    def predict_reconstruct_from_latent(self,
                                        x,
                                        batch_size=None,
                                        verbose=0,
                                        steps=None):
        """
        Maps latent vectors to reconstruction space using the decoder.
        """

        return self.decoder.predict(x, batch_size, verbose, steps)

    def summary(self):
        """
        Provides a summary.
        """

        print("Encoder:")
        self.encoder.summary()
        print("Decoder:")
        self.decoder.summary()
        print("Autoencoder:")
        self.autoencoder.summary()

    def save(self, path):
        """
        Saves the autoencoder.

        This includes the whole autoencoder plus the encoder and the decoder.
        The encoder and decoder use the path plus a respective annotation.

        This code

        >>> ae.save("myae.h5")

        will create the files *myae.h5*, *myae-encoder.h5*, and *myae-decoder.h5*.

        """
        self.autoencoder.save(path)
        self.encoder.save(append_to_filepath(path, "-encoder"))
        self.decoder.save(append_to_filepath(path, "-decoder"))
def test_model_methods():
    a = Input(shape=(3,), name='input_a')
    b = Input(shape=(3,), name='input_b')

    a_2 = Dense(4, name='dense_1')(a)
    dp = Dropout(0.5, name='dropout')
    b_2 = dp(b)

    model = Model([a, b], [a_2, b_2])

    optimizer = 'rmsprop'
    loss = 'mse'
    loss_weights = [1., 0.5]
    model.compile(optimizer, loss, metrics=[], loss_weights=loss_weights,
                  sample_weight_mode=None)

    input_a_np = np.random.random((10, 3))
    input_b_np = np.random.random((10, 3))

    output_a_np = np.random.random((10, 4))
    output_b_np = np.random.random((10, 3))

    # test train_on_batch
    out = model.train_on_batch([input_a_np, input_b_np],
                               [output_a_np, output_b_np])
    out = model.train_on_batch({'input_a': input_a_np, 'input_b': input_b_np},
                               [output_a_np, output_b_np])
    out = model.train_on_batch({'input_a': input_a_np, 'input_b': input_b_np},
                               {'dense_1': output_a_np, 'dropout': output_b_np})

    # test fit
    out = model.fit([input_a_np, input_b_np],
                    [output_a_np, output_b_np], nb_epoch=1, batch_size=4)
    out = model.fit({'input_a': input_a_np, 'input_b': input_b_np},
                    [output_a_np, output_b_np], nb_epoch=1, batch_size=4)
    out = model.fit({'input_a': input_a_np, 'input_b': input_b_np},
                    {'dense_1': output_a_np, 'dropout': output_b_np},
                    nb_epoch=1, batch_size=4)

    # test validation_split
    out = model.fit([input_a_np, input_b_np],
                    [output_a_np, output_b_np],
                    nb_epoch=1, batch_size=4, validation_split=0.5)
    out = model.fit({'input_a': input_a_np, 'input_b': input_b_np},
                    [output_a_np, output_b_np],
                    nb_epoch=1, batch_size=4, validation_split=0.5)
    out = model.fit({'input_a': input_a_np, 'input_b': input_b_np},
                    {'dense_1': output_a_np, 'dropout': output_b_np},
                    nb_epoch=1, batch_size=4, validation_split=0.5)

    # test validation data
    out = model.fit([input_a_np, input_b_np],
                    [output_a_np, output_b_np],
                    nb_epoch=1, batch_size=4,
                    validation_data=([input_a_np, input_b_np], [output_a_np, output_b_np]))
    out = model.fit({'input_a': input_a_np, 'input_b': input_b_np},
                    [output_a_np, output_b_np],
                    nb_epoch=1, batch_size=4, validation_split=0.5,
                    validation_data=({'input_a': input_a_np, 'input_b': input_b_np}, [output_a_np, output_b_np]))
    out = model.fit({'input_a': input_a_np, 'input_b': input_b_np},
                    {'dense_1': output_a_np, 'dropout': output_b_np},
                    nb_epoch=1, batch_size=4, validation_split=0.5,
                    validation_data=({'input_a': input_a_np, 'input_b': input_b_np}, {'dense_1': output_a_np, 'dropout': output_b_np}))

    # test_on_batch
    out = model.test_on_batch([input_a_np, input_b_np],
                              [output_a_np, output_b_np])
    out = model.test_on_batch({'input_a': input_a_np, 'input_b': input_b_np},
                              [output_a_np, output_b_np])
    out = model.test_on_batch({'input_a': input_a_np, 'input_b': input_b_np},
                              {'dense_1': output_a_np, 'dropout': output_b_np})

    # predict_on_batch
    out = model.predict_on_batch([input_a_np, input_b_np])
    out = model.predict_on_batch({'input_a': input_a_np, 'input_b': input_b_np})

    # predict, evaluate
    input_a_np = np.random.random((10, 3))
    input_b_np = np.random.random((10, 3))

    output_a_np = np.random.random((10, 4))
    output_b_np = np.random.random((10, 3))

    out = model.evaluate([input_a_np, input_b_np], [output_a_np, output_b_np], batch_size=4)
    out = model.predict([input_a_np, input_b_np], batch_size=4)

    # with sample_weight
    input_a_np = np.random.random((10, 3))
    input_b_np = np.random.random((10, 3))

    output_a_np = np.random.random((10, 4))
    output_b_np = np.random.random((10, 3))

    sample_weight = [None, np.random.random((10,))]
    out = model.train_on_batch([input_a_np, input_b_np],
                               [output_a_np, output_b_np],
                               sample_weight=sample_weight)

    out = model.test_on_batch([input_a_np, input_b_np],
                              [output_a_np, output_b_np],
                              sample_weight=sample_weight)

    # test accuracy metric
    model.compile(optimizer, loss, metrics=['acc'],
                  sample_weight_mode=None)

    out = model.train_on_batch([input_a_np, input_b_np],
                               [output_a_np, output_b_np])
    assert len(out) == 5
    out = model.test_on_batch([input_a_np, input_b_np],
                              [output_a_np, output_b_np])
    assert len(out) == 5

    # this should also work
    model.compile(optimizer, loss, metrics={'dense_1': 'acc'},
                  sample_weight_mode=None)

    out = model.train_on_batch([input_a_np, input_b_np],
                               [output_a_np, output_b_np])
    assert len(out) == 4
    out = model.test_on_batch([input_a_np, input_b_np],
                              [output_a_np, output_b_np])
    assert len(out) == 4

    # and this as well
    model.compile(optimizer, loss, metrics={'dense_1': ['acc']},
                  sample_weight_mode=None)

    out = model.train_on_batch([input_a_np, input_b_np],
                               [output_a_np, output_b_np])
    assert len(out) == 4
    out = model.test_on_batch([input_a_np, input_b_np],
                              [output_a_np, output_b_np])
    assert len(out) == 4

    # test starting from non-zero initial epoch
    trained_epochs = []

    def on_epoch_begin(epoch, logs):
        trained_epochs.append(epoch)
    tracker_cb = LambdaCallback(on_epoch_begin=on_epoch_begin)
    out = model.fit([input_a_np, input_b_np],
                    [output_a_np, output_b_np], nb_epoch=5, batch_size=4,
                    initial_epoch=2, callbacks=[tracker_cb])
    assert trained_epochs == [2, 3, 4]

    # test starting from non-zero initial epoch for generator too
    trained_epochs = []

    def gen_data(batch_sz):
        while True:
            yield ([np.random.random((batch_sz, 3)), np.random.random((batch_sz, 3))],
                   [np.random.random((batch_sz, 4)), np.random.random((batch_sz, 3))])
    out = model.fit_generator(gen_data(4), samples_per_epoch=10, nb_epoch=5,
                              initial_epoch=2, callbacks=[tracker_cb])
    assert trained_epochs == [2, 3, 4]

    # test with a custom metric function
    mse = lambda y_true, y_pred: K.mean(K.pow(y_true - y_pred, 2))

    def mse_powers(y_true, y_pred):
        m = mse(y_true, y_pred)
        return {
            'mse_squared': K.pow(m, 2),
            'mse_cubed': K.pow(m, 3)
        }

    model.compile(optimizer, loss, metrics=[mse, mse_powers],
                  sample_weight_mode=None)

    out = model.train_on_batch([input_a_np, input_b_np],
                               [output_a_np, output_b_np])
    out_len = 1 + 2 * 4  # total loss, per layer: loss + 3 metrics
    assert len(out) == out_len
    out = model.test_on_batch([input_a_np, input_b_np],
                              [output_a_np, output_b_np])
    assert len(out) == out_len

    input_a_np = np.random.random((10, 3))
    input_b_np = np.random.random((10, 3))

    output_a_np = np.random.random((10, 4))
    output_b_np = np.random.random((10, 3))

    out = model.fit([input_a_np, input_b_np], [output_a_np, output_b_np], batch_size=4, nb_epoch=1)
    out = model.evaluate([input_a_np, input_b_np], [output_a_np, output_b_np], batch_size=4)
    out = model.predict([input_a_np, input_b_np], batch_size=4)
class PolicyValueNet():
    """policy-value network """
    def __init__(self, board_width, board_height, model_file=None):
        self.board_width = board_width
        self.board_height = board_height 
        self.l2_const = 1e-4  # coef of l2 penalty 
        self.create_policy_value_net()   
        self._loss_train_op()

        if model_file:
            net_params = pickle.load(open(model_file, 'rb'))
            self.model.set_weights(net_params)
        
    def create_policy_value_net(self):
        """create the policy value network """   
        in_x = network = Input((4, self.board_width, self.board_height))

        # conv layers
        network = Conv2D(filters=32, kernel_size=(3, 3), padding="same", data_format="channels_first", activation="relu", kernel_regularizer=l2(self.l2_const))(network)
        network = Conv2D(filters=64, kernel_size=(3, 3), padding="same", data_format="channels_first", activation="relu", kernel_regularizer=l2(self.l2_const))(network)
        network = Conv2D(filters=128, kernel_size=(3, 3), padding="same", data_format="channels_first", activation="relu", kernel_regularizer=l2(self.l2_const))(network)
        # action policy layers
        policy_net = Conv2D(filters=4, kernel_size=(1, 1), data_format="channels_first", activation="relu", kernel_regularizer=l2(self.l2_const))(network)
        policy_net = Flatten()(policy_net)
        self.policy_net = Dense(self.board_width*self.board_height, activation="softmax", kernel_regularizer=l2(self.l2_const))(policy_net)
        # state value layers
        value_net = Conv2D(filters=2, kernel_size=(1, 1), data_format="channels_first", activation="relu", kernel_regularizer=l2(self.l2_const))(network)
        value_net = Flatten()(value_net)
        value_net = Dense(64, kernel_regularizer=l2(self.l2_const))(value_net)
        self.value_net = Dense(1, activation="tanh", kernel_regularizer=l2(self.l2_const))(value_net)

        self.model = Model(in_x, [self.policy_net, self.value_net])
        
        def policy_value(state_input):
            state_input_union = np.array(state_input)
            results = self.model.predict_on_batch(state_input_union)
            return results
        self.policy_value = policy_value
        
    def policy_value_fn(self, board):
        """
        input: board
        output: a list of (action, probability) tuples for each available action and the score of the board state
        """
        legal_positions = board.availables
        current_state = board.current_state()
        act_probs, value = self.policy_value(current_state.reshape(-1, 4, self.board_width, self.board_height))
        act_probs = zip(legal_positions, act_probs.flatten()[legal_positions])
        return act_probs, value[0][0]

    def _loss_train_op(self):
        """
        Three loss terms:
        loss = (z - v)^2 + pi^T * log(p) + c||theta||^2
        """

        # get the train op   
        opt = Adam()
        losses = ['categorical_crossentropy', 'mean_squared_error']
        self.model.compile(optimizer=opt, loss=losses)

        def self_entropy(probs):
            return -np.mean(np.sum(probs * np.log(probs + 1e-10), axis=1))

        def train_step(state_input, mcts_probs, winner, learning_rate):
            state_input_union = np.array(state_input)
            mcts_probs_union = np.array(mcts_probs)
            winner_union = np.array(winner)
            loss = self.model.evaluate(state_input_union, [mcts_probs_union, winner_union], batch_size=len(state_input), verbose=0)
            action_probs, _ = self.model.predict_on_batch(state_input_union)
            entropy = self_entropy(action_probs)
            K.set_value(self.model.optimizer.lr, learning_rate)
            self.model.fit(state_input_union, [mcts_probs_union, winner_union], batch_size=len(state_input), verbose=0)
            return loss[0], entropy
        
        self.train_step = train_step

    def get_policy_param(self):
        net_params = self.model.get_weights()        
        return net_params

    def save_model(self, model_file):
        """ save model params to file """
        net_params = self.get_policy_param()
        pickle.dump(net_params, open(model_file, 'wb'), protocol=2)