Esempio n. 1
0
def DNN_auto(x_train):

    encoding_dim = 128  #128 original
    input_img = Input(shape=(673, ))

    encoded = Dense(450,
                    activation='relu')(input_img)  # 450 - output (input layer)
    #encoded = Dense(250, activation='relu')(encoded)     # 200 - output (hidden layer1)
    encoded = Dense(250,
                    activation='relu')(encoded)  # 100 - output (hidden layer2)
    encoder_output = Dense(encoding_dim)(
        encoded)  # 128 - output (encoding layer)
    print()
    # decoder layers
    decoded = Dense(250, activation='relu')(encoder_output)
    #decoded = Dense(250, activation='relu')(decoded)
    decoded = Dense(450, activation='relu')(decoded)
    decoded = Dense(673, activation='tanh')(decoded)

    autoencoder = Model(input=input_img, output=decoded)

    encoder = Model(input=input_img, output=encoder_output)

    autoencoder.compile(optimizer='adam', loss='mse')

    autoencoder.fit(
        x_train, x_train, epochs=20, batch_size=100, shuffle=True
    )  # second x_train is given instead of train labels in DNN, ie here, i/p=o/p

    #batch_size=100 original
    encoded_imgs = encoder.predict(x_train)

    return encoder_output, encoded_imgs
Esempio n. 2
0
def test_sparse_input_validation_split():
    test_input = sparse.random(6, 3, density=0.25).tocsr()
    in1 = Input(shape=(3,), sparse=True)
    out1 = Dense(4)(in1)
    test_output = np.random.random((6, 4))
    model = Model(in1, out1)
    model.compile('rmsprop', 'mse')
    model.fit(test_input, test_output, epochs=1, batch_size=2, validation_split=0.2)
Esempio n. 3
0
def test_sparse_input_validation_split():
    test_input = sparse.random(6, 3, density=0.25).tocsr()
    in1 = Input(shape=(3,), sparse=True)
    out1 = Dense(4)(in1)
    test_output = np.random.random((6, 4))
    model = Model(in1, out1)
    model.compile('rmsprop', 'mse')
    model.fit(test_input, test_output, epochs=1, batch_size=2, validation_split=0.2)
Esempio n. 4
0
def test_sparse_placeholder_fit():
    test_inputs = [sparse.random(6, 3, density=0.25).tocsr() for _ in range(2)]
    test_outputs = [sparse.random(6, i, density=0.25).tocsr() for i in range(3, 5)]
    in1 = Input(shape=(3,))
    in2 = Input(shape=(3,), sparse=True)
    out1 = Dropout(0.5, name='dropout')(in1)
    out2 = Dense(4, name='dense_1')(in2)
    model = Model([in1, in2], [out1, out2])
    model.compile('rmsprop', 'mse')
    model.fit(test_inputs, test_outputs, epochs=1, batch_size=2, validation_split=0.2)
Esempio n. 5
0
def test_sparse_placeholder_fit():
    test_inputs = [sparse.random(6, 3, density=0.25).tocsr() for _ in range(2)]
    test_outputs = [sparse.random(6, i, density=0.25).tocsr() for i in range(3, 5)]
    in1 = Input(shape=(3,))
    in2 = Input(shape=(3,), sparse=True)
    out1 = Dropout(0.5, name='dropout')(in1)
    out2 = Dense(4, name='dense_1')(in2)
    model = Model([in1, in2], [out1, out2])
    model.predict(test_inputs, batch_size=2)
    model.compile('rmsprop', 'mse')
    model.fit(test_inputs, test_outputs, epochs=1, batch_size=2, validation_split=0.5)
    model.evaluate(test_inputs, test_outputs, batch_size=2)
Esempio n. 6
0
class MyModel:
    model = None

    def __init__(self, input_shape, use_dropout=True, use_vat=True):
        self.input_shape = input_shape
        self.use_dropout = use_dropout
        self.use_vat = use_vat

    def build(self):
        input_layer = Input(self.input_shape)
        output_layer = self.core_data_flow(input_layer)
        if self.use_vat:
            self.model = VATModel(input_layer, output_layer).setup_vat_loss()
        else:
            self.model = Model(input_layer, output_layer)
        return self

    def core_data_flow(self, input_layer):
        x = Convolution2D(nb_filters,
                          kernel_size[0],
                          kernel_size[1],
                          border_mode='valid')(input_layer)
        x = Activation('relu')(x)
        x = Convolution2D(nb_filters, kernel_size[0], kernel_size[1])(x)
        x = Activation('relu')(x)
        x = MaxPooling2D(pool_size=pool_size)(x)
        if self.use_dropout:
            x = Dropout(0.25)(x)

        x = Flatten()(x)
        x = Dense(128, activation="relu")(x)
        if self.use_dropout:
            x = Dropout(0.5)(x)
        x = Dense(nb_classes, activation='softmax')(x)
        return x

    def training(self, X_train, y_train, X_test, y_test):
        self.model.compile(loss=K.categorical_crossentropy,
                           optimizer='adadelta',
                           metrics=['accuracy'])
        np.random.seed(1337)  # for reproducibility
        self.model.fit(X_train,
                       y_train,
                       batch_size=batch_size,
                       nb_epoch=nb_epoch,
                       verbose=1,
                       validation_data=(X_test, y_test))
Esempio n. 7
0
def test_trainable_weights_count_consistency():
    """Tests the trainable weights consistency check of Model.

    This verifies that a warning is shown if model.trainable is modified
    and the model is summarized/run without a new call to .compile()

    Reproduce issue #8121
    """
    a = Input(shape=(3, ), name='input_a')
    model1 = Model(inputs=a, outputs=Dense(1)(a))

    model1.trainable = False
    b = Input(shape=(3, ), name='input_b')
    y = model1(b)
    model2 = Model(inputs=b, outputs=Dense(1)(y))

    model2.compile(optimizer='adam', loss='mse')

    model1.trainable = True

    # Should warn on .summary()
    with pytest.warns(UserWarning) as w:
        model2.summary()
    warning_raised = any(['Discrepancy' in str(w_.message) for w_ in w])
    assert warning_raised, 'No warning raised when trainable is modified without .compile.'

    # And on .fit()
    with pytest.warns(UserWarning) as w:
        model2.fit(x=np.zeros((5, 3)), y=np.zeros((5, 1)))
    warning_raised = any(['Discrepancy' in str(w_.message) for w_ in w])
    assert warning_raised, 'No warning raised when trainable is modified without .compile.'

    # And shouldn't warn if we recompile
    model2.compile(optimizer='adam', loss='mse')
    with pytest.warns(None) as w:
        model2.summary()
    assert len(
        w
    ) == 0, "Warning raised even when .compile() is called after modifying .trainable"
Esempio n. 8
0
def test_model_with_partial_loss():
    a = Input(shape=(3, ), name='input_a')
    a_2 = Dense(4, name='dense_1')(a)
    dp = Dropout(0.5, name='dropout')
    a_3 = dp(a_2)
    model = Model(a, [a_2, a_3])

    optimizer = 'rmsprop'
    loss = {'dropout': 'mse'}
    model.compile(optimizer, loss, metrics=['mae'])

    input_a_np = np.random.random((10, 3))
    output_a_np = np.random.random((10, 4))

    # test train_on_batch
    out = model.train_on_batch(input_a_np, output_a_np)
    out = model.test_on_batch(input_a_np, output_a_np)
    # fit
    out = model.fit(input_a_np, [output_a_np])
    # evaluate
    out = model.evaluate(input_a_np, [output_a_np])

    # Same without dropout.
    a = Input(shape=(3, ), name='input_a')
    a_2 = Dense(4, name='dense_1')(a)
    a_3 = Dense(4, name='dense_2')(a_2)
    model = Model(a, [a_2, a_3])

    optimizer = 'rmsprop'
    loss = {'dense_2': 'mse'}
    model.compile(optimizer, loss, metrics={'dense_1': 'mae'})

    # test train_on_batch
    out = model.train_on_batch(input_a_np, output_a_np)
    out = model.test_on_batch(input_a_np, output_a_np)
    # fit
    out = model.fit(input_a_np, [output_a_np])
    # evaluate
    out = model.evaluate(input_a_np, [output_a_np])
Esempio n. 9
0
def test_model_with_partial_loss():
    a = Input(shape=(3,), name='input_a')
    a_2 = Dense(4, name='dense_1')(a)
    dp = Dropout(0.5, name='dropout')
    a_3 = dp(a_2)
    model = Model(a, [a_2, a_3])

    optimizer = 'rmsprop'
    loss = {'dropout': 'mse'}
    model.compile(optimizer, loss, metrics=['mae'])

    input_a_np = np.random.random((10, 3))
    output_a_np = np.random.random((10, 4))

    # test train_on_batch
    out = model.train_on_batch(input_a_np, output_a_np)
    out = model.test_on_batch(input_a_np, output_a_np)
    # fit
    out = model.fit(input_a_np, [output_a_np])
    # evaluate
    out = model.evaluate(input_a_np, [output_a_np])

    # Same without dropout.
    a = Input(shape=(3,), name='input_a')
    a_2 = Dense(4, name='dense_1')(a)
    a_3 = Dense(4, name='dense_2')(a_2)
    model = Model(a, [a_2, a_3])

    optimizer = 'rmsprop'
    loss = {'dense_2': 'mse'}
    model.compile(optimizer, loss, metrics={'dense_1': 'mae'})

    # test train_on_batch
    out = model.train_on_batch(input_a_np, output_a_np)
    out = model.test_on_batch(input_a_np, output_a_np)
    # fit
    out = model.fit(input_a_np, [output_a_np])
    # evaluate
    out = model.evaluate(input_a_np, [output_a_np])
Esempio n. 10
0
def test_trainable_weights_count_consistency():
    """Tests the trainable weights consistency check of Model.

    This verifies that a warning is shown if model.trainable is modified
    and the model is summarized/run without a new call to .compile()

    Reproduce issue #8121
    """
    a = Input(shape=(3,), name='input_a')
    model1 = Model(inputs=a, outputs=Dense(1)(a))

    model1.trainable = False
    b = Input(shape=(3,), name='input_b')
    y = model1(b)
    model2 = Model(inputs=b, outputs=Dense(1)(y))

    model2.compile(optimizer='adam', loss='mse')

    model1.trainable = True

    # Should warn on .summary()
    with pytest.warns(UserWarning) as w:
        model2.summary()
    warning_raised = any(['Discrepancy' in str(w_.message) for w_ in w])
    assert warning_raised, 'No warning raised when trainable is modified without .compile.'

    # And on .fit()
    with pytest.warns(UserWarning) as w:
        model2.fit(x=np.zeros((5, 3)), y=np.zeros((5, 1)))
    warning_raised = any(['Discrepancy' in str(w_.message) for w_ in w])
    assert warning_raised, 'No warning raised when trainable is modified without .compile.'

    # And shouldn't warn if we recompile
    model2.compile(optimizer='adam', loss='mse')
    with pytest.warns(None) as w:
        model2.summary()
    assert len(w) == 0, "Warning raised even when .compile() is called after modifying .trainable"
Esempio n. 11
0
class Network():
    def __init__(self, conf):
        # Some Hyperparameters
        self._board_size = conf['board_size']  # the size of the playing board
        self._lr = conf['learning_rate']  # learning rate of SGD (2e-3)
        self._momentum = conf['momentum']  # nesterov momentum (1e-1)
        self._l2_coef = conf['l2']  # coefficient of L2 penalty (1e-4)
        # Define Network
        self._build_network()
        # File Location
        self._net_para_file = conf['net_para_file']
        # If we use previous model or not
        self._use_previous_model = conf['use_previous_model']
        if self._use_previous_model:
            net_para = self._model.load_weights(self._net_para_file)
            self._model.set_weights(net_para)

    def _build_network(self):
        # Input_Layer
        init_x = Input((3, self._board_size, self._board_size))
        x = init_x
        # Convolutional Layer
        x = Conv2D(filters=32,
                   kernel_size=(3, 3),
                   strides=(1, 1),
                   padding='same',
                   data_format='channels_first',
                   kernel_regularizer=l2(self._l2_coef))(x)
        x = BatchNormalization()(x)
        x = Activation('relu')(x)
        # Residual Layer
        x = self._residual_block(x)
        x = self._residual_block(x)
        x = self._residual_block(x)
        # Policy Head
        policy = Conv2D(filters=2,
                        kernel_size=(1, 1),
                        strides=(1, 1),
                        padding='same',
                        data_format='channels_first',
                        kernel_regularizer=l2(self._l2_coef))(x)
        policy = BatchNormalization()(policy)
        policy = Activation('relu')(policy)
        policy = Flatten()(policy)
        policy = Dense(self._board_size * self._board_size,
                       kernel_regularizer=l2(self._l2_coef))(policy)
        self._policy = Activation('softmax')(policy)
        # Value Head
        value = Conv2D(filters=1,
                       kernel_size=(1, 1),
                       strides=(1, 1),
                       padding='same',
                       data_format="channels_first",
                       kernel_regularizer=l2(self._l2_coef))(x)
        value = BatchNormalization()(value)
        value = Activation('relu')(value)
        value = Flatten()(value)
        value = Dense(32, kernel_regularizer=l2(self._l2_coef))(value)
        value = Activation('relu')(value)
        value = Dense(1, kernel_regularizer=l2(self._l2_coef))(value)
        self._value = Activation('tanh')(value)
        # Define Network
        self._model = Model(inputs=init_x, outputs=[self._policy, self._value])
        # Define the Loss Function
        opt = SGD(lr=self._lr, momentum=self._momentum, nesterov=True)
        losses_type = ['categorical_crossentropy', 'mean_squared_error']
        self._model.compile(optimizer=opt, loss=losses_type)

    def _residual_block(self, x):
        x_shortcut = x
        x = Conv2D(filters=32,
                   kernel_size=(3, 3),
                   strides=(1, 1),
                   padding='same',
                   data_format="channels_first",
                   kernel_regularizer=l2(self._l2_coef))(x)
        x = BatchNormalization()(x)
        x = Activation('relu')(x)
        x = Conv2D(filters=32,
                   kernel_size=(3, 3),
                   strides=(1, 1),
                   padding='same',
                   data_format="channels_first",
                   kernel_regularizer=l2(self._l2_coef))(x)
        x = BatchNormalization()(x)
        x = add([x, x_shortcut])  # Skip Connection
        x = Activation('relu')(x)
        return x

    def predict(self, board, color, random_flip=False):
        if random_flip:
            b_t, method_index = input_transform(board)
            tensor_t = board2tensor(b_t, color, reshape_flag=True)
            prob_tensor_t, value_tensor = self._model.predict_on_batch(
                tensor_t)
            policy = output_decode(prob_tensor_t, method_index, board.shape[0])
            value = value_tensor[0][0]
            return policy, value
        else:
            tensor = board2tensor(board, color)
            policy, value_tensor = self._model.predict_on_batch(tensor)
            value = value_tensor[0][0]
            return policy, value

    def train(self, board_list, color_list, pi_list, z_list):
        # Reguliza Data
        tensor_list = np.array([
            board2tensor(board_list[i], color_list[i], reshape_flag=False)
            for i in range(len(board_list))
        ])
        pi_list = np.array(pi_list)
        z_list = np.array(z_list)
        # Training
        self._model.fit(tensor_list, [pi_list, z_list],
                        epochs=20,
                        batch_size=len(color_list),
                        verbose=1)
        # Calculate Loss Explicitly
        loss = self._model.evaluate(tensor_list, [pi_list, z_list],
                                    batch_size=len(board_list),
                                    verbose=0)
        loss = loss[0]
        return loss

    def get_para(self):
        net_para = self._model.get_weights()
        return net_para

    def save_model(self):
        """ save model para to file """
        self._model.save_weights(self._net_para_file)

    def load_model(self):
        self._model.load_weights(self._net_para_file)
Esempio n. 12
0
class QNetwork:
    def __init__(self, config: Config) -> None:
        self.config = config
        self.digest = None

    def build(self) -> None:
        mc = self.config.model
        in_x = x = Input((4, 5, 5))

        x = Conv2D(filters=mc.cnn_filter_num,
                   kernel_size=mc.cnn_filter_size,
                   padding="same",
                   data_format="channels_first",
                   kernel_regularizer=l2(mc.l2_reg))(x)
        x = BatchNormalization(axis=1)(x)
        x = Activation("relu")(x)

        for _ in range(mc.res_layer_num):
            x = self._build_residual_block(x)

        res_out = x
        # for policy output
        x = Conv2D(filters=2,
                   kernel_size=1,
                   data_format="channels_first",
                   kernel_regularizer=l2(mc.l2_reg))(res_out)
        x = BatchNormalization(axis=1)(x)
        x = Activation("relu")(x)
        x = Flatten()(x)
        # no output for 'pass'
        out = Dense(100,
                    kernel_regularizer=l2(mc.l2_reg),
                    activation="softmax",
                    name="out")(x)

        # x = Dense(mc.value_fc_size, kernel_regularizer=l2(mc.l2_reg),
        #          activation="relu")(x)
        # value_out = Dense(1, kernel_regularizer=l2(mc.l2_reg),
        #                   activation="tanh", name="value_out")(x)

        self.model = Model(in_x, out, name="slipe_model")
        self.model.compile(loss='mse', optimizer=Adam(lr=mc.learning_rate))
        self.model.summary()

    def _build_residual_block(self, x):
        mc = self.config.model
        in_x = x
        x = Conv2D(filters=mc.cnn_filter_num,
                   kernel_size=mc.cnn_filter_size,
                   padding="same",
                   data_format="channels_first",
                   kernel_regularizer=l2(mc.l2_reg))(x)
        x = BatchNormalization(axis=1)(x)
        x = Activation("relu")(x)
        x = Conv2D(filters=mc.cnn_filter_num,
                   kernel_size=mc.cnn_filter_size,
                   padding="same",
                   data_format="channels_first",
                   kernel_regularizer=l2(mc.l2_reg))(x)
        x = BatchNormalization(axis=1)(x)
        x = Add()([in_x, x])
        x = Activation("relu")(x)
        return x

    # 重みの学習
    def replay(self, memory: Memory, batch_size: int, gamma: float,
               targetQN: 'QNetwork') -> None:
        inputs = np.zeros((batch_size, 4, 5, 5))
        targets = np.zeros((batch_size, 100))
        mini_batch = memory.sample(batch_size)

        for i, (state_b, action_b, reward_b,
                next_state_b) in enumerate(mini_batch):
            inputs[i] = state_b  # shape=(4, 5, 5)
            target = reward_b  # type: int

            # if not (next_state_b == 0).all():
            # 価値計算(DDQNにも対応できるように、行動決定のQネットワークと価値関数のQネットワークは分離)
            retmainQs = self.model.predict(next_state_b)
            next_action = np.argmax(retmainQs)  # 最大の報酬を返す行動を選択する
            target = reward_b + gamma * \
                targetQN.model.predict(next_state_b)[0][next_action]

            targets[i] = self.model.predict(state_b)[0][0]  # Qネットワークの出力
            # 教師信号 action_b: int <= 100
            targets[i, action_b] = target
        # epochsは訓練データの反復回数、verbose=0は表示なしの設定
        self.model.fit(inputs, targets, epochs=1, verbose=0)

    @staticmethod
    def fetch_digest(weight_path: str):
        if os.path.exists(weight_path):
            m = hashlib.sha256()
            with open(weight_path, "rb") as f:
                m.update(f.read())
            return m.hexdigest()

    def load(self, config_path: str, weight_path: str) -> bool:
        if os.path.exists(weight_path):  # os.path.exists(config_path) and
            logger.debug(f"loading model from {config_path}")
            with open(config_path, "rt") as f:
                self.model = Model.from_config(json.load(f))
            self.model.load_weights(weight_path)
            self.model.compile(
                loss='mse', optimizer=Adam(lr=self.config.model.learning_rate))
            self.model.summary()
            self.digest = self.fetch_digest(weight_path)
            logger.debug(f"loaded model digest = {self.digest}")
            return True
        else:
            logger.debug(
                f"model files does not exist at {config_path} and {weight_path}"
            )
            return False

    def save(self, config_path: str, weight_path: str) -> None:
        logger.debug(f"save model to {config_path}")
        with open(config_path, "wt") as f:
            json.dump(self.model.get_config(), f)
        self.model.save_weights(weight_path)
        self.digest = self.fetch_digest(weight_path)
        logger.debug(f"saved model digest {self.digest}")
Esempio n. 13
0
class AIPlayer(Player):
    
    def __init__(self, buffer_size, sim_count, train=True, model="", tau = 1, compile=False):
        self.buffer = ReplayBuffer(buffer_size)
        self.temp_state = deque()
        self.train = train
        self.loss = 0
        self.acc = 0
        self.batch_count = 0
        self.sim_count = sim_count
        if model != "":
            self.load(model, compile)
        else:
            self.create_network()
        self.tau = tau

    @staticmethod
    def create_if_nonexistant(config):
        models = glob.glob(config.data.model_location + "*.h5")
        if len(models) == 0:
            ai = AIPlayer(config.buffer_size, config.game.simulation_num_per_move)
            ai.save(config.data.model_location+"model_0.h5")
            del ai

    def set_training(self, train):
        self.train = train
    
    @staticmethod
    def clear():
        K.clear_session()
    
    def load(self, file, compile=False):
        try:
            del self.network
        except Exception:
            pass
        self.network = load_model(file, custom_objects={"objective_function_for_policy":AIPlayer.objective_function_for_policy,
                                                        "objective_function_for_value":AIPlayer.objective_function_for_value}, compile=compile)
        
    def save(self, file):
        self.network.save(file)
    
    def create_network(self):
        x_in = Input((3, 8, 8))
        x = Conv2D(filters=128, kernel_size=(3,3), padding="same", data_format="channels_first")(x_in)
        x = BatchNormalization(axis=1)(x)
        x = Activation("relu")(x)
        for _ in range(10):
            x = self._build_residual_block(x)

        res_out = x
        
        x = Conv2D(filters=2, kernel_size=1, data_format="channels_first")(res_out)
        x = BatchNormalization(axis=1)(x)
        x = Activation("relu")(x)
        x = Flatten()(x)
        policy_out = Dense(8*8+1, activation="softmax", name="policy_out")(x)

        x = Conv2D(filters=1, kernel_size=1, data_format="channels_first")(res_out)
        x = BatchNormalization(axis=1)(x)
        x = Activation("relu")(x)
        x = Flatten()(x)
        x = Dense(64, activation="relu")(x)
        value_out =  Dense(1, activation="tanh", name="value_out")(x)
        
        self.network = Model(x_in, [policy_out, value_out], name="reversi_model")
        self.compile()
      
    def _build_residual_block(self, x):
        in_x = x
        x = Conv2D(filters=128, kernel_size=(3,3), padding="same", data_format="channels_first")(x)
        x = BatchNormalization(axis=1)(x)
        x = Activation("relu")(x)
        x = Conv2D(filters=128, kernel_size=(3,3), padding="same", data_format="channels_first")(x)
        x = BatchNormalization(axis=1)(x)
        x = Add()([in_x, x])
        x = Activation("relu")(x)
        return x
        
    def compile(self):
        losses = [AIPlayer.objective_function_for_policy, AIPlayer.objective_function_for_value]
        self.network.compile(optimizer=optimizers.SGD(lr=1e-3, momentum=0.9), loss=losses)
      
    def update_lr(self, lr):
         K.set_value(self.network.optimizer.lr, lr)
        
    @staticmethod
    def objective_function_for_policy(y_true, y_pred):
        # can use categorical_crossentropy??
        return K.sum(-y_true * K.log(y_pred + K.epsilon()), axis=-1)

    @staticmethod
    def objective_function_for_value(y_true, y_pred):
        return mean_squared_error(y_true, y_pred)
        
    def update_buffer(self, winner):
        if self.train:
            while len(self.temp_state) > 0:
                t = self.temp_state.pop()
                self.buffer.add((t[0], t[1], winner))
    
    def train_batches(self, batch_size, batches=-1, verbose=2):
        if batches == -1:
            s_buffer = np.array([_[0] for _ in self.buffer.buffer])
            p_buffer = np.array([_[1] for _ in self.buffer.buffer])
            v_buffer = np.array([_[2] for _ in self.buffer.buffer])
        else:
            sample_size = batch_size*batches
            sample = []
            while sample_size > 0:
                sample += self.buffer.sample(sample_size)
                sample_size -= self.buffer.size()
            s_buffer = np.array([_[0] for _ in sample])
            p_buffer = np.array([_[1] for _ in sample])
            v_buffer = np.array([_[2] for _ in sample])
        history = self.network.fit(s_buffer, [p_buffer, v_buffer], batch_size=batch_size, epochs=1, verbose=verbose)
        return history
    
    def preprocess_input(self, board, side):
        state = np.zeros((3, 8, 8), dtype=np.int)
        for i in range(8):
            for j in range(8):
                if board[i,j] == 1:
                    state[0,i,j] = 1
                elif board[i,j] == -1:
                    state[1,i,j] = 1
                if side == 1:
                    state[2,i,j] = 1
        return state
    
    def evaluate(self, game, side):
        current_input = self.preprocess_input(game.board, side)
        pred = self.network.predict(current_input[np.newaxis,:])
        return pred[1][0]
    
    def pick_move(self, game, side):
        possible_moves = game.possible_moves(side)
        if len(possible_moves) == 0:
            possible_moves.append((-1,-1))
        monte_prob = self.monte_carlo(game, side)
        
        if self.train:
            self.temp_state.append((self.preprocess_input(game.board, side), np.divide(monte_prob, np.sum(monte_prob))))
        
        monte_prob = np.float_power(monte_prob, 1/self.tau)
        monte_prob = np.divide(monte_prob, np.sum(monte_prob))
        
        r = random()
        for i, move in enumerate(possible_moves):
            r -= monte_prob[Othello.move_id(move)]
            if r <= 0:
                return move
        return possible_moves[-1]
            
    def monte_carlo(self, game, side):
        N = defaultdict(lambda: 0)
        W = defaultdict(lambda: 0)
        Q = defaultdict(lambda: 0)
        P = defaultdict(lambda: 0)
        
        
        possible_moves = game.possible_moves(side)
        if len(possible_moves) == 0:
            policy = np.zeros((65))
            policy[64] = 1
            return policy
        elif len(possible_moves) == 1:
            policy = np.zeros((65))
            policy[Othello.move_id(possible_moves[0])] = 1
            return policy
        
        current_input = self.preprocess_input(game.board, side)
        sid = Othello.state_id(game.board)
        pred = self.network.predict(current_input[np.newaxis,:])
        policy = pred[0][0]
        
        total = 1e-10
        for i, move in enumerate(possible_moves):
            total += policy[Othello.move_id(move)]
          
        for move in possible_moves:
            P[(sid, Othello.move_id(move))] = policy[Othello.move_id(move)]/total
        
        for i in range(self.sim_count):
            #print("Sim #%d"% i)
            clone = deepcopy(game)
            current_side = side
            visited = deque()
            while True:
                possible_moves = clone.possible_moves(current_side)
                if len(possible_moves) == 0:
                    possible_moves.append((-1,-1))
                best_move = None
                best_move_value = -2
                sid = Othello.state_id(clone.board)
                for move in possible_moves:
                    mid = Othello.move_id(move)
                    qu_val = Q[(sid, mid)] + P[(sid, mid)]/(N[(sid, mid)]+1)
                    if qu_val > best_move_value:
                        best_move_value = qu_val
                        best_move = move
                
                #print(best_move)
                
                if N[(sid, Othello.move_id(best_move))] == 0:
                    visited.append((sid, Othello.move_id(best_move)))
                    clone.play_move(best_move[0], best_move[1], current_side)
                    current_side *= -1
                    if clone.game_over():
                        for node in visited:
                            N[node] += 1
                            W[node] += clone.get_winner()*side
                            Q[node] = W[node]/N[node]
                        break
                    
                    current_input = self.preprocess_input(clone.board, current_side)
                    sid = Othello.state_id(clone.board)
                    pred = self.network.predict(current_input[np.newaxis,:])
                    policy = pred[0][0]
                    value = pred[1][0]
                    
                    possible_moves = clone.possible_moves(current_side)
                    if len(possible_moves) == 0:
                        possible_moves.append((-1,-1))
                    total = 1e-10
                    for i, move in enumerate(possible_moves):
                        total += policy[Othello.move_id(move)]
                      
                    for move in possible_moves:
                        P[(sid, Othello.move_id(move))] = policy[Othello.move_id(move)]/total
                    
                    for node in visited:
                        N[node] += 1
                        W[node] += value*side
                        Q[node] = W[node]/N[node]
                    #print()
                    break
                else:
                    visited.append((sid, Othello.move_id(best_move)))
                    clone.play_move(best_move[0], best_move[1], current_side)
                    current_side *= -1
                    if clone.game_over():
                        for node in visited:
                            N[node] += 1
                            W[node] += clone.get_winner()*side
                            Q[node] = W[node]/N[node]
                        break
                             
        policy = np.zeros((65))
        possible_moves = game.possible_moves(side)
        sid = Othello.state_id(game.board)
        for move in possible_moves:
            mid = Othello.move_id(move)
            policy[mid] = N[(sid,mid)]
        
        return policy
class PolicyValueNet():
    """策略价值网络"""

    #def __init__(self, board_width, board_height, model_file=None):
    def __init__(self, policy_infer_size, model_file=None):
        #self.board_width = board_width
        #self.board_height = board_height
        self.policy_infer_size = policy_infer_size
        self.l2_const = 1e-4  # coef of l2 penalty
        self.create_policy_value_net()
        self._loss_train_op()

        self.load_model_done = True
        if model_file and os.path.exists(model_file):
            self.load_model_done = False
            self.load_model(model_file)

    def load_model(self, model_file):
        """重新加载模型(仅用于selfplay时load new model)"""
        try:
            #net_params = pickle.load(open(model_file, 'rb'), encoding='bytes') #iso-8859-1')
            net_params = utils.pickle_load(model_file)
            self.model.set_weights(net_params)
            self.load_model_done = True
        except:
            logging.error("load_model fail! {}\t{}".format(
                model_file, utils.get_trace()))
            self.load_model_done = False
        if os.path.exists(
                model_file
        ) and self.load_model_done is False:  #鏂囦欢瀛樺湪鍗村鍦ㄥけ璐ユ椂缁堟杩愯
            exit(-1)
        return self.load_model_done

    def create_policy_value_net(self):
        """创建policy-value网络"""
        # 输入层
        #in_x = network = Input((4, self.board_width, self.board_height))
        in_x = network = Input((4, 1, self.policy_infer_size))

        # conv layers
        network = Conv2D(filters=32,
                         kernel_size=(3, 3),
                         padding="same",
                         data_format="channels_first",
                         activation="relu",
                         kernel_regularizer=l2(self.l2_const))(network)
        network = Conv2D(filters=64,
                         kernel_size=(3, 3),
                         padding="same",
                         data_format="channels_first",
                         activation="relu",
                         kernel_regularizer=l2(self.l2_const))(network)
        network = Conv2D(filters=128,
                         kernel_size=(3, 3),
                         padding="same",
                         data_format="channels_first",
                         activation="relu",
                         kernel_regularizer=l2(self.l2_const))(network)
        # 走子策略 action policy layers
        policy_net = Conv2D(filters=4,
                            kernel_size=(1, 1),
                            data_format="channels_first",
                            activation="relu",
                            kernel_regularizer=l2(self.l2_const))(network)
        policy_net = Flatten()(policy_net)
        # infer self.board_width * self.board_height action_probs
        #self.policy_net = Dense(self.board_width * self.board_height, activation="softmax", kernel_regularizer=l2(self.l2_const))(policy_net)
        self.policy_net = Dense(self.policy_infer_size,
                                activation="softmax",
                                kernel_regularizer=l2(
                                    self.l2_const))(policy_net)
        # 盘面价值 state value layers
        value_net = Conv2D(filters=2,
                           kernel_size=(1, 1),
                           data_format="channels_first",
                           activation="relu",
                           kernel_regularizer=l2(self.l2_const))(network)
        value_net = Flatten()(value_net)
        value_net = Dense(64, kernel_regularizer=l2(self.l2_const))(value_net)
        # infer one current state score
        self.value_net = Dense(1,
                               activation="tanh",
                               kernel_regularizer=l2(self.l2_const))(value_net)

        # 创建网络模型
        self.model = Model(in_x, [self.policy_net, self.value_net])

        # 返回走子策略和价值概率
        def policy_value(state_input):
            state_input_union = np.array(state_input)
            #print(state_input_union)
            results = self.model.predict_on_batch(state_input_union)
            return results

        self.policy_value = policy_value

    def policy_value_fn(self, board):
        """使用模型预测棋盘所有actionid的价值概率"""
        # 棋盘所有可移动action_ids
        legal_positions = board.availables
        #print(legal_positions)
        # 当前玩家角度的actions过程
        current_actions = board.current_actions()
        #print(current_actions)
        # 使用模型预测走子策略和价值概率
        #print(self.policy_infer_size)
        #act_probs, value = self.policy_value(current_actions.reshape(-1, 4, self.board_width, self.board_height))
        act_probs, value = self.policy_value(
            current_actions.reshape(-1, 4, 1, self.policy_infer_size))
        act_probs = zip(legal_positions, act_probs.flatten()[legal_positions])
        # 返回[(action, 概率)] 以及当前玩家的后续走子value
        return act_probs, value[0][0]

    def _loss_train_op(self):
        """初始化损失
        3个损失函数因子
        loss = (z - v)^2 + pi^T * log(p) + c||theta||^2
        loss = value损失函数 + policy损失函数 + 惩罚项
        """
        # 定义优化器和损失函数
        opt = Adam()
        losses = ['categorical_crossentropy', 'mean_squared_error']
        self.model.compile(optimizer=opt, loss=losses)

        def self_entropy(probs):
            return -np.mean(np.sum(probs * np.log(probs + 1e-10), axis=1))

        def train_step(state_input, mcts_probs, winner, learning_rate):
            """输出训练过程中的结果"""
            state_input_union = np.array(state_input)
            mcts_probs_union = np.array(mcts_probs)
            winner_union = np.array(winner)
            # 评估
            loss = self.model.evaluate(state_input_union,
                                       [mcts_probs_union, winner_union],
                                       batch_size=len(state_input),
                                       verbose=0)
            # 预测
            action_probs, _ = self.model.predict_on_batch(state_input_union)
            entropy = self_entropy(action_probs)
            K.set_value(self.model.optimizer.lr, learning_rate)
            self.model.fit(state_input_union, [mcts_probs_union, winner_union],
                           batch_size=len(state_input),
                           verbose=0)
            return loss[0], entropy

        self.train_step = train_step

    def get_policy_param(self):
        """获得模型参数"""
        net_params = self.model.get_weights()
        return net_params

    def save_model(self, model_file):
        """保存模型参数到文件"""
        net_params = self.get_policy_param()
        #pickle.dump(net_params, open(model_file, 'wb'), protocol=4)
        utils.pickle_dump(net_params, model_file)
Esempio n. 15
0
class PolicyValueNet():
    """policy-value network """
    def __init__(self, board_width, board_height, model_file=None):
        self.board_width = board_width
        self.board_height = board_height
        self.l2_const = 1e-4  # coef of l2 penalty
        if model_file:
            #   net_params = pickle.load(open(model_file, 'rb'))
            #   self.model.set_weights(net_params)
            self.model = load_model(model_file)
        else:
            # self.create_policy_value_net()
            self.create_policy_value_resnet()
        self._loss_train_op()

    def create_policy_value_resnet(self):
        def _conv_bn_relu(filters=128, kernel_size=(3, 3)):
            def f(input):
                conv = Conv2D(kernel_size=kernel_size,
                              filters=filters,
                              padding="same",
                              data_format="channels_first",
                              kernel_regularizer=l2(self.l2_const))(input)
                norm = BatchNormalization(axis=1)(conv)
                return Activation("relu")(norm)

            return f

        def _conv_bn(filters=128, kernel_size=(3, 3)):
            def f(input):
                conv = Conv2D(kernel_size=kernel_size,
                              filters=filters,
                              padding="same",
                              data_format="channels_first",
                              kernel_regularizer=l2(self.l2_const))(input)
                norm = BatchNormalization(axis=1)(conv)
                return norm

            return f

        def _basic_block(nb_filters):
            def f(input):
                conv1 = _conv_bn_relu(nb_filters, (3, 3))(input)
                conv2 = _conv_bn(nb_filters, (3, 3))(conv1)
                shortcut = keras.layers.add([conv1, conv2])
                return Activation("relu")(shortcut)

            return f

        in_x = network = Input((4, self.board_width, self.board_height))

        network = _basic_block(64)(network)
        network = _basic_block(128)(network)
        '''
        layer1 = Conv2D(filters=64, kernel_size=(3, 3), padding="same", data_format="channels_first",
                         activation="relu", kernel_regularizer=l2(self.l2_const))(network)
        layer2 = Conv2D(filters=64, kernel_size=(3, 3), padding="same", data_format="channels_first",
                         activation="relu", kernel_regularizer=l2(self.l2_const))(layer1)
        network = Conv2D(filters=128, kernel_size=(3, 3), padding="same", data_format="channels_first",
                         activation="relu", kernel_regularizer=l2(self.l2_const))(network)
        '''

        # action policy layers
        policy_net = Conv2D(filters=4,
                            kernel_size=(1, 1),
                            data_format="channels_first",
                            activation="relu",
                            kernel_regularizer=l2(self.l2_const))(network)
        policy_net = Flatten()(policy_net)
        self.policy_net = Dense(self.board_width * self.board_height,
                                activation="softmax",
                                kernel_regularizer=l2(
                                    self.l2_const))(policy_net)
        # state value layers
        value_net = Conv2D(filters=2,
                           kernel_size=(1, 1),
                           data_format="channels_first",
                           activation="relu",
                           kernel_regularizer=l2(self.l2_const))(network)
        value_net = Flatten()(value_net)
        value_net = Dense(64, kernel_regularizer=l2(self.l2_const))(value_net)
        self.value_net = Dense(1,
                               activation="tanh",
                               kernel_regularizer=l2(self.l2_const))(value_net)

        self.model = Model(in_x, [self.policy_net, self.value_net])

        def policy_value(state_input):
            state_input_union = np.array(state_input)
            results = self.model.predict_on_batch(state_input_union)
            return results

        self.policy_value = policy_value

    def create_policy_value_net(self):
        """create the policy value network """
        in_x = network = Input((4, self.board_width, self.board_height))

        # conv layers
        '''
        network = Conv2D(filters=32, kernel_size=(3, 3), padding="same", data_format="channels_first", activation="relu", kernel_regularizer=l2(self.l2_const))(network)
        network = Conv2D(filters=64, kernel_size=(3, 3), padding="same", data_format="channels_first", activation="relu", kernel_regularizer=l2(self.l2_const))(network)
        network = Conv2D(filters=128, kernel_size=(3, 3), padding="same", data_format="channels_first", activation="relu", kernel_regularizer=l2(self.l2_const))(network)
        '''

        layer1 = Conv2D(filters=64,
                        kernel_size=(3, 3),
                        padding="same",
                        data_format="channels_first",
                        activation="relu",
                        kernel_regularizer=l2(self.l2_const))(network)
        layer2 = Conv2D(filters=64,
                        kernel_size=(3, 3),
                        padding="same",
                        data_format="channels_first",
                        activation="relu",
                        kernel_regularizer=l2(self.l2_const))(layer1)
        network = Conv2D(filters=128,
                         kernel_size=(3, 3),
                         padding="same",
                         data_format="channels_first",
                         activation="relu",
                         kernel_regularizer=l2(self.l2_const))(network)

        # action policy layers
        policy_net = Conv2D(filters=4,
                            kernel_size=(1, 1),
                            data_format="channels_first",
                            activation="relu",
                            kernel_regularizer=l2(self.l2_const))(network)
        policy_net = Flatten()(policy_net)
        self.policy_net = Dense(self.board_width * self.board_height,
                                activation="softmax",
                                kernel_regularizer=l2(
                                    self.l2_const))(policy_net)
        # state value layers
        value_net = Conv2D(filters=2,
                           kernel_size=(1, 1),
                           data_format="channels_first",
                           activation="relu",
                           kernel_regularizer=l2(self.l2_const))(network)
        value_net = Flatten()(value_net)
        value_net = Dense(64, kernel_regularizer=l2(self.l2_const))(value_net)
        self.value_net = Dense(1,
                               activation="tanh",
                               kernel_regularizer=l2(self.l2_const))(value_net)

        self.model = Model(in_x, [self.policy_net, self.value_net])
        '''
        def policy_value(state_input):
            state_input_union = np.array(state_input)
            results = self.model.predict_on_batch(state_input_union)
            return results
        self.policy_value = policy_value
        '''

    def policy_value_fn(self, board):
        """
        input: board
        output: a list of (action, probability) tuples for each available action and the score of the board state
        """
        legal_positions = board.availables
        current_state = board.current_state()
        act_probs, value = self.policy_value(
            current_state.reshape(-1, 4, self.board_width, self.board_height))
        act_probs = zip(legal_positions, act_probs.flatten()[legal_positions])
        return act_probs, value[0][0]

    def _loss_train_op(self):
        """
        Three loss terms:
        loss = (z - v)^2 + pi^T * log(p) + c||theta||^2
        """

        # get the train op
        opt = Adam()
        losses = ['categorical_crossentropy', 'mean_squared_error']
        self.model.compile(optimizer=opt, loss=losses)

        def self_entropy(probs):
            return -np.mean(np.sum(probs * np.log(probs + 1e-10), axis=1))

        def train_step(state_input, mcts_probs, winner, learning_rate):
            state_input_union = np.array(state_input)
            mcts_probs_union = np.array(mcts_probs)
            winner_union = np.array(winner)
            loss = self.model.evaluate(state_input_union,
                                       [mcts_probs_union, winner_union],
                                       batch_size=len(state_input),
                                       verbose=0)
            action_probs, _ = self.model.predict_on_batch(state_input_union)
            entropy = self_entropy(action_probs)
            K.set_value(self.model.optimizer.lr, learning_rate)
            self.model.fit(state_input_union, [mcts_probs_union, winner_union],
                           batch_size=len(state_input),
                           verbose=0)
            return loss[0], entropy

        self.train_step = train_step

    def get_policy_param(self):
        net_params = self.model.get_weights()
        return net_params

    def save_model(self, model_file):
        """ save model params to file """
        # net_params = self.get_policy_param()
        # pickle.dump(net_params, open(model_file, 'wb'), protocol=2)
        # self.model.save_weights(model_file)
        self.model.save(model_file)

    @staticmethod
    def _shortcut(self, input, residual):
        stride_width = input._keras_shape[2] / residual._keras_shape[2]
        stride_height = input._keras_shape[3] / residual._keras_shape[3]
        equal_channels = residual._keras_shape[1] == input._keras_shape[1]

        shortcut = input
        if stride_width > 1 or stride_height > 1 or not equal_channels:
            shortcut = Conv2D(nb_filter=residual._keras_shape[1],
                              nb_row=1,
                              nb_col=1,
                              subsample=(stride_width, stride_height),
                              init="he_normal",
                              border_mode="valid")(input)

        return merge([shortcut, residual], mode="sum")

    @staticmethod
    def _residual_block(self,
                        block_function,
                        nb_filters,
                        repetations,
                        is_first_layer=False):
        def f(input):
            for i in range(repetations):
                init_subsample = (1, 1)
                if i == 0 and not is_first_layer:
                    init_subsample = (2, 2)
                input = block_function(nb_filters=nb_filters,
                                       init_subsample=init_subsample)(input)
            return input

        return f

    def resnet(self):
        from keras.layers.convolutional import MaxPooling2D, AveragePooling2D

        input = Input(shape=(3, 224, 224))

        conv1 = self._conv_bn_relu(nb_filter=64,
                                   nb_row=7,
                                   nb_col=7,
                                   subsample=(2, 2))(input)
        pool1 = MaxPooling2D(pool_size=(3, 3),
                             strides=(2, 2),
                             border_mode="same")(conv1)

        # Build residual blocks..
        block_fn = self._basic_block
        block1 = self._residual_block(block_fn,
                                      nb_filters=64,
                                      repetations=3,
                                      is_first_layer=True)(pool1)
        block2 = self._residual_block(block_fn, nb_filters=128,
                                      repetations=4)(block1)
        block3 = self._residual_block(block_fn, nb_filters=256,
                                      repetations=6)(block2)
        block4 = self._residual_block(block_fn, nb_filters=512,
                                      repetations=3)(block3)

        # Classifier block
        pool2 = AveragePooling2D(pool_size=(7, 7),
                                 strides=(1, 1),
                                 border_mode="same")(block4)
        flatten1 = Flatten()(pool2)
        dense = Dense(output_dim=1000, init="he_normal",
                      activation="softmax")(flatten1)

        model = Model(input=input, output=dense)
        return model
Esempio n. 16
0
cnn = Convolution1D(filters=50, kernel_size=3, activation='tanh')(dropouted)
cnn = Convolution1D(filters=50, kernel_size=3, activation='tanh')(cnn)
flattened = Flatten()(cnn)
dense = Dense(100, activation='tanh')(flattened)

predict = Dense(2, activation='softmax')(dense)
model = Model(input=[word, distance_e1, distance_e2], output=predict)

# opt = RMSprop(lr=0.001, rho=0.9, epsilon=1e-06)
#    opt = Adagrad(lr=0.01, epsilon=1e-06)
#    opt = Adadelta(lr=1.0, rho=0.95, epsilon=1e-06)
#    opt = Adam(lr=0.001, beta_1=0.9, beta_2=0.999, epsilon=1e-08)
opt = SGD(lr=0.01, decay=1e-6, momentum=0.9, nesterov=True)

model.compile(loss='categorical_crossentropy', optimizer=opt)
train_instances = [line.strip() for line in lines]
label_array_t, word_array_t, dis_e1_array_t, dis_e2_array_t = rep.represent_instances(
    train_instances)

model.fit([word_array_t, dis_e1_array_t, dis_e2_array_t],
          label_array_t,
          batch_size=128,
          epochs=epoch_size)
model.save(output_file)
label_array_ans = model.predict([word_array_t, dis_e1_array_t, dis_e2_array_t],
                                batch_size=128)
print(label_array_ans)
print("训练完成!!")
eval_mulclass(label_array_t, label_array_ans)
def build_CNN_model(inputType, do_training=False, model_inputs=None, loss_func='binary_crossentropy',
                    optimize_proc='adam', is_IntermediateModel=False, load_weight_path=None, **kwargs):
    """

    :param inputType:
    :param do_training:
    :param model_inputs:
    :param loss_func:
    :param optimize_proc:
    :param is_IntermediateModel:
    :param load_weight_path:
    :param kwargs:
    :return:
    """

    # assert not do_training and model_inputs, "if do_training then must pass in model_inputs dictionary"

    EMBEDDING_TYPE = 'embeddingMatrix'
    ONEHOT_TYPE = '1hotVector'

    defined_input_types = {EMBEDDING_TYPE, ONEHOT_TYPE}

    assert inputType in defined_input_types, "unknown input type {0}".format(inputType)

    if inputType is ONEHOT_TYPE:

        review_input = Input(shape=(modelParameters.MaxLen_w,), dtype='float32',
                             name="ONEHOT_INPUT")

        layer = Embedding(modelParameters.VocabSize_w + modelParameters.INDEX_FROM, embedding_dims,
                          embeddings_initializer=embedding_init, embeddings_regularizer=embedding_reg,
                          input_length=modelParameters.MaxLen_w, name='1hot_embeddingLayer')(review_input)

        layer = SpatialDropout1D(0.50)(layer)

    elif inputType is EMBEDDING_TYPE:
        review_input = Input(shape=(modelParameters.MaxLen_w, embedding_dims), dtype="float32", name="EMBEDDING_INPUT")
        layer = review_input

    else:
        raise ValueError("Bad inputType arg to build_CNN_model")

    layer = Convolution1D(filters=num_filters1,
                          kernel_size=filter_length1,
                          padding=region,
                          strides=1,
                          activation=conv_activation1,
                          kernel_initializer='glorot_uniform',
                          bias_initializer='zeros',
                          kernel_regularizer=conv_reg1,
                          dilation_rate=1,
                          name='ConvLayer1')(layer)

    layer = SpatialDropout1D(0.50)(layer)

    layer = MaxPooling1D(pool_size=pool_len1)(layer)

    # layer = Convolution1D(filters=num_filters2,
    #                       kernel_size=filter_length2,
    #                       padding=region,
    #                       strides=1,
    #                       activation=conv_activation2,
    #                       kernel_initializer=conv_init2,
    #                       kernel_regularizer=conv_reg2,
    #                       dilation_rate=1,
    #                       name='ConvLayer2')(layer)
    #
    # layer = SpatialDropout1D(0.50)(layer)
    #
    # layer = MaxPooling1D(pool_size=pool_len2)(layer)

    # layer = Convolution1D(filters=num_filters3,
    #                       kernel_size=filter_length3,
    #                       padding=region,
    #                       activation=conv_activation3,
    #                       kernel_initializer=conv_init3,
    #                       kernel_regularizer=conv_reg3,
    #                       dilation_rate=1,
    #                       name='ConvLayer3')(layer)
    #
    # layer = SpatialDropout1D(0.50)(layer)
    #
    # layer = MaxPooling1D(pool_size=pool_len3)(layer)



    # #layer = GlobalMaxPool1D()(layer)
    #
    # layer = Convolution1D(filters=num_filters4,
    #                       kernel_size=filter_length4,
    #                       padding=region,
    #                       activation=conv_activation4,
    #                       kernel_initializer=conv_init4,
    #                       kernel_regularizer=conv_reg4,
    #                       dilation_rate=1,
    #                       name='ConvLayer4')(layer)
    #
    # #layer = leaky_relu(layer)
    #
    # layer = SpatialDropout1D(0.50)(layer)
    #
    # layer = MaxPooling1D(pool_size=pool_len4)(layer)
    # #layer = GlobalMaxPool1D()(layer)
    #
    # # layer = BatchNormalization()(layer)

    layer = Flatten()(layer)

    layer = Dense(dense_dims0, activation=dense_activation0, kernel_regularizer=dense_reg0,
                  kernel_initializer='glorot_normal', bias_initializer='zeros',
                  name='dense0')(layer)

    layer = Dropout(0.50)(layer)

    layer = Dense(dense_dims1, activation=dense_activation1, kernel_regularizer=dense_reg1,
                  kernel_initializer='glorot_normal', bias_initializer='zeros',
                  name='dense1')(layer)

    layer = Dropout(0.50)(layer)

    # layer = Dense(dense_dims2, activation=dense_activation2, kernel_regularizer=dense_reg2,
    #               kernel_initializer=dense_init2,
    #               name='dense2')(layer)
    #
    #
    # layer = Dropout(0.50)(layer)
    #
    # layer = Dense(dense_dims3, activation=dense_activation3, kernel_regularizer=dense_reg3,
    #               kernel_initializer=dense_init3,
    #               name='dense3_outA')(layer)
    # #layer = leaky_relu(layer)
    #
    if is_IntermediateModel:
        return Model(inputs=[review_input], outputs=[layer], name="CNN_model")

    #
    # layer = Dropout(0.5)(layer)

    layer = Dense(dense_dims_final, activation=dense_activation_final, kernel_initializer=dense_init_final,
                  kernel_regularizer=dense_reg0,
                  name='output_Full')(layer)

    CNN_model = Model(inputs=[review_input], outputs=[layer], name="CNN_model")

    CNN_model.compile(optimizer=Adam(lr=0.001, decay=0.0), loss=loss_func, metrics=[binary_accuracy])

    if load_weight_path is not None:
        CNN_model.load_weights(load_weight_path)

    hist = ""
    if do_training:
        weightPath = os.path.join(modelParameters.WEIGHT_PATH, filename)
        configPath = os.path.join(modelParameters.WEIGHT_PATH, filename_config)

        with open(configPath + ".json", 'wb') as f:
            f.write(CNN_model.to_json())

        checkpoint = ModelCheckpoint(weightPath + '_W.{epoch:02d}-{val_loss:.4f}.hdf5',
                                     verbose=1, save_best_only=True, save_weights_only=False, monitor='val_loss')

        earlyStop = EarlyStopping(patience=3, verbose=1, monitor='val_loss')

        LRadjuster = ReduceLROnPlateau(monitor='val_loss', factor=0.30, patience=0, verbose=1, cooldown=1,
                                       min_lr=0.00001, epsilon=1e-2)

        call_backs = [checkpoint, earlyStop, LRadjuster]

        CNN_model.summary()

        hist = CNN_model.fit(*model_inputs['training'],
                             batch_size=batch_size,
                             epochs=nb_epoch, verbose=1,
                             validation_data=model_inputs['dev'],
                             callbacks=call_backs)

    return {"model": CNN_model, "hist": hist}
Esempio n. 18
0
def test_pandas_dataframe():
    input_a = Input(shape=(3, ), name='input_a')
    input_b = Input(shape=(3, ), name='input_b')

    x = Dense(4, name='dense_1')(input_a)
    y = Dense(3, name='desne_2')(input_b)

    model_1 = Model(inputs=input_a, outputs=x)
    model_2 = Model(inputs=[input_a, input_b], outputs=[x, y])

    optimizer = 'rmsprop'
    loss = 'mse'

    model_1.compile(optimizer=optimizer, loss=loss)
    model_2.compile(optimizer=optimizer, loss=loss)

    input_a_df = pd.DataFrame(np.random.random((10, 3)))
    input_b_df = pd.DataFrame(np.random.random((10, 3)))

    output_a_df = pd.DataFrame(np.random.random((10, 4)))
    output_b_df = pd.DataFrame(np.random.random((10, 3)))

    model_1.fit(input_a_df, output_a_df)
    model_2.fit([input_a_df, input_b_df], [output_a_df, output_b_df])
    model_1.fit([input_a_df], [output_a_df])
    model_1.fit({'input_a': input_a_df}, output_a_df)
    model_2.fit({
        'input_a': input_a_df,
        'input_b': input_b_df
    }, [output_a_df, output_b_df])

    model_1.predict(input_a_df)
    model_2.predict([input_a_df, input_b_df])
    model_1.predict([input_a_df])
    model_1.predict({'input_a': input_a_df})
    model_2.predict({'input_a': input_a_df, 'input_b': input_b_df})

    model_1.predict_on_batch(input_a_df)
    model_2.predict_on_batch([input_a_df, input_b_df])
    model_1.predict_on_batch([input_a_df])
    model_1.predict_on_batch({'input_a': input_a_df})
    model_2.predict_on_batch({'input_a': input_a_df, 'input_b': input_b_df})

    model_1.evaluate(input_a_df, output_a_df)
    model_2.evaluate([input_a_df, input_b_df], [output_a_df, output_b_df])
    model_1.evaluate([input_a_df], [output_a_df])
    model_1.evaluate({'input_a': input_a_df}, output_a_df)
    model_2.evaluate({
        'input_a': input_a_df,
        'input_b': input_b_df
    }, [output_a_df, output_b_df])

    model_1.train_on_batch(input_a_df, output_a_df)
    model_2.train_on_batch([input_a_df, input_b_df],
                           [output_a_df, output_b_df])
    model_1.train_on_batch([input_a_df], [output_a_df])
    model_1.train_on_batch({'input_a': input_a_df}, output_a_df)
    model_2.train_on_batch({
        'input_a': input_a_df,
        'input_b': input_b_df
    }, [output_a_df, output_b_df])

    model_1.test_on_batch(input_a_df, output_a_df)
    model_2.test_on_batch([input_a_df, input_b_df], [output_a_df, output_b_df])
    model_1.test_on_batch([input_a_df], [output_a_df])
    model_1.test_on_batch({'input_a': input_a_df}, output_a_df)
    model_2.test_on_batch({
        'input_a': input_a_df,
        'input_b': input_b_df
    }, [output_a_df, output_b_df])
Esempio n. 19
0
class FinancialNewsAnalysisModel(object):
    model = None

    def __init__(self, nb_time_step, dim_data, batch_size=1, model_path=None):
        self.model_path = model_path
        self.model_path = model_path
        self.batch_size = batch_size
        self.size_of_input_data_dim = dim_data
        self.size_of_input_timesteps = nb_time_step
        self.build()
        self.weight_loaded = False
        if model_path is not None:
            self.load_weights()

    def build(self):
        dim_data = self.size_of_input_data_dim
        nb_time_step = self.size_of_input_timesteps
        news_input = Input(shape=(nb_time_step, dim_data))
        lstm = LSTM(output_dim=nb_hidden_units, dropout_U=dropout, dropout_W=dropout,
                    W_regularizer=l2(l2_norm_alpha), b_regularizer=l2(l2_norm_alpha), activation='tanh')
        bi_lstm = Bidirectional(lstm, input_shape=(nb_time_step, dim_data), merge_mode='concat')
        all_news_rep = bi_lstm(news_input)
        news_predictions = Dense(1, activation='linear')(all_news_rep)
        self.model = Model(news_input, news_predictions, name="deep rnn for financial news analysis")

    def reset(self):
        for l in self.model.layers:
            if type(l) is LSTM:
                l.reset_status()

    def compile_model(self, lr=0.0001, loss_weights=0.1):
        optimizer = Adam(lr=lr)
        loss = 'mse'
        # loss = custom_objective
        self.model.compile(optimizer=optimizer, loss=loss)
                           #metrics=['mse'])
        plot(self.model, to_file='model.png')

    def fit_model(self, X, y, X_val=None, y_val=None, epoch=500):
        early_stopping = EarlyStopping(monitor='val_loss', patience=100, verbose=0)
        if X_val is None:
            self.model.fit(X, y, batch_size=self.batch_size, nb_epoch=epoch, validation_split=0.2,
                           shuffle=True, callbacks=[early_stopping])
        else:
            self.model.fit(X, y, batch_size=self.batch_size, nb_epoch=epoch, validation_data=(X_val, y_val),
                           shuffle=True, callbacks=[early_stopping])

    def save(self):
        self.model.save_weights(self.model_path, overwrite=True)

    def load_weights(self):
        if os.path.exists(self.model_path):
            self.model.load_weights(self.model_path)
            self.weight_loaded = True

    def print_weights(self, weights=None, detail=False):
        weights = weights or self.model.get_weights()
        for w in weights:
            print("w%s: sum(w)=%s, ave(w)=%s" % (w.shape, np.sum(w), np.average(w)))
        if detail:
            for w in weights:
                print("%s: %s" % (w.shape, w))

    def model_eval(self, X, y):
        y_hat = self.model.predict(X, batch_size=1)
        count_true = 0
        count_all = y.shape[0]
        for i in range(y.shape[0]):
            count_true = count_true + 1 if y[i,0]*y_hat[i,0]>0 else count_true
            print y[i,0],y_hat[i,0]
        print count_all,count_true
Esempio n. 20
0
def test_model_methods():
    a = Input(shape=(3,), name='input_a')
    b = Input(shape=(3,), name='input_b')

    a_2 = Dense(4, name='dense_1')(a)
    dp = Dropout(0.5, name='dropout')
    b_2 = dp(b)

    model = Model([a, b], [a_2, b_2])

    optimizer = 'rmsprop'
    loss = 'mse'
    loss_weights = [1., 0.5]

    input_a_np = np.random.random((10, 3))
    input_b_np = np.random.random((10, 3))

    output_a_np = np.random.random((10, 4))
    output_b_np = np.random.random((10, 3))

    # training/testing doesn't work before compiling.
    with pytest.raises(RuntimeError):
        model.train_on_batch([input_a_np, input_b_np], [output_a_np, output_b_np])

    model.compile(optimizer, loss, metrics=[], loss_weights=loss_weights,
                  sample_weight_mode=None)

    # test train_on_batch
    out = model.train_on_batch([input_a_np, input_b_np],
                               [output_a_np, output_b_np])
    out = model.train_on_batch({'input_a': input_a_np, 'input_b': input_b_np},
                               [output_a_np, output_b_np])
    out = model.train_on_batch({'input_a': input_a_np, 'input_b': input_b_np},
                               {'dense_1': output_a_np, 'dropout': output_b_np})

    # test fit
    out = model.fit([input_a_np, input_b_np],
                    [output_a_np, output_b_np], epochs=1, batch_size=4)
    out = model.fit({'input_a': input_a_np, 'input_b': input_b_np},
                    [output_a_np, output_b_np], epochs=1, batch_size=4)
    out = model.fit({'input_a': input_a_np, 'input_b': input_b_np},
                    {'dense_1': output_a_np, 'dropout': output_b_np},
                    epochs=1, batch_size=4)

    # test validation_split
    out = model.fit([input_a_np, input_b_np],
                    [output_a_np, output_b_np],
                    epochs=1, batch_size=4, validation_split=0.5)
    out = model.fit({'input_a': input_a_np, 'input_b': input_b_np},
                    [output_a_np, output_b_np],
                    epochs=1, batch_size=4, validation_split=0.5)

    # test validation data
    out = model.fit([input_a_np, input_b_np],
                    [output_a_np, output_b_np],
                    epochs=1, batch_size=4,
                    validation_data=([input_a_np, input_b_np], [output_a_np, output_b_np]))
    out = model.fit({'input_a': input_a_np, 'input_b': input_b_np},
                    [output_a_np, output_b_np],
                    epochs=1, batch_size=4, validation_split=0.5,
                    validation_data=({'input_a': input_a_np, 'input_b': input_b_np}, [output_a_np, output_b_np]))
    out = model.fit({'input_a': input_a_np, 'input_b': input_b_np},
                    {'dense_1': output_a_np, 'dropout': output_b_np},
                    epochs=1, batch_size=4, validation_split=0.5,
                    validation_data=(
                        {'input_a': input_a_np, 'input_b': input_b_np},
                        {'dense_1': output_a_np, 'dropout': output_b_np}))

    # test_on_batch
    out = model.test_on_batch([input_a_np, input_b_np],
                              [output_a_np, output_b_np])
    out = model.test_on_batch({'input_a': input_a_np, 'input_b': input_b_np},
                              [output_a_np, output_b_np])
    out = model.test_on_batch({'input_a': input_a_np, 'input_b': input_b_np},
                              {'dense_1': output_a_np, 'dropout': output_b_np})

    # predict_on_batch
    out = model.predict_on_batch([input_a_np, input_b_np])
    out = model.predict_on_batch({'input_a': input_a_np, 'input_b': input_b_np})

    # predict, evaluate
    input_a_np = np.random.random((10, 3))
    input_b_np = np.random.random((10, 3))

    output_a_np = np.random.random((10, 4))
    output_b_np = np.random.random((10, 3))

    out = model.evaluate([input_a_np, input_b_np], [output_a_np, output_b_np], batch_size=4)
    out = model.predict([input_a_np, input_b_np], batch_size=4)

    # with sample_weight
    input_a_np = np.random.random((10, 3))
    input_b_np = np.random.random((10, 3))

    output_a_np = np.random.random((10, 4))
    output_b_np = np.random.random((10, 3))

    sample_weight = [None, np.random.random((10,))]
    out = model.train_on_batch([input_a_np, input_b_np],
                               [output_a_np, output_b_np],
                               sample_weight=sample_weight)

    out = model.test_on_batch([input_a_np, input_b_np],
                              [output_a_np, output_b_np],
                              sample_weight=sample_weight)

    # test accuracy metric
    model.compile(optimizer, loss, metrics=['acc'],
                  sample_weight_mode=None)

    out = model.train_on_batch([input_a_np, input_b_np],
                               [output_a_np, output_b_np])
    assert len(out) == 5
    out = model.test_on_batch([input_a_np, input_b_np],
                              [output_a_np, output_b_np])
    assert len(out) == 5

    # this should also work
    model.compile(optimizer, loss, metrics={'dense_1': 'acc'},
                  sample_weight_mode=None)

    out = model.train_on_batch([input_a_np, input_b_np],
                               [output_a_np, output_b_np])
    assert len(out) == 4
    out = model.test_on_batch([input_a_np, input_b_np],
                              [output_a_np, output_b_np])
    assert len(out) == 4

    # and this as well
    model.compile(optimizer, loss, metrics={'dense_1': ['acc']},
                  sample_weight_mode=None)

    out = model.train_on_batch([input_a_np, input_b_np],
                               [output_a_np, output_b_np])
    assert len(out) == 4
    out = model.test_on_batch([input_a_np, input_b_np],
                              [output_a_np, output_b_np])
    assert len(out) == 4

    # test starting from non-zero initial epoch
    trained_epochs = []
    trained_batches = []

    # define tracer callback
    def on_epoch_begin(epoch, logs):
        trained_epochs.append(epoch)

    def on_batch_begin(batch, logs):
        trained_batches.append(batch)

    tracker_cb = LambdaCallback(on_epoch_begin=on_epoch_begin,
                                on_batch_begin=on_batch_begin)

    out = model.fit([input_a_np, input_b_np],
                    [output_a_np, output_b_np], epochs=5, batch_size=4,
                    initial_epoch=2, callbacks=[tracker_cb])
    assert trained_epochs == [2, 3, 4]

    # test starting from non-zero initial epoch for generator too
    trained_epochs = []

    def gen_data(batch_sz):
        while True:
            yield ([np.random.random((batch_sz, 3)), np.random.random((batch_sz, 3))],
                   [np.random.random((batch_sz, 4)), np.random.random((batch_sz, 3))])

    out = model.fit_generator(gen_data(4), steps_per_epoch=3, epochs=5,
                              initial_epoch=2, callbacks=[tracker_cb])
    assert trained_epochs == [2, 3, 4]

    # test with a custom metric function
    def mse(y_true, y_pred):
        return K.mean(K.pow(y_true - y_pred, 2))

    model.compile(optimizer, loss, metrics=[mse],
                  sample_weight_mode=None)

    out = model.train_on_batch([input_a_np, input_b_np],
                               [output_a_np, output_b_np])
    out_len = 1 + 2 * (1 + 1)  # total loss + 2 outputs * (loss + metric)
    assert len(out) == out_len
    out = model.test_on_batch([input_a_np, input_b_np],
                              [output_a_np, output_b_np])
    assert len(out) == out_len

    input_a_np = np.random.random((10, 3))
    input_b_np = np.random.random((10, 3))

    output_a_np = np.random.random((10, 4))
    output_b_np = np.random.random((10, 3))

    out = model.fit([input_a_np, input_b_np], [output_a_np, output_b_np], batch_size=4, epochs=1)
    out = model.evaluate([input_a_np, input_b_np], [output_a_np, output_b_np], batch_size=4)
    out = model.predict([input_a_np, input_b_np], batch_size=4)

    # enable verbose for evaluate_generator
    out = model.evaluate_generator(gen_data(4), steps=3, verbose=1)

    # empty batch
    with pytest.raises(ValueError):
        def gen_data():
            while True:
                yield (np.asarray([]), np.asarray([]))
        out = model.evaluate_generator(gen_data(), steps=1)

    # x is not a list of numpy arrays.
    with pytest.raises(ValueError):
        out = model.predict([None])

    # x does not match _feed_input_names.
    with pytest.raises(ValueError):
        out = model.predict([input_a_np, None, input_b_np])
    with pytest.raises(ValueError):
        out = model.predict([None, input_a_np, input_b_np])

    # all input/output/weight arrays should have the same number of samples.
    with pytest.raises(ValueError):
        out = model.train_on_batch([input_a_np, input_b_np[:2]],
                                   [output_a_np, output_b_np],
                                   sample_weight=sample_weight)
    with pytest.raises(ValueError):
        out = model.train_on_batch([input_a_np, input_b_np],
                                   [output_a_np, output_b_np[:2]],
                                   sample_weight=sample_weight)
    with pytest.raises(ValueError):
        out = model.train_on_batch([input_a_np, input_b_np],
                                   [output_a_np, output_b_np],
                                   sample_weight=[sample_weight[1], sample_weight[1][:2]])

    # `sample_weight` is neither a dict nor a list.
    with pytest.raises(TypeError):
        out = model.train_on_batch([input_a_np, input_b_np],
                                   [output_a_np, output_b_np],
                                   sample_weight=tuple(sample_weight))

    # `validation_data` is neither a tuple nor a triple.
    with pytest.raises(ValueError):
        out = model.fit([input_a_np, input_b_np],
                        [output_a_np, output_b_np],
                        epochs=1, batch_size=4,
                        validation_data=([input_a_np, input_b_np],))

    # `loss` does not match outputs.
    with pytest.raises(ValueError):
        model.compile(optimizer, loss=['mse', 'mae', 'mape'])

    # `loss_weights` does not match output_names.
    with pytest.raises(ValueError):
        model.compile(optimizer, loss='mse', loss_weights={'lstm': 0.5})

    # `loss_weights` does not match outputs.
    with pytest.raises(ValueError):
        model.compile(optimizer, loss='mse', loss_weights=[0.5])

    # `loss_weights` is invalid type.
    with pytest.raises(TypeError):
        model.compile(optimizer, loss='mse', loss_weights=(0.5, 0.5))

    # `sample_weight_mode` does not match output_names.
    with pytest.raises(ValueError):
        model.compile(optimizer, loss='mse', sample_weight_mode={'lstm': 'temporal'})

    # `sample_weight_mode` does not match output_names.
    with pytest.raises(ValueError):
        model.compile(optimizer, loss='mse', sample_weight_mode=['temporal'])

    # `sample_weight_mode` matches output_names partially.
    with pytest.raises(ValueError):
        model.compile(optimizer, loss='mse', sample_weight_mode={'dense_1': 'temporal'})

    # `loss` does not exist.
    with pytest.raises(ValueError):
        model.compile(optimizer, loss=[])

    model.compile(optimizer, loss=['mse', 'mae'])
    model.compile(optimizer, loss='mse', loss_weights={'dense_1': 0.2, 'dropout': 0.8})
    model.compile(optimizer, loss='mse', loss_weights=[0.2, 0.8])

    # the rank of weight arrays should be 1.
    with pytest.raises(ValueError):
        out = model.train_on_batch([input_a_np, input_b_np],
                                   [output_a_np, output_b_np],
                                   sample_weight=[None, np.random.random((10, 20, 30))])

    model.compile(optimizer, loss='mse', sample_weight_mode={'dense_1': None, 'dropout': 'temporal'})
    model.compile(optimizer, loss='mse', sample_weight_mode=[None, 'temporal'])

    # the rank of output arrays should be at least 3D.
    with pytest.raises(ValueError):
        out = model.train_on_batch([input_a_np, input_b_np],
                                   [output_a_np, output_b_np],
                                   sample_weight=sample_weight)

    model.compile(optimizer, loss, metrics=[], loss_weights=loss_weights,
                  sample_weight_mode=None)
    trained_epochs = []
    trained_batches = []
    out = model.fit_generator(generator=RandomSequence(3), steps_per_epoch=3, epochs=5,
                              initial_epoch=0, validation_data=RandomSequence(4),
                              validation_steps=3, callbacks=[tracker_cb])
    assert trained_epochs == [0, 1, 2, 3, 4]
    assert trained_batches == list(range(3)) * 5

    # steps_per_epoch will be equal to len of sequence if it's unspecified
    trained_epochs = []
    trained_batches = []
    out = model.fit_generator(generator=RandomSequence(3), epochs=5,
                              initial_epoch=0, validation_data=RandomSequence(4),
                              callbacks=[tracker_cb])
    assert trained_epochs == [0, 1, 2, 3, 4]
    assert trained_batches == list(range(12)) * 5

    # fit_generator will throw an exception if steps is unspecified for regular generator
    with pytest.raises(ValueError):
        def gen_data():
            while True:
                yield (np.asarray([]), np.asarray([]))
        out = model.fit_generator(generator=gen_data(), epochs=5,
                                  initial_epoch=0, validation_data=gen_data(),
                                  callbacks=[tracker_cb])

    # Check if generator is only accessed an expected number of times
    gen_counters = [0, 0]

    def gen_data(i):
        while True:
            gen_counters[i] += 1
            yield ([np.random.random((1, 3)), np.random.random((1, 3))],
                   [np.random.random((1, 4)), np.random.random((1, 3))])
    out = model.fit_generator(generator=gen_data(0), epochs=3,
                              steps_per_epoch=2,
                              validation_data=gen_data(1),
                              validation_steps=1,
                              max_queue_size=2,
                              workers=2)

    # Need range check here as filling of the queue depends on sleep in the enqueuers
    assert 6 <= gen_counters[0] <= 8
    # 12 = (epoch * workers * validation steps * max_queue_size)
    assert 3 <= gen_counters[1] <= 12

    gen_counters = [0]
    out = model.fit_generator(generator=RandomSequence(3), epochs=3,
                              validation_data=gen_data(0),
                              validation_steps=1,
                              max_queue_size=2,
                              workers=2)

    # 12 = (epoch * workers * validation steps * max_queue_size)
    # Need range check here as filling of the queue depends on sleep in the enqueuers
    assert 3 <= gen_counters[0] <= 12

    # predict_generator output shape behavior should be consistent
    def expected_shape(batch_size, n_batches):
        return (batch_size * n_batches, 4), (batch_size * n_batches, 3)

    # Multiple outputs and one step.
    batch_size = 5
    sequence_length = 1
    shape_0, shape_1 = expected_shape(batch_size, sequence_length)
    out = model.predict_generator(RandomSequence(batch_size,
                                                 sequence_length=sequence_length))
    assert np.shape(out[0]) == shape_0 and np.shape(out[1]) == shape_1

    # Multiple outputs and multiple steps.
    batch_size = 5
    sequence_length = 2
    shape_0, shape_1 = expected_shape(batch_size, sequence_length)
    out = model.predict_generator(RandomSequence(batch_size,
                                                 sequence_length=sequence_length))
    assert np.shape(out[0]) == shape_0 and np.shape(out[1]) == shape_1

    # Create a model with a single output.
    single_output_model = Model([a, b], a_2)
    single_output_model.compile(optimizer, loss, metrics=[], sample_weight_mode=None)

    # Single output and one step.
    batch_size = 5
    sequence_length = 1
    shape_0, _ = expected_shape(batch_size, sequence_length)
    out = single_output_model.predict_generator(RandomSequence(batch_size,
                                                sequence_length=sequence_length))
    assert np.shape(out) == shape_0

    # Single output and multiple steps.
    batch_size = 5
    sequence_length = 2
    shape_0, _ = expected_shape(batch_size, sequence_length)
    out = single_output_model.predict_generator(RandomSequence(batch_size,
                                                sequence_length=sequence_length))
    assert np.shape(out) == shape_0
class PolicyValueNet():
    """策略价值网络"""

    def __init__(self, board_width, board_height, model_file=None):
        self.board_width = board_width
        self.board_height = board_height
        self.l2_const = 1e-4  # coef of l2 penalty 
        self.create_policy_value_net()
        self._loss_train_op()

        if model_file:
            if platform.python_version().split('.')[0] == '3': #python3
                net_params = pickle.load(open(model_file, 'rb'), encoding='iso-8859-1')
            else:
                net_params = pickle.load(open(model_file, 'rb'))
            self.model.set_weights(net_params)

    def create_policy_value_net(self):
        """创建policy-value网络"""
        # 输入层
        in_x = network = Input((4, self.board_width, self.board_height))

        # conv layers
        network = Conv2D(filters=32, kernel_size=(3, 3), padding="same", data_format="channels_first", activation="relu", kernel_regularizer=l2(self.l2_const))(network)
        network = Conv2D(filters=64, kernel_size=(3, 3), padding="same", data_format="channels_first", activation="relu", kernel_regularizer=l2(self.l2_const))(network)
        network = Conv2D(filters=128, kernel_size=(3, 3), padding="same", data_format="channels_first", activation="relu", kernel_regularizer=l2(self.l2_const))(network)
        # 走子策略 action policy layers
        policy_net = Conv2D(filters=4, kernel_size=(1, 1), data_format="channels_first", activation="relu", kernel_regularizer=l2(self.l2_const))(network)
        policy_net = Flatten()(policy_net)
        self.policy_net = Dense(self.board_width * self.board_height, activation="softmax", kernel_regularizer=l2(self.l2_const))(policy_net)
        # 盘面价值 state value layers
        value_net = Conv2D(filters=2, kernel_size=(1, 1), data_format="channels_first", activation="relu", kernel_regularizer=l2(self.l2_const))(network)
        value_net = Flatten()(value_net)
        value_net = Dense(64, kernel_regularizer=l2(self.l2_const))(value_net)
        self.value_net = Dense(1, activation="tanh", kernel_regularizer=l2(self.l2_const))(value_net)

        # 创建网络模型
        self.model = Model(in_x, [self.policy_net, self.value_net])

        # 返回走子策略和价值概率
        def policy_value(state_input):
            state_input_union = np.array(state_input)
            results = self.model.predict_on_batch(state_input_union)
            return results

        self.policy_value = policy_value

    def policy_value_fn(self, board):
        """使用模型预测棋盘所有可落子位置价值概率"""
        # 棋盘所有可落子位置
        legal_positions = board.availables
        # 当前玩家角度的棋盘方格状态
        current_state = board.current_state()
        # 使用模型预测走子策略和价值概率
        act_probs, value = self.policy_value(current_state.reshape(-1, 4, self.board_width, self.board_height))
        act_probs = zip(legal_positions, act_probs.flatten()[legal_positions])
        # 返回[(action, 概率)] 以及当前玩家的后续走子value
        return act_probs, value[0][0]

    def _loss_train_op(self):
        """初始化损失
        3个损失函数因子
        loss = (z - v)^2 + pi^T * log(p) + c||theta||^2
        loss = value损失函数 + policy损失函数 + 惩罚项
        """
        # 定义优化器和损失函数
        opt = Adam()
        losses = ['categorical_crossentropy', 'mean_squared_error']
        self.model.compile(optimizer=opt, loss=losses)

        def self_entropy(probs):
            return -np.mean(np.sum(probs * np.log(probs + 1e-10), axis=1))

        def train_step(state_input, mcts_probs, winner, learning_rate):
            """输出训练过程中的结果"""
            state_input_union = np.array(state_input)
            mcts_probs_union = np.array(mcts_probs)
            winner_union = np.array(winner)
            # 评估
            loss = self.model.evaluate(state_input_union, [mcts_probs_union, winner_union], batch_size=len(state_input), verbose=0)
            # 预测
            action_probs, _ = self.model.predict_on_batch(state_input_union)
            entropy = self_entropy(action_probs)
            K.set_value(self.model.optimizer.lr, learning_rate)
            self.model.fit(state_input_union, [mcts_probs_union, winner_union], batch_size=len(state_input), verbose=0)
            return loss[0], entropy

        self.train_step = train_step

    def get_policy_param(self):
        """获得模型参数"""
        net_params = self.model.get_weights()
        return net_params

    def save_model(self, model_file):
        """保存模型参数到文件"""
        net_params = self.get_policy_param()
        pickle.dump(net_params, open(model_file, 'wb'), protocol=2)
class FinancialTimeSeriesAnalysisModel(object):
    model = None

    def __init__(self, nb_time_step, dim_data, batch_size=1, model_path=None):
        self.model_path = model_path
        self.model_path = model_path
        self.batch_size = batch_size
        self.size_of_input_data_dim = dim_data
        self.size_of_input_timesteps = nb_time_step
        self.build()
        self.weight_loaded = False
        if model_path is not None:
            self.load_weights()

    def build(self):
        dim_data = self.size_of_input_data_dim
        nb_time_step = self.size_of_input_timesteps
        financial_time_series_input = Input(shape=(nb_time_step, dim_data))

        lstm_layer_1 = LSTM(output_dim=nb_hidden_units,
                            dropout_U=dropout,
                            dropout_W=dropout,
                            inner_activation='sigmoid',
                            W_regularizer=l2(l2_norm_alpha),
                            b_regularizer=l2(l2_norm_alpha),
                            activation='tanh',
                            return_sequences=True)
        lstm_layer_2 = LSTM(output_dim=nb_hidden_units,
                            dropout_U=dropout,
                            dropout_W=dropout,
                            inner_activation='sigmoid',
                            W_regularizer=l2(l2_norm_alpha),
                            b_regularizer=l2(l2_norm_alpha),
                            activation='tanh',
                            return_sequences=True)

        h1 = lstm_layer_1(financial_time_series_input)
        h2 = lstm_layer_2(h1)
        time_series_predictions = TimeDistributedDense(1)(h2)
        self.model = Model(
            financial_time_series_input,
            time_series_predictions,
            name="deep rnn for financial time series forecasting")

    def reset(self):
        for l in self.model.layers:
            if type(l) is LSTM:
                l.reset_status()

    def compile_model(self, lr=0.0001, arg_weight=1.):
        optimizer = Adam(lr=lr)
        loss = 'mse'
        self.model.compile(optimizer=optimizer, loss=loss)

    def fit_model(self, X, y, X_val=None, y_val=None, epoch=3):
        early_stopping = EarlyStopping(monitor='val_loss',
                                       patience=3,
                                       verbose=0)
        if X_val is None:
            self.model.fit(X,
                           y,
                           batch_size=self.batch_size,
                           nb_epoch=epoch,
                           validation_split=0.2,
                           shuffle=True,
                           callbacks=[early_stopping])
        else:
            self.model.fit(X,
                           y,
                           batch_size=self.batch_size,
                           nb_epoch=epoch,
                           validation_data=(X_val, y_val),
                           shuffle=True,
                           callbacks=[early_stopping])

    def save(self):
        self.model.save_weights(self.model_path, overwrite=True)

    def load_weights(self):
        if os.path.exists(self.model_path):
            self.model.load_weights(self.model_path)
            self.weight_loaded = True

    def print_weights(self, weights=None, detail=False):
        weights = weights or self.model.get_weights()
        for w in weights:
            print("w%s: sum(w)=%s, ave(w)=%s" %
                  (w.shape, np.sum(w), np.average(w)))
        if detail:
            for w in weights:
                print("%s: %s" % (w.shape, w))

    def model_eval(self, X, y):
        y_hat = self.model.predict(X, batch_size=1)
        count_true = 0
        count_all = y.shape[1]
        for i in range(y.shape[1]):
            count_true = count_true + 1 if y[0, i, 0] * y_hat[
                0, i, 0] > 0 else count_true
            print(y[0, i, 0], y_hat[0, i, 0])
        print(count_all, count_true)
class FinancialNewsAnalysisModel(object):
    model = None

    def __init__(self, nb_time_step, dim_data, batch_size=1, model_path=None):
        self.model_path = model_path
        self.model_path = model_path
        self.batch_size = batch_size
        self.size_of_input_data_dim = dim_data
        self.size_of_input_timesteps = nb_time_step
        self.build()
        self.weight_loaded = False
        if model_path is not None:
            self.load_weights()

    def build(self):
        dim_data = self.size_of_input_data_dim
        nb_time_step = self.size_of_input_timesteps
        news_input = Input(shape=(nb_time_step, dim_data), name='x1')
        lstm = LSTM(output_dim=nb_hidden_units, dropout_U=dropout, dropout_W=dropout,
                    W_regularizer=l2(l2_norm_alpha), b_regularizer=l2(l2_norm_alpha),
                    activation='tanh', name='h1')
        bi_lstm = Bidirectional(lstm, input_shape=(nb_time_step, dim_data), merge_mode='concat', name='h1')
        all_news_rep = bi_lstm(news_input)
        news_predictions = Dense(1, activation='linear')(all_news_rep)
        self.model = Model(news_input, news_predictions, name="deep rnn for financial news analysis")

    def reset(self):
        for l in self.model.layers:
            if type(l) is LSTM:
                l.reset_status()

    def compile_model(self, lr=0.0001, loss_weights=0.1):
        optimizer = Adam(lr=lr)
        loss = 'mse'
        # loss = custom_objective
        self.model.compile(optimizer=optimizer, loss=loss)
                           #metrics=['mse'])
        plot(self.model, to_file='model.png')

    def fit_model(self, X, y, X_val=None, y_val=None, epoch=500):
        early_stopping = EarlyStopping(monitor='val_loss', patience=3, verbose=0)
        if X_val is None:
            self.model.fit(X, y, batch_size=self.batch_size, nb_epoch=epoch, validation_split=0.2,
                           shuffle=True, callbacks=[early_stopping])
        else:
            self.model.fit(X, y, batch_size=self.batch_size, nb_epoch=epoch, validation_data=(X_val, y_val),
                           shuffle=True, callbacks=[early_stopping])

    def save(self):
        self.model.save_weights(self.model_path, overwrite=True)

    def load_weights(self):
        if os.path.exists(self.model_path):
            self.model.load_weights(self.model_path)
            self.weight_loaded = True

    def print_weights(self, weights=None, detail=False):
        weights = weights or self.model.get_weights()
        for w in weights:
            print("w%s: sum(w)=%s, ave(w)=%s" % (w.shape, np.sum(w), np.average(w)))
        if detail:
            for w in weights:
                print("%s: %s" % (w.shape, w))

    def model_eval(self, X, y):
        y_hat = self.model.predict(X, batch_size=1)
        count_true = 0
        count_all = y.shape[0]
        for i in range(y.shape[0]):
            count_true = count_true + 1 if y[i,0]*y_hat[i,0]>0 else count_true
            print y[i,0],y_hat[i,0]
        print count_all,count_true
Esempio n. 24
0
def test_model_methods():
    a = Input(shape=(3, ), name='input_a')
    b = Input(shape=(3, ), name='input_b')

    a_2 = Dense(4, name='dense_1')(a)
    dp = Dropout(0.5, name='dropout')
    b_2 = dp(b)

    model = Model([a, b], [a_2, b_2])

    optimizer = 'rmsprop'
    loss = 'mse'
    loss_weights = [1., 0.5]

    input_a_np = np.random.random((10, 3))
    input_b_np = np.random.random((10, 3))

    output_a_np = np.random.random((10, 4))
    output_b_np = np.random.random((10, 3))

    # training/testing doesn't work before compiling.
    with pytest.raises(RuntimeError):
        model.train_on_batch([input_a_np, input_b_np],
                             [output_a_np, output_b_np])

    model.compile(optimizer,
                  loss,
                  metrics=[],
                  loss_weights=loss_weights,
                  sample_weight_mode=None)

    # test train_on_batch
    out = model.train_on_batch([input_a_np, input_b_np],
                               [output_a_np, output_b_np])
    out = model.train_on_batch({
        'input_a': input_a_np,
        'input_b': input_b_np
    }, [output_a_np, output_b_np])
    out = model.train_on_batch({
        'input_a': input_a_np,
        'input_b': input_b_np
    }, {
        'dense_1': output_a_np,
        'dropout': output_b_np
    })

    # test fit
    out = model.fit([input_a_np, input_b_np], [output_a_np, output_b_np],
                    epochs=1,
                    batch_size=4)
    out = model.fit({
        'input_a': input_a_np,
        'input_b': input_b_np
    }, [output_a_np, output_b_np],
                    epochs=1,
                    batch_size=4)
    out = model.fit({
        'input_a': input_a_np,
        'input_b': input_b_np
    }, {
        'dense_1': output_a_np,
        'dropout': output_b_np
    },
                    epochs=1,
                    batch_size=4)

    # test validation_split
    out = model.fit([input_a_np, input_b_np], [output_a_np, output_b_np],
                    epochs=1,
                    batch_size=4,
                    validation_split=0.5)
    out = model.fit({
        'input_a': input_a_np,
        'input_b': input_b_np
    }, [output_a_np, output_b_np],
                    epochs=1,
                    batch_size=4,
                    validation_split=0.5)

    # test validation data
    out = model.fit([input_a_np, input_b_np], [output_a_np, output_b_np],
                    epochs=1,
                    batch_size=4,
                    validation_data=([input_a_np,
                                      input_b_np], [output_a_np, output_b_np]))
    out = model.fit({
        'input_a': input_a_np,
        'input_b': input_b_np
    }, [output_a_np, output_b_np],
                    epochs=1,
                    batch_size=4,
                    validation_split=0.5,
                    validation_data=({
                        'input_a': input_a_np,
                        'input_b': input_b_np
                    }, [output_a_np, output_b_np]))
    out = model.fit({
        'input_a': input_a_np,
        'input_b': input_b_np
    }, {
        'dense_1': output_a_np,
        'dropout': output_b_np
    },
                    epochs=1,
                    batch_size=4,
                    validation_split=0.5,
                    validation_data=({
                        'input_a': input_a_np,
                        'input_b': input_b_np
                    }, {
                        'dense_1': output_a_np,
                        'dropout': output_b_np
                    }))

    # test_on_batch
    out = model.test_on_batch([input_a_np, input_b_np],
                              [output_a_np, output_b_np])
    out = model.test_on_batch({
        'input_a': input_a_np,
        'input_b': input_b_np
    }, [output_a_np, output_b_np])
    out = model.test_on_batch({
        'input_a': input_a_np,
        'input_b': input_b_np
    }, {
        'dense_1': output_a_np,
        'dropout': output_b_np
    })

    # predict_on_batch
    out = model.predict_on_batch([input_a_np, input_b_np])
    out = model.predict_on_batch({
        'input_a': input_a_np,
        'input_b': input_b_np
    })

    # predict, evaluate
    input_a_np = np.random.random((10, 3))
    input_b_np = np.random.random((10, 3))

    output_a_np = np.random.random((10, 4))
    output_b_np = np.random.random((10, 3))

    out = model.evaluate([input_a_np, input_b_np], [output_a_np, output_b_np],
                         batch_size=4)
    out = model.predict([input_a_np, input_b_np], batch_size=4)

    # with sample_weight
    input_a_np = np.random.random((10, 3))
    input_b_np = np.random.random((10, 3))

    output_a_np = np.random.random((10, 4))
    output_b_np = np.random.random((10, 3))

    sample_weight = [None, np.random.random((10, ))]
    out = model.train_on_batch([input_a_np, input_b_np],
                               [output_a_np, output_b_np],
                               sample_weight=sample_weight)

    out = model.test_on_batch([input_a_np, input_b_np],
                              [output_a_np, output_b_np],
                              sample_weight=sample_weight)

    # test accuracy metric
    model.compile(optimizer, loss, metrics=['acc'], sample_weight_mode=None)

    out = model.train_on_batch([input_a_np, input_b_np],
                               [output_a_np, output_b_np])
    assert len(out) == 5
    out = model.test_on_batch([input_a_np, input_b_np],
                              [output_a_np, output_b_np])
    assert len(out) == 5

    # this should also work
    model.compile(optimizer,
                  loss,
                  metrics={'dense_1': 'acc'},
                  sample_weight_mode=None)

    out = model.train_on_batch([input_a_np, input_b_np],
                               [output_a_np, output_b_np])
    assert len(out) == 4
    out = model.test_on_batch([input_a_np, input_b_np],
                              [output_a_np, output_b_np])
    assert len(out) == 4

    # and this as well
    model.compile(optimizer,
                  loss,
                  metrics={'dense_1': ['acc']},
                  sample_weight_mode=None)

    out = model.train_on_batch([input_a_np, input_b_np],
                               [output_a_np, output_b_np])
    assert len(out) == 4
    out = model.test_on_batch([input_a_np, input_b_np],
                              [output_a_np, output_b_np])
    assert len(out) == 4

    # test starting from non-zero initial epoch
    trained_epochs = []
    trained_batches = []

    # define tracer callback
    def on_epoch_begin(epoch, logs):
        trained_epochs.append(epoch)

    def on_batch_begin(batch, logs):
        trained_batches.append(batch)

    tracker_cb = LambdaCallback(on_epoch_begin=on_epoch_begin,
                                on_batch_begin=on_batch_begin)

    out = model.fit([input_a_np, input_b_np], [output_a_np, output_b_np],
                    epochs=5,
                    batch_size=4,
                    initial_epoch=2,
                    callbacks=[tracker_cb])
    assert trained_epochs == [2, 3, 4]

    # test starting from non-zero initial epoch for generator too
    trained_epochs = []

    def gen_data(batch_sz):
        while True:
            yield ([
                np.random.random((batch_sz, 3)),
                np.random.random((batch_sz, 3))
            ], [
                np.random.random((batch_sz, 4)),
                np.random.random((batch_sz, 3))
            ])

    out = model.fit_generator(gen_data(4),
                              steps_per_epoch=3,
                              epochs=5,
                              initial_epoch=2,
                              callbacks=[tracker_cb])
    assert trained_epochs == [2, 3, 4]

    # test with a custom metric function
    def mse(y_true, y_pred):
        return K.mean(K.pow(y_true - y_pred, 2))

    model.compile(optimizer, loss, metrics=[mse], sample_weight_mode=None)

    out = model.train_on_batch([input_a_np, input_b_np],
                               [output_a_np, output_b_np])
    out_len = 1 + 2 * (1 + 1)  # total loss + 2 outputs * (loss + metric)
    assert len(out) == out_len
    out = model.test_on_batch([input_a_np, input_b_np],
                              [output_a_np, output_b_np])
    assert len(out) == out_len

    input_a_np = np.random.random((10, 3))
    input_b_np = np.random.random((10, 3))

    output_a_np = np.random.random((10, 4))
    output_b_np = np.random.random((10, 3))

    out = model.fit([input_a_np, input_b_np], [output_a_np, output_b_np],
                    batch_size=4,
                    epochs=1)
    out = model.evaluate([input_a_np, input_b_np], [output_a_np, output_b_np],
                         batch_size=4)
    out = model.predict([input_a_np, input_b_np], batch_size=4)

    # empty batch
    with pytest.raises(ValueError):

        def gen_data():
            while True:
                yield (np.asarray([]), np.asarray([]))

        out = model.evaluate_generator(gen_data(), steps=1)

    # x is not a list of numpy arrays.
    with pytest.raises(ValueError):
        out = model.predict([None])

    # x does not match _feed_input_names.
    with pytest.raises(ValueError):
        out = model.predict([input_a_np, None, input_b_np])
    with pytest.raises(ValueError):
        out = model.predict([None, input_a_np, input_b_np])

    # all input/output/weight arrays should have the same number of samples.
    with pytest.raises(ValueError):
        out = model.train_on_batch([input_a_np, input_b_np[:2]],
                                   [output_a_np, output_b_np],
                                   sample_weight=sample_weight)
    with pytest.raises(ValueError):
        out = model.train_on_batch([input_a_np, input_b_np],
                                   [output_a_np, output_b_np[:2]],
                                   sample_weight=sample_weight)
    with pytest.raises(ValueError):
        out = model.train_on_batch(
            [input_a_np, input_b_np], [output_a_np, output_b_np],
            sample_weight=[sample_weight[1], sample_weight[1][:2]])

    # `sample_weight` is neither a dict nor a list.
    with pytest.raises(TypeError):
        out = model.train_on_batch([input_a_np, input_b_np],
                                   [output_a_np, output_b_np],
                                   sample_weight=tuple(sample_weight))

    # `validation_data` is neither a tuple nor a triple.
    with pytest.raises(ValueError):
        out = model.fit([input_a_np, input_b_np], [output_a_np, output_b_np],
                        epochs=1,
                        batch_size=4,
                        validation_data=([input_a_np, input_b_np], ))

    # `loss` does not match outputs.
    with pytest.raises(ValueError):
        model.compile(optimizer, loss=['mse', 'mae', 'mape'])

    # `loss_weights` does not match output_names.
    with pytest.raises(ValueError):
        model.compile(optimizer, loss='mse', loss_weights={'lstm': 0.5})

    # `loss_weights` does not match outputs.
    with pytest.raises(ValueError):
        model.compile(optimizer, loss='mse', loss_weights=[0.5])

    # `loss_weights` is invalid type.
    with pytest.raises(TypeError):
        model.compile(optimizer, loss='mse', loss_weights=(0.5, 0.5))

    # `sample_weight_mode` does not match output_names.
    with pytest.raises(ValueError):
        model.compile(optimizer,
                      loss='mse',
                      sample_weight_mode={'lstm': 'temporal'})

    # `sample_weight_mode` does not match output_names.
    with pytest.raises(ValueError):
        model.compile(optimizer, loss='mse', sample_weight_mode=['temporal'])

    # `sample_weight_mode` matches output_names partially.
    with pytest.raises(ValueError):
        model.compile(optimizer,
                      loss='mse',
                      sample_weight_mode={'dense_1': 'temporal'})

    # `loss` does not exist.
    with pytest.raises(ValueError):
        model.compile(optimizer, loss=[])

    model.compile(optimizer, loss=['mse', 'mae'])
    model.compile(optimizer,
                  loss='mse',
                  loss_weights={
                      'dense_1': 0.2,
                      'dropout': 0.8
                  })
    model.compile(optimizer, loss='mse', loss_weights=[0.2, 0.8])

    # the rank of weight arrays should be 1.
    with pytest.raises(ValueError):
        out = model.train_on_batch(
            [input_a_np, input_b_np], [output_a_np, output_b_np],
            sample_weight=[None, np.random.random((10, 20, 30))])

    model.compile(optimizer,
                  loss='mse',
                  sample_weight_mode={
                      'dense_1': None,
                      'dropout': 'temporal'
                  })
    model.compile(optimizer, loss='mse', sample_weight_mode=[None, 'temporal'])

    # the rank of output arrays should be at least 3D.
    with pytest.raises(ValueError):
        out = model.train_on_batch([input_a_np, input_b_np],
                                   [output_a_np, output_b_np],
                                   sample_weight=sample_weight)

    model.compile(optimizer,
                  loss,
                  metrics=[],
                  loss_weights=loss_weights,
                  sample_weight_mode=None)
    trained_epochs = []
    trained_batches = []
    out = model.fit_generator(generator=RandomSequence(3),
                              steps_per_epoch=3,
                              epochs=5,
                              initial_epoch=0,
                              validation_data=RandomSequence(4),
                              validation_steps=3,
                              callbacks=[tracker_cb])
    assert trained_epochs == [0, 1, 2, 3, 4]
    assert trained_batches == list(range(3)) * 5

    # steps_per_epoch will be equal to len of sequence if it's unspecified
    trained_epochs = []
    trained_batches = []
    out = model.fit_generator(generator=RandomSequence(3),
                              epochs=5,
                              initial_epoch=0,
                              validation_data=RandomSequence(4),
                              callbacks=[tracker_cb])
    assert trained_epochs == [0, 1, 2, 3, 4]
    assert trained_batches == list(range(12)) * 5

    # fit_generator will throw an exception if steps is unspecified for regular generator
    with pytest.raises(ValueError):

        def gen_data():
            while True:
                yield (np.asarray([]), np.asarray([]))

        out = model.fit_generator(generator=gen_data(),
                                  epochs=5,
                                  initial_epoch=0,
                                  validation_data=gen_data(),
                                  callbacks=[tracker_cb])

    # predict_generator output shape behavior should be consistent
    def expected_shape(batch_size, n_batches):
        return (batch_size * n_batches, 4), (batch_size * n_batches, 3)

    # Multiple outputs and one step.
    batch_size = 5
    sequence_length = 1
    shape_0, shape_1 = expected_shape(batch_size, sequence_length)
    out = model.predict_generator(
        RandomSequence(batch_size, sequence_length=sequence_length))
    assert np.shape(out[0]) == shape_0 and np.shape(out[1]) == shape_1

    # Multiple outputs and multiple steps.
    batch_size = 5
    sequence_length = 2
    shape_0, shape_1 = expected_shape(batch_size, sequence_length)
    out = model.predict_generator(
        RandomSequence(batch_size, sequence_length=sequence_length))
    assert np.shape(out[0]) == shape_0 and np.shape(out[1]) == shape_1

    # Create a model with a single output.
    single_output_model = Model([a, b], a_2)
    single_output_model.compile(optimizer,
                                loss,
                                metrics=[],
                                sample_weight_mode=None)

    # Single output and one step.
    batch_size = 5
    sequence_length = 1
    shape_0, _ = expected_shape(batch_size, sequence_length)
    out = single_output_model.predict_generator(
        RandomSequence(batch_size, sequence_length=sequence_length))
    assert np.shape(out) == shape_0

    # Single output and multiple steps.
    batch_size = 5
    sequence_length = 2
    shape_0, _ = expected_shape(batch_size, sequence_length)
    out = single_output_model.predict_generator(
        RandomSequence(batch_size, sequence_length=sequence_length))
    assert np.shape(out) == shape_0
Esempio n. 25
0
        cos_inner = np.pi * (t % (self.T // self.M)
                             )  # t - 1 is used when t has 1-based indexing.
        cos_inner /= self.T // self.M
        cos_out = np.cos(cos_inner) + 1
        return float(self.alpha_zero / 2 * cos_out)


# In[ ]:

from keras.callbacks import EarlyStopping, ModelCheckpoint, ReduceLROnPlateau

check_point = ModelCheckpoint('model.hdf5', verbose=True, save_best_only=True)
early_stop = EarlyStopping(patience=5, verbose=True)
model.fit(X_train,
          y_train.astype(int),
          validation_data=(X_valid, y_valid.astype(int)),
          epochs=5,
          verbose=True,
          callbacks=[early_stop, check_point])

# In[ ]:

from sklearn.metrics import accuracy_score
# distribution of confidence that will be used as submission
model.load_weights('model.hdf5')
confidence_valid = model.predict(X_valid)[:, 0] * 2 - 1
print(accuracy_score(confidence_valid > 0, y_valid))
plt.hist(confidence_valid, bins='auto')
plt.title("predicted confidence")
plt.show()

# In[ ]:
Esempio n. 26
0
class Network:
    def __init__(self, conf):
        # All hyperparameters used in the model
        self._board_size = conf['board_size']  # the size of the playing board
        self._lr = conf['learning_rate']  # learning rate of SGD (2e-3)
        self._momentum = conf['momentum']  # nesterov momentum (1e-1)
        self._l2_coef = conf['l2']  # coefficient of L2 penalty (1e-4)
        self._mini_batch_size = conf['mini_batch_size']  # the size of batch when training the network
        self._fit_epochs = conf['fit_epochs']  # the number of iteration

        # Define Network
        self._build_network()

        # The location of the file which stores the parameters of the network
        self._net_para_file = conf['net_para_file']
        self._fit_history_file = conf['fit_history_file']

        # Whether we use previous model or not
        self._use_previous_model = conf['use_previous_model']
        if self._use_previous_model:
            if os.path.exists(self._net_para_file):
                self._model.load_weights(self._net_para_file)
            else:
                print('> error: [use_previous_model] = True, ' + self._net_para_file + ' not found')

    @log
    def _build_network(self):
        # Input_Layer
        init_x = Input((3, self._board_size, self._board_size))  # the input is a tensor with the shape 3*(15*15)
        x = init_x

        # First Convolutional Layer with 32 filters
        x = Conv2D(filters=32, kernel_size=(3, 3), strides=(1, 1), padding='same',
                   data_format='channels_first', kernel_regularizer=l2(self._l2_coef))(x)
        x = BatchNormalization()(x)
        x = Activation('relu')(x)

        # Two Residual Blocks
        x = self._residual_block(x)
        x = self._residual_block(x)
        x = self._residual_block(x)

        # Policy Head for generating prior probability vector for each action
        policy = Conv2D(filters=2, kernel_size=(1, 1), strides=(1, 1), padding='same',
                        data_format='channels_first', kernel_regularizer=l2(self._l2_coef))(x)
        policy = BatchNormalization()(policy)
        policy = Activation('relu')(policy)
        policy = Flatten()(policy)
        policy = Dense(self._board_size*self._board_size, kernel_regularizer=l2(self._l2_coef))(policy)
        self._policy = Activation('softmax')(policy)

        # Value Head for generating value of each action
        value = Conv2D(filters=1, kernel_size=(1, 1), strides=(1, 1), padding='same',
                       data_format="channels_first", kernel_regularizer=l2(self._l2_coef))(x)
        value = BatchNormalization()(value)
        value = Activation('relu')(value)
        value = Flatten()(value)
        value = Dense(32, kernel_regularizer=l2(self._l2_coef))(value)
        value = Activation('relu')(value)
        value = Dense(1, kernel_regularizer=l2(self._l2_coef))(value)
        self._value = Activation('tanh')(value)

        # Define Network
        self._model = Model(inputs=init_x, outputs=[self._policy, self._value])

        # Define the Loss Function
        opt = SGD(lr=self._lr, momentum=self._momentum, nesterov=True)  # stochastic gradient descend with momentum
        losses_type = ['categorical_crossentropy', 'mean_squared_error']  # cross-entrophy and MSE are weighted equally
        self._model.compile(optimizer=opt, loss=losses_type)

    def _residual_block(self, x):
        x_shortcut = x
        x = Conv2D(filters=32, kernel_size=(3, 3), strides=(1, 1), padding='same',
                   data_format="channels_first", kernel_regularizer=l2(self._l2_coef))(x)
        x = BatchNormalization()(x) 
        x = Activation('relu')(x)
        x = Conv2D(filters=32, kernel_size=(3, 3), strides=(1, 1), padding='same',
                   data_format="channels_first", kernel_regularizer=l2(self._l2_coef))(x)
        x = BatchNormalization()(x) 
        x = add([x, x_shortcut])  # Skip Connection
        x = Activation('relu')(x)
        return x
        
    def predict(self, board, color, last_move):
        if sum(sum(board)) == 0 and color == WHITE:
            print('error: network.predict')
        if sum(sum(board)) == 1 and color == BLACK:
            print('error: network.predict')
        tensor = board2tensor(board, color, last_move)
        policy, value_tensor = self._model.predict_on_batch(tensor)
        value = value_tensor[0][0]
        return policy, value

    def train(self, board_list, color_list, last_move_list, pi_list, z_list):
        size = len(color_list)
        for i in range(size):
            if sum(sum(board_list[i])) == 0 and color_list[i] == WHITE:
                print('error: network.train')
            if sum(sum(board_list[i])) == 1 and color_list[i] == BLACK:
                print('error: network.train')

        # Data Augmentation through symmetric and self-rotation transformation
        board_aug = []
        color_aug = []
        last_move_aug = []
        pi_aug = []
        z_aug = []
        for i in range(len(board_list)):
            new_board, new_color, new_last_move, new_pi, new_z = \
                data_augmentation(board_list[i], color_list[i], last_move_list[i], pi_list[i], z_list[i])
            board_aug.extend(new_board)
            color_aug.extend(new_color)
            last_move_aug.extend(new_last_move)
            pi_aug.extend(new_pi)
            z_aug.extend(new_z)
        board_list.extend(board_aug)
        color_list.extend(color_aug)
        last_move_list.extend(last_move_aug)
        pi_list.extend(pi_aug)
        z_list.extend(z_aug)

        # Regularize Data
        board_list = np.array([board2tensor(board_list[i], color_list[i], last_move_list[i], reshape_flag=False)
                               for i in range(len(board_list))])
        pi_list = np.array(pi_list)
        z_list = np.array(z_list)

        # Training
        hist = self._model.fit(board_list, [pi_list, z_list], epochs=self._fit_epochs, batch_size=self._mini_batch_size, verbose=1)
        hist_path = self._fit_history_file + '_' + str(self._fit_epochs) + '_' + str(self._mini_batch_size) + '.txt'
        with open(hist_path, 'a') as f:
            f.write(str(hist.history))
            return hist.history['loss'][0]  # only sample loss of first epoch
        
    def get_para(self):
        net_para = self._model.get_weights() 
        return net_para

    def save_model(self):
        """ save model para to file """
        self._model.save_weights(self._net_para_file)

    def load_model(self):
        if os.path.exists(self._net_para_file):
            self._model.load_weights(self._net_para_file)
        else:
            print('> error: ' + self._net_para_file + ' not found')
Esempio n. 27
0
def test_model_with_input_feed_tensor():
    """We test building a model with a TF variable as input.
    We should be able to call fit, evaluate, predict,
    by only passing them data for the placeholder inputs
    in the model.
    """
    import tensorflow as tf

    input_a_np = np.random.random((10, 3))
    input_b_np = np.random.random((10, 3))

    output_a_np = np.random.random((10, 4))
    output_b_np = np.random.random((10, 3))

    a = Input(tensor=tf.Variable(input_a_np, dtype=tf.float32))
    b = Input(shape=(3,), name='input_b')

    a_2 = Dense(4, name='dense_1')(a)
    dp = Dropout(0.5, name='dropout')
    b_2 = dp(b)

    model = Model([a, b], [a_2, b_2])
    model.summary()

    optimizer = 'rmsprop'
    loss = 'mse'
    loss_weights = [1., 0.5]
    model.compile(optimizer, loss, metrics=['mean_squared_error'],
                  loss_weights=loss_weights,
                  sample_weight_mode=None)

    # test train_on_batch
    out = model.train_on_batch(input_b_np,
                               [output_a_np, output_b_np])
    out = model.train_on_batch({'input_b': input_b_np},
                               [output_a_np, output_b_np])
    out = model.test_on_batch({'input_b': input_b_np},
                              [output_a_np, output_b_np])
    out = model.predict_on_batch({'input_b': input_b_np})

    # test fit
    out = model.fit({'input_b': input_b_np},
                    [output_a_np, output_b_np], epochs=1, batch_size=10)
    out = model.fit(input_b_np,
                    [output_a_np, output_b_np], epochs=1, batch_size=10)

    # test evaluate
    out = model.evaluate({'input_b': input_b_np},
                         [output_a_np, output_b_np], batch_size=10)
    out = model.evaluate(input_b_np,
                         [output_a_np, output_b_np], batch_size=10)

    # test predict
    out = model.predict({'input_b': input_b_np}, batch_size=10)
    out = model.predict(input_b_np, batch_size=10)
    assert len(out) == 2

    # Now test a model with a single input
    # i.e. we don't pass any data to fit the model.
    a = Input(tensor=tf.Variable(input_a_np, dtype=tf.float32))
    a_2 = Dense(4, name='dense_1')(a)
    a_2 = Dropout(0.5, name='dropout')(a_2)
    model = Model(a, a_2)
    model.summary()

    optimizer = 'rmsprop'
    loss = 'mse'
    model.compile(optimizer, loss, metrics=['mean_squared_error'])

    # test train_on_batch
    out = model.train_on_batch(None,
                               output_a_np)
    out = model.train_on_batch(None,
                               output_a_np)
    out = model.test_on_batch(None,
                              output_a_np)
    out = model.predict_on_batch(None)
    out = model.train_on_batch([],
                               output_a_np)
    out = model.train_on_batch({},
                               output_a_np)

    # test fit
    out = model.fit(None,
                    output_a_np, epochs=1, batch_size=10)
    out = model.fit(None,
                    output_a_np, epochs=1, batch_size=10)

    # test evaluate
    out = model.evaluate(None,
                         output_a_np, batch_size=10)
    out = model.evaluate(None,
                         output_a_np, batch_size=10)

    # test predict
    out = model.predict(None, steps=3)
    out = model.predict(None, steps=3)
    assert out.shape == (10 * 3, 4)

    # Same, without learning phase
    # i.e. we don't pass any data to fit the model.
    a = Input(tensor=tf.Variable(input_a_np, dtype=tf.float32))
    a_2 = Dense(4, name='dense_1')(a)
    model = Model(a, a_2)
    model.summary()

    optimizer = 'rmsprop'
    loss = 'mse'
    model.compile(optimizer, loss, metrics=['mean_squared_error'])

    # test train_on_batch
    out = model.train_on_batch(None,
                               output_a_np)
    out = model.train_on_batch(None,
                               output_a_np)
    out = model.test_on_batch(None,
                              output_a_np)
    out = model.predict_on_batch(None)
    out = model.train_on_batch([],
                               output_a_np)
    out = model.train_on_batch({},
                               output_a_np)

    # test fit
    out = model.fit(None,
                    output_a_np, epochs=1, batch_size=10)
    out = model.fit(None,
                    output_a_np, epochs=1, batch_size=10)

    # test evaluate
    out = model.evaluate(None,
                         output_a_np, batch_size=10)
    out = model.evaluate(None,
                         output_a_np, batch_size=10)

    # test predict
    out = model.predict(None, steps=3)
    out = model.predict(None, steps=3)
    assert out.shape == (10 * 3, 4)
Esempio n. 28
0
def test_model_with_input_feed_tensor():
    """We test building a model with a TF variable as input.
    We should be able to call fit, evaluate, predict,
    by only passing them data for the placeholder inputs
    in the model.
    """
    import tensorflow as tf

    input_a_np = np.random.random((10, 3))
    input_b_np = np.random.random((10, 3))

    output_a_np = np.random.random((10, 4))
    output_b_np = np.random.random((10, 3))

    a = Input(tensor=tf.Variable(input_a_np, dtype=tf.float32))
    b = Input(shape=(3, ), name='input_b')

    a_2 = Dense(4, name='dense_1')(a)
    dp = Dropout(0.5, name='dropout')
    b_2 = dp(b)

    model = Model([a, b], [a_2, b_2])
    model.summary()

    optimizer = 'rmsprop'
    loss = 'mse'
    loss_weights = [1., 0.5]
    model.compile(optimizer,
                  loss,
                  metrics=['mean_squared_error'],
                  loss_weights=loss_weights,
                  sample_weight_mode=None)

    # test train_on_batch
    out = model.train_on_batch(input_b_np, [output_a_np, output_b_np])
    out = model.train_on_batch({'input_b': input_b_np},
                               [output_a_np, output_b_np])
    out = model.test_on_batch({'input_b': input_b_np},
                              [output_a_np, output_b_np])
    out = model.predict_on_batch({'input_b': input_b_np})

    # test fit
    out = model.fit({'input_b': input_b_np}, [output_a_np, output_b_np],
                    epochs=1,
                    batch_size=10)
    out = model.fit(input_b_np, [output_a_np, output_b_np],
                    epochs=1,
                    batch_size=10)

    # test evaluate
    out = model.evaluate({'input_b': input_b_np}, [output_a_np, output_b_np],
                         batch_size=10)
    out = model.evaluate(input_b_np, [output_a_np, output_b_np], batch_size=10)

    # test predict
    out = model.predict({'input_b': input_b_np}, batch_size=10)
    out = model.predict(input_b_np, batch_size=10)
    assert len(out) == 2

    # Now test a model with a single input
    # i.e. we don't pass any data to fit the model.
    a = Input(tensor=tf.Variable(input_a_np, dtype=tf.float32))
    a_2 = Dense(4, name='dense_1')(a)
    a_2 = Dropout(0.5, name='dropout')(a_2)
    model = Model(a, a_2)
    model.summary()

    optimizer = 'rmsprop'
    loss = 'mse'
    model.compile(optimizer, loss, metrics=['mean_squared_error'])

    # test train_on_batch
    out = model.train_on_batch(None, output_a_np)
    out = model.train_on_batch(None, output_a_np)
    out = model.test_on_batch(None, output_a_np)
    out = model.predict_on_batch(None)
    out = model.train_on_batch([], output_a_np)
    out = model.train_on_batch({}, output_a_np)

    # test fit
    out = model.fit(None, output_a_np, epochs=1, batch_size=10)
    out = model.fit(None, output_a_np, epochs=1, batch_size=10)

    # test evaluate
    out = model.evaluate(None, output_a_np, batch_size=10)
    out = model.evaluate(None, output_a_np, batch_size=10)

    # test predict
    out = model.predict(None, steps=3)
    out = model.predict(None, steps=3)
    assert out.shape == (10 * 3, 4)

    # Same, without learning phase
    # i.e. we don't pass any data to fit the model.
    a = Input(tensor=tf.Variable(input_a_np, dtype=tf.float32))
    a_2 = Dense(4, name='dense_1')(a)
    model = Model(a, a_2)
    model.summary()

    optimizer = 'rmsprop'
    loss = 'mse'
    model.compile(optimizer, loss, metrics=['mean_squared_error'])

    # test train_on_batch
    out = model.train_on_batch(None, output_a_np)
    out = model.train_on_batch(None, output_a_np)
    out = model.test_on_batch(None, output_a_np)
    out = model.predict_on_batch(None)
    out = model.train_on_batch([], output_a_np)
    out = model.train_on_batch({}, output_a_np)

    # test fit
    out = model.fit(None, output_a_np, epochs=1, batch_size=10)
    out = model.fit(None, output_a_np, epochs=1, batch_size=10)

    # test evaluate
    out = model.evaluate(None, output_a_np, batch_size=10)
    out = model.evaluate(None, output_a_np, batch_size=10)

    # test predict
    out = model.predict(None, steps=3)
    out = model.predict(None, steps=3)
    assert out.shape == (10 * 3, 4)
Esempio n. 29
0
def test_model_with_external_loss():
    # None loss, only regularization loss.
    a = Input(shape=(3,), name='input_a')
    a_2 = Dense(4, name='dense_1',
                kernel_regularizer='l1',
                bias_regularizer='l2')(a)
    dp = Dropout(0.5, name='dropout')
    a_3 = dp(a_2)

    model = Model(a, [a_2, a_3])

    optimizer = 'rmsprop'
    loss = None
    model.compile(optimizer, loss, metrics=['mae'])

    input_a_np = np.random.random((10, 3))

    # test train_on_batch
    out = model.train_on_batch(input_a_np, None)
    out = model.test_on_batch(input_a_np, None)
    # fit
    out = model.fit(input_a_np, None)
    # evaluate
    out = model.evaluate(input_a_np, None)

    # No dropout, external loss.
    a = Input(shape=(3,), name='input_a')
    a_2 = Dense(4, name='dense_1')(a)
    a_3 = Dense(4, name='dense_2')(a)

    model = Model(a, [a_2, a_3])
    model.add_loss(K.mean(a_3 + a_2))

    optimizer = 'rmsprop'
    loss = None
    model.compile(optimizer, loss, metrics=['mae'])

    # test train_on_batch
    out = model.train_on_batch(input_a_np, None)
    out = model.test_on_batch(input_a_np, None)
    # fit
    out = model.fit(input_a_np, None)
    # evaluate
    out = model.evaluate(input_a_np, None)

    # Test fit with no external data at all.
    if K.backend() == 'tensorflow':
        import tensorflow as tf

        a = Input(tensor=tf.Variable(input_a_np, dtype=tf.float32))
        a_2 = Dense(4, name='dense_1')(a)
        a_2 = Dropout(0.5, name='dropout')(a_2)
        model = Model(a, a_2)
        model.add_loss(K.mean(a_2))

        model.compile(optimizer='rmsprop',
                      loss=None,
                      metrics=['mean_squared_error'])

        # test train_on_batch
        out = model.train_on_batch(None, None)
        out = model.test_on_batch(None, None)
        out = model.predict_on_batch(None)

        # test fit
        with pytest.raises(ValueError):
            out = model.fit(None, None, epochs=1, batch_size=10)
        out = model.fit(None, None, epochs=1, steps_per_epoch=1)

        # test fit with validation data
        with pytest.raises(ValueError):
            out = model.fit(None, None,
                            epochs=1,
                            steps_per_epoch=None,
                            validation_steps=2)
        out = model.fit(None, None,
                        epochs=1,
                        steps_per_epoch=2,
                        validation_steps=2)

        # test evaluate
        with pytest.raises(ValueError):
            out = model.evaluate(None, None, batch_size=10)
        out = model.evaluate(None, None, steps=3)

        # test predict
        with pytest.raises(ValueError):
            out = model.predict(None, batch_size=10)
        out = model.predict(None, steps=3)
        assert out.shape == (10 * 3, 4)

        # Test multi-output model without external data.
        a = Input(tensor=tf.Variable(input_a_np, dtype=tf.float32))
        a_1 = Dense(4, name='dense_1')(a)
        a_2 = Dropout(0.5, name='dropout')(a_1)
        model = Model(a, [a_1, a_2])
        model.add_loss(K.mean(a_2))
        model.compile(optimizer='rmsprop',
                      loss=None,
                      metrics=['mean_squared_error'])

        # test train_on_batch
        out = model.train_on_batch(None, None)
        out = model.test_on_batch(None, None)
        out = model.predict_on_batch(None)

        # test fit
        with pytest.raises(ValueError):
            out = model.fit(None, None, epochs=1, batch_size=10)
        out = model.fit(None, None, epochs=1, steps_per_epoch=1)

        # test fit with validation data
        with pytest.raises(ValueError):
            out = model.fit(None, None,
                            epochs=1,
                            steps_per_epoch=None,
                            validation_steps=2)
        out = model.fit(None, None,
                        epochs=1,
                        steps_per_epoch=2,
                        validation_steps=2)

        # test evaluate
        with pytest.raises(ValueError):
            out = model.evaluate(None, None, batch_size=10)
        out = model.evaluate(None, None, steps=3)

        # test predict
        with pytest.raises(ValueError):
            out = model.predict(None, batch_size=10)
        out = model.predict(None, steps=3)
        assert len(out) == 2
        assert out[0].shape == (10 * 3, 4)
        assert out[1].shape == (10 * 3, 4)
Esempio n. 30
0
def test_model_with_external_loss():
    # None loss, only regularization loss.
    a = Input(shape=(3, ), name='input_a')
    a_2 = Dense(4,
                name='dense_1',
                kernel_regularizer='l1',
                bias_regularizer='l2')(a)
    dp = Dropout(0.5, name='dropout')
    a_3 = dp(a_2)

    model = Model(a, [a_2, a_3])

    optimizer = 'rmsprop'
    loss = None
    model.compile(optimizer, loss, metrics=['mae'])

    input_a_np = np.random.random((10, 3))

    # test train_on_batch
    out = model.train_on_batch(input_a_np, None)
    out = model.test_on_batch(input_a_np, None)
    # fit
    out = model.fit(input_a_np, None)
    # evaluate
    out = model.evaluate(input_a_np, None)

    # No dropout, external loss.
    a = Input(shape=(3, ), name='input_a')
    a_2 = Dense(4, name='dense_1')(a)
    a_3 = Dense(4, name='dense_2')(a)

    model = Model(a, [a_2, a_3])
    model.add_loss(K.mean(a_3 + a_2))

    optimizer = 'rmsprop'
    loss = None
    model.compile(optimizer, loss, metrics=['mae'])

    # test train_on_batch
    out = model.train_on_batch(input_a_np, None)
    out = model.test_on_batch(input_a_np, None)
    # fit
    out = model.fit(input_a_np, None)
    # evaluate
    out = model.evaluate(input_a_np, None)

    # Test fit with no external data at all.
    if K.backend() == 'tensorflow':
        import tensorflow as tf

        a = Input(tensor=tf.Variable(input_a_np, dtype=tf.float32))
        a_2 = Dense(4, name='dense_1')(a)
        a_2 = Dropout(0.5, name='dropout')(a_2)
        model = Model(a, a_2)
        model.add_loss(K.mean(a_2))

        model.compile(optimizer='rmsprop',
                      loss=None,
                      metrics=['mean_squared_error'])

        # test train_on_batch
        out = model.train_on_batch(None, None)
        out = model.test_on_batch(None, None)
        out = model.predict_on_batch(None)

        # test fit
        with pytest.raises(ValueError):
            out = model.fit(None, None, epochs=1, batch_size=10)
        out = model.fit(None, None, epochs=1, steps_per_epoch=1)

        # test fit with validation data
        with pytest.raises(ValueError):
            out = model.fit(None,
                            None,
                            epochs=1,
                            steps_per_epoch=None,
                            validation_steps=2)
        out = model.fit(None,
                        None,
                        epochs=1,
                        steps_per_epoch=2,
                        validation_steps=2)

        # test evaluate
        with pytest.raises(ValueError):
            out = model.evaluate(None, None, batch_size=10)
        out = model.evaluate(None, None, steps=3)

        # test predict
        with pytest.raises(ValueError):
            out = model.predict(None, batch_size=10)
        out = model.predict(None, steps=3)
        assert out.shape == (10 * 3, 4)

        # Test multi-output model without external data.
        a = Input(tensor=tf.Variable(input_a_np, dtype=tf.float32))
        a_1 = Dense(4, name='dense_1')(a)
        a_2 = Dropout(0.5, name='dropout')(a_1)
        model = Model(a, [a_1, a_2])
        model.add_loss(K.mean(a_2))
        model.compile(optimizer='rmsprop',
                      loss=None,
                      metrics=['mean_squared_error'])

        # test train_on_batch
        out = model.train_on_batch(None, None)
        out = model.test_on_batch(None, None)
        out = model.predict_on_batch(None)

        # test fit
        with pytest.raises(ValueError):
            out = model.fit(None, None, epochs=1, batch_size=10)
        out = model.fit(None, None, epochs=1, steps_per_epoch=1)

        # test fit with validation data
        with pytest.raises(ValueError):
            out = model.fit(None,
                            None,
                            epochs=1,
                            steps_per_epoch=None,
                            validation_steps=2)
        out = model.fit(None,
                        None,
                        epochs=1,
                        steps_per_epoch=2,
                        validation_steps=2)

        # test evaluate
        with pytest.raises(ValueError):
            out = model.evaluate(None, None, batch_size=10)
        out = model.evaluate(None, None, steps=3)

        # test predict
        with pytest.raises(ValueError):
            out = model.predict(None, batch_size=10)
        out = model.predict(None, steps=3)
        assert len(out) == 2
        assert out[0].shape == (10 * 3, 4)
        assert out[1].shape == (10 * 3, 4)
class PolicyValueNet():
    """policy-value network """
    def __init__(self, board_width, board_height, model_file=None):
        self.board_width = board_width
        self.board_height = board_height
        self.l2_const = 1e-4  # coef of l2 penalty
        self.create_policy_value_net()

        if model_file:
            print("[Notice] load model from file")
            self.model = load_model(model_file)
        else:
            print("[Notice] create model")
            self.create_policy_value_net()
        self._loss_train_op()

    def create_policy_value_net(self):
        """create the policy value network """
        in_x = network = Input((4, self.board_width, self.board_height))

        # conv layers
        network = Conv2D(filters=32,
                         kernel_size=(3, 3),
                         padding="same",
                         data_format="channels_first",
                         activation="relu",
                         kernel_regularizer=l2(self.l2_const))(network)
        network = Conv2D(filters=64,
                         kernel_size=(3, 3),
                         padding="same",
                         data_format="channels_first",
                         activation="relu",
                         kernel_regularizer=l2(self.l2_const))(network)
        network = Conv2D(filters=128,
                         kernel_size=(3, 3),
                         padding="same",
                         data_format="channels_first",
                         activation="relu",
                         kernel_regularizer=l2(self.l2_const))(network)
        # action policy layers
        policy_net = Conv2D(filters=4,
                            kernel_size=(1, 1),
                            data_format="channels_first",
                            activation="relu",
                            kernel_regularizer=l2(self.l2_const))(network)
        policy_net = Flatten()(policy_net)
        self.policy_net = Dense(self.board_width * self.board_height,
                                activation="softmax",
                                kernel_regularizer=l2(
                                    self.l2_const))(policy_net)
        # state value layers
        value_net = Conv2D(filters=2,
                           kernel_size=(1, 1),
                           data_format="channels_first",
                           activation="relu",
                           kernel_regularizer=l2(self.l2_const))(network)
        value_net = Flatten()(value_net)
        value_net = Dense(64, kernel_regularizer=l2(self.l2_const))(value_net)
        self.value_net = Dense(1,
                               activation="tanh",
                               kernel_regularizer=l2(self.l2_const))(value_net)

        self.model = Model(in_x, [self.policy_net, self.value_net])

        def policy_value(state_input):
            state_input_union = np.array(state_input)
            results = self.model.predict_on_batch(state_input_union)
            return results

        self.policy_value = policy_value

    def policy_value_fn(self, board):
        """
        input: board
        output: a list of (action, probability) tuples for each available action and the score of the board state
        """
        legal_positions = board.availables
        current_state = board.current_state()
        act_probs, value = self.policy_value(
            current_state.reshape(-1, 4, self.board_width, self.board_height))
        act_probs = zip(legal_positions, act_probs.flatten()[legal_positions])
        return act_probs, value[0][0]

    def _loss_train_op(self):
        """
        Three loss terms:
        loss = (z - v)^2 + pi^T * log(p) + c||theta||^2
        """

        # get the train op
        opt = Adam()
        losses = ['categorical_crossentropy', 'mean_squared_error']
        self.model.compile(optimizer=opt, loss=losses)

        def self_entropy(probs):
            return -np.mean(np.sum(probs * np.log(probs + 1e-10), axis=1))

        def train_step(state_input, mcts_probs, winner, learning_rate):
            state_input_union = np.array(state_input)
            mcts_probs_union = np.array(mcts_probs)
            winner_union = np.array(winner)
            loss = self.model.evaluate(state_input_union,
                                       [mcts_probs_union, winner_union],
                                       batch_size=len(state_input),
                                       verbose=0)
            action_probs, _ = self.model.predict_on_batch(state_input_union)
            entropy = self_entropy(action_probs)
            K.set_value(self.model.optimizer.lr, learning_rate)
            self.model.fit(state_input_union, [mcts_probs_union, winner_union],
                           batch_size=len(state_input),
                           verbose=0)
            return loss[0], entropy

        self.train_step = train_step

    def get_policy_param(self):
        net_params = self.model.get_weights()
        return net_params

    def save_model(self, model_file):
        """ save model to file """
        print("save model file")
        self.model.save(model_file)
Esempio n. 32
0
def test_pandas_dataframe():
    input_a = Input(shape=(3,), name='input_a')
    input_b = Input(shape=(3,), name='input_b')

    x = Dense(4, name='dense_1')(input_a)
    y = Dense(3, name='desne_2')(input_b)

    model_1 = Model(inputs=input_a, outputs=x)
    model_2 = Model(inputs=[input_a, input_b], outputs=[x, y])

    optimizer = 'rmsprop'
    loss = 'mse'

    model_1.compile(optimizer=optimizer, loss=loss)
    model_2.compile(optimizer=optimizer, loss=loss)

    input_a_df = pd.DataFrame(np.random.random((10, 3)))
    input_b_df = pd.DataFrame(np.random.random((10, 3)))

    output_a_df = pd.DataFrame(np.random.random((10, 4)))
    output_b_df = pd.DataFrame(np.random.random((10, 3)))

    model_1.fit(input_a_df,
                output_a_df)
    model_2.fit([input_a_df, input_b_df],
                [output_a_df, output_b_df])
    model_1.fit([input_a_df],
                [output_a_df])
    model_1.fit({'input_a': input_a_df},
                output_a_df)
    model_2.fit({'input_a': input_a_df, 'input_b': input_b_df},
                [output_a_df, output_b_df])

    model_1.predict(input_a_df)
    model_2.predict([input_a_df, input_b_df])
    model_1.predict([input_a_df])
    model_1.predict({'input_a': input_a_df})
    model_2.predict({'input_a': input_a_df, 'input_b': input_b_df})

    model_1.predict_on_batch(input_a_df)
    model_2.predict_on_batch([input_a_df, input_b_df])
    model_1.predict_on_batch([input_a_df])
    model_1.predict_on_batch({'input_a': input_a_df})
    model_2.predict_on_batch({'input_a': input_a_df, 'input_b': input_b_df})

    model_1.evaluate(input_a_df,
                     output_a_df)
    model_2.evaluate([input_a_df, input_b_df],
                     [output_a_df, output_b_df])
    model_1.evaluate([input_a_df],
                     [output_a_df])
    model_1.evaluate({'input_a': input_a_df},
                     output_a_df)
    model_2.evaluate({'input_a': input_a_df, 'input_b': input_b_df},
                     [output_a_df, output_b_df])

    model_1.train_on_batch(input_a_df,
                           output_a_df)
    model_2.train_on_batch([input_a_df, input_b_df],
                           [output_a_df, output_b_df])
    model_1.train_on_batch([input_a_df],
                           [output_a_df])
    model_1.train_on_batch({'input_a': input_a_df},
                           output_a_df)
    model_2.train_on_batch({'input_a': input_a_df, 'input_b': input_b_df},
                           [output_a_df, output_b_df])

    model_1.test_on_batch(input_a_df,
                          output_a_df)
    model_2.test_on_batch([input_a_df, input_b_df],
                          [output_a_df, output_b_df])
    model_1.test_on_batch([input_a_df],
                          [output_a_df])
    model_1.test_on_batch({'input_a': input_a_df},
                          output_a_df)
    model_2.test_on_batch({'input_a': input_a_df, 'input_b': input_b_df},
                          [output_a_df, output_b_df])
    return mean_square_loss + K.mean(loss_tensor, axis=-1)
    # return K.mean(y_pred - tf.reshape(loss_tensor, [-1, 3]), axis=-1)
    # return mean_square_loss

    # return mean_squared_error(y_true, y_pred)

# Compiling the model using 'adam' optimizer and MSE as loss function
# sgd = SGD(lr=0.01, momentum=0.0, decay=0.0, nesterov=False)

# model.compile(optimizer=sgd, loss=my_loss_function,  metrics=['mse', 'mae', 'mape'],  loss_weights=[1.0, 1.0, 1.0])
model.compile(optimizer='adam', loss=my_loss_function,  metrics=['mse', 'mae', 'mape'],  loss_weights=[1.0, 1.0, 1.0])
# model.compile(optimizer='adam', loss='mean_squared_error',  metrics=['mse', 'mae', 'mape'],  loss_weights=[1.0, 1.0, 1.0])

#muti_outputs shape= tasks x train_samples
callbacks = []
model.fit(x=dat_train, y=[label_train_1, label_train_2, label_train_3], epochs=5000, batch_size=32)

pred1, pred2, pred3 = model.predict(dat_test)

# inv_y = scaler.inverse_transform(inv_y)

plot_x = dat_test[:, 0]
plot_y = pred1.flatten() - label_test_1
# plt.scatter(x=plot_x, y=plot_y)

final_data = np.array([
    grid_cells_test,
    label_test_1,
    pred1.flatten(),
    grid_cells_id_test
]).transpose()
Esempio n. 34
0
class AIPlayer(Player):
    def __init__(self,
                 buffer_size,
                 sim_count,
                 train=True,
                 model="",
                 tau=1,
                 compile=False):
        self.buffer = ReplayBuffer(buffer_size)
        self.temp_state = deque()
        self.train = train
        self.loss = 0
        self.acc = 0
        self.batch_count = 0
        self.sim_count = sim_count
        if model != "":
            self.load(model, compile)
        else:
            self.create_network()
        self.tau = tau

    @staticmethod
    def create_if_nonexistant(config):
        models = glob.glob(config.data.model_location + "*.h5")
        if len(models) == 0:
            ai = AIPlayer(config.buffer_size,
                          config.game.simulation_num_per_move)
            ai.save(config.data.model_location + "model_0.h5")
            del ai

    def set_training(self, train):
        self.train = train

    @staticmethod
    def clear():
        K.clear_session()

    def load(self, file, compile=False):
        try:
            del self.network
        except Exception:
            pass
        self.network = load_model(file,
                                  custom_objects={
                                      "objective_function_for_policy":
                                      AIPlayer.objective_function_for_policy,
                                      "objective_function_for_value":
                                      AIPlayer.objective_function_for_value
                                  },
                                  compile=compile)

    def save(self, file):
        self.network.save(file)

    def create_network(self):
        x_in = Input((3, 8, 8))
        x = Conv2D(filters=128,
                   kernel_size=(3, 3),
                   padding="same",
                   data_format="channels_first")(x_in)
        x = BatchNormalization(axis=1)(x)
        x = Activation("relu")(x)
        for _ in range(10):
            x = self._build_residual_block(x)

        res_out = x

        x = Conv2D(filters=2, kernel_size=1,
                   data_format="channels_first")(res_out)
        x = BatchNormalization(axis=1)(x)
        x = Activation("relu")(x)
        x = Flatten()(x)
        policy_out = Dense(8 * 8 + 1, activation="softmax",
                           name="policy_out")(x)

        x = Conv2D(filters=1, kernel_size=1,
                   data_format="channels_first")(res_out)
        x = BatchNormalization(axis=1)(x)
        x = Activation("relu")(x)
        x = Flatten()(x)
        x = Dense(64, activation="relu")(x)
        value_out = Dense(1, activation="tanh", name="value_out")(x)

        self.network = Model(x_in, [policy_out, value_out],
                             name="reversi_model")
        self.compile()

    def _build_residual_block(self, x):
        in_x = x
        x = Conv2D(filters=128,
                   kernel_size=(3, 3),
                   padding="same",
                   data_format="channels_first")(x)
        x = BatchNormalization(axis=1)(x)
        x = Activation("relu")(x)
        x = Conv2D(filters=128,
                   kernel_size=(3, 3),
                   padding="same",
                   data_format="channels_first")(x)
        x = BatchNormalization(axis=1)(x)
        x = Add()([in_x, x])
        x = Activation("relu")(x)
        return x

    def compile(self):
        losses = [
            AIPlayer.objective_function_for_policy,
            AIPlayer.objective_function_for_value
        ]
        self.network.compile(optimizer=optimizers.SGD(lr=1e-3, momentum=0.9),
                             loss=losses)

    def update_lr(self, lr):
        K.set_value(self.network.optimizer.lr, lr)

    @staticmethod
    def objective_function_for_policy(y_true, y_pred):
        # can use categorical_crossentropy??
        return K.sum(-y_true * K.log(y_pred + K.epsilon()), axis=-1)

    @staticmethod
    def objective_function_for_value(y_true, y_pred):
        return mean_squared_error(y_true, y_pred)

    def update_buffer(self, winner):
        if self.train:
            while len(self.temp_state) > 0:
                t = self.temp_state.pop()
                self.buffer.add((t[0], t[1], winner))

    def train_batches(self, batch_size, batches=-1, verbose=2):
        if batches == -1:
            s_buffer = np.array([_[0] for _ in self.buffer.buffer])
            p_buffer = np.array([_[1] for _ in self.buffer.buffer])
            v_buffer = np.array([_[2] for _ in self.buffer.buffer])
        else:
            sample_size = batch_size * batches
            sample = []
            while sample_size > 0:
                sample += self.buffer.sample(sample_size)
                sample_size -= self.buffer.size()
            s_buffer = np.array([_[0] for _ in sample])
            p_buffer = np.array([_[1] for _ in sample])
            v_buffer = np.array([_[2] for _ in sample])
        history = self.network.fit(s_buffer, [p_buffer, v_buffer],
                                   batch_size=batch_size,
                                   epochs=1,
                                   verbose=verbose)
        return history

    def preprocess_input(self, board, side):
        state = np.zeros((3, 8, 8), dtype=np.int)
        for i in range(8):
            for j in range(8):
                if board[i, j] == 1:
                    state[0, i, j] = 1
                elif board[i, j] == -1:
                    state[1, i, j] = 1
                if side == 1:
                    state[2, i, j] = 1
        return state

    def evaluate(self, game, side):
        current_input = self.preprocess_input(game.board, side)
        pred = self.network.predict(current_input[np.newaxis, :])
        return pred[1][0]

    def pick_move(self, game, side):
        possible_moves = game.possible_moves(side)
        if len(possible_moves) == 0:
            possible_moves.append((-1, -1))
        monte_prob = self.monte_carlo(game, side)

        if self.train:
            self.temp_state.append((self.preprocess_input(game.board, side),
                                    np.divide(monte_prob, np.sum(monte_prob))))

        monte_prob = np.float_power(monte_prob, 1 / self.tau)
        monte_prob = np.divide(monte_prob, np.sum(monte_prob))

        r = random()
        for i, move in enumerate(possible_moves):
            r -= monte_prob[Othello.move_id(move)]
            if r <= 0:
                return move
        return possible_moves[-1]

    def monte_carlo(self, game, side):
        N = defaultdict(lambda: 0)
        W = defaultdict(lambda: 0)
        Q = defaultdict(lambda: 0)
        P = defaultdict(lambda: 0)

        possible_moves = game.possible_moves(side)
        if len(possible_moves) == 0:
            policy = np.zeros((65))
            policy[64] = 1
            return policy
        elif len(possible_moves) == 1:
            policy = np.zeros((65))
            policy[Othello.move_id(possible_moves[0])] = 1
            return policy

        current_input = self.preprocess_input(game.board, side)
        sid = Othello.state_id(game.board)
        pred = self.network.predict(current_input[np.newaxis, :])
        policy = pred[0][0]

        total = 1e-10
        for i, move in enumerate(possible_moves):
            total += policy[Othello.move_id(move)]

        for move in possible_moves:
            P[(sid,
               Othello.move_id(move))] = policy[Othello.move_id(move)] / total

        for i in range(self.sim_count):
            #print("Sim #%d"% i)
            clone = deepcopy(game)
            current_side = side
            visited = deque()
            while True:
                possible_moves = clone.possible_moves(current_side)
                if len(possible_moves) == 0:
                    possible_moves.append((-1, -1))
                best_move = None
                best_move_value = -2
                sid = Othello.state_id(clone.board)
                for move in possible_moves:
                    mid = Othello.move_id(move)
                    qu_val = Q[(sid,
                                mid)] + P[(sid, mid)] / (N[(sid, mid)] + 1)
                    if qu_val > best_move_value:
                        best_move_value = qu_val
                        best_move = move

                #print(best_move)

                if N[(sid, Othello.move_id(best_move))] == 0:
                    visited.append((sid, Othello.move_id(best_move)))
                    clone.play_move(best_move[0], best_move[1], current_side)
                    current_side *= -1
                    if clone.game_over():
                        for node in visited:
                            N[node] += 1
                            W[node] += clone.get_winner() * side
                            Q[node] = W[node] / N[node]
                        break

                    current_input = self.preprocess_input(
                        clone.board, current_side)
                    sid = Othello.state_id(clone.board)
                    pred = self.network.predict(current_input[np.newaxis, :])
                    policy = pred[0][0]
                    value = pred[1][0]

                    possible_moves = clone.possible_moves(current_side)
                    if len(possible_moves) == 0:
                        possible_moves.append((-1, -1))
                    total = 1e-10
                    for i, move in enumerate(possible_moves):
                        total += policy[Othello.move_id(move)]

                    for move in possible_moves:
                        P[(sid, Othello.move_id(move)
                           )] = policy[Othello.move_id(move)] / total

                    for node in visited:
                        N[node] += 1
                        W[node] += value * side
                        Q[node] = W[node] / N[node]
                    #print()
                    break
                else:
                    visited.append((sid, Othello.move_id(best_move)))
                    clone.play_move(best_move[0], best_move[1], current_side)
                    current_side *= -1
                    if clone.game_over():
                        for node in visited:
                            N[node] += 1
                            W[node] += clone.get_winner() * side
                            Q[node] = W[node] / N[node]
                        break

        policy = np.zeros((65))
        possible_moves = game.possible_moves(side)
        sid = Othello.state_id(game.board)
        for move in possible_moves:
            mid = Othello.move_id(move)
            policy[mid] = N[(sid, mid)]

        return policy
Esempio n. 35
0
class FinancialTimeSeriesAnalysisModel(object):
    model = None

    def __init__(self, nb_time_step, dim_data, batch_size=1, model_path=None):
        self.model_path = model_path
        self.model_path = model_path
        self.batch_size = batch_size
        self.size_of_input_data_dim = dim_data
        self.size_of_input_timesteps = nb_time_step
        self.build()
        self.weight_loaded = False
        if model_path is not None:
            self.load_weights()

    def build(self):
        dim_data = self.size_of_input_data_dim
        nb_time_step = self.size_of_input_timesteps
        financial_time_series_input = Input(shape=(nb_time_step, dim_data), name='x1')
        lstm_layer_1 = LSTM(output_dim=nb_hidden_units, dropout_U=dropout, dropout_W=dropout,
                            W_regularizer=l2(l2_norm_alpha), b_regularizer=l2(l2_norm_alpha), activation='tanh',
                            return_sequences=True, name='lstm_layer1')
        lstm_layer_21 = LSTM(output_dim=nb_hidden_units, dropout_U=dropout, dropout_W=dropout,
                             W_regularizer=l2(l2_norm_alpha), b_regularizer=l2(l2_norm_alpha), activation='tanh',
                             return_sequences=True, name='lstm_layer2_loss1')
        lstm_layer_22 = LSTM(output_dim=nb_hidden_units, dropout_U=dropout, dropout_W=dropout,
                             W_regularizer=l2(l2_norm_alpha), b_regularizer=l2(l2_norm_alpha), activation='tanh',
                             return_sequences=True, name='lstm_layer2_loss2')
        lstm_layer_23 = LSTM(output_dim=nb_hidden_units, dropout_U=dropout, dropout_W=dropout,
                             W_regularizer=l2(l2_norm_alpha), b_regularizer=l2(l2_norm_alpha), activation='tanh',
                             return_sequences=True, name='lstm_layer2_loss3')

        lstm_layer_24 = LSTM(output_dim=nb_hidden_units, dropout_U=dropout, dropout_W=dropout,
                             W_regularizer=l2(l2_norm_alpha), b_regularizer=l2(l2_norm_alpha), activation='tanh',
                             return_sequences=True, name='lstm_layer2_loss4')

        lstm_layer_25 = LSTM(output_dim=nb_hidden_units, dropout_U=dropout, dropout_W=dropout,
                             W_regularizer=l2(l2_norm_alpha), b_regularizer=l2(l2_norm_alpha), activation='tanh',
                             return_sequences=True, name='lstm_layer2_loss5')
        h1 = lstm_layer_1(financial_time_series_input)
        h21 = lstm_layer_21(h1)
        h22 = lstm_layer_22(h1)
        h23 = lstm_layer_23(h1)
        h24 = lstm_layer_24(h1)
        h25 = lstm_layer_25(h1)
        time_series_predictions1 = TimeDistributed(Dense(1), name="p1")(h21)  # custom 1
        time_series_predictions2 = TimeDistributed(Dense(1), name="p2")(h22)  # custom 2
        time_series_predictions3 = TimeDistributed(Dense(1), name="p3")(h23)  # mse
        time_series_predictions4 = TimeDistributed(Dense(1, activation='sigmoid'), name="p4")(h24)  # logloss
        time_series_predictions5 = TimeDistributed(Dense(nb_labels, activation='softmax'), name="p5")(h25)  # cross
        self.model = Model(input=financial_time_series_input,
                           output=[time_series_predictions1, time_series_predictions2,
                                   time_series_predictions3, time_series_predictions4,
                                   time_series_predictions5],
                           name="multi-task deep rnn for financial time series forecasting")
        plot(self.model, to_file='model.png')

    def reset(self):
        for l in self.model.layers:
            if type(l) is LSTM:
                l.reset_status()

    def compile_model(self, lr=0.0001, arg_weight=1.):
        optimizer = Adam(lr=lr)
        loss = [custom_objective1, custom_objective2, 'mse', 'binary_crossentropy', 'categorical_crossentropy']
        self.model.compile(optimizer=optimizer, loss=loss)

    def fit_model(self, X, y, y_label, epoch=300):
        early_stopping = EarlyStopping(monitor='val_loss', patience=10, verbose=0)

        self.model.fit(X, [y]*3 + [y > 0] + [y_label], batch_size=self.batch_size, nb_epoch=epoch, validation_split=0.2,
                       shuffle=True, callbacks=[early_stopping])

    def save(self):
        self.model.save_weights(self.model_path, overwrite=True)

    def load_weights(self):
        if os.path.exists(self.model_path):
            self.model.load_weights(self.model_path)
            self.weight_loaded = True

    def print_weights(self, weights=None, detail=False):
        weights = weights or self.model.get_weights()
        for w in weights:
            print("w%s: sum(w)=%s, ave(w)=%s" % (w.shape, np.sum(w), np.average(w)))
        if detail:
            for w in weights:
                print("%s: %s" % (w.shape, w))

    def model_eval(self, X, y):
        y_hat = self.model.predict(X, batch_size=1)[0]
        count_true = 0
        count_all = y.shape[1]
        for i in range(y.shape[1]):
            count_true = count_true + 1 if y[0,i,0]*y_hat[0,i,0]>0 else count_true
            print(y[0,i,0],y_hat[0,i,0])
        print(count_all,count_true)
Esempio n. 36
0
def test_model_methods():
    a = Input(shape=(3, ), name='input_a')
    b = Input(shape=(3, ), name='input_b')

    a_2 = Dense(4, name='dense_1')(a)
    dp = Dropout(0.5, name='dropout')
    b_2 = dp(b)

    model = Model([a, b], [a_2, b_2])

    optimizer = 'rmsprop'
    loss = 'mse'
    loss_weights = [1., 0.5]
    model.compile(optimizer,
                  loss,
                  metrics=[],
                  loss_weights=loss_weights,
                  sample_weight_mode=None)

    input_a_np = np.random.random((10, 3))
    input_b_np = np.random.random((10, 3))

    output_a_np = np.random.random((10, 4))
    output_b_np = np.random.random((10, 3))

    # test train_on_batch
    out = model.train_on_batch([input_a_np, input_b_np],
                               [output_a_np, output_b_np])
    out = model.train_on_batch({
        'input_a': input_a_np,
        'input_b': input_b_np
    }, [output_a_np, output_b_np])
    out = model.train_on_batch({
        'input_a': input_a_np,
        'input_b': input_b_np
    }, {
        'dense_1': output_a_np,
        'dropout': output_b_np
    })

    # test fit
    out = model.fit([input_a_np, input_b_np], [output_a_np, output_b_np],
                    nb_epoch=1,
                    batch_size=4)
    out = model.fit({
        'input_a': input_a_np,
        'input_b': input_b_np
    }, [output_a_np, output_b_np],
                    nb_epoch=1,
                    batch_size=4)
    out = model.fit({
        'input_a': input_a_np,
        'input_b': input_b_np
    }, {
        'dense_1': output_a_np,
        'dropout': output_b_np
    },
                    nb_epoch=1,
                    batch_size=4)

    # test validation_split
    out = model.fit([input_a_np, input_b_np], [output_a_np, output_b_np],
                    nb_epoch=1,
                    batch_size=4,
                    validation_split=0.5)
    out = model.fit({
        'input_a': input_a_np,
        'input_b': input_b_np
    }, [output_a_np, output_b_np],
                    nb_epoch=1,
                    batch_size=4,
                    validation_split=0.5)
    out = model.fit({
        'input_a': input_a_np,
        'input_b': input_b_np
    }, {
        'dense_1': output_a_np,
        'dropout': output_b_np
    },
                    nb_epoch=1,
                    batch_size=4,
                    validation_split=0.5)

    # test validation data
    out = model.fit([input_a_np, input_b_np], [output_a_np, output_b_np],
                    nb_epoch=1,
                    batch_size=4,
                    validation_data=([input_a_np,
                                      input_b_np], [output_a_np, output_b_np]))
    out = model.fit({
        'input_a': input_a_np,
        'input_b': input_b_np
    }, [output_a_np, output_b_np],
                    nb_epoch=1,
                    batch_size=4,
                    validation_split=0.5,
                    validation_data=({
                        'input_a': input_a_np,
                        'input_b': input_b_np
                    }, [output_a_np, output_b_np]))
    out = model.fit({
        'input_a': input_a_np,
        'input_b': input_b_np
    }, {
        'dense_1': output_a_np,
        'dropout': output_b_np
    },
                    nb_epoch=1,
                    batch_size=4,
                    validation_split=0.5,
                    validation_data=({
                        'input_a': input_a_np,
                        'input_b': input_b_np
                    }, {
                        'dense_1': output_a_np,
                        'dropout': output_b_np
                    }))

    # test_on_batch
    out = model.test_on_batch([input_a_np, input_b_np],
                              [output_a_np, output_b_np])
    out = model.test_on_batch({
        'input_a': input_a_np,
        'input_b': input_b_np
    }, [output_a_np, output_b_np])
    out = model.test_on_batch({
        'input_a': input_a_np,
        'input_b': input_b_np
    }, {
        'dense_1': output_a_np,
        'dropout': output_b_np
    })

    # predict_on_batch
    out = model.predict_on_batch([input_a_np, input_b_np])
    out = model.predict_on_batch({
        'input_a': input_a_np,
        'input_b': input_b_np
    })

    # predict, evaluate
    input_a_np = np.random.random((10, 3))
    input_b_np = np.random.random((10, 3))

    output_a_np = np.random.random((10, 4))
    output_b_np = np.random.random((10, 3))

    out = model.evaluate([input_a_np, input_b_np], [output_a_np, output_b_np],
                         batch_size=4)
    out = model.predict([input_a_np, input_b_np], batch_size=4)

    # with sample_weight
    input_a_np = np.random.random((10, 3))
    input_b_np = np.random.random((10, 3))

    output_a_np = np.random.random((10, 4))
    output_b_np = np.random.random((10, 3))

    sample_weight = [None, np.random.random((10, ))]
    out = model.train_on_batch([input_a_np, input_b_np],
                               [output_a_np, output_b_np],
                               sample_weight=sample_weight)

    out = model.test_on_batch([input_a_np, input_b_np],
                              [output_a_np, output_b_np],
                              sample_weight=sample_weight)

    # test accuracy metric
    model.compile(optimizer, loss, metrics=['acc'], sample_weight_mode=None)

    out = model.train_on_batch([input_a_np, input_b_np],
                               [output_a_np, output_b_np])
    assert len(out) == 5
    out = model.test_on_batch([input_a_np, input_b_np],
                              [output_a_np, output_b_np])
    assert len(out) == 5

    # this should also work
    model.compile(optimizer,
                  loss,
                  metrics={'dense_1': 'acc'},
                  sample_weight_mode=None)

    out = model.train_on_batch([input_a_np, input_b_np],
                               [output_a_np, output_b_np])
    assert len(out) == 4
    out = model.test_on_batch([input_a_np, input_b_np],
                              [output_a_np, output_b_np])
    assert len(out) == 4

    # and this as well
    model.compile(optimizer,
                  loss,
                  metrics={'dense_1': ['acc']},
                  sample_weight_mode=None)

    out = model.train_on_batch([input_a_np, input_b_np],
                               [output_a_np, output_b_np])
    assert len(out) == 4
    out = model.test_on_batch([input_a_np, input_b_np],
                              [output_a_np, output_b_np])
    assert len(out) == 4

    # test starting from non-zero initial epoch
    trained_epochs = []

    def on_epoch_begin(epoch, logs):
        trained_epochs.append(epoch)

    tracker_cb = LambdaCallback(on_epoch_begin=on_epoch_begin)
    out = model.fit([input_a_np, input_b_np], [output_a_np, output_b_np],
                    nb_epoch=5,
                    batch_size=4,
                    initial_epoch=2,
                    callbacks=[tracker_cb])
    assert trained_epochs == [2, 3, 4]

    # test starting from non-zero initial epoch for generator too
    trained_epochs = []

    def gen_data(batch_sz):
        while True:
            yield ([
                np.random.random((batch_sz, 3)),
                np.random.random((batch_sz, 3))
            ], [
                np.random.random((batch_sz, 4)),
                np.random.random((batch_sz, 3))
            ])

    out = model.fit_generator(gen_data(4),
                              samples_per_epoch=10,
                              nb_epoch=5,
                              initial_epoch=2,
                              callbacks=[tracker_cb])
    assert trained_epochs == [2, 3, 4]

    # test with a custom metric function
    mse = lambda y_true, y_pred: K.mean(K.pow(y_true - y_pred, 2))

    def mse_powers(y_true, y_pred):
        m = mse(y_true, y_pred)
        return {'mse_squared': K.pow(m, 2), 'mse_cubed': K.pow(m, 3)}

    model.compile(optimizer,
                  loss,
                  metrics=[mse, mse_powers],
                  sample_weight_mode=None)

    out = model.train_on_batch([input_a_np, input_b_np],
                               [output_a_np, output_b_np])
    out_len = 1 + 2 * 4  # total loss, per layer: loss + 3 metrics
    assert len(out) == out_len
    out = model.test_on_batch([input_a_np, input_b_np],
                              [output_a_np, output_b_np])
    assert len(out) == out_len

    input_a_np = np.random.random((10, 3))
    input_b_np = np.random.random((10, 3))

    output_a_np = np.random.random((10, 4))
    output_b_np = np.random.random((10, 3))

    out = model.fit([input_a_np, input_b_np], [output_a_np, output_b_np],
                    batch_size=4,
                    nb_epoch=1)
    out = model.evaluate([input_a_np, input_b_np], [output_a_np, output_b_np],
                         batch_size=4)
    out = model.predict([input_a_np, input_b_np], batch_size=4)
Esempio n. 37
0
class BasicModel(object):
    def __init__(self, multi_class=False):
        self.multi_class = multi_class

    def build(self,
              input_shape,
              nn_type='Dense',
              bidirectional=True,
              vat=True):
        """ build model
        :param input_shape: shape=(number of input rows, 1)
        :param nn_type: select 'Dense' or 'RNN' or 'GRU' or 'LSTM'
        :param bidirectional: use_flag for Bidirectional rnn
        :param vat: use_flag for VAT
        :param multi_class: use_flag for multi_class
        :return: self
        """
        input_layer = Input(input_shape)
        output_layer = self.core_data_flow(input_layer, nn_type, bidirectional)
        if vat:
            self.model = VATModel(input_layer, output_layer).setup_vat_loss()
        else:
            self.model = Model(input_layer, output_layer)
        return self

    def core_data_flow(self, input_layer, nn_type, bidirectional):
        """ build nn model
        :param input_layer: required for Model()
        :return: layer
        """
        if nn_type == 'Dense':
            x = Dense(64, activation='relu')(input_layer)
            x = Dropout(0.5)(x)
            x = Dense(64, activation='relu')(x)
            x = Dropout(0.5)(x)
            x = Flatten()(x)
            if self.multi_class:
                x = Dense(5, activation='softmax')(x)
            else:
                x = Dense(1, activation='sigmoid')(x)
        else:
            x = Dense(160)(input_layer)
            x = BatchNormalization()(x)
            x = LeakyReLU()(x)
            if nn_type == 'RNN':
                x = Bidirectional(SimpleRNN(256))(x)
            elif nn_type == 'GRU':
                x = Bidirectional(GRU(256))(x)
            elif nn_type == 'LSTM':
                x = Bidirectional(LSTM(256))(x)
            x = BatchNormalization()(x)
            x = LeakyReLU()(x)
            if self.multi_class:
                x = Dense(5, activation='softmax')(x)
            else:
                x = Dense(1, activation='sigmoid')(x)
        return x

    def train(self,
              X_train,
              X_test,
              y_train,
              y_test,
              batch_size=128,
              epochs=100,
              early_stop=True):
        """ train rnn model
        :param X_train, X_test, y_train, y_test: X is feature vectol. y is label
        :param batch_size: onece per training size
        :param epochs: number of iterations
        :param early_stopinput_layer: use_flag for EarlyStopping
        :return: history data
        """
        if self.multi_class:
            self.model.compile(loss='categorical_crossentropy',
                               optimizer=SGD(lr=0.01,
                                             decay=1e-6,
                                             momentum=0.9,
                                             nesterov=True),
                               metrics=['accuracy'])
        else:
            self.model.compile(loss='binary_crossentropy',
                               optimizer='RMSprop',
                               metrics=['accuracy'])

        np.random.seed(1337)  # for reproducibility
        if early_stop:
            early_stopping = EarlyStopping(monitor='val_loss',
                                           mode='auto',
                                           patience=5)
            return self.model.fit(X_train,
                                  y_train,
                                  batch_size=batch_size,
                                  epochs=epochs,
                                  validation_data=(X_test, y_test),
                                  callbacks=[early_stopping])
        else:
            return self.model.fit(X_train,
                                  y_train,
                                  batch_size=batch_size,
                                  epochs=epochs,
                                  validation_data=(X_test, y_test))

    def predict(self, X):
        return self.model.predict(X)

    def evaluate(self, X, y):
        return self._score(y, self.model.predict_proba(X)[:, 1])

    def _score(self, true_label, predicted_prob):
        """ calculate the performance score for binary calssification
        :param true_label: the ground truth score
        :param predicted_label: the predicted probability
        :return: a dict of scores
        """
        score_dict = dict()
        score_dict['AUC'] = metrics.roc_auc_score(true_label, predicted_prob)
        predicted_label = [0 if prob < 0.5 else 1 for prob in predicted_prob]
        score_dict['Accuracy'] = metrics.accuracy_score(
            true_label, predicted_label)
        cm = metrics.confusion_matrix(true_label, predicted_label)
        score_dict['Confusion Matrix'] = cm
        score_dict['TPR'] = cm[1, 1] / float(cm[1, 0] + cm[1, 1])
        score_dict['FPR'] = cm[0, 1] / float(cm[0, 0] + cm[0, 1])
        return score_dict
Esempio n. 38
0
class ResiCNN:  # My JackNet with residual block
    cnn_filter_num = 64
    cnn_kernel_size = 3

    def __init__(self, channels=3):
        self.model = None
        self.optimizer = None
        self.channels = channels

    def bulid(self):  # build model
        image_in = Input((None, None, self.channels))

        conv = Conv2D(filters=self.cnn_filter_num,
                      kernel_size=self.cnn_kernel_size,
                      strides=(1, 1),
                      padding='same',
                      data_format='channels_last')(image_in)
        conv = Activation('relu')(conv)

        x = conv

        for layers in range(8):
            x = self._build_residual_block(x)

        conv_out = Conv2D(filters=self.channels,
                          kernel_size=self.cnn_kernel_size,
                          strides=(1, 1),
                          padding='same',
                          data_format='channels_last')(x)

        output = Add()([image_in, conv_out])

        self.model = Model(image_in, output, name='model')

    def _build_residual_block(self, x):  # build residual block
        x_in = x

        x = Conv2D(filters=self.cnn_filter_num,
                   kernel_size=self.cnn_kernel_size,
                   strides=(1, 1),
                   padding='same',
                   data_format='channels_last')(x)
        x = BatchNormalization(axis=-1)(x)
        x = Activation('relu')(x)
        x = Conv2D(filters=self.cnn_filter_num,
                   kernel_size=self.cnn_kernel_size,
                   strides=(1, 1),
                   padding='same',
                   data_format='channels_last')(x)
        x = BatchNormalization(axis=-1)(x)
        x = Add()([x_in, x])
        x = Activation("relu")(x)
        return x

    def predict(self, x):  # denoise on input x
        if x.ndim == 3:
            x = x.reshape(1, x.shape[0], x.shape[1], self.channels)
        return self.model.predict_on_batch(x)

    def load(self, config_path, model_path):  # load model
        print('restore model...')
        if os.path.exists(config_path) and os.path.exists(model_path):
            with open(config_path, 'r') as fp:
                self.model = Model.from_config(json.load(fp))
                self.model.load_weights(model_path)
            return True
        return False

    def save(self, config_path, model_path):  # save model
        with open(config_path, 'w') as fp:
            json.dump(self.model.get_config(), fp)
            self.model.save_weights(model_path)

    def compile(self):  # choose adam optimizer and set learning rate
        self.optimizer = Adam(lr=1e-2)
        self.model.compile(optimizer=self.optimizer, loss=self.loss)

    def train_generator(self,
                        data,
                        epochs=1,
                        steps_per_epochs=None,
                        callbacks=None):
        self.model.fit_generator(iter(data),
                                 epochs=epochs,
                                 steps_per_epoch=steps_per_epochs,
                                 callbacks=callbacks)

    def train(self, data, epochs=1, callbacks=None):
        self.model.fit(x=data[0],
                       y=data[1],
                       epochs=epochs,
                       batch_size=8,
                       callbacks=callbacks)

    @staticmethod
    def loss(y_true, y_pred):  # loss function, mean square error
        return 0.5 * K.sum(K.square(y_pred - y_true), axis=-1)
Esempio n. 39
0
def training_CNN(model: training.Model,
                 train,
                 val,
                 num_epochs: int,
                 save=True,
                 learning_rate=0.5,
                 verbose=0,
                 batch_size=32) -> Tuple[History, str]:
    '''
  cnn.training_CNN()
  This function takes the model arcitecture and compiles it using the SDG optimiser and the Binary Cross entropy loss-function.
  
  The function defines the optimzer, the Stocastich Gradient Descent using the learning rate. The Learning rate is described in the thesis. 
  For information on the two hyper parameters momentum and decay see https://keras.io/api/optimizers/.
  
  The funcition then compiles the model with all the hyper parameters. Here, callback are defined. These are used for plotting, saving ect. 
  Then, the function is trained on the training data and validataed on the validation data.
  
  Input:
      model: the saved model arcitecture from keras. Made using e.g. cnn.s3_model or cnn.s1_model.
      train: Training data. Made using e.g. cnn.make_dataset
      val: Validataion data. Made using e.g. cnn.make_dataset. 
      num_epochs: Number of epochs
      save[True/false]: If true, the model will be saved to disk. 
      learning_rate[float]: Value between 0 and 1. 
      verbose[int]: if verbose =0, nothing will be printed. If verbose=1, information will be printed.
      batch_size: hyper parater used in keras.

  Output:
      history: object containing the training history, the weight, plots and more
      weights: the weights as an array.

  Example:
      history, weights = training_CNN(model, train_data, val_data, 40, save=True,0.001,verbose=1)
      history, weights = training_CNN(model, train_data, val_data, 40, save=True,0.9,verbose=1)

  Author:
    Kristian Soerensen
    July 2020
    [email protected]
  '''
    #printing info if verbose >0
    if verbose > 0:
        print(model.name + ' is being trained.\n')
    #setting tensorflow verbosity.
    tf.autograph.set_verbosity(0)
    #defining learning and loss fucntion.
    optimizer1 = keras.optimizers.SGD(lr=learning_rate,
                                      momentum=0.9,
                                      decay=0.01)
    #compiling the model with hyper paramters.
    model.compile(optimizer=optimizer1,
                  loss="binary_crossentropy",
                  metrics=[
                      "accuracy", "AUC",
                      tf.keras.metrics.BinaryCrossentropy(),
                      tf.keras.metrics.Precision(),
                      tf.keras.metrics.BinaryAccuracy()
                  ])
    #path to save weight to..
    filepath = 'weights/' + model.name + '.{epoch:02d}-{loss:.2f}.hdf5'
    #checkpoint and tensorbord are used in the fitting to get info..
    checkpoint = keras.callbacks.ModelCheckpoint(filepath,
                                                 monitor='loss',
                                                 verbose=verbose,
                                                 save_weights_only=True,
                                                 save_best_only=True,
                                                 mode='auto')
    tensor_board = keras.callbacks.TensorBoard(log_dir='logs/',
                                               histogram_freq=0)
    #fitting the data to the model!
    history = model.fit(train,
                        batch_size=batch_size,
                        epochs=num_epochs,
                        verbose=verbose,
                        callbacks=[checkpoint, tensor_board],
                        validation_data=val)
    weight_files = glob.glob(os.path.join(os.getcwd(), 'weights/*'))
    #making a folder to save models to.
    if os.path.exists(model.name) == False:
        os.mkdir(model.name)
    #saving models and weights..
    if save == True:
        # convert history dict to pandas to save
        hist_df = pd.DataFrame(history.history)
        # save to json:
        hist_json_file = model.name + '/history.json'
        #with open(os.path.join(dirName, '') + 'index.html', 'w') as write_file:
        with open(hist_json_file, mode='w') as f:
            hist_df.to_json(f)
        hist_csv_file = model.name + '/history.csv'
        with open(hist_csv_file, mode='w') as f:
            hist_df.to_csv(f)
        np.save(model.name + '/history.npy', history.history)
        #saving entire model! here, we also have the weights and all!!!
        model.save(model.name + "/model_try.h5")
        print("Model is saved.")

    return history, weight_files
Esempio n. 40
0
def test_model_methods():
    a = Input(shape=(3,), name='input_a')
    b = Input(shape=(3,), name='input_b')

    a_2 = Dense(4, name='dense_1')(a)
    dp = Dropout(0.5, name='dropout')
    b_2 = dp(b)

    model = Model([a, b], [a_2, b_2])

    optimizer = 'rmsprop'
    loss = 'mse'
    loss_weights = [1., 0.5]
    model.compile(optimizer, loss, metrics=[], loss_weights=loss_weights,
                  sample_weight_mode=None)

    input_a_np = np.random.random((10, 3))
    input_b_np = np.random.random((10, 3))

    output_a_np = np.random.random((10, 4))
    output_b_np = np.random.random((10, 3))

    # test train_on_batch
    out = model.train_on_batch([input_a_np, input_b_np],
                               [output_a_np, output_b_np])
    out = model.train_on_batch({'input_a': input_a_np, 'input_b': input_b_np},
                               [output_a_np, output_b_np])
    out = model.train_on_batch({'input_a': input_a_np, 'input_b': input_b_np},
                               {'dense_1': output_a_np, 'dropout': output_b_np})

    # test fit
    out = model.fit([input_a_np, input_b_np],
                    [output_a_np, output_b_np], nb_epoch=1, batch_size=4)
    out = model.fit({'input_a': input_a_np, 'input_b': input_b_np},
                    [output_a_np, output_b_np], nb_epoch=1, batch_size=4)
    out = model.fit({'input_a': input_a_np, 'input_b': input_b_np},
                    {'dense_1': output_a_np, 'dropout': output_b_np},
                    nb_epoch=1, batch_size=4)

    # test validation_split
    out = model.fit([input_a_np, input_b_np],
                    [output_a_np, output_b_np],
                    nb_epoch=1, batch_size=4, validation_split=0.5)
    out = model.fit({'input_a': input_a_np, 'input_b': input_b_np},
                    [output_a_np, output_b_np],
                    nb_epoch=1, batch_size=4, validation_split=0.5)
    out = model.fit({'input_a': input_a_np, 'input_b': input_b_np},
                    {'dense_1': output_a_np, 'dropout': output_b_np},
                    nb_epoch=1, batch_size=4, validation_split=0.5)

    # test validation data
    out = model.fit([input_a_np, input_b_np],
                    [output_a_np, output_b_np],
                    nb_epoch=1, batch_size=4,
                    validation_data=([input_a_np, input_b_np], [output_a_np, output_b_np]))
    out = model.fit({'input_a': input_a_np, 'input_b': input_b_np},
                    [output_a_np, output_b_np],
                    nb_epoch=1, batch_size=4, validation_split=0.5,
                    validation_data=({'input_a': input_a_np, 'input_b': input_b_np}, [output_a_np, output_b_np]))
    out = model.fit({'input_a': input_a_np, 'input_b': input_b_np},
                    {'dense_1': output_a_np, 'dropout': output_b_np},
                    nb_epoch=1, batch_size=4, validation_split=0.5,
                    validation_data=({'input_a': input_a_np, 'input_b': input_b_np}, {'dense_1': output_a_np, 'dropout': output_b_np}))

    # test_on_batch
    out = model.test_on_batch([input_a_np, input_b_np],
                              [output_a_np, output_b_np])
    out = model.test_on_batch({'input_a': input_a_np, 'input_b': input_b_np},
                              [output_a_np, output_b_np])
    out = model.test_on_batch({'input_a': input_a_np, 'input_b': input_b_np},
                              {'dense_1': output_a_np, 'dropout': output_b_np})

    # predict_on_batch
    out = model.predict_on_batch([input_a_np, input_b_np])
    out = model.predict_on_batch({'input_a': input_a_np, 'input_b': input_b_np})

    # predict, evaluate
    input_a_np = np.random.random((10, 3))
    input_b_np = np.random.random((10, 3))

    output_a_np = np.random.random((10, 4))
    output_b_np = np.random.random((10, 3))

    out = model.evaluate([input_a_np, input_b_np], [output_a_np, output_b_np], batch_size=4)
    out = model.predict([input_a_np, input_b_np], batch_size=4)

    # with sample_weight
    input_a_np = np.random.random((10, 3))
    input_b_np = np.random.random((10, 3))

    output_a_np = np.random.random((10, 4))
    output_b_np = np.random.random((10, 3))

    sample_weight = [None, np.random.random((10,))]
    out = model.train_on_batch([input_a_np, input_b_np],
                               [output_a_np, output_b_np],
                               sample_weight=sample_weight)

    out = model.test_on_batch([input_a_np, input_b_np],
                              [output_a_np, output_b_np],
                              sample_weight=sample_weight)

    # test accuracy metric
    model.compile(optimizer, loss, metrics=['acc'],
                  sample_weight_mode=None)

    out = model.train_on_batch([input_a_np, input_b_np],
                               [output_a_np, output_b_np])
    assert len(out) == 5
    out = model.test_on_batch([input_a_np, input_b_np],
                              [output_a_np, output_b_np])
    assert len(out) == 5

    # this should also work
    model.compile(optimizer, loss, metrics={'dense_1': 'acc'},
                  sample_weight_mode=None)

    out = model.train_on_batch([input_a_np, input_b_np],
                               [output_a_np, output_b_np])
    assert len(out) == 4
    out = model.test_on_batch([input_a_np, input_b_np],
                              [output_a_np, output_b_np])
    assert len(out) == 4

    # and this as well
    model.compile(optimizer, loss, metrics={'dense_1': ['acc']},
                  sample_weight_mode=None)

    out = model.train_on_batch([input_a_np, input_b_np],
                               [output_a_np, output_b_np])
    assert len(out) == 4
    out = model.test_on_batch([input_a_np, input_b_np],
                              [output_a_np, output_b_np])
    assert len(out) == 4

    # test with a custom metric function
    mse = lambda y_true, y_pred: K.mean(K.pow(y_true - y_pred, 2))
    model.compile(optimizer, loss, metrics=[mse],
                  sample_weight_mode=None)

    out = model.train_on_batch([input_a_np, input_b_np],
                               [output_a_np, output_b_np])
    assert len(out) == 5
    out = model.test_on_batch([input_a_np, input_b_np],
                              [output_a_np, output_b_np])
    assert len(out) == 5

    input_a_np = np.random.random((10, 3))
    input_b_np = np.random.random((10, 3))

    output_a_np = np.random.random((10, 4))
    output_b_np = np.random.random((10, 3))

    out = model.fit([input_a_np, input_b_np], [output_a_np, output_b_np], batch_size=4, nb_epoch=1)
    out = model.evaluate([input_a_np, input_b_np], [output_a_np, output_b_np], batch_size=4)
    out = model.predict([input_a_np, input_b_np], batch_size=4)
Esempio n. 41
0
class AE(Model):
    """
    Autoencoder. This is a simple autoencoder consisting of an encoder and a decoder.

    You can use the class like this:
    >>> encoder = ...
    >>> decoder = ...
    >>> ae = Autoencoder(encoder=encoder, decoder=decoder)
    >>> ae.compile(...)
    >>> ae.fit(...)

    """
    def __init__(self, encoder=None, decoder=None, autoencoder=None):
        super(AE, self).__init__()

        # For calling this as a super-constructor.
        parameters = [encoder, decoder]
        if all(v is None for v in parameters):
            return

        # From loading.
        if encoder != None and decoder != None and autoencoder != None:
            self.encoder = encoder
            self.decoder = decoder
            self.autoencoder = autoencoder
            return

        # Check preconditions.
        assert len(encoder.outputs) == 1
        assert len(decoder.inputs) == 1
        assert encoder.outputs[0].shape[1:] == decoder.inputs[0].shape[
            1:], str(encoder.outputs[0].shape) + " " + str(
                decoder.inputs[0].shape)
        self.latent_dim = encoder.outputs[0].shape[1]

        self.encoder = encoder
        self.decoder = decoder

        # Creating the AE.
        inputs = self.encoder.inputs[0]
        outputs = self.decoder(self.encoder(inputs))
        self.autoencoder = Model(inputs, outputs, name='ae')

    def compile(self,
                optimizer,
                loss=None,
                metrics=None,
                loss_weights=None,
                sample_weight_mode=None,
                weighted_metrics=None,
                target_tensors=None,
                **kwargs):
        """
        Compiles the model.

        This is the same as compilation in Keras.

        """

        assert "reconstruction_loss" not in kwargs, "Not expected to use reconstruction_loss in AE."

        self.autoencoder.compile(optimizer, loss, metrics, loss_weights,
                                 sample_weight_mode, weighted_metrics,
                                 **kwargs)

    def fit(self,
            x=None,
            y=None,
            batch_size=None,
            epochs=1,
            verbose=1,
            callbacks=None,
            validation_split=0.,
            validation_data=None,
            shuffle=True,
            class_weight=None,
            sample_weight=None,
            initial_epoch=0,
            steps_per_epoch=None,
            validation_steps=None,
            **kwargs):
        """
        Trains the autoencoder.
        """

        return self.autoencoder.fit(x, y, batch_size, epochs, verbose,
                                    callbacks, validation_split,
                                    validation_data, shuffle, class_weight,
                                    sample_weight, initial_epoch,
                                    steps_per_epoch, validation_steps,
                                    **kwargs)

    def fit_generator(self,
                      generator,
                      steps_per_epoch=None,
                      epochs=1,
                      verbose=1,
                      callbacks=None,
                      validation_data=None,
                      validation_steps=None,
                      class_weight=None,
                      max_queue_size=10,
                      workers=1,
                      use_multiprocessing=False,
                      shuffle=True,
                      initial_epoch=0):
        """
        Trains the autoencoder with a generator.
        """

        return self.autoencoder.fit_generator(
            generator,
            steps_per_epoch,
            epochs,
            verbose=verbose,
            callbacks=callbacks,
            validation_data=validation_data,
            validation_steps=validation_steps,
            class_weight=class_weight,
            max_queue_size=max_queue_size,
            workers=workers,
            use_multiprocessing=use_multiprocessing,
            shuffle=shuffle,
            initial_epoch=initial_epoch)

    def evaluate(self,
                 x=None,
                 y=None,
                 batch_size=None,
                 verbose=1,
                 sample_weight=None,
                 steps=None):
        """
        Evaluates the autoencoder.
        """

        return self.autoencoder.evaluate(x,
                                         y,
                                         batch_size,
                                         verbose,
                                         sample_weight,
                                         steps=None)

    def predict(self, x, batch_size=None, verbose=0, steps=None):
        """
        Does a prediction. This is the same as :func:`~ngdlm.models.AE.predict_reconstruct_from_samples`
        """

        return self.predict_reconstruct_from_samples(x, batch_size, verbose,
                                                     steps)

    def predict_reconstruct_from_samples(self,
                                         x,
                                         batch_size=None,
                                         verbose=0,
                                         steps=None):
        """
        Reconstructs samples.

        Samples are firstly mapped to latent space using the encoder.
        The resulting latent vectors are then mapped to reconstruction space via the decoder.
        """

        return self.autoencoder.predict(x, batch_size, verbose, steps)

    def predict_embed_samples_into_latent(self,
                                          x,
                                          batch_size=None,
                                          verbose=0,
                                          steps=None):
        """
        Embeds samples into latent space using the encoder.
        """

        return self.encoder.predict(x, batch_size, verbose, steps)

    def predict_reconstruct_from_latent(self,
                                        x,
                                        batch_size=None,
                                        verbose=0,
                                        steps=None):
        """
        Maps latent vectors to reconstruction space using the decoder.
        """

        return self.decoder.predict(x, batch_size, verbose, steps)

    def summary(self):
        """
        Provides a summary.
        """

        print("Encoder:")
        self.encoder.summary()
        print("Decoder:")
        self.decoder.summary()
        print("Autoencoder:")
        self.autoencoder.summary()

    def save(self, path):
        """
        Saves the autoencoder.

        This includes the whole autoencoder plus the encoder and the decoder.
        The encoder and decoder use the path plus a respective annotation.

        This code

        >>> ae.save("myae.h5")

        will create the files *myae.h5*, *myae-encoder.h5*, and *myae-decoder.h5*.

        """
        self.autoencoder.save(path)
        self.encoder.save(append_to_filepath(path, "-encoder"))
        self.decoder.save(append_to_filepath(path, "-decoder"))
Esempio n. 42
0
def test_model_methods():
    a = Input(shape=(3,), name='input_a')
    b = Input(shape=(3,), name='input_b')

    a_2 = Dense(4, name='dense_1')(a)
    dp = Dropout(0.5, name='dropout')
    b_2 = dp(b)

    model = Model([a, b], [a_2, b_2])

    optimizer = 'rmsprop'
    loss = 'mse'
    loss_weights = [1., 0.5]

    input_a_np = np.random.random((10, 3))
    input_b_np = np.random.random((10, 3))
    input_a_df = pd.DataFrame(input_a_np)
    input_b_df = pd.DataFrame(input_b_np)

    output_a_np = np.random.random((10, 4))
    output_b_np = np.random.random((10, 3))
    output_a_df = pd.DataFrame(output_a_np)
    output_b_df = pd.DataFrame(output_b_np)

    # training/testing doesn't work before compiling.
    with pytest.raises(RuntimeError):
        model.train_on_batch([input_a_np, input_b_np], [output_a_np, output_b_np])

    model.compile(optimizer, loss, metrics=[], loss_weights=loss_weights,
                  sample_weight_mode=None)

    # test train_on_batch
    out = model.train_on_batch([input_a_np, input_b_np],
                               [output_a_np, output_b_np])
    out = model.train_on_batch({'input_a': input_a_np, 'input_b': input_b_np},
                               [output_a_np, output_b_np])
    out = model.train_on_batch({'input_a': input_a_np, 'input_b': input_b_np},
                               {'dense_1': output_a_np, 'dropout': output_b_np})
    out = model.train_on_batch([input_a_df, input_b_df],
                               [output_a_df, output_b_df])

    # test fit
    out = model.fit([input_a_np, input_b_np],
                    [output_a_np, output_b_np], epochs=1, batch_size=4)
    out = model.fit({'input_a': input_a_np, 'input_b': input_b_np},
                    [output_a_np, output_b_np], epochs=1, batch_size=4)
    out = model.fit({'input_a': input_a_np, 'input_b': input_b_np},
                    {'dense_1': output_a_np, 'dropout': output_b_np},
                    epochs=1, batch_size=4)
    out = model.fit([input_a_df, input_b_df],
                    [output_a_df, output_b_df], epochs=1, batch_size=4)

    # test validation_split
    out = model.fit([input_a_np, input_b_np],
                    [output_a_np, output_b_np],
                    epochs=1, batch_size=4, validation_split=0.5)
    out = model.fit({'input_a': input_a_np, 'input_b': input_b_np},
                    [output_a_np, output_b_np],
                    epochs=1, batch_size=4, validation_split=0.5)

    # test validation data
    out = model.fit([input_a_np, input_b_np],
                    [output_a_np, output_b_np],
                    epochs=1, batch_size=4,
                    validation_data=([input_a_np, input_b_np], [output_a_np, output_b_np]))
    out = model.fit({'input_a': input_a_np, 'input_b': input_b_np},
                    [output_a_np, output_b_np],
                    epochs=1, batch_size=4, validation_split=0.5,
                    validation_data=({'input_a': input_a_np, 'input_b': input_b_np}, [output_a_np, output_b_np]))
    out = model.fit({'input_a': input_a_np, 'input_b': input_b_np},
                    {'dense_1': output_a_np, 'dropout': output_b_np},
                    epochs=1, batch_size=4, validation_split=0.5,
                    validation_data=(
                        {'input_a': input_a_np, 'input_b': input_b_np},
                        {'dense_1': output_a_np, 'dropout': output_b_np}))

    # test_on_batch
    out = model.test_on_batch([input_a_np, input_b_np],
                              [output_a_np, output_b_np])
    out = model.test_on_batch({'input_a': input_a_np, 'input_b': input_b_np},
                              [output_a_np, output_b_np])
    out = model.test_on_batch({'input_a': input_a_np, 'input_b': input_b_np},
                              {'dense_1': output_a_np, 'dropout': output_b_np})
    out = model.test_on_batch([input_a_df, input_b_df],
                              [output_a_df, output_b_df])

    # predict_on_batch
    out = model.predict_on_batch([input_a_np, input_b_np])
    out = model.predict_on_batch({'input_a': input_a_np, 'input_b': input_b_np})
    out = model.predict_on_batch([input_a_df, input_b_df])

    # predict, evaluate
    input_a_np = np.random.random((10, 3))
    input_b_np = np.random.random((10, 3))

    output_a_np = np.random.random((10, 4))
    output_b_np = np.random.random((10, 3))

    out = model.evaluate([input_a_np, input_b_np], [output_a_np, output_b_np], batch_size=4)
    out = model.evaluate([input_a_df, input_b_df], [output_a_df, output_b_df], batch_size=4)
    out = model.predict([input_a_np, input_b_np], batch_size=4)
    out = model.predict([input_a_df, input_b_df], batch_size=4)

    # with sample_weight
    input_a_np = np.random.random((10, 3))
    input_b_np = np.random.random((10, 3))

    output_a_np = np.random.random((10, 4))
    output_b_np = np.random.random((10, 3))

    sample_weight = [None, np.random.random((10,))]
    out = model.train_on_batch([input_a_np, input_b_np],
                               [output_a_np, output_b_np],
                               sample_weight=sample_weight)

    out = model.test_on_batch([input_a_np, input_b_np],
                              [output_a_np, output_b_np],
                              sample_weight=sample_weight)

    # test accuracy metric
    model.compile(optimizer, loss, metrics=['acc'],
                  sample_weight_mode=None)

    out = model.train_on_batch([input_a_np, input_b_np],
                               [output_a_np, output_b_np])
    assert len(out) == 5
    out = model.test_on_batch([input_a_np, input_b_np],
                              [output_a_np, output_b_np])
    assert len(out) == 5

    # this should also work
    model.compile(optimizer, loss, metrics={'dense_1': 'acc'},
                  sample_weight_mode=None)

    out = model.train_on_batch([input_a_np, input_b_np],
                               [output_a_np, output_b_np])
    assert len(out) == 4
    out = model.test_on_batch([input_a_np, input_b_np],
                              [output_a_np, output_b_np])
    assert len(out) == 4

    # and this as well
    model.compile(optimizer, loss, metrics={'dense_1': ['acc']},
                  sample_weight_mode=None)

    out = model.train_on_batch([input_a_np, input_b_np],
                               [output_a_np, output_b_np])
    assert len(out) == 4
    out = model.test_on_batch([input_a_np, input_b_np],
                              [output_a_np, output_b_np])
    assert len(out) == 4

    # test starting from non-zero initial epoch
    trained_epochs = []

    # define tracer callback
    def on_epoch_begin(epoch, logs):
        trained_epochs.append(epoch)

    tracker_cb = LambdaCallback(on_epoch_begin=on_epoch_begin)

    out = model.fit([input_a_np, input_b_np],
                    [output_a_np, output_b_np], epochs=5, batch_size=4,
                    initial_epoch=2, callbacks=[tracker_cb])
    assert trained_epochs == [2, 3, 4]

    # test starting from non-zero initial epoch for generator too
    trained_epochs = []

    def gen_data(batch_sz):
        while True:
            yield ([np.random.random((batch_sz, 3)), np.random.random((batch_sz, 3))],
                   [np.random.random((batch_sz, 4)), np.random.random((batch_sz, 3))])

    out = model.fit_generator(gen_data(4), steps_per_epoch=3, epochs=5,
                              initial_epoch=2, callbacks=[tracker_cb])
    assert trained_epochs == [2, 3, 4]

    # test with a custom metric function
    def mse(y_true, y_pred):
        return K.mean(K.pow(y_true - y_pred, 2))

    model.compile(optimizer, loss, metrics=[mse],
                  sample_weight_mode=None)

    out = model.train_on_batch([input_a_np, input_b_np],
                               [output_a_np, output_b_np])
    out_len = 1 + 2 * (1 + 1)  # total loss + 2 outputs * (loss + metric)
    assert len(out) == out_len
    out = model.test_on_batch([input_a_np, input_b_np],
                              [output_a_np, output_b_np])
    assert len(out) == out_len

    input_a_np = np.random.random((10, 3))
    input_b_np = np.random.random((10, 3))

    output_a_np = np.random.random((10, 4))
    output_b_np = np.random.random((10, 3))

    out = model.fit([input_a_np, input_b_np], [output_a_np, output_b_np], batch_size=4, epochs=1)
    out = model.evaluate([input_a_np, input_b_np], [output_a_np, output_b_np], batch_size=4)
    out = model.predict([input_a_np, input_b_np], batch_size=4)

    # empty batch
    with pytest.raises(ValueError):
        def gen_data():
            while True:
                yield (np.asarray([]), np.asarray([]))
        out = model.evaluate_generator(gen_data(), steps=1)

    # x is not a list of numpy arrays.
    with pytest.raises(ValueError):
        out = model.predict([None])

    # x does not match _feed_input_names.
    with pytest.raises(ValueError):
        out = model.predict([input_a_np, None, input_b_np])
    with pytest.raises(ValueError):
        out = model.predict([None, input_a_np, input_b_np])

    # all input/output/weight arrays should have the same number of samples.
    with pytest.raises(ValueError):
        out = model.train_on_batch([input_a_np, input_b_np[:2]],
                                   [output_a_np, output_b_np],
                                   sample_weight=sample_weight)
    with pytest.raises(ValueError):
        out = model.train_on_batch([input_a_np, input_b_np],
                                   [output_a_np, output_b_np[:2]],
                                   sample_weight=sample_weight)
    with pytest.raises(ValueError):
        out = model.train_on_batch([input_a_np, input_b_np],
                                   [output_a_np, output_b_np],
                                   sample_weight=[sample_weight[1], sample_weight[1][:2]])

    # `sample_weight` is neither a dict nor a list.
    with pytest.raises(TypeError):
        out = model.train_on_batch([input_a_np, input_b_np],
                                   [output_a_np, output_b_np],
                                   sample_weight=tuple(sample_weight))

    # `validation_data` is neither a tuple nor a triple.
    with pytest.raises(ValueError):
        out = model.fit([input_a_np, input_b_np],
                        [output_a_np, output_b_np],
                        epochs=1, batch_size=4,
                        validation_data=([input_a_np, input_b_np],))

    # `loss` does not match outputs.
    with pytest.raises(ValueError):
        model.compile(optimizer, loss=['mse', 'mae', 'mape'])

    # `loss_weights` does not match output_names.
    with pytest.raises(ValueError):
        model.compile(optimizer, loss='mse', loss_weights={'lstm': 0.5})

    # `loss_weights` does not match outputs.
    with pytest.raises(ValueError):
        model.compile(optimizer, loss='mse', loss_weights=[0.5])

    # `loss_weights` is invalid type.
    with pytest.raises(TypeError):
        model.compile(optimizer, loss='mse', loss_weights=(0.5, 0.5))

    # `sample_weight_mode` does not match output_names.
    with pytest.raises(ValueError):
        model.compile(optimizer, loss='mse', sample_weight_mode={'lstm': 'temporal'})

    # `sample_weight_mode` does not match output_names.
    with pytest.raises(ValueError):
        model.compile(optimizer, loss='mse', sample_weight_mode=['temporal'])

    # `sample_weight_mode` matches output_names partially.
    with pytest.raises(ValueError):
        model.compile(optimizer, loss='mse', sample_weight_mode={'dense_1': 'temporal'})

    # `loss` does not exist.
    with pytest.raises(ValueError):
        model.compile(optimizer, loss=[])

    model.compile(optimizer, loss=['mse', 'mae'])
    model.compile(optimizer, loss='mse', loss_weights={'dense_1': 0.2, 'dropout': 0.8})
    model.compile(optimizer, loss='mse', loss_weights=[0.2, 0.8])

    # the rank of weight arrays should be 1.
    with pytest.raises(ValueError):
        out = model.train_on_batch([input_a_np, input_b_np],
                                   [output_a_np, output_b_np],
                                   sample_weight=[None, np.random.random((10, 20, 30))])

    model.compile(optimizer, loss='mse', sample_weight_mode={'dense_1': None, 'dropout': 'temporal'})
    model.compile(optimizer, loss='mse', sample_weight_mode=[None, 'temporal'])

    # the rank of output arrays should be at least 3D.
    with pytest.raises(ValueError):
        out = model.train_on_batch([input_a_np, input_b_np],
                                   [output_a_np, output_b_np],
                                   sample_weight=sample_weight)

    model.compile(optimizer, loss, metrics=[], loss_weights=loss_weights,
                  sample_weight_mode=None)
    trained_epochs = []
    out = model.fit_generator(generator=RandomSequence(3), steps_per_epoch=12, epochs=5,
                              initial_epoch=0, validation_data=RandomSequence(4),
                              validation_steps=12, callbacks=[tracker_cb])
    assert trained_epochs == [0, 1, 2, 3, 4]
Esempio n. 43
0
class TL(Model):
    """
    Triplet-Loss trained Neural Network.

    https://arxiv.org/abs/1503.03832
    """
    def __init__(self, base=None, siamese=None):
        super(TL, self).__init__()

        # Store the base model.
        assert (base != None)
        self.base = base

        # For loading.
        if base != None and siamese != None:
            self.base = base
            self.siamese = siamese
            self.latent_dim = self.base.outputs[0].shape[1]
            return

        # Get the latent dimension.
        assert len(self.base.outputs) == 1
        assert len(self.base.outputs[0].shape) == 2
        self.latent_dim = self.base.outputs[0].shape[1]

        # Get the input shape.
        input_shape = self.base.inputs[0].shape.as_list()[1:]

        # Create the anchor.
        input_anchor = layers.Input(shape=input_shape)
        output_anchor = input_anchor
        output_anchor = self.base(output_anchor)

        # Create the positive.
        input_positive = layers.Input(shape=input_shape)
        output_positive = input_positive
        output_positive = self.base(output_positive)

        # Create the negative.
        input_negative = layers.Input(shape=input_shape)
        output_negative = input_negative
        output_negative = self.base(output_negative)

        # Create a dummy output.
        output = layers.concatenate(
            [output_anchor, output_positive, output_negative])

        # Create the model.
        self.siamese = Model([input_anchor, input_positive, input_negative],
                             output,
                             name="triplet_model")

    def compile(self,
                optimizer,
                loss=None,
                metrics=None,
                loss_weights=None,
                sample_weight_mode=None,
                weighted_metrics=None,
                target_tensors=None,
                triplet_loss="mse",
                **kwargs):
        """
        Compiles the TL.

        Additionally to the default functionality of *compile*, it adds the triplet-loss.
        In order to do so you have to provide it via the parameter *triplet_loss*.

        The VAE loss is similar to

        >>> vae_loss = max(0.0, pos_dist - neg_dist + alpha)

        See the literature for details.

        Additional args:
            triplet_loss (string): The base-loss for the triplet-loss. Values are either *euclidean* for euclidean norm or *cosine* for cosine similarity.

        """
        assert loss == None, "Not expected to provide an explicit loss for TL. Use 'triplet_loss'"

        self.triplet_loss = triplet_loss

        def triplet_loss_function(y_true, y_pred, alpha=0.4):

            anchor = y_pred[:, 0:self.latent_dim]
            positive = y_pred[:, self.latent_dim:self.latent_dim * 2]
            negative = y_pred[:, self.latent_dim * 2:self.latent_dim * 3]

            if triplet_loss == "euclidean":
                pos_dist = euclidean_loss(positive, anchor)
                neg_dist = euclidean_loss(negative, anchor)
            elif triplet_loss == "cosine":
                pos_dist = cosine_loss(positive, anchor)
                neg_dist = cosine_loss(negative, anchor)
            else:
                raise Exception("Unexpected: " + triplet_loss)

            basic_loss = pos_dist - neg_dist + alpha
            loss = K.maximum(basic_loss, 0.0)
            return loss

        loss = triplet_loss_function

        self.siamese.compile(optimizer, loss, metrics, loss_weights,
                             sample_weight_mode, weighted_metrics, **kwargs)

    def fit(self,
            x=None,
            y=None,
            batch_size=None,
            minibatch_size=None,
            epochs=1,
            verbose=1,
            callbacks=None,
            validation_split=0.,
            validation_data=None,
            shuffle=True,
            class_weight=None,
            sample_weight=None,
            initial_epoch=0,
            steps_per_epoch=None,
            validation_steps=None,
            **kwargs):
        """
        This is basically the same as in vanilla Keras.

        Additional args:
            minibatch_size (int): The model internally does some sampling. The *minibatch_size* specifies how many candidates to use in order to create a triplet for training.
        """

        assert minibatch_size != None, "ERROR! Must provide 'minibatch_size'."
        assert steps_per_epoch != None, "ERROR! Must provide 'steps_per_epoch'."
        assert validation_steps != None, "ERROR! Must provide 'validation_steps'."

        y_dummy = np.zeros((batch_size, self.latent_dim * 3))

        # Template generator.
        def triplet_loss_generator(x_generator, y_generator, model, sampling):

            # Get the classes.
            classes = sorted(list(set(y_generator)))

            # Sort by classes for easy indexing.
            class_indices = {}
            for c in classes:
                class_indices[c] = []
            for index, c in enumerate(y_generator):
                class_indices[c].append(index)

            # Compute the complements.
            class_complements = {}
            for c in classes:
                class_complements[c] = [c2 for c2 in classes if c2 != c]

            # Generator loop.
            while True:

                x_input_anchors = []
                x_input_positives = []
                x_input_negatives = []

                # Generate a whole batch.
                for _ in range(batch_size):
                    anchor_class = random.choice(classes)
                    anchor_index = random.choice(class_indices[anchor_class])
                    anchor_input = x_generator[anchor_index]
                    #print("anchor_class", anchor_class)
                    anchor_latent = self.base.predict(
                        np.expand_dims(anchor_input, axis=0))[0]

                    # Generate some positive candidates.
                    positive_candidates = []
                    while len(positive_candidates) < minibatch_size:
                        positive_class = anchor_class
                        positive_index = random.choice(
                            class_indices[positive_class])
                        positive_input = x_generator[positive_index]
                        assert positive_class == y_generator[positive_index]
                        #print("positive_class", positive_class)
                        positive_candidates.append(positive_input)

                    # Find the farthest positive candidate.
                    positive_candidates = np.array(positive_candidates)
                    positive_latents = self.base.predict(positive_candidates)
                    positive_extremum = compute_latent_extremum(
                        anchor_latent, positive_latents, "argmax",
                        self.triplet_loss)
                    positive_input = positive_candidates[positive_extremum]

                    # Generate some negative candidates.
                    negative_candidates = []
                    while len(negative_candidates) < minibatch_size:
                        negative_class = random.choice(
                            class_complements[anchor_class])
                        negative_index = random.choice(
                            class_indices[negative_class])
                        negative_input = x_generator[negative_index]
                        assert negative_class == y_generator[negative_index]
                        #print("negative_class", negative_class)
                        negative_candidates.append(negative_input)

                    # Find the closest negative candidate.
                    negative_candidates = np.array(negative_candidates)
                    negative_latents = self.base.predict(negative_candidates)
                    negative_extremum = compute_latent_extremum(
                        anchor_latent, negative_latents, "argmin",
                        self.triplet_loss)
                    negative_input = negative_candidates[negative_extremum]

                    # Done.
                    x_input_anchors.append(anchor_input)
                    x_input_positives.append(positive_input)
                    x_input_negatives.append(negative_input)

                x_input_anchors = np.array(x_input_anchors)
                x_input_positives = np.array(x_input_positives)
                x_input_negatives = np.array(x_input_negatives)
                x_input = [
                    x_input_anchors, x_input_positives, x_input_negatives
                ]

                yield x_input, y_dummy

        # Create the generators.
        training_generator = triplet_loss_generator(x, y, batch_size,
                                                    self.siamese)
        if validation_data != None:
            validation_generator = triplet_loss_generator(
                validation_data[0], validation_data[1], batch_size,
                self.siamese)
        else:
            validation_generator = None

        # Create the history.
        history_keys = ["loss", "val_loss"]
        history = {}
        for history_key in history_keys:
            history[history_key] = []

        # Training the model
        for epoch in range(epochs):

            print("Epoch " + str(epoch + 1) + "/" + str(epochs) + "...")

            # Generating data for training.
            training_input, training_output = next(training_generator)
            if validation_generator != None:
                validation_input, validation_output = next(
                    validation_generator)

            model_history = self.siamese.fit(
                training_input,
                training_output,
                validation_data=(validation_input, validation_output),
                epochs=1,
                steps_per_epoch=steps_per_epoch,
                verbose=0,
                validation_steps=validation_steps)

            # Update the history.
            for history_key in history_keys:
                history_value = model_history.history[history_key]
                history[history_key].append(history_value)
                print(history_key, history_value)

        return history

    def fit_generator(self,
                      generator,
                      steps_per_epoch=None,
                      epochs=1,
                      verbose=1,
                      callbacks=None,
                      validation_data=None,
                      validation_steps=None,
                      class_weight=None,
                      max_queue_size=10,
                      workers=1,
                      use_multiprocessing=False,
                      shuffle=True,
                      initial_epoch=0):
        """
        Coming soon...
        """

        print("TODO: implement fit_generator!")

        raise Exception("Not implemented!")

        return self.siamese.fit_generator(generator, steps_per_epoch, epochs,
                                          verbose, callbacks, validation_data,
                                          validation_steps, class_weight,
                                          max_queue_size, workers,
                                          use_multiprocessing, shuffle,
                                          initial_epoch)

    def evaluate(self,
                 x=None,
                 y=None,
                 batch_size=None,
                 verbose=1,
                 sample_weight=None,
                 steps=None):
        """
        Evaluates the model. Same as vanilla Keras.
        """

        return self.siamese.evaluate(x,
                                     y,
                                     batch_size,
                                     verbose,
                                     sample_weight,
                                     steps=None)

    def predict(self, x, batch_size=None, verbose=0, steps=None):
        """
        Does a prediction. Same as vanilla Keras.
        """

        return self.siamese.predict(x, batch_size, verbose, steps)

    def summary(self):
        """
        Provides a summary.
        """

        print("Basemodel:")
        self.base.summary()
        print("Siamese model:")
        self.siamese.summary()

    def save(self, path):
        """
        Saves the TL.

        This includes the whole Siamese Net plus the base-model.

        This code

        >>> tl.save("myae.h5")

        will create the files *tl.h5*, and *tl-base.h5*.

        """
        self.siamese.save(path)
        self.base.save(append_to_filepath(path, "-base"))
Esempio n. 44
0
def test_model_methods():
    a = Input(shape=(3,), name='input_a')
    b = Input(shape=(3,), name='input_b')

    a_2 = Dense(4, name='dense_1')(a)
    dp = Dropout(0.5, name='dropout')
    b_2 = dp(b)

    model = Model([a, b], [a_2, b_2])

    optimizer = 'rmsprop'
    loss = 'mse'
    loss_weights = [1., 0.5]
    model.compile(optimizer, loss, metrics=[], loss_weights=loss_weights,
                  sample_weight_mode=None)

    input_a_np = np.random.random((10, 3))
    input_b_np = np.random.random((10, 3))

    output_a_np = np.random.random((10, 4))
    output_b_np = np.random.random((10, 3))

    # test train_on_batch
    out = model.train_on_batch([input_a_np, input_b_np],
                               [output_a_np, output_b_np])
    out = model.train_on_batch({'input_a': input_a_np, 'input_b': input_b_np},
                               [output_a_np, output_b_np])
    out = model.train_on_batch({'input_a': input_a_np, 'input_b': input_b_np},
                               {'dense_1': output_a_np, 'dropout': output_b_np})

    # test fit
    out = model.fit([input_a_np, input_b_np],
                    [output_a_np, output_b_np], nb_epoch=1, batch_size=4)
    out = model.fit({'input_a': input_a_np, 'input_b': input_b_np},
                    [output_a_np, output_b_np], nb_epoch=1, batch_size=4)
    out = model.fit({'input_a': input_a_np, 'input_b': input_b_np},
                    {'dense_1': output_a_np, 'dropout': output_b_np},
                    nb_epoch=1, batch_size=4)

    # test validation_split
    out = model.fit([input_a_np, input_b_np],
                    [output_a_np, output_b_np],
                    nb_epoch=1, batch_size=4, validation_split=0.5)
    out = model.fit({'input_a': input_a_np, 'input_b': input_b_np},
                    [output_a_np, output_b_np],
                    nb_epoch=1, batch_size=4, validation_split=0.5)
    out = model.fit({'input_a': input_a_np, 'input_b': input_b_np},
                    {'dense_1': output_a_np, 'dropout': output_b_np},
                    nb_epoch=1, batch_size=4, validation_split=0.5)

    # test validation data
    out = model.fit([input_a_np, input_b_np],
                    [output_a_np, output_b_np],
                    nb_epoch=1, batch_size=4,
                    validation_data=([input_a_np, input_b_np], [output_a_np, output_b_np]))
    out = model.fit({'input_a': input_a_np, 'input_b': input_b_np},
                    [output_a_np, output_b_np],
                    nb_epoch=1, batch_size=4, validation_split=0.5,
                    validation_data=({'input_a': input_a_np, 'input_b': input_b_np}, [output_a_np, output_b_np]))
    out = model.fit({'input_a': input_a_np, 'input_b': input_b_np},
                    {'dense_1': output_a_np, 'dropout': output_b_np},
                    nb_epoch=1, batch_size=4, validation_split=0.5,
                    validation_data=({'input_a': input_a_np, 'input_b': input_b_np}, {'dense_1': output_a_np, 'dropout': output_b_np}))

    # test_on_batch
    out = model.test_on_batch([input_a_np, input_b_np],
                              [output_a_np, output_b_np])
    out = model.test_on_batch({'input_a': input_a_np, 'input_b': input_b_np},
                              [output_a_np, output_b_np])
    out = model.test_on_batch({'input_a': input_a_np, 'input_b': input_b_np},
                              {'dense_1': output_a_np, 'dropout': output_b_np})

    # predict_on_batch
    out = model.predict_on_batch([input_a_np, input_b_np])
    out = model.predict_on_batch({'input_a': input_a_np, 'input_b': input_b_np})

    # predict, evaluate
    input_a_np = np.random.random((10, 3))
    input_b_np = np.random.random((10, 3))

    output_a_np = np.random.random((10, 4))
    output_b_np = np.random.random((10, 3))

    out = model.evaluate([input_a_np, input_b_np], [output_a_np, output_b_np], batch_size=4)
    out = model.predict([input_a_np, input_b_np], batch_size=4)

    # with sample_weight
    input_a_np = np.random.random((10, 3))
    input_b_np = np.random.random((10, 3))

    output_a_np = np.random.random((10, 4))
    output_b_np = np.random.random((10, 3))

    sample_weight = [None, np.random.random((10,))]
    out = model.train_on_batch([input_a_np, input_b_np],
                               [output_a_np, output_b_np],
                               sample_weight=sample_weight)

    out = model.test_on_batch([input_a_np, input_b_np],
                              [output_a_np, output_b_np],
                              sample_weight=sample_weight)

    # test accuracy metric
    model.compile(optimizer, loss, metrics=['acc'],
                  sample_weight_mode=None)

    out = model.train_on_batch([input_a_np, input_b_np],
                               [output_a_np, output_b_np])
    assert len(out) == 5
    out = model.test_on_batch([input_a_np, input_b_np],
                              [output_a_np, output_b_np])
    assert len(out) == 5

    # this should also work
    model.compile(optimizer, loss, metrics={'dense_1': 'acc'},
                  sample_weight_mode=None)

    out = model.train_on_batch([input_a_np, input_b_np],
                               [output_a_np, output_b_np])
    assert len(out) == 4
    out = model.test_on_batch([input_a_np, input_b_np],
                              [output_a_np, output_b_np])
    assert len(out) == 4

    # and this as well
    model.compile(optimizer, loss, metrics={'dense_1': ['acc']},
                  sample_weight_mode=None)

    out = model.train_on_batch([input_a_np, input_b_np],
                               [output_a_np, output_b_np])
    assert len(out) == 4
    out = model.test_on_batch([input_a_np, input_b_np],
                              [output_a_np, output_b_np])
    assert len(out) == 4

    # test starting from non-zero initial epoch
    trained_epochs = []

    def on_epoch_begin(epoch, logs):
        trained_epochs.append(epoch)
    tracker_cb = LambdaCallback(on_epoch_begin=on_epoch_begin)
    out = model.fit([input_a_np, input_b_np],
                    [output_a_np, output_b_np], nb_epoch=5, batch_size=4,
                    initial_epoch=2, callbacks=[tracker_cb])
    assert trained_epochs == [2, 3, 4]

    # test starting from non-zero initial epoch for generator too
    trained_epochs = []

    def gen_data(batch_sz):
        while True:
            yield ([np.random.random((batch_sz, 3)), np.random.random((batch_sz, 3))],
                   [np.random.random((batch_sz, 4)), np.random.random((batch_sz, 3))])
    out = model.fit_generator(gen_data(4), samples_per_epoch=10, nb_epoch=5,
                              initial_epoch=2, callbacks=[tracker_cb])
    assert trained_epochs == [2, 3, 4]

    # test with a custom metric function
    mse = lambda y_true, y_pred: K.mean(K.pow(y_true - y_pred, 2))

    def mse_powers(y_true, y_pred):
        m = mse(y_true, y_pred)
        return {
            'mse_squared': K.pow(m, 2),
            'mse_cubed': K.pow(m, 3)
        }

    model.compile(optimizer, loss, metrics=[mse, mse_powers],
                  sample_weight_mode=None)

    out = model.train_on_batch([input_a_np, input_b_np],
                               [output_a_np, output_b_np])
    out_len = 1 + 2 * 4  # total loss, per layer: loss + 3 metrics
    assert len(out) == out_len
    out = model.test_on_batch([input_a_np, input_b_np],
                              [output_a_np, output_b_np])
    assert len(out) == out_len

    input_a_np = np.random.random((10, 3))
    input_b_np = np.random.random((10, 3))

    output_a_np = np.random.random((10, 4))
    output_b_np = np.random.random((10, 3))

    out = model.fit([input_a_np, input_b_np], [output_a_np, output_b_np], batch_size=4, nb_epoch=1)
    out = model.evaluate([input_a_np, input_b_np], [output_a_np, output_b_np], batch_size=4)
    out = model.predict([input_a_np, input_b_np], batch_size=4)
class PolicyValueNet():
    """policy-value network """
    def __init__(self, board_width, board_height, model_file=None):
        self.board_width = board_width
        self.board_height = board_height 
        self.l2_const = 1e-4  # coef of l2 penalty 
        self.create_policy_value_net()   
        self._loss_train_op()

        if model_file:
            net_params = pickle.load(open(model_file, 'rb'))
            self.model.set_weights(net_params)
        
    def create_policy_value_net(self):
        """create the policy value network """   
        in_x = network = Input((4, self.board_width, self.board_height))

        # conv layers
        network = Conv2D(filters=32, kernel_size=(3, 3), padding="same", data_format="channels_first", activation="relu", kernel_regularizer=l2(self.l2_const))(network)
        network = Conv2D(filters=64, kernel_size=(3, 3), padding="same", data_format="channels_first", activation="relu", kernel_regularizer=l2(self.l2_const))(network)
        network = Conv2D(filters=128, kernel_size=(3, 3), padding="same", data_format="channels_first", activation="relu", kernel_regularizer=l2(self.l2_const))(network)
        # action policy layers
        policy_net = Conv2D(filters=4, kernel_size=(1, 1), data_format="channels_first", activation="relu", kernel_regularizer=l2(self.l2_const))(network)
        policy_net = Flatten()(policy_net)
        self.policy_net = Dense(self.board_width*self.board_height, activation="softmax", kernel_regularizer=l2(self.l2_const))(policy_net)
        # state value layers
        value_net = Conv2D(filters=2, kernel_size=(1, 1), data_format="channels_first", activation="relu", kernel_regularizer=l2(self.l2_const))(network)
        value_net = Flatten()(value_net)
        value_net = Dense(64, kernel_regularizer=l2(self.l2_const))(value_net)
        self.value_net = Dense(1, activation="tanh", kernel_regularizer=l2(self.l2_const))(value_net)

        self.model = Model(in_x, [self.policy_net, self.value_net])
        
        def policy_value(state_input):
            state_input_union = np.array(state_input)
            results = self.model.predict_on_batch(state_input_union)
            return results
        self.policy_value = policy_value
        
    def policy_value_fn(self, board):
        """
        input: board
        output: a list of (action, probability) tuples for each available action and the score of the board state
        """
        legal_positions = board.availables
        current_state = board.current_state()
        act_probs, value = self.policy_value(current_state.reshape(-1, 4, self.board_width, self.board_height))
        act_probs = zip(legal_positions, act_probs.flatten()[legal_positions])
        return act_probs, value[0][0]

    def _loss_train_op(self):
        """
        Three loss terms:
        loss = (z - v)^2 + pi^T * log(p) + c||theta||^2
        """

        # get the train op   
        opt = Adam()
        losses = ['categorical_crossentropy', 'mean_squared_error']
        self.model.compile(optimizer=opt, loss=losses)

        def self_entropy(probs):
            return -np.mean(np.sum(probs * np.log(probs + 1e-10), axis=1))

        def train_step(state_input, mcts_probs, winner, learning_rate):
            state_input_union = np.array(state_input)
            mcts_probs_union = np.array(mcts_probs)
            winner_union = np.array(winner)
            loss = self.model.evaluate(state_input_union, [mcts_probs_union, winner_union], batch_size=len(state_input), verbose=0)
            action_probs, _ = self.model.predict_on_batch(state_input_union)
            entropy = self_entropy(action_probs)
            K.set_value(self.model.optimizer.lr, learning_rate)
            self.model.fit(state_input_union, [mcts_probs_union, winner_union], batch_size=len(state_input), verbose=0)
            return loss[0], entropy
        
        self.train_step = train_step

    def get_policy_param(self):
        net_params = self.model.get_weights()        
        return net_params

    def save_model(self, model_file):
        """ save model params to file """
        net_params = self.get_policy_param()
        pickle.dump(net_params, open(model_file, 'wb'), protocol=2)