예제 #1
0
    input_validation = tf.keras.preprocessing.sequence.pad_sequences(
        list_tokenized_validation, maxlen=maxlen)

    # Network input layers
    inputs = tf.keras.Input(shape=(maxlen, ), name='embds_input')
    act_input = tf.keras.Input((num_actions, ), name='actions_input')
    if MODEL == 'DRL':
        # Create an environment
        ENV = Environment(input_train, y_train)

        # Actor-Critic

        # Train
        critic_model = Network(num_actions, embeddings_matrix, maxlen=maxlen)
        _ = critic_model(inputs=[inputs, act_input])
        critic_model.compile(loss='mse', optimizer='adam')

        # Decision
        actor_q_model = tf.keras.Model(
            inputs=critic_model.input,
            outputs=critic_model.get_layer('q_outputs').output)

        if MODE == 'train':

            def train(samples):

                if len(samples) < BATCH_SIZE:
                    return
                samples = np.array(samples)
                states, actions, old_q, rewards, next_states = zip(*samples)
                states, actions, old_q, rewards = np.array(states), np.array(actions).reshape(-1, 1),\