input_validation = tf.keras.preprocessing.sequence.pad_sequences( list_tokenized_validation, maxlen=maxlen) # Network input layers inputs = tf.keras.Input(shape=(maxlen, ), name='embds_input') act_input = tf.keras.Input((num_actions, ), name='actions_input') if MODEL == 'DRL': # Create an environment ENV = Environment(input_train, y_train) # Actor-Critic # Train critic_model = Network(num_actions, embeddings_matrix, maxlen=maxlen) _ = critic_model(inputs=[inputs, act_input]) critic_model.compile(loss='mse', optimizer='adam') # Decision actor_q_model = tf.keras.Model( inputs=critic_model.input, outputs=critic_model.get_layer('q_outputs').output) if MODE == 'train': def train(samples): if len(samples) < BATCH_SIZE: return samples = np.array(samples) states, actions, old_q, rewards, next_states = zip(*samples) states, actions, old_q, rewards = np.array(states), np.array(actions).reshape(-1, 1),\