예제 #1
0
파일: train.py 프로젝트: L3afMe/NEATCaptcha
def train(height = CAPTCHA_HEIGHT, width = CAPTCHA_WIDTH, y_size = len(CAPTCHA_LIST) * CAPTCHA_LEN):
    acc_rate = 0.95
    
    x = placeholder(float32, [None, height * width])
    y = placeholder(float32, [None, y_size])
    keep_prob = placeholder(float32)
    y_conv = cnn_graph(x, keep_prob, (height, width))
    optimizer = optimize_graph(y, y_conv)
    accuracy = accuracy_graph(y, y_conv)
    saver = Saver()
    sess = Session()
    sess.run(global_variables_initializer())
    step = 0
    while 1:
        batch_x, batch_y = get_next_batch(64)
        sess.run(optimizer, feed_dict = {x: batch_x, y: batch_y, keep_prob: 0.75})
        if step % 100 == 0:
            batch_x_test, batch_y_test = get_next_batch(100)
            acc = sess.run(accuracy, feed_dict = {x: batch_x_test, y: batch_y_test, keep_prob: 1.0})
            print(datetime.now().strftime('%c'), ' step:', step, ' accuracy:', acc)
            if acc > acc_rate:
                if not isdir('./model'):
                    mkdir('./model')
                
                print('Saving to model/captcha.model')
                saver.save(sess, './model/captcha.model', global_step = step)
                print('Saved to model/captcha.model')
                acc_rate += 0.005
                if acc_rate >= 1:
                    break
        step += 1
    sess.close()
def train_mfmodel_without_ipw(sess: tf.Session,
                              model: MFMODEL,
                              data: str,
                              train: np.ndarray,
                              val: np.ndarray,
                              test: np.ndarray,
                              max_iters: int = 500,
                              batch_size: int = 2**9,
                              model_name: str = 'mf',
                              seed: int = 0) -> Tuple:
    """Train and evaluate the MF-IPS model."""
    train_loss_list = []
    val_loss_list = []
    test_mse_list = []
    test_mae_list = []

    # Initialise all the TF variables
    init_op = tf.global_variables_initializer()
    sess.run(init_op)

    # Count the num of training data and estimate the propensity scores
    num_train = train.shape[0]
    train_mcar, test = train_test_split(test,
                                        test_size=0.95,
                                        random_state=rand_seed_val)

    labels_train = np.expand_dims(train[:, 2], 1)
    labels_val = np.expand_dims(val[:, 2], 1)
    labels_test = np.expand_dims(test[:, 2], 1)

    # Start training a recommender
    np.random.seed(rand_seed_val)

    for iter_ in np.arange(max_iters):
        # Sample mini-batch
        idx = np.random.choice(np.arange(num_train), size=batch_size)
        train_batch, labels_batch = train[idx], labels_train[idx]
        # Update user-item latent factors
        _, loss, wmse = sess.run(
            [model.apply_grads, model.loss, model.weighted_mse],
            feed_dict={
                model.users: train_batch[:, 0],
                model.items: train_batch[:, 1],
                model.labels: labels_batch,
                model.scores: np.ones(
                    (np.int(batch_size),
                     1))  # We just use 1 as propensity score for all records
            })
        # print('train_loss:', loss, wmse)
        train_loss_list.append(loss)
        # Calculate validation loss
        val_loss = sess.run(
            model.loss,
            feed_dict={
                model.users: val[:, 0],
                model.items: val[:, 1],
                model.labels: labels_val,
                model.scores: np.ones(
                    (np.int(len(labels_val)),
                     1))  # We just use 1 as propensity score for all records
            })
        # print('val_loss:', val_loss)
        val_loss_list.append(val_loss)
        # Calculate test loss
        mse_score, mae_score = sess.run(
            [model.mse, model.mae],
            feed_dict={
                model.users: test[:, 0],
                model.items: test[:, 1],
                model.labels: labels_test
            })
        # mse_score = round(mse_score, round_digit)
        # mae_score = round(mae_score, round_digit)
        # print('mse_score:', mse_score)
        # print('mae_score:', mae_score)
        test_mse_list.append(mse_score)
        test_mae_list.append(mae_score)

    u_emb, i_emb, u_bias, i_bias, g_bias = sess.run([
        model.user_embeddings, model.item_embeddings, model.user_bias,
        model.item_bias, model.global_bias
    ])

    sess.close()

    return (np.min(val_loss_list), test_mse_list[np.argmin(val_loss_list)],
            test_mae_list[np.argmin(val_loss_list)], u_emb, i_emb, u_bias,
            i_bias, g_bias)
def train_mfmodel_with_at(sess: tf.Session,
                          model: MFMODEL,
                          mfmodel1: MFMODEL,
                          mfmodel2: MFMODEL,
                          data: str,
                          train: np.ndarray,
                          val: np.ndarray,
                          test: np.ndarray,
                          epsilon: float,
                          pre_iters: int = 500,
                          post_iters: int = 50,
                          post_steps: int = 5,
                          batch_size: int = 2**9,
                          model_name: str = 'naive-at',
                          seed: int = 0) -> Tuple:
    """Train and evaluate the MF-IPS model with asymmetric tri-training"""
    train_loss_list = []
    val_loss_list = []
    test_mse_list = []
    test_mae_list = []

    # Initialise all the TF variables
    init_op = tf.global_variables_initializer()
    sess.run(init_op)

    # Count the num of training data and estimate the propensity scores
    num_train = train.shape[0]
    train_mcar, test = train_test_split(test,
                                        test_size=0.95,
                                        random_state=rand_seed_val)
    pscore_train, pscore_val = estimate_pscore(train=train,
                                               train_mcar=train_mcar,
                                               val=val,
                                               model_name=model_name)
    labels_train = np.expand_dims(train[:, 2], 1)
    labels_val = np.expand_dims(val[:, 2], 1)
    labels_test = np.expand_dims(test[:, 2], 1)
    pscore_model_all_1 = np.ones((batch_size, 1))

    ### Start training a recommender
    np.random.seed(rand_seed_val)
    ## Start pre-training step
    for i in np.arange(pre_iters):
        # Sample mini-batch
        idx = np.random.choice(np.arange(num_train), size=batch_size)
        idx1 = np.random.choice(np.arange(num_train), size=batch_size)
        idx2 = np.random.choice(np.arange(num_train), size=batch_size)
        train_batch, train_batch1, train_batch2 = train[idx], train[
            idx1], train[idx2]
        labels_batch, labels_batch1, labels_batch2 = labels_train[
            idx], labels_train[idx1], labels_train[idx2]
        pscore_batch1, pscore_batch2 = pscore_train[idx1], pscore_train[idx2]
        # print('pscore_batch1', pscore_batch1)
        # print('pscore_batch2', pscore_batch2)
        # Update user-item latent factors
        _, train_loss, train_wmse = sess.run(
            [model.apply_grads, model.loss, model.weighted_mse],
            feed_dict={
                model.users: train_batch[:, 0],
                model.items: train_batch[:, 1],
                model.labels: labels_batch,
                model.scores: pscore_model_all_1
            })
        _, mfmodel1_loss, mfmodel1_wmse = sess.run(
            [mfmodel1.apply_grads, mfmodel1.loss, mfmodel1.weighted_mse],
            feed_dict={
                mfmodel1.users: train_batch1[:, 0],
                mfmodel1.items: train_batch1[:, 1],
                mfmodel1.labels: labels_batch1,
                mfmodel1.scores: pscore_batch1
            })
        _, mfmodel2_loss, mfmodel2_wmse = sess.run(
            [mfmodel2.apply_grads, mfmodel2.loss, mfmodel2.weighted_mse],
            feed_dict={
                mfmodel2.users: train_batch2[:, 0],
                mfmodel2.items: train_batch2[:, 1],
                mfmodel2.labels: labels_batch2,
                mfmodel2.scores: pscore_batch2
            })
        # print('train_loss:', train_loss, train_wmse)
        # print('mfmodel1_loss:', mfmodel1_loss, mfmodel1_wmse)
        # print('mfmodel2_loss:', mfmodel2_loss, mfmodel2_wmse)
        # print()

    ## Start psuedo-labeling and final prediction steps
    # Cast to integer to avoid an error
    train = train.astype(int)
    val = val.astype(int)

    all_data = pd.DataFrame(
        np.zeros((train[:, 0].max() + 1, train[:, 1].max() + 1)))
    all_data = all_data.stack().reset_index().values[:, :2]
    for k in np.arange(post_iters):
        for j in np.arange(post_steps):
            idx = np.random.choice(np.arange(all_data.shape[0]),
                                   size=num_train * 5)
            batch_data = all_data[idx]
            # Create psuedo-labeled dataset
            preds1 = sess.run(mfmodel1.preds,
                              feed_dict={
                                  mfmodel1.users: batch_data[:, 0],
                                  mfmodel1.items: batch_data[:, 1]
                              })
            preds2 = sess.run(mfmodel2.preds,
                              feed_dict={
                                  mfmodel2.users: batch_data[:, 0],
                                  mfmodel2.items: batch_data[:, 1]
                              })

            # Extract records whose prediction difference between model1 and model2 are less than or equal to epsilon
            idx = np.array(np.abs(preds1 - preds2) <= epsilon).flatten()
            # print(idx.sum())
            target_users, target_items, pseudo_labels = batch_data[
                idx, 0], batch_data[idx, 1], preds1[idx]
            target_data = np.c_[target_users, target_items, pseudo_labels]
            # Store information during the pseudo-labeleing step
            num_target = target_data.shape[0]
            # Sample mini-batch for the pseudo-labeleing step
            idx = np.random.choice(np.arange(num_target), size=batch_size)
            idx1 = np.random.choice(np.arange(num_target), size=batch_size)
            idx2 = np.random.choice(np.arange(num_target), size=batch_size)
            pseudo_train_batch, pseudo_train_batch1, pseudo_train_batch2 = target_data[
                idx], target_data[idx1], target_data[idx2]
            # Update user-item latent factors of the final prediction model
            _, train_loss = sess.run(
                [model.apply_grads, model.loss],
                feed_dict={
                    model.users: pseudo_train_batch[:, 0],
                    model.items: pseudo_train_batch[:, 1],
                    model.labels: np.expand_dims(pseudo_train_batch[:, 2], 1),
                    model.scores: np.ones((np.int(batch_size), 1))
                })
            # print('train_loss:', train_loss)
            # Calculate validation loss during the psuedo-labeleing step
            val_loss = sess.run(
                model.loss,  ##model.weighted_mse,
                feed_dict={
                    model.users: val[:, 0],
                    model.items: val[:, 1],
                    model.scores: pscore_val,
                    model.labels: labels_val
                })
            # print('val_loss:', val_loss)
            # Calculate test losses during the psuedo-labeleing step
            mse_score, mae_score = sess.run(
                [model.mse, model.mae],
                feed_dict={
                    model.users: test[:, 0],
                    model.items: test[:, 1],
                    model.labels: labels_test
                })
            # mse_score = round(mse_score, round_digit)
            # mae_score = round(mae_score, round_digit)
            # print('mse_score:', mse_score)
            # print('mae_score:', mae_score)
            train_loss_list.append(train_loss)
            val_loss_list.append(val_loss)
            test_mse_list.append(mse_score)
            test_mae_list.append(mae_score)
            # Re-update the model parameters of pre-trained models using pseudo-labeled data
            _ = sess.run(mfmodel1.apply_grads,
                         feed_dict={
                             mfmodel1.users:
                             pseudo_train_batch1[:, 0],
                             mfmodel1.items:
                             pseudo_train_batch1[:, 1],
                             mfmodel1.labels:
                             np.expand_dims(pseudo_train_batch1[:, 2], 1),
                             mfmodel1.scores:
                             np.ones((batch_size, 1))
                         })
            _ = sess.run(mfmodel2.apply_grads,
                         feed_dict={
                             mfmodel2.users:
                             pseudo_train_batch2[:, 0],
                             mfmodel2.items:
                             pseudo_train_batch2[:, 1],
                             mfmodel2.labels:
                             np.expand_dims(pseudo_train_batch2[:, 2], 1),
                             mfmodel2.scores:
                             np.ones((batch_size, 1))
                         })

    # Obtain user-item embeddings
    u_emb, i_emb, u_bias, i_bias, g_bias = sess.run([
        model.user_embeddings, model.item_embeddings, model.user_bias,
        model.item_bias, model.global_bias
    ])

    sess.close()

    return (np.min(val_loss_list), test_mse_list[np.argmin(val_loss_list)],
            test_mae_list[np.argmin(val_loss_list)], u_emb, i_emb, u_bias,
            i_bias, g_bias)
예제 #4
0
class PpoGraph:
    """
    Proximal Policy Implementation in tensorflow. https://arxiv.org/abs/1707.06347 ("Proximal Policy Optimization Algorithms", J. Schulman et al, 2017)
    This class encapsulates all tensorflow interactions
    """
    def __init__(self, observation_size, net_arch, initializer, activation,
                 clip_range, value_coef, entropy_coef, learning_rate,
                 pre_training_learning_rate, action_bounds, policy):
        """
        :param observation_size:
        :param net_arch:
        :param initializer:
        :param activation:
        :param clip_range:
        :param value_coef:
        :param entropy_coef:
        :param learning_rate:
        :param pre_training_learning_rate:
        :param action_bounds:
        :param policy:
        """
        """Set class constants"""
        self.observation_size = observation_size
        self.net_arch = net_arch
        self.initializer = initializer
        self.activation = activation
        self.clip_range = clip_range
        self.value_coef = value_coef
        self.entropy_coef = entropy_coef

        if action_bounds is None:
            action_bounds = [0.0, 1.5]
        self.action_bounds = action_bounds
        self.learning_rate = learning_rate
        self.pre_training_learning_rate = pre_training_learning_rate

        if policy is None:
            policy = GaussFull()
        self.policy = policy
        """Set up the tensorflow graph"""
        self.graph = Graph()

        with self.graph.as_default():
            self.sess = Session(graph=self.graph)
            """ core """
            # place holders
            self.observation_string_ph = placeholder(
                shape=(None, 1), dtype=string, name="observation_string_ph")
            self.action_ph = placeholder(dtype=float32,
                                         shape=(None, 1),
                                         name="action_ph")
            self.old_neg_logits = placeholder(dtype=float32,
                                              shape=(None, 1),
                                              name="old_neg_logits")
            self.advantage_ph = placeholder(dtype=float32,
                                            shape=(None, 1),
                                            name="advantage_ph")
            self.value_target_ph = placeholder(dtype=float32,
                                               shape=(None, 1),
                                               name="value_target_ph")
            # learning rate tensors
            self.learning_rate_ph = placeholder_with_default(
                input=self.learning_rate, shape=())
            self.pre_training_learning_rate_ph = placeholder_with_default(
                input=self.pre_training_learning_rate, shape=())

            # observation tensor
            replaced1 = regex_replace(self.observation_string_ph, "/", "_")
            replaced2 = regex_replace(replaced1, r"\+", "-")
            byte_tensor = decode_base64(replaced2)
            decoded = decode_raw(byte_tensor, out_type=float32)
            squeezed = squeeze(decoded, axis=1)
            self.observation_input = ensure_shape(
                squeezed,
                shape=(None, self.observation_size),
                name="observation_input")

            # policy net
            latent_policy = net_core(self.observation_input, self.net_arch,
                                     self.initializer, self.activation)
            self.policy.construct(latent_policy=latent_policy)

            self.clipped_action = clip_by_value(
                cast(self.policy.action, float32), self.action_bounds[0],
                self.action_bounds[1], "clipped_action")

            # value net
            latent_value = net_core(self.observation_input, self.net_arch,
                                    self.initializer, self.activation)
            self.value = identity(
                input=Dense(units=1,
                            activation=None,
                            kernel_initializer=self.initializer)(latent_value),
                name="value")
            """loss calculation"""
            # policy loss
            self.neg_logits = self.policy.neg_logits_from_actions(
                self.action_ph)
            ratio = exp(self.old_neg_logits - self.neg_logits)

            standardized_adv = (self.advantage_ph - reduce_mean(
                self.advantage_ph)) / (reduce_std(self.advantage_ph) + 1e-8)
            raw_policy_loss = -standardized_adv * ratio
            clipped_policy_loss = -standardized_adv * clip_by_value(
                ratio, 1 - self.clip_range, 1 + self.clip_range)
            self.policy_loss = reduce_mean(
                maximum(raw_policy_loss, clipped_policy_loss))

            self.value_loss = mean_squared_error(self.value_target_ph,
                                                 self.value)

            # entropy loss
            self.entropy_loss = -reduce_mean(self.policy.entropy)

            # total loss
            self.total_loss = self.policy_loss + self.value_coef * self.value_loss + self.entropy_coef * self.entropy_loss

            # optimizer
            optimizer = AdamOptimizer(learning_rate=self.learning_rate_ph)

            # training ops
            self.training_op = optimizer.minimize(self.total_loss)

            # pre training
            self.dist_param_target_ph = placeholder(
                dtype=float32,
                shape=(None, self.policy.dist_params.shape[1]),
                name="dist_param_label_ph")
            self.pre_training_loss = mean_squared_error(
                self.dist_param_target_ph, self.policy.dist_params)
            pre_training_optimizer = GradientDescentOptimizer(
                learning_rate=self.pre_training_learning_rate_ph)
            self.pre_training_op = pre_training_optimizer.minimize(
                self.pre_training_loss)
            """utility nodes"""
            # inspect model weights
            self.trainable_variables = trainable_variables()

            # saviour
            self.saver = Saver()

            # tensorboard summaries
            self.summary = merge([
                histogram("values", self.value),
                histogram("advantages", standardized_adv),
                histogram("actions", self.clipped_action),
                histogram("det_actions",
                          replace_nan(self.policy.det_action, 0.0)),
                histogram("value_targets", self.value_target_ph),
                scalar("policy_loss", self.policy_loss),
                scalar("value_loss", self.value_loss),
                scalar("entropy_loss", self.entropy_loss)
            ])

            self.pre_summary = merge([
                histogram("pretraining_actions", self.clipped_action),
                scalar("pretraining_loss", self.pre_training_loss)
            ])

            # initialization
            init = global_variables_initializer()
            self.sess.run(init)

    def predict(self, observation):
        """
        :param observation: input environment state
        :return: action, deterministic action (mode), negative log dist value, value prediction
        """

        fetches = [
            self.clipped_action, self.policy.dist_params,
            self.policy.neg_logits, self.value
        ]
        action, dist_params, neg_logit, value = self.sess.run(
            fetches, {self.observation_input: observation})

        return action, dist_params, neg_logit, value

    def train_step(self,
                   observations,
                   actions,
                   old_neg_logits,
                   value_targets,
                   advantages,
                   obs_as_string=False,
                   learning_rate=None,
                   additional_fetches=None):
        fetches = [self.training_op, self.summary] + (
            [] if additional_fetches is None else additional_fetches)
        obs_tensor = self.observation_string_ph if obs_as_string else self.observation_input
        feed_dict = {
            obs_tensor: observations,
            self.action_ph: actions,
            self.old_neg_logits: old_neg_logits,
            self.value_target_ph: value_targets,
            self.advantage_ph: advantages
        }

        if learning_rate is not None:
            feed_dict.update({self.learning_rate_ph: learning_rate})

        return self.sess.run(fetches, feed_dict)

    def pre_train_step(self,
                       observations,
                       dist_param_targets,
                       obs_as_string=False,
                       learning_rate=None,
                       additional_fetches=None):
        fetches = [self.pre_training_op, self.pre_summary] + (
            [] if additional_fetches is None else additional_fetches)
        obs_tensor = self.observation_string_ph if obs_as_string else self.observation_input
        feed_dict = {
            obs_tensor: observations,
            self.dist_param_target_ph: dist_param_targets
        }

        if learning_rate is not None:
            feed_dict.update(
                {self.pre_training_learning_rate_ph: learning_rate})

        return self.sess.run(fetches, feed_dict)

    def simple_save(self, path):
        with self.graph.as_default():
            simple_save(self.sess,
                        path,
                        inputs={"obs": self.observation_input},
                        outputs={"action": self.clipped_action})

    def save(self, path):
        with self.graph.as_default():
            self.saver.save(sess=self.sess, save_path=path)

    def restore(self, path):
        with self.graph.as_default():
            self.saver.restore(sess=self.sess, save_path=path)

    def close_session(self):
        self.sess.close()

    def get_trainable_variables(self):
        return self.sess.run(self.trainable_variables)