def train(height = CAPTCHA_HEIGHT, width = CAPTCHA_WIDTH, y_size = len(CAPTCHA_LIST) * CAPTCHA_LEN): acc_rate = 0.95 x = placeholder(float32, [None, height * width]) y = placeholder(float32, [None, y_size]) keep_prob = placeholder(float32) y_conv = cnn_graph(x, keep_prob, (height, width)) optimizer = optimize_graph(y, y_conv) accuracy = accuracy_graph(y, y_conv) saver = Saver() sess = Session() sess.run(global_variables_initializer()) step = 0 while 1: batch_x, batch_y = get_next_batch(64) sess.run(optimizer, feed_dict = {x: batch_x, y: batch_y, keep_prob: 0.75}) if step % 100 == 0: batch_x_test, batch_y_test = get_next_batch(100) acc = sess.run(accuracy, feed_dict = {x: batch_x_test, y: batch_y_test, keep_prob: 1.0}) print(datetime.now().strftime('%c'), ' step:', step, ' accuracy:', acc) if acc > acc_rate: if not isdir('./model'): mkdir('./model') print('Saving to model/captcha.model') saver.save(sess, './model/captcha.model', global_step = step) print('Saved to model/captcha.model') acc_rate += 0.005 if acc_rate >= 1: break step += 1 sess.close()
def train_mfmodel_without_ipw(sess: tf.Session, model: MFMODEL, data: str, train: np.ndarray, val: np.ndarray, test: np.ndarray, max_iters: int = 500, batch_size: int = 2**9, model_name: str = 'mf', seed: int = 0) -> Tuple: """Train and evaluate the MF-IPS model.""" train_loss_list = [] val_loss_list = [] test_mse_list = [] test_mae_list = [] # Initialise all the TF variables init_op = tf.global_variables_initializer() sess.run(init_op) # Count the num of training data and estimate the propensity scores num_train = train.shape[0] train_mcar, test = train_test_split(test, test_size=0.95, random_state=rand_seed_val) labels_train = np.expand_dims(train[:, 2], 1) labels_val = np.expand_dims(val[:, 2], 1) labels_test = np.expand_dims(test[:, 2], 1) # Start training a recommender np.random.seed(rand_seed_val) for iter_ in np.arange(max_iters): # Sample mini-batch idx = np.random.choice(np.arange(num_train), size=batch_size) train_batch, labels_batch = train[idx], labels_train[idx] # Update user-item latent factors _, loss, wmse = sess.run( [model.apply_grads, model.loss, model.weighted_mse], feed_dict={ model.users: train_batch[:, 0], model.items: train_batch[:, 1], model.labels: labels_batch, model.scores: np.ones( (np.int(batch_size), 1)) # We just use 1 as propensity score for all records }) # print('train_loss:', loss, wmse) train_loss_list.append(loss) # Calculate validation loss val_loss = sess.run( model.loss, feed_dict={ model.users: val[:, 0], model.items: val[:, 1], model.labels: labels_val, model.scores: np.ones( (np.int(len(labels_val)), 1)) # We just use 1 as propensity score for all records }) # print('val_loss:', val_loss) val_loss_list.append(val_loss) # Calculate test loss mse_score, mae_score = sess.run( [model.mse, model.mae], feed_dict={ model.users: test[:, 0], model.items: test[:, 1], model.labels: labels_test }) # mse_score = round(mse_score, round_digit) # mae_score = round(mae_score, round_digit) # print('mse_score:', mse_score) # print('mae_score:', mae_score) test_mse_list.append(mse_score) test_mae_list.append(mae_score) u_emb, i_emb, u_bias, i_bias, g_bias = sess.run([ model.user_embeddings, model.item_embeddings, model.user_bias, model.item_bias, model.global_bias ]) sess.close() return (np.min(val_loss_list), test_mse_list[np.argmin(val_loss_list)], test_mae_list[np.argmin(val_loss_list)], u_emb, i_emb, u_bias, i_bias, g_bias)
def train_mfmodel_with_at(sess: tf.Session, model: MFMODEL, mfmodel1: MFMODEL, mfmodel2: MFMODEL, data: str, train: np.ndarray, val: np.ndarray, test: np.ndarray, epsilon: float, pre_iters: int = 500, post_iters: int = 50, post_steps: int = 5, batch_size: int = 2**9, model_name: str = 'naive-at', seed: int = 0) -> Tuple: """Train and evaluate the MF-IPS model with asymmetric tri-training""" train_loss_list = [] val_loss_list = [] test_mse_list = [] test_mae_list = [] # Initialise all the TF variables init_op = tf.global_variables_initializer() sess.run(init_op) # Count the num of training data and estimate the propensity scores num_train = train.shape[0] train_mcar, test = train_test_split(test, test_size=0.95, random_state=rand_seed_val) pscore_train, pscore_val = estimate_pscore(train=train, train_mcar=train_mcar, val=val, model_name=model_name) labels_train = np.expand_dims(train[:, 2], 1) labels_val = np.expand_dims(val[:, 2], 1) labels_test = np.expand_dims(test[:, 2], 1) pscore_model_all_1 = np.ones((batch_size, 1)) ### Start training a recommender np.random.seed(rand_seed_val) ## Start pre-training step for i in np.arange(pre_iters): # Sample mini-batch idx = np.random.choice(np.arange(num_train), size=batch_size) idx1 = np.random.choice(np.arange(num_train), size=batch_size) idx2 = np.random.choice(np.arange(num_train), size=batch_size) train_batch, train_batch1, train_batch2 = train[idx], train[ idx1], train[idx2] labels_batch, labels_batch1, labels_batch2 = labels_train[ idx], labels_train[idx1], labels_train[idx2] pscore_batch1, pscore_batch2 = pscore_train[idx1], pscore_train[idx2] # print('pscore_batch1', pscore_batch1) # print('pscore_batch2', pscore_batch2) # Update user-item latent factors _, train_loss, train_wmse = sess.run( [model.apply_grads, model.loss, model.weighted_mse], feed_dict={ model.users: train_batch[:, 0], model.items: train_batch[:, 1], model.labels: labels_batch, model.scores: pscore_model_all_1 }) _, mfmodel1_loss, mfmodel1_wmse = sess.run( [mfmodel1.apply_grads, mfmodel1.loss, mfmodel1.weighted_mse], feed_dict={ mfmodel1.users: train_batch1[:, 0], mfmodel1.items: train_batch1[:, 1], mfmodel1.labels: labels_batch1, mfmodel1.scores: pscore_batch1 }) _, mfmodel2_loss, mfmodel2_wmse = sess.run( [mfmodel2.apply_grads, mfmodel2.loss, mfmodel2.weighted_mse], feed_dict={ mfmodel2.users: train_batch2[:, 0], mfmodel2.items: train_batch2[:, 1], mfmodel2.labels: labels_batch2, mfmodel2.scores: pscore_batch2 }) # print('train_loss:', train_loss, train_wmse) # print('mfmodel1_loss:', mfmodel1_loss, mfmodel1_wmse) # print('mfmodel2_loss:', mfmodel2_loss, mfmodel2_wmse) # print() ## Start psuedo-labeling and final prediction steps # Cast to integer to avoid an error train = train.astype(int) val = val.astype(int) all_data = pd.DataFrame( np.zeros((train[:, 0].max() + 1, train[:, 1].max() + 1))) all_data = all_data.stack().reset_index().values[:, :2] for k in np.arange(post_iters): for j in np.arange(post_steps): idx = np.random.choice(np.arange(all_data.shape[0]), size=num_train * 5) batch_data = all_data[idx] # Create psuedo-labeled dataset preds1 = sess.run(mfmodel1.preds, feed_dict={ mfmodel1.users: batch_data[:, 0], mfmodel1.items: batch_data[:, 1] }) preds2 = sess.run(mfmodel2.preds, feed_dict={ mfmodel2.users: batch_data[:, 0], mfmodel2.items: batch_data[:, 1] }) # Extract records whose prediction difference between model1 and model2 are less than or equal to epsilon idx = np.array(np.abs(preds1 - preds2) <= epsilon).flatten() # print(idx.sum()) target_users, target_items, pseudo_labels = batch_data[ idx, 0], batch_data[idx, 1], preds1[idx] target_data = np.c_[target_users, target_items, pseudo_labels] # Store information during the pseudo-labeleing step num_target = target_data.shape[0] # Sample mini-batch for the pseudo-labeleing step idx = np.random.choice(np.arange(num_target), size=batch_size) idx1 = np.random.choice(np.arange(num_target), size=batch_size) idx2 = np.random.choice(np.arange(num_target), size=batch_size) pseudo_train_batch, pseudo_train_batch1, pseudo_train_batch2 = target_data[ idx], target_data[idx1], target_data[idx2] # Update user-item latent factors of the final prediction model _, train_loss = sess.run( [model.apply_grads, model.loss], feed_dict={ model.users: pseudo_train_batch[:, 0], model.items: pseudo_train_batch[:, 1], model.labels: np.expand_dims(pseudo_train_batch[:, 2], 1), model.scores: np.ones((np.int(batch_size), 1)) }) # print('train_loss:', train_loss) # Calculate validation loss during the psuedo-labeleing step val_loss = sess.run( model.loss, ##model.weighted_mse, feed_dict={ model.users: val[:, 0], model.items: val[:, 1], model.scores: pscore_val, model.labels: labels_val }) # print('val_loss:', val_loss) # Calculate test losses during the psuedo-labeleing step mse_score, mae_score = sess.run( [model.mse, model.mae], feed_dict={ model.users: test[:, 0], model.items: test[:, 1], model.labels: labels_test }) # mse_score = round(mse_score, round_digit) # mae_score = round(mae_score, round_digit) # print('mse_score:', mse_score) # print('mae_score:', mae_score) train_loss_list.append(train_loss) val_loss_list.append(val_loss) test_mse_list.append(mse_score) test_mae_list.append(mae_score) # Re-update the model parameters of pre-trained models using pseudo-labeled data _ = sess.run(mfmodel1.apply_grads, feed_dict={ mfmodel1.users: pseudo_train_batch1[:, 0], mfmodel1.items: pseudo_train_batch1[:, 1], mfmodel1.labels: np.expand_dims(pseudo_train_batch1[:, 2], 1), mfmodel1.scores: np.ones((batch_size, 1)) }) _ = sess.run(mfmodel2.apply_grads, feed_dict={ mfmodel2.users: pseudo_train_batch2[:, 0], mfmodel2.items: pseudo_train_batch2[:, 1], mfmodel2.labels: np.expand_dims(pseudo_train_batch2[:, 2], 1), mfmodel2.scores: np.ones((batch_size, 1)) }) # Obtain user-item embeddings u_emb, i_emb, u_bias, i_bias, g_bias = sess.run([ model.user_embeddings, model.item_embeddings, model.user_bias, model.item_bias, model.global_bias ]) sess.close() return (np.min(val_loss_list), test_mse_list[np.argmin(val_loss_list)], test_mae_list[np.argmin(val_loss_list)], u_emb, i_emb, u_bias, i_bias, g_bias)
class PpoGraph: """ Proximal Policy Implementation in tensorflow. https://arxiv.org/abs/1707.06347 ("Proximal Policy Optimization Algorithms", J. Schulman et al, 2017) This class encapsulates all tensorflow interactions """ def __init__(self, observation_size, net_arch, initializer, activation, clip_range, value_coef, entropy_coef, learning_rate, pre_training_learning_rate, action_bounds, policy): """ :param observation_size: :param net_arch: :param initializer: :param activation: :param clip_range: :param value_coef: :param entropy_coef: :param learning_rate: :param pre_training_learning_rate: :param action_bounds: :param policy: """ """Set class constants""" self.observation_size = observation_size self.net_arch = net_arch self.initializer = initializer self.activation = activation self.clip_range = clip_range self.value_coef = value_coef self.entropy_coef = entropy_coef if action_bounds is None: action_bounds = [0.0, 1.5] self.action_bounds = action_bounds self.learning_rate = learning_rate self.pre_training_learning_rate = pre_training_learning_rate if policy is None: policy = GaussFull() self.policy = policy """Set up the tensorflow graph""" self.graph = Graph() with self.graph.as_default(): self.sess = Session(graph=self.graph) """ core """ # place holders self.observation_string_ph = placeholder( shape=(None, 1), dtype=string, name="observation_string_ph") self.action_ph = placeholder(dtype=float32, shape=(None, 1), name="action_ph") self.old_neg_logits = placeholder(dtype=float32, shape=(None, 1), name="old_neg_logits") self.advantage_ph = placeholder(dtype=float32, shape=(None, 1), name="advantage_ph") self.value_target_ph = placeholder(dtype=float32, shape=(None, 1), name="value_target_ph") # learning rate tensors self.learning_rate_ph = placeholder_with_default( input=self.learning_rate, shape=()) self.pre_training_learning_rate_ph = placeholder_with_default( input=self.pre_training_learning_rate, shape=()) # observation tensor replaced1 = regex_replace(self.observation_string_ph, "/", "_") replaced2 = regex_replace(replaced1, r"\+", "-") byte_tensor = decode_base64(replaced2) decoded = decode_raw(byte_tensor, out_type=float32) squeezed = squeeze(decoded, axis=1) self.observation_input = ensure_shape( squeezed, shape=(None, self.observation_size), name="observation_input") # policy net latent_policy = net_core(self.observation_input, self.net_arch, self.initializer, self.activation) self.policy.construct(latent_policy=latent_policy) self.clipped_action = clip_by_value( cast(self.policy.action, float32), self.action_bounds[0], self.action_bounds[1], "clipped_action") # value net latent_value = net_core(self.observation_input, self.net_arch, self.initializer, self.activation) self.value = identity( input=Dense(units=1, activation=None, kernel_initializer=self.initializer)(latent_value), name="value") """loss calculation""" # policy loss self.neg_logits = self.policy.neg_logits_from_actions( self.action_ph) ratio = exp(self.old_neg_logits - self.neg_logits) standardized_adv = (self.advantage_ph - reduce_mean( self.advantage_ph)) / (reduce_std(self.advantage_ph) + 1e-8) raw_policy_loss = -standardized_adv * ratio clipped_policy_loss = -standardized_adv * clip_by_value( ratio, 1 - self.clip_range, 1 + self.clip_range) self.policy_loss = reduce_mean( maximum(raw_policy_loss, clipped_policy_loss)) self.value_loss = mean_squared_error(self.value_target_ph, self.value) # entropy loss self.entropy_loss = -reduce_mean(self.policy.entropy) # total loss self.total_loss = self.policy_loss + self.value_coef * self.value_loss + self.entropy_coef * self.entropy_loss # optimizer optimizer = AdamOptimizer(learning_rate=self.learning_rate_ph) # training ops self.training_op = optimizer.minimize(self.total_loss) # pre training self.dist_param_target_ph = placeholder( dtype=float32, shape=(None, self.policy.dist_params.shape[1]), name="dist_param_label_ph") self.pre_training_loss = mean_squared_error( self.dist_param_target_ph, self.policy.dist_params) pre_training_optimizer = GradientDescentOptimizer( learning_rate=self.pre_training_learning_rate_ph) self.pre_training_op = pre_training_optimizer.minimize( self.pre_training_loss) """utility nodes""" # inspect model weights self.trainable_variables = trainable_variables() # saviour self.saver = Saver() # tensorboard summaries self.summary = merge([ histogram("values", self.value), histogram("advantages", standardized_adv), histogram("actions", self.clipped_action), histogram("det_actions", replace_nan(self.policy.det_action, 0.0)), histogram("value_targets", self.value_target_ph), scalar("policy_loss", self.policy_loss), scalar("value_loss", self.value_loss), scalar("entropy_loss", self.entropy_loss) ]) self.pre_summary = merge([ histogram("pretraining_actions", self.clipped_action), scalar("pretraining_loss", self.pre_training_loss) ]) # initialization init = global_variables_initializer() self.sess.run(init) def predict(self, observation): """ :param observation: input environment state :return: action, deterministic action (mode), negative log dist value, value prediction """ fetches = [ self.clipped_action, self.policy.dist_params, self.policy.neg_logits, self.value ] action, dist_params, neg_logit, value = self.sess.run( fetches, {self.observation_input: observation}) return action, dist_params, neg_logit, value def train_step(self, observations, actions, old_neg_logits, value_targets, advantages, obs_as_string=False, learning_rate=None, additional_fetches=None): fetches = [self.training_op, self.summary] + ( [] if additional_fetches is None else additional_fetches) obs_tensor = self.observation_string_ph if obs_as_string else self.observation_input feed_dict = { obs_tensor: observations, self.action_ph: actions, self.old_neg_logits: old_neg_logits, self.value_target_ph: value_targets, self.advantage_ph: advantages } if learning_rate is not None: feed_dict.update({self.learning_rate_ph: learning_rate}) return self.sess.run(fetches, feed_dict) def pre_train_step(self, observations, dist_param_targets, obs_as_string=False, learning_rate=None, additional_fetches=None): fetches = [self.pre_training_op, self.pre_summary] + ( [] if additional_fetches is None else additional_fetches) obs_tensor = self.observation_string_ph if obs_as_string else self.observation_input feed_dict = { obs_tensor: observations, self.dist_param_target_ph: dist_param_targets } if learning_rate is not None: feed_dict.update( {self.pre_training_learning_rate_ph: learning_rate}) return self.sess.run(fetches, feed_dict) def simple_save(self, path): with self.graph.as_default(): simple_save(self.sess, path, inputs={"obs": self.observation_input}, outputs={"action": self.clipped_action}) def save(self, path): with self.graph.as_default(): self.saver.save(sess=self.sess, save_path=path) def restore(self, path): with self.graph.as_default(): self.saver.restore(sess=self.sess, save_path=path) def close_session(self): self.sess.close() def get_trainable_variables(self): return self.sess.run(self.trainable_variables)