def usfaces_df(queue): """Preprocess and augment US Face Database faces to data/. Returns pandas dataframe""" usfaces_df = pd.read_excel("Full Attribute Scores/demographic & others labels/demographic-others-labels.xlsx") usfaces_df = usfaces_df[["Filename", "Attractive"]] usfaces_df = usfaces_df.drop_duplicates(["Filename"]) for face in usfaces_df["Filename"]: base = os.path.splitext(face)[0] try: preprocess.resize("10k US Adult Faces Database/Face Images/{0}".format(face), "data/{0}".format(face)) preprocess.hflip("data/{0}".format(face), "data/{0}-F.jpg".format(base)) preprocess.add_noise("data/{0}".format(face), "data/{0}-N.jpg".format(base)) except: usfaces_df = usfaces_df[usfaces_df.Filename != face] flipped_df = usfaces_df.copy() noisy_df = usfaces_df.copy() flipped_df["Filename"] = flipped_df["Filename"].str[:-4] + "-F.jpg" noisy_df["Filename"] = noisy_df["Filename"].str[:-4] + "-N.jpg" df = pd.concat([usfaces_df, flipped_df, noisy_df], ignore_index=True) df.columns = ["Face", "Rating"] df["Rating"] *= 10.0 / 5.0 queue.put(df)
def scutfbp_df(queue): """Preprocess and augment SCUT-FBP faces to data/. Returns pandas dataframe""" scutfbp_df = pd.read_excel("Rating_Collection/Attractiveness label.xlsx") # Convert type of #Image column to str scutfbp_df["#Image"] = scutfbp_df["#Image"].astype(str) # Drop column Standard Deviation scutfbp_df = scutfbp_df.drop("Standard Deviation", 1) for face in os.listdir("Data_Collection"): if face.endswith(".jpg"): base = os.path.splitext(face)[0] # Regex to find numbers at end of string img_num = re.match(".*?([0-9]+)$", base).group(1) try: preprocess.resize("Data_Collection/{0}".format(face), "data/{0}".format(face)) preprocess.hflip("data/{0}".format(face), "data/{0}-F.jpg".format(base)) preprocess.add_noise("data/{0}".format(face), "data/{0}-N.jpg".format(base)) except: scutfbp_df = scutfbp_df[getattr(scutfbp_df, "#Image") != img_num] flipped_df = scutfbp_df.copy() noisy_df = scutfbp_df.copy() flipped_df["#Image"] = "SCUT-FBP-" + flipped_df["#Image"] + "-F.jpg" noisy_df["#Image"] = "SCUT-FBP-" + noisy_df["#Image"] + "-N.jpg" scutfbp_df["#Image"] = "SCUT-FBP-" + scutfbp_df["#Image"] + ".jpg" df = pd.concat([scutfbp_df, flipped_df, noisy_df], ignore_index=True) # Rename #Image -> Face and Attractiveness label -> Rating df.columns = ["Face", "Rating"] # Convert from 5 point scale to 10 point scale df["Rating"] *= 10.0 / 5.0 queue.put(df)
def chicago_df(queue): """Preprocess and augment Chicago faces to data/. Returns pandas dataframe""" chicago_df = pd.read_excel("CFD Version 2.0/CFD 2.0 Norming Data and Codebook.xlsx", skiprows=4) chicago_df = chicago_df[["Target", "Attractive"]] for dir in os.listdir("CFD Version 2.0/CFD 2.0 Images"): if dir == ".DS_Store": continue for face in os.listdir("CFD Version 2.0/CFD 2.0 Images/{0}".format(dir)): # Neutral faces if face.endswith("N.jpg"): # Is one face detected try: preprocess.resize( "CFD Version 2.0/CFD 2.0 Images/{0}/{1}".format(dir, face), "data/{0}.jpg".format(dir) ) preprocess.hflip("data/{0}.jpg".format(dir), "data/{0}-F.jpg".format(dir)) preprocess.add_noise("data/{0}.jpg".format(dir), "data/{0}-N.jpg".format(dir)) except: chicago_df = chicago_df[chicago_df.Target != dir] flipped_df = chicago_df.copy() noisy_df = chicago_df.copy() flipped_df["Target"] = flipped_df["Target"] + "-F.jpg" noisy_df["Target"] = noisy_df["Target"] + "-N.jpg" chicago_df["Target"] = chicago_df["Target"] + ".jpg" df = pd.concat([chicago_df, flipped_df, noisy_df], ignore_index=True) # Rename Target -> Face and Attractive -> Rating df.columns = ["Face", "Rating"] # Convert from 7 point scale to 10 point scale df["Rating"] *= 10.0 / 7.0 queue.put(df)
def models_df(queue): """Preprocess and augment models.com faces to data/. Returns pandas dataframe""" imgs = [] for i in range(1, 216): url = "http://models.com/newfaces/page/{0}".format(i) page = urllib2.urlopen(url).read() soup = BeautifulSoup(page, "lxml") for tag in soup.findAll("img", {"class": "attachment-square"}): src = "http:{0}".format(tag["src"]) base = uuid.uuid4().hex filename = base + ".jpg" urllib.urlretrieve(src, "data/{0}".format(filename)) try: preprocess.resize("data/{0}".format(filename), "data/{0}".format(filename)) preprocess.hflip("data/{0}".format(filename), "data/{0}-F.jpg".format(base)) preprocess.add_noise("data/{0}".format(filename), "data/{0}-N.jpg".format(base)) except: os.remove("data/{0}".format(filename)) continue imgs.append({"Face": filename, "Rating": 10}) imgs.append({"Face": "{0}-F.jpg".format(base), "Rating": 10}) imgs.append({"Face": "{0}-N.jpg".format(base), "Rating": 10}) df = pd.DataFrame(imgs) queue.put(df)
def eccv_df(queue): """Preprocess and augment Gray et al. dataset to data/. Returns pandas dataframe""" root = ET.parse("eccv2010_beauty_data/hotornot_face_all.xml").getroot() childs = [] for child in root: filename = os.path.split(child.attrib["filename"])[-1] base = os.path.splitext(filename)[0] try: preprocess.resize( "eccv2010_beauty_data/{0}".format(child.attrib["filename"]), "data/{0}".format(base + ".jpg"), crop=False, ) preprocess.hflip("data/{0}".format(filename), "data/{0}-F.jpg".format(base)) preprocess.add_noise("data/{0}".format(filename), "data/{0}-N.jpg".format(base)) except: continue childs.append([base + "-F.jpg", float(child.attrib["score"])]) childs.append([base + "-N.jpg", float(child.attrib["score"])]) childs.append([base + ".jpg", float(child.attrib["score"])]) df = pd.DataFrame(childs, columns=["Face", "Rating"]) df["Rating"] += 4 df["Rating"] *= 10.0 / 8.0 queue.put(df)
def train(gamma, double_q, n_step_q, exp_fraction, final_eps, kp_type, colour_input, patch_sizes, lsp_layers, batch_size, num_iters, learning_starts, train_freq, kpt_encoder_type, kpt_cnn_channels, agent_size, learning_rate, max_grad_norm, mask_threshold, tau, window_size, ckpts_prefix, ckpt_load_dir, vis_load, test_every, mp_num_steps, img_size, replay_buffer_size, seed, noise_type, _run): model_init_start = time.time() process_seed = seed + hvd.local_rank() # init Gym environments train_env = make_env(mode="train", seed=process_seed) if hvd.local_rank() == 0: # eval only on 1 node (horovod) eval_env = make_env(mode="eval", seed=20 * (process_seed + 1)) n_actions = train_env.action_space.n # build models vision_model_dict = build_vision_model() agent_model_dict = build_agent_model(n_actions=n_actions, kpt_cnn_channels=kpt_cnn_channels) target_agent_model_dict = build_agent_model( n_actions=n_actions, kpt_cnn_channels=kpt_cnn_channels) # Horovod: adjust learning rate based on number of GPUs. optimizer = get_optimizer(learning_rate=learning_rate * hvd.size()) # setting up ckpts for all the modules query_ckpt, attn_ckpt, pos_enc_ckpt, node_enc_ckpt, \ scene_ckpt, kpt_enc_ckpt = None, None, None, None, None, None policy_ckpt = tf.train.Checkpoint(optimizer=optimizer, model=agent_model_dict["agent_net"]) kpt_enc_ckpt = tf.train.Checkpoint(optimizer=optimizer, model=agent_model_dict["kpt_encoder"]) if kpt_encoder_type == "gnn": node_enc_ckpt = tf.train.Checkpoint(optimizer=optimizer, model=agent_model_dict["node_enc"]) pos_enc_ckpt = tf.train.Checkpoint(optimizer=optimizer, model=agent_model_dict["pos_net"]) # load pre-trained vision module vision_model_dict = load_vision_model(vision_model_dict, kp_type, colour_input, batch_size, lsp_layers, patch_sizes, ckpt_load_dir, vis_load) if hvd.local_rank() == 0: print("initializing models and env took %4.5f s" % (time.time() - model_init_start)) def train_step(inputs): # Minimize the TD error on a batch sampled from replay buffer. with tf.GradientTape() as tape: step_loss, extra = q_learning( vision_model_dict, agent_model_dict, target_agent_model_dict, inputs, batch_size, kp_type, agent_size, mask_threshold, patch_sizes, kpt_encoder_type, mp_num_steps, img_size, lsp_layers, window_size, gamma, double_q, n_step_q) w_update_start = time.time() # Horovod: add Horovod Distributed GradientTape. tape = hvd.DistributedGradientTape(tape) # collecting trainable params of all modules params = [] for agent_model in agent_model_dict.values(): params = params + list(agent_model.trainable_variables) # compute grads grads = tape.gradient(step_loss, params) # apply grad clipping grads, global_norm = tf.clip_by_global_norm(grads, clip_norm=max_grad_norm) # update agent optimizer.apply_gradients(zip(grads, params)) # print("grad comp + weight updates take %4.5f" % (time.time() - w_update_start)) return step_loss, extra # load weights using var assignment source_vars, target_vars = update_target_networks(agent_model_dict, target_agent_model_dict, tau) # init replay buffer data_spec = (specs.TensorSpec([84, 84, 3], tf.int32, 'obs_tm1'), specs.TensorSpec([1], tf.int32, 'a_tm1'), specs.TensorSpec([1], tf.float32, 'r_tm1'), specs.TensorSpec([2], tf.float32, 'begin_end')) # each process has it's own smaller reply_buffer replay_buffer = EpisodicReplayBuffer( capacity=int(replay_buffer_size), buffer_size=8, dataset_drop_remainder=False, data_spec=data_spec, begin_episode_fn=lambda x: bool(x[3][0, 0]), end_episode_fn=lambda x: bool(x[3][0, 1])) # create tf.Dataset object from replay_buffer and sample rb_ds = replay_buffer.as_dataset(sample_batch_size=batch_size, num_steps=window_size + n_step_q + 1) # dataset iterator sampling trajectories from replay_buffer episode_ids = replay_buffer.create_episode_ids(1) rb_ds = rb_ds.prefetch(buffer_size=AUTOTUNE) rb_iterator = iter(rb_ds) episode_rewards = [0.0] obs = train_env.reset() reset = False # lists for logging exp results eps = 0.1 episode_timestep = 0 exploration = exploration_policy(num_iters, exp_fraction, final_eps) avg_td_error = 0.0 # init lstm_agent state c_tm1 = tf.Variable(tf.zeros((1, agent_size)), trainable=False) h_tm1 = tf.Variable(tf.zeros((1, agent_size)), trainable=False) best_eval_score = -float("inf") # TRAINING LOOP for t in range(int(num_iters)): # Horovod: broadcast initial variable states from rank 0 to all other processes. # This is necessary to ensure consistent initialization of all workers when # training is started with random weights or restored from a checkpoint. if t == 0: hvd.broadcast_variables(source_vars, root_rank=0) hvd.broadcast_variables(target_vars, root_rank=0) hvd.broadcast_variables(optimizer.variables(), root_rank=0) online_step_start = time.time() # convert obs to float and scale to 0-1 obs_float = np.asarray(obs[None, :, :, :], dtype=np.float32) / 255.0 # sometimes add distractors if noise_type is not "none": obs_float = add_noise(obs_float[0, :, :, :], noise_type) obs_float = obs_float[None, :, :, :] # exploration update_eps = tf.constant(exploration.value(t)) # compute forward pass of input obs over vision + attention modules bottom_up_masks, encoder_features, kpt_centers = vision_forward_pass( obs_float, vision_model_dict, lsp_layers, kp_type, patch_sizes, img_size) # compute keypoint encodings bottom_up_features = encode_keypoints( bottom_up_masks, encoder_features, kpt_centers, mask_threshold, kp_type, kpt_encoder_type, mp_num_steps, q_learn=False, pos_net=agent_model_dict.get("pos_net"), node_encoder=agent_model_dict.get("node_enc"), kpt_encoder=agent_model_dict.get( "kpt_encoder")) # passes None if not available # agent step action, h_t, c_t = agent_model_dict["agent_net"].step( bottom_up_features, [h_tm1, c_tm1], update_eps, training=True, stochastic=True) # env step new_obs, rew, done, _ = train_env.step(action) episode_timestep = episode_timestep + 1 episode_rewards[-1] += rew # store transitions in replay buffer store_exp_start = time.time() # making data_tuple compatible for add_batch() method obs = img_as_ubyte(np.array(obs_float[0, :, :, :], dtype=float)) action = np.array(action, dtype=np.int32) rew = np.array(rew, ndmin=1, dtype=np.float32) end = np.array(done, ndmin=1, dtype=np.float32) begin = np.array(reset, ndmin=1, dtype=np.float32) begin_end = np.concatenate((begin, end), axis=0) # converting from values = (obs, action, rew, begin_end) values_batched = tf.nest.map_structure(lambda b: tf.stack([b]), values) # add batch of transitions of episode_ids to replay_buffer episode_ids = replay_buffer.add_batch(values_batched, episode_ids) obs = new_obs h_tm1 = h_t c_tm1 = c_t reset = False # episode termination if done: # saving cummulative returns at end of episode print("Episode Return: %3.3f" % (episode_rewards[-1])) print(episode_ids.numpy(), update_eps.numpy()) obs = train_env.reset() episode_timestep = 0 # reset lstm state at episode end c_tm1 = tf.Variable(tf.zeros((1, agent_size)), trainable=False) h_tm1 = tf.Variable(tf.zeros((1, agent_size)), trainable=False) episode_rewards.append(0.0) reset = True # Q_LEARNING UPDATES BEGIN if t > learning_starts and t % train_freq == 0: batch_q_start = time.time() # sample a batch of trajectories from replay_buffer for recurrent-dqn inputs = next(rb_iterator) step_loss, extra = train_step(inputs) step_loss = hvd.allreduce(step_loss) # soft-update target networks update_start = time.time() source_vars, target_vars = update_target_networks( agent_model_dict, target_agent_model_dict, tau) # print("Target network updates take %4.5f" % (time.time() - update_start)) td_error = tf.reduce_mean(hvd.allreduce(extra.td_error), axis=0) if hvd.local_rank() == 0: print( "Iteration: %5d Step loss: %4.4f, TD_error: %3.4f took %4.5f s" % (t, step_loss, td_error, time.time() - batch_q_start)) # logging step losses to sacred add_sacred_log("train.t", int((t - learning_starts) / train_freq), _run) add_sacred_log("train.step_loss", float(step_loss), _run) add_sacred_log("train.step_td_error", float(td_error), _run) avg_td_error = avg_td_error + np.abs(td_error) # VALIDATION/CKPT if t > learning_starts and t % test_every == 0: # trigger evaluation run on only 1 node if hvd.local_rank() == 0: eval_start = time.time() mean_ep_rew, var_ep_rew, _, _ = eval_step( eval_env, vision_model_dict, agent_model_dict) avg_td_error = avg_td_error / float( (t - learning_starts) / train_freq) print( "Evaluation after: %5d steps avg_ep_return: %4.5f running_avg_td_error: %4.5f took %4.5f s" % (t, mean_ep_rew, avg_td_error, time.time() - eval_start)) # logging avg. episodic rewards to sacred add_sacred_log("test.t", int( (t - learning_starts) / train_freq), _run) add_sacred_log("test.mean_ep_return", float(mean_ep_rew), _run) add_sacred_log("test.var_ep_return", float(var_ep_rew), _run) add_sacred_log("test.avg_td_error", float(avg_td_error), _run) avg_td_error = 0.0 # ckpt model based on eval-run scores if mean_ep_rew > 0.95 * best_eval_score: best_eval_score = mean_ep_rew # Horovod: save checkpoints only on worker 0 to prevent other workers from # corrupting it. policy_ckpt.save(ckpts_prefix + '_agent_net') kpt_enc_ckpt.save(ckpts_prefix + '_kpt_encoder') if kpt_encoder_type == "gnn": node_enc_ckpt.save(ckpts_prefix + '_node_enc') pos_enc_ckpt.save(ckpts_prefix + '_pos_net') if hvd.local_rank() == 0: print("Training complete!!!")
def eval_step(eval_env, vision_model_dict, agent_model_dict, eval_eps, max_eval_ep, agent_size, lsp_layers, kp_type, mask_threshold, patch_sizes, img_size, kpt_encoder_type, noise_type, mp_num_steps): # Run max_eval_ep number of episodes using greedy-policy inferred # from q-function and compute avg. episodic reward eval_ep_rewards = [0.0] obs = eval_env.reset() reset = True num_ep = 0 eval_c_tm1 = tf.Variable(tf.zeros((1, agent_size)), trainable=False) eval_h_tm1 = tf.Variable(tf.zeros((1, agent_size)), trainable=False) while num_ep < max_eval_ep: obs_float = np.asarray(obs[None, :, :, :], dtype=np.float32) / 255.0 # sometimes add distractors if noise_type != "none": obs_float = add_noise(obs_float[0, :, :, :], noise_type) obs_float = obs_float[None, :, :, :] # vision-module forward pass bottom_up_maps, encoder_features, kpt_centers = vision_forward_pass( tf.constant(obs_float), vision_model_dict, lsp_layers, kp_type, patch_sizes, img_size) # compute keypoint encodings bottom_up_features = encode_keypoints( bottom_up_maps, encoder_features, kpt_centers, mask_threshold, kp_type, kpt_encoder_type, mp_num_steps, q_learn=False, pos_net=agent_model_dict.get("pos_net"), node_encoder=agent_model_dict.get("node_enc"), kpt_encoder=agent_model_dict.get( "kpt_encoder")) # passes None if not available # agent step action, eval_h_t, eval_c_t = agent_model_dict["agent_net"].step( bottom_up_features, [eval_h_tm1, eval_c_tm1], eval_eps, training=False, stochastic=True) # env step new_obs, rew, done, _ = eval_env.step(action) eval_ep_rewards[-1] += rew obs = new_obs eval_h_tm1, eval_c_tm1 = eval_h_t, eval_c_t # episode termination if done: obs = eval_env.reset() # reset lstm cell state at episode end eval_c_tm1 = tf.Variable(tf.zeros((1, agent_size)), trainable=False) eval_h_tm1 = tf.Variable(tf.zeros((1, agent_size)), trainable=False) num_ep = num_ep + 1 # if hvd.local_rank() == 0: # print(eval_ep_rewards[-1]) eval_ep_rewards.append(0.0) reset = True # log episodic return stats avg_eval_ep_return = np.mean(np.array(eval_ep_rewards[0:-1]), axis=0) std_ep_return = np.std(np.array(eval_ep_rewards[0:-1]), axis=0) min_ep_return = np.amin(np.array(eval_ep_rewards[0:-1]), axis=0) max_ep_return = np.amax(np.array(eval_ep_rewards[0:-1]), axis=0) return avg_eval_ep_return, std_ep_return, min_ep_return, max_ep_return
def sample_datasets(hyperparams): print("Getting data...", end=" ") sys.stdout.flush() seed = hyperparams['seed'] n_inputs = hyperparams['n_inputs'] # Number of examples to sample from each dataset num_examples = hyperparams['num_examples'] # Number of synthetic examples to sample from each dataset num_synthetic_examples = hyperparams['num_synthetic_examples'] # Number of bad examples to sample from each dataset sweeps = [] sweeps_labels = [] if num_examples != 0: for i, data_file in enumerate(hyperparams['datasets']): temp_data = np.load("../../data_training/" + data_file + ".npz")['sweeps'] # np.random.seed(seed + i) # np.random.shuffle(temp_data) temp_data = temp_data[0:temp_data.shape[0] if num_examples > temp_data.shape[0] else num_examples] # Remove offsets temp_data[:, 256:512] -= np.mean(temp_data[:, 256:256 + 32], axis=1, keepdims=True) sweeps.append(temp_data) try: temp_data_labels = np.load("../../data_training/" + data_file + "_labels.npz")['labels'] except: print("Labels not found for {}".format(data_file)) temp_data_labels = np.zeros((temp_data.shape[0], 3)) sweeps_labels.append(temp_data_labels) sweeps_labels = np.concatenate(sweeps_labels, axis=0) sweeps = np.concatenate(sweeps, axis=0) # Add 4 zeros after each sweep -- first zero is a flag indicating whether the following # physical parameters (ne, Vp, Te) are included in the loss function calculation. They are # not included for (real) sweeps because they have not been analyzed yet. # The remain 3 zeros are the physical parameters specified above. sweeps = np.concatenate([sweeps, np.zeros((sweeps.shape[0], 4))], axis=1) sweeps = np.concatenate([sweeps, sweeps_labels], axis=1) print("Real examples: {}...".format(sweeps.shape[0]), end=" ") sys.stdout.flush() if num_synthetic_examples != 0: sweeps_synthetic = [] for i, data_file in enumerate(hyperparams['datasets_synthetic']): temp_data = np.load("../../data_synthetic/" + data_file + ".npz")['sweeps'] # np.random.seed(seed + i + 1000) # np.random.shuffle(temp_data) temp_data = temp_data[0:temp_data.shape[0] if num_synthetic_examples > temp_data.shape[0] else num_synthetic_examples] temp_data[:, 0:n_inputs * 2] = preprocess.add_noise(temp_data[:, 0:n_inputs * 2], hyperparams, epoch=0) temp_data[:, 0:n_inputs * 2] = preprocess.add_offset(temp_data[:, 0:n_inputs * 2], hyperparams, epoch=0) sweeps_synthetic.append(temp_data) sweeps_synthetic = np.concatenate(sweeps_synthetic, axis=0) # Insert flag indicating that these are not bad sweeps (they're good). # sweeps_synthetic = np.insert(sweeps_synthetic, n_inputs * 2 + 1, 0, axis=1) sweeps_synthetic = np.concatenate([sweeps_synthetic, np.zeros((sweeps_synthetic.shape[0], 3))], axis=1) print("Synthetic examples: {}...".format(sweeps_synthetic.shape[0]), end=" ") sys.stdout.flush() if len(sweeps) != 0: sweeps = np.concatenate([sweeps, sweeps_synthetic]) else: sweeps = sweeps_synthetic del sweeps_synthetic # Find the voltage sweep and current means and peak-to-peaks so the model is easier to train. vsweep_mean = np.full(hyperparams['n_inputs'], np.mean(sweeps[:, 0:n_inputs])) vsweep_ptp = np.full(hyperparams['n_inputs'], np.ptp(sweeps[:, 0:n_inputs])) current_mean = np.full(hyperparams['n_inputs'], np.mean(sweeps[:, n_inputs:n_inputs * 2])) current_ptp = np.full(hyperparams['n_inputs'], np.ptp(sweeps[:, n_inputs:n_inputs * 2])) # Combine the two so we have a nice neat X, y, and scalings tuple returned by the function. data_mean = np.concatenate((vsweep_mean, current_mean)) data_ptp = np.concatenate((vsweep_ptp, current_ptp)) # Voltage and current sweeps are already concatenated. # Centering and scaling the input so that it's easier to train. sweeps[:, 0:n_inputs * 2] = (sweeps[:, 0:n_inputs * 2] - data_mean) / data_ptp data_train, data_test, data_valid = preprocess.shuffle_split_data(sweeps, hyperparams) print("Done.") return data_train, data_test, data_valid, data_mean, data_ptp