コード例 #1
0
ファイル: dataset.py プロジェクト: jamesaanderson/vitruvian
def usfaces_df(queue):
    """Preprocess and augment US Face Database faces to data/. Returns pandas dataframe"""
    usfaces_df = pd.read_excel("Full Attribute Scores/demographic & others labels/demographic-others-labels.xlsx")
    usfaces_df = usfaces_df[["Filename", "Attractive"]]

    usfaces_df = usfaces_df.drop_duplicates(["Filename"])

    for face in usfaces_df["Filename"]:
        base = os.path.splitext(face)[0]

        try:
            preprocess.resize("10k US Adult Faces Database/Face Images/{0}".format(face), "data/{0}".format(face))
            preprocess.hflip("data/{0}".format(face), "data/{0}-F.jpg".format(base))
            preprocess.add_noise("data/{0}".format(face), "data/{0}-N.jpg".format(base))
        except:
            usfaces_df = usfaces_df[usfaces_df.Filename != face]

    flipped_df = usfaces_df.copy()
    noisy_df = usfaces_df.copy()
    flipped_df["Filename"] = flipped_df["Filename"].str[:-4] + "-F.jpg"
    noisy_df["Filename"] = noisy_df["Filename"].str[:-4] + "-N.jpg"

    df = pd.concat([usfaces_df, flipped_df, noisy_df], ignore_index=True)
    df.columns = ["Face", "Rating"]
    df["Rating"] *= 10.0 / 5.0

    queue.put(df)
コード例 #2
0
ファイル: dataset.py プロジェクト: jamesaanderson/vitruvian
def scutfbp_df(queue):
    """Preprocess and augment SCUT-FBP faces to data/. Returns pandas dataframe"""
    scutfbp_df = pd.read_excel("Rating_Collection/Attractiveness label.xlsx")

    # Convert type of #Image column to str
    scutfbp_df["#Image"] = scutfbp_df["#Image"].astype(str)
    # Drop column Standard Deviation
    scutfbp_df = scutfbp_df.drop("Standard Deviation", 1)

    for face in os.listdir("Data_Collection"):
        if face.endswith(".jpg"):
            base = os.path.splitext(face)[0]
            # Regex to find numbers at end of string
            img_num = re.match(".*?([0-9]+)$", base).group(1)

            try:
                preprocess.resize("Data_Collection/{0}".format(face), "data/{0}".format(face))
                preprocess.hflip("data/{0}".format(face), "data/{0}-F.jpg".format(base))
                preprocess.add_noise("data/{0}".format(face), "data/{0}-N.jpg".format(base))
            except:
                scutfbp_df = scutfbp_df[getattr(scutfbp_df, "#Image") != img_num]

    flipped_df = scutfbp_df.copy()
    noisy_df = scutfbp_df.copy()
    flipped_df["#Image"] = "SCUT-FBP-" + flipped_df["#Image"] + "-F.jpg"
    noisy_df["#Image"] = "SCUT-FBP-" + noisy_df["#Image"] + "-N.jpg"
    scutfbp_df["#Image"] = "SCUT-FBP-" + scutfbp_df["#Image"] + ".jpg"

    df = pd.concat([scutfbp_df, flipped_df, noisy_df], ignore_index=True)
    # Rename #Image -> Face and Attractiveness label -> Rating
    df.columns = ["Face", "Rating"]
    # Convert from 5 point scale to 10 point scale
    df["Rating"] *= 10.0 / 5.0

    queue.put(df)
コード例 #3
0
ファイル: dataset.py プロジェクト: jamesaanderson/vitruvian
def chicago_df(queue):
    """Preprocess and augment Chicago faces to data/. Returns pandas dataframe"""
    chicago_df = pd.read_excel("CFD Version 2.0/CFD 2.0 Norming Data and Codebook.xlsx", skiprows=4)
    chicago_df = chicago_df[["Target", "Attractive"]]

    for dir in os.listdir("CFD Version 2.0/CFD 2.0 Images"):
        if dir == ".DS_Store":
            continue

        for face in os.listdir("CFD Version 2.0/CFD 2.0 Images/{0}".format(dir)):
            # Neutral faces
            if face.endswith("N.jpg"):
                # Is one face detected
                try:
                    preprocess.resize(
                        "CFD Version 2.0/CFD 2.0 Images/{0}/{1}".format(dir, face), "data/{0}.jpg".format(dir)
                    )
                    preprocess.hflip("data/{0}.jpg".format(dir), "data/{0}-F.jpg".format(dir))
                    preprocess.add_noise("data/{0}.jpg".format(dir), "data/{0}-N.jpg".format(dir))
                except:
                    chicago_df = chicago_df[chicago_df.Target != dir]

    flipped_df = chicago_df.copy()
    noisy_df = chicago_df.copy()
    flipped_df["Target"] = flipped_df["Target"] + "-F.jpg"
    noisy_df["Target"] = noisy_df["Target"] + "-N.jpg"
    chicago_df["Target"] = chicago_df["Target"] + ".jpg"

    df = pd.concat([chicago_df, flipped_df, noisy_df], ignore_index=True)
    # Rename Target -> Face and Attractive -> Rating
    df.columns = ["Face", "Rating"]
    # Convert from 7 point scale to 10 point scale
    df["Rating"] *= 10.0 / 7.0

    queue.put(df)
コード例 #4
0
ファイル: dataset.py プロジェクト: jamesaanderson/vitruvian
def models_df(queue):
    """Preprocess and augment models.com faces to data/. Returns pandas dataframe"""
    imgs = []

    for i in range(1, 216):
        url = "http://models.com/newfaces/page/{0}".format(i)
        page = urllib2.urlopen(url).read()
        soup = BeautifulSoup(page, "lxml")

        for tag in soup.findAll("img", {"class": "attachment-square"}):
            src = "http:{0}".format(tag["src"])
            base = uuid.uuid4().hex
            filename = base + ".jpg"

            urllib.urlretrieve(src, "data/{0}".format(filename))

            try:
                preprocess.resize("data/{0}".format(filename), "data/{0}".format(filename))
                preprocess.hflip("data/{0}".format(filename), "data/{0}-F.jpg".format(base))
                preprocess.add_noise("data/{0}".format(filename), "data/{0}-N.jpg".format(base))
            except:
                os.remove("data/{0}".format(filename))
                continue

            imgs.append({"Face": filename, "Rating": 10})
            imgs.append({"Face": "{0}-F.jpg".format(base), "Rating": 10})
            imgs.append({"Face": "{0}-N.jpg".format(base), "Rating": 10})

    df = pd.DataFrame(imgs)
    queue.put(df)
コード例 #5
0
ファイル: dataset.py プロジェクト: jamesaanderson/vitruvian
def eccv_df(queue):
    """Preprocess and augment Gray et al. dataset to data/. Returns pandas dataframe"""
    root = ET.parse("eccv2010_beauty_data/hotornot_face_all.xml").getroot()

    childs = []
    for child in root:
        filename = os.path.split(child.attrib["filename"])[-1]
        base = os.path.splitext(filename)[0]

        try:
            preprocess.resize(
                "eccv2010_beauty_data/{0}".format(child.attrib["filename"]),
                "data/{0}".format(base + ".jpg"),
                crop=False,
            )
            preprocess.hflip("data/{0}".format(filename), "data/{0}-F.jpg".format(base))
            preprocess.add_noise("data/{0}".format(filename), "data/{0}-N.jpg".format(base))
        except:
            continue

        childs.append([base + "-F.jpg", float(child.attrib["score"])])
        childs.append([base + "-N.jpg", float(child.attrib["score"])])
        childs.append([base + ".jpg", float(child.attrib["score"])])

    df = pd.DataFrame(childs, columns=["Face", "Rating"])
    df["Rating"] += 4
    df["Rating"] *= 10.0 / 8.0

    queue.put(df)
コード例 #6
0
def train(gamma, double_q, n_step_q, exp_fraction, final_eps, kp_type,
          colour_input, patch_sizes, lsp_layers, batch_size, num_iters,
          learning_starts, train_freq, kpt_encoder_type, kpt_cnn_channels,
          agent_size, learning_rate, max_grad_norm, mask_threshold, tau,
          window_size, ckpts_prefix, ckpt_load_dir, vis_load, test_every,
          mp_num_steps, img_size, replay_buffer_size, seed, noise_type, _run):

    model_init_start = time.time()
    process_seed = seed + hvd.local_rank()

    # init Gym environments
    train_env = make_env(mode="train", seed=process_seed)
    if hvd.local_rank() == 0:  # eval only on 1 node (horovod)
        eval_env = make_env(mode="eval", seed=20 * (process_seed + 1))
    n_actions = train_env.action_space.n

    # build models
    vision_model_dict = build_vision_model()
    agent_model_dict = build_agent_model(n_actions=n_actions,
                                         kpt_cnn_channels=kpt_cnn_channels)
    target_agent_model_dict = build_agent_model(
        n_actions=n_actions, kpt_cnn_channels=kpt_cnn_channels)

    # Horovod: adjust learning rate based on number of GPUs.
    optimizer = get_optimizer(learning_rate=learning_rate * hvd.size())

    # setting up ckpts for all the modules
    query_ckpt, attn_ckpt, pos_enc_ckpt, node_enc_ckpt, \
    scene_ckpt, kpt_enc_ckpt = None, None, None, None, None, None

    policy_ckpt = tf.train.Checkpoint(optimizer=optimizer,
                                      model=agent_model_dict["agent_net"])

    kpt_enc_ckpt = tf.train.Checkpoint(optimizer=optimizer,
                                       model=agent_model_dict["kpt_encoder"])
    if kpt_encoder_type == "gnn":
        node_enc_ckpt = tf.train.Checkpoint(optimizer=optimizer,
                                            model=agent_model_dict["node_enc"])
        pos_enc_ckpt = tf.train.Checkpoint(optimizer=optimizer,
                                           model=agent_model_dict["pos_net"])

    # load pre-trained vision module
    vision_model_dict = load_vision_model(vision_model_dict, kp_type,
                                          colour_input, batch_size, lsp_layers,
                                          patch_sizes, ckpt_load_dir, vis_load)

    if hvd.local_rank() == 0:
        print("initializing models and env took %4.5f s" %
              (time.time() - model_init_start))

    def train_step(inputs):
        # Minimize the TD error on a batch sampled from replay buffer.
        with tf.GradientTape() as tape:
            step_loss, extra = q_learning(
                vision_model_dict, agent_model_dict, target_agent_model_dict,
                inputs, batch_size, kp_type, agent_size, mask_threshold,
                patch_sizes, kpt_encoder_type, mp_num_steps, img_size,
                lsp_layers, window_size, gamma, double_q, n_step_q)
        w_update_start = time.time()
        # Horovod: add Horovod Distributed GradientTape.
        tape = hvd.DistributedGradientTape(tape)

        # collecting trainable params of all modules
        params = []
        for agent_model in agent_model_dict.values():
            params = params + list(agent_model.trainable_variables)

        # compute grads
        grads = tape.gradient(step_loss, params)
        # apply grad clipping
        grads, global_norm = tf.clip_by_global_norm(grads,
                                                    clip_norm=max_grad_norm)
        # update agent
        optimizer.apply_gradients(zip(grads, params))
        # print("grad comp + weight updates take %4.5f" % (time.time() - w_update_start))
        return step_loss, extra

    # load weights using var assignment
    source_vars, target_vars = update_target_networks(agent_model_dict,
                                                      target_agent_model_dict,
                                                      tau)

    # init replay buffer
    data_spec = (specs.TensorSpec([84, 84, 3], tf.int32, 'obs_tm1'),
                 specs.TensorSpec([1], tf.int32, 'a_tm1'),
                 specs.TensorSpec([1], tf.float32, 'r_tm1'),
                 specs.TensorSpec([2], tf.float32, 'begin_end'))
    # each process has it's own smaller reply_buffer
    replay_buffer = EpisodicReplayBuffer(
        capacity=int(replay_buffer_size),
        buffer_size=8,
        dataset_drop_remainder=False,
        data_spec=data_spec,
        begin_episode_fn=lambda x: bool(x[3][0, 0]),
        end_episode_fn=lambda x: bool(x[3][0, 1]))

    # create tf.Dataset object from replay_buffer and sample
    rb_ds = replay_buffer.as_dataset(sample_batch_size=batch_size,
                                     num_steps=window_size + n_step_q + 1)

    # dataset iterator sampling trajectories from replay_buffer
    episode_ids = replay_buffer.create_episode_ids(1)
    rb_ds = rb_ds.prefetch(buffer_size=AUTOTUNE)
    rb_iterator = iter(rb_ds)

    episode_rewards = [0.0]
    obs = train_env.reset()
    reset = False

    # lists for logging exp results
    eps = 0.1
    episode_timestep = 0
    exploration = exploration_policy(num_iters, exp_fraction, final_eps)
    avg_td_error = 0.0
    # init lstm_agent state
    c_tm1 = tf.Variable(tf.zeros((1, agent_size)), trainable=False)
    h_tm1 = tf.Variable(tf.zeros((1, agent_size)), trainable=False)
    best_eval_score = -float("inf")

    # TRAINING LOOP
    for t in range(int(num_iters)):
        # Horovod: broadcast initial variable states from rank 0 to all other processes.
        # This is necessary to ensure consistent initialization of all workers when
        # training is started with random weights or restored from a checkpoint.
        if t == 0:
            hvd.broadcast_variables(source_vars, root_rank=0)
            hvd.broadcast_variables(target_vars, root_rank=0)
            hvd.broadcast_variables(optimizer.variables(), root_rank=0)

        online_step_start = time.time()

        # convert obs to float and scale to 0-1
        obs_float = np.asarray(obs[None, :, :, :], dtype=np.float32) / 255.0
        # sometimes add distractors
        if noise_type is not "none":
            obs_float = add_noise(obs_float[0, :, :, :], noise_type)
            obs_float = obs_float[None, :, :, :]
        # exploration
        update_eps = tf.constant(exploration.value(t))

        # compute forward pass of input obs over vision + attention modules
        bottom_up_masks, encoder_features, kpt_centers = vision_forward_pass(
            obs_float, vision_model_dict, lsp_layers, kp_type, patch_sizes,
            img_size)

        # compute keypoint encodings

        bottom_up_features = encode_keypoints(
            bottom_up_masks,
            encoder_features,
            kpt_centers,
            mask_threshold,
            kp_type,
            kpt_encoder_type,
            mp_num_steps,
            q_learn=False,
            pos_net=agent_model_dict.get("pos_net"),
            node_encoder=agent_model_dict.get("node_enc"),
            kpt_encoder=agent_model_dict.get(
                "kpt_encoder"))  # passes None if not available

        # agent step
        action, h_t, c_t = agent_model_dict["agent_net"].step(
            bottom_up_features, [h_tm1, c_tm1],
            update_eps,
            training=True,
            stochastic=True)
        # env step
        new_obs, rew, done, _ = train_env.step(action)

        episode_timestep = episode_timestep + 1
        episode_rewards[-1] += rew

        # store transitions in replay buffer
        store_exp_start = time.time()
        # making data_tuple compatible for add_batch() method
        obs = img_as_ubyte(np.array(obs_float[0, :, :, :], dtype=float))
        action = np.array(action, dtype=np.int32)
        rew = np.array(rew, ndmin=1, dtype=np.float32)
        end = np.array(done, ndmin=1, dtype=np.float32)
        begin = np.array(reset, ndmin=1, dtype=np.float32)
        begin_end = np.concatenate((begin, end), axis=0)
        # converting from
        values = (obs, action, rew, begin_end)
        values_batched = tf.nest.map_structure(lambda b: tf.stack([b]), values)
        # add batch of transitions of episode_ids to replay_buffer
        episode_ids = replay_buffer.add_batch(values_batched, episode_ids)

        obs = new_obs
        h_tm1 = h_t
        c_tm1 = c_t
        reset = False
        # episode termination
        if done:
            # saving cummulative returns at end of episode
            print("Episode Return: %3.3f" % (episode_rewards[-1]))
            print(episode_ids.numpy(), update_eps.numpy())
            obs = train_env.reset()
            episode_timestep = 0
            # reset lstm state at episode end
            c_tm1 = tf.Variable(tf.zeros((1, agent_size)), trainable=False)
            h_tm1 = tf.Variable(tf.zeros((1, agent_size)), trainable=False)
            episode_rewards.append(0.0)
            reset = True

        # Q_LEARNING UPDATES BEGIN
        if t > learning_starts and t % train_freq == 0:
            batch_q_start = time.time()
            # sample a batch of trajectories from replay_buffer for recurrent-dqn
            inputs = next(rb_iterator)
            step_loss, extra = train_step(inputs)
            step_loss = hvd.allreduce(step_loss)

            # soft-update target networks
            update_start = time.time()
            source_vars, target_vars = update_target_networks(
                agent_model_dict, target_agent_model_dict, tau)
            # print("Target network updates take %4.5f" % (time.time() - update_start))
            td_error = tf.reduce_mean(hvd.allreduce(extra.td_error), axis=0)

            if hvd.local_rank() == 0:
                print(
                    "Iteration: %5d Step loss: %4.4f, TD_error: %3.4f took %4.5f s"
                    % (t, step_loss, td_error, time.time() - batch_q_start))

                # logging step losses to sacred
                add_sacred_log("train.t",
                               int((t - learning_starts) / train_freq), _run)
                add_sacred_log("train.step_loss", float(step_loss), _run)
                add_sacred_log("train.step_td_error", float(td_error), _run)

            avg_td_error = avg_td_error + np.abs(td_error)
        # VALIDATION/CKPT
        if t > learning_starts and t % test_every == 0:
            # trigger evaluation run on only 1 node
            if hvd.local_rank() == 0:
                eval_start = time.time()
                mean_ep_rew, var_ep_rew, _, _ = eval_step(
                    eval_env, vision_model_dict, agent_model_dict)
                avg_td_error = avg_td_error / float(
                    (t - learning_starts) / train_freq)

                print(
                    "Evaluation after: %5d steps avg_ep_return: %4.5f running_avg_td_error: %4.5f took %4.5f s"
                    % (t, mean_ep_rew, avg_td_error, time.time() - eval_start))

                # logging avg. episodic rewards to sacred
                add_sacred_log("test.t", int(
                    (t - learning_starts) / train_freq), _run)
                add_sacred_log("test.mean_ep_return", float(mean_ep_rew), _run)
                add_sacred_log("test.var_ep_return", float(var_ep_rew), _run)
                add_sacred_log("test.avg_td_error", float(avg_td_error), _run)

                avg_td_error = 0.0

                # ckpt model based on eval-run scores
                if mean_ep_rew > 0.95 * best_eval_score:
                    best_eval_score = mean_ep_rew
                    # Horovod: save checkpoints only on worker 0 to prevent other workers from
                    # corrupting it.
                    policy_ckpt.save(ckpts_prefix + '_agent_net')
                    kpt_enc_ckpt.save(ckpts_prefix + '_kpt_encoder')
                    if kpt_encoder_type == "gnn":
                        node_enc_ckpt.save(ckpts_prefix + '_node_enc')
                        pos_enc_ckpt.save(ckpts_prefix + '_pos_net')

    if hvd.local_rank() == 0:
        print("Training complete!!!")
コード例 #7
0
def eval_step(eval_env, vision_model_dict, agent_model_dict, eval_eps,
              max_eval_ep, agent_size, lsp_layers, kp_type, mask_threshold,
              patch_sizes, img_size, kpt_encoder_type, noise_type,
              mp_num_steps):

    # Run max_eval_ep number of episodes using greedy-policy inferred
    # from q-function and compute avg. episodic reward
    eval_ep_rewards = [0.0]
    obs = eval_env.reset()
    reset = True
    num_ep = 0
    eval_c_tm1 = tf.Variable(tf.zeros((1, agent_size)), trainable=False)
    eval_h_tm1 = tf.Variable(tf.zeros((1, agent_size)), trainable=False)

    while num_ep < max_eval_ep:
        obs_float = np.asarray(obs[None, :, :, :], dtype=np.float32) / 255.0
        # sometimes add distractors
        if noise_type != "none":
            obs_float = add_noise(obs_float[0, :, :, :], noise_type)
            obs_float = obs_float[None, :, :, :]
        # vision-module forward pass
        bottom_up_maps, encoder_features, kpt_centers = vision_forward_pass(
            tf.constant(obs_float), vision_model_dict, lsp_layers, kp_type,
            patch_sizes, img_size)

        # compute keypoint encodings
        bottom_up_features = encode_keypoints(
            bottom_up_maps,
            encoder_features,
            kpt_centers,
            mask_threshold,
            kp_type,
            kpt_encoder_type,
            mp_num_steps,
            q_learn=False,
            pos_net=agent_model_dict.get("pos_net"),
            node_encoder=agent_model_dict.get("node_enc"),
            kpt_encoder=agent_model_dict.get(
                "kpt_encoder"))  # passes None if not available

        # agent step
        action, eval_h_t, eval_c_t = agent_model_dict["agent_net"].step(
            bottom_up_features, [eval_h_tm1, eval_c_tm1],
            eval_eps,
            training=False,
            stochastic=True)
        # env step
        new_obs, rew, done, _ = eval_env.step(action)
        eval_ep_rewards[-1] += rew
        obs = new_obs
        eval_h_tm1, eval_c_tm1 = eval_h_t, eval_c_t
        # episode termination
        if done:
            obs = eval_env.reset()
            # reset lstm cell state at episode end
            eval_c_tm1 = tf.Variable(tf.zeros((1, agent_size)),
                                     trainable=False)
            eval_h_tm1 = tf.Variable(tf.zeros((1, agent_size)),
                                     trainable=False)
            num_ep = num_ep + 1
            # if hvd.local_rank() == 0:
            # 	print(eval_ep_rewards[-1])
            eval_ep_rewards.append(0.0)
            reset = True

    # log episodic return stats
    avg_eval_ep_return = np.mean(np.array(eval_ep_rewards[0:-1]), axis=0)
    std_ep_return = np.std(np.array(eval_ep_rewards[0:-1]), axis=0)
    min_ep_return = np.amin(np.array(eval_ep_rewards[0:-1]), axis=0)
    max_ep_return = np.amax(np.array(eval_ep_rewards[0:-1]), axis=0)
    return avg_eval_ep_return, std_ep_return, min_ep_return, max_ep_return
コード例 #8
0
def sample_datasets(hyperparams):
    print("Getting data...", end=" ")
    sys.stdout.flush()

    seed = hyperparams['seed']
    n_inputs = hyperparams['n_inputs']
    # Number of examples to sample from each dataset
    num_examples = hyperparams['num_examples']
    # Number of synthetic examples to sample from each dataset
    num_synthetic_examples = hyperparams['num_synthetic_examples']
    # Number of bad examples to sample from each dataset

    sweeps = []
    sweeps_labels = []
    if num_examples != 0:
        for i, data_file in enumerate(hyperparams['datasets']):
            temp_data = np.load("../../data_training/" + data_file + ".npz")['sweeps']
            # np.random.seed(seed + i)
            # np.random.shuffle(temp_data)
            temp_data = temp_data[0:temp_data.shape[0] if num_examples > temp_data.shape[0]
                                  else num_examples]
            # Remove offsets
            temp_data[:, 256:512] -= np.mean(temp_data[:, 256:256 + 32], axis=1, keepdims=True)
            sweeps.append(temp_data)

            try:
                temp_data_labels = np.load("../../data_training/" + data_file + "_labels.npz")['labels']
            except:
                print("Labels not found for {}".format(data_file))
                temp_data_labels = np.zeros((temp_data.shape[0], 3))
            sweeps_labels.append(temp_data_labels)
        sweeps_labels = np.concatenate(sweeps_labels, axis=0)

        sweeps = np.concatenate(sweeps, axis=0)
        # Add 4 zeros after each sweep -- first zero is a flag indicating whether the following
        #   physical parameters (ne, Vp, Te) are included in the loss function calculation. They are
        #   not included for  (real) sweeps because they have not been analyzed yet.
        #   The remain 3 zeros are the physical parameters specified above.
        sweeps = np.concatenate([sweeps, np.zeros((sweeps.shape[0], 4))], axis=1)
        sweeps = np.concatenate([sweeps, sweeps_labels], axis=1)

        print("Real examples: {}...".format(sweeps.shape[0]), end=" ")
        sys.stdout.flush()

    if num_synthetic_examples != 0:
        sweeps_synthetic = []
        for i, data_file in enumerate(hyperparams['datasets_synthetic']):
            temp_data = np.load("../../data_synthetic/" + data_file + ".npz")['sweeps']
            # np.random.seed(seed + i + 1000)
            # np.random.shuffle(temp_data)
            temp_data = temp_data[0:temp_data.shape[0]
                                  if num_synthetic_examples > temp_data.shape[0]
                                  else num_synthetic_examples]
            temp_data[:, 0:n_inputs * 2] = preprocess.add_noise(temp_data[:, 0:n_inputs * 2],
                                                                hyperparams, epoch=0)
            temp_data[:, 0:n_inputs * 2] = preprocess.add_offset(temp_data[:, 0:n_inputs * 2],
                                                                 hyperparams, epoch=0)
            sweeps_synthetic.append(temp_data)
        sweeps_synthetic = np.concatenate(sweeps_synthetic, axis=0)
        # Insert flag indicating that these are not bad sweeps (they're good).
        # sweeps_synthetic = np.insert(sweeps_synthetic, n_inputs * 2 + 1, 0, axis=1)
        sweeps_synthetic = np.concatenate([sweeps_synthetic,
                                           np.zeros((sweeps_synthetic.shape[0], 3))], axis=1)

        print("Synthetic examples: {}...".format(sweeps_synthetic.shape[0]), end=" ")
        sys.stdout.flush()

        if len(sweeps) != 0:
            sweeps = np.concatenate([sweeps, sweeps_synthetic])
        else:
            sweeps = sweeps_synthetic
        del sweeps_synthetic

    # Find the voltage sweep and current means and peak-to-peaks so the model is easier to train.
    vsweep_mean = np.full(hyperparams['n_inputs'], np.mean(sweeps[:, 0:n_inputs]))
    vsweep_ptp = np.full(hyperparams['n_inputs'], np.ptp(sweeps[:, 0:n_inputs]))
    current_mean = np.full(hyperparams['n_inputs'], np.mean(sweeps[:, n_inputs:n_inputs * 2]))
    current_ptp = np.full(hyperparams['n_inputs'], np.ptp(sweeps[:, n_inputs:n_inputs * 2]))
    # Combine the two so we have a nice neat X, y, and scalings tuple returned by the function.
    data_mean = np.concatenate((vsweep_mean, current_mean))
    data_ptp = np.concatenate((vsweep_ptp, current_ptp))

    # Voltage and current sweeps are already concatenated.
    # Centering and scaling the input so that it's easier to train.
    sweeps[:, 0:n_inputs * 2] = (sweeps[:, 0:n_inputs * 2] - data_mean) / data_ptp
    data_train, data_test, data_valid = preprocess.shuffle_split_data(sweeps, hyperparams)
    print("Done.")

    return data_train, data_test, data_valid, data_mean, data_ptp