def run_full_pipeline(metadata: Metadata, model_wrapper: ModelWrapper,
                      model_type: ModelType):
    print_cuda_info()
    restore_transformations = False

    if os.path.exists(os.path.join(metadata.model_dir, MODEL_FILENAME)):
        Metadata.restore_from_json(metadata,
                                   f"{model_wrapper.model_dir}/metadata.json")
        if metadata.training_finished:
            print(
                f"\n\n\nModel at {metadata.model_dir} already finished training."
            )
            return
        else:
            print(
                f"\n\n\nModel at {metadata.model_dir} already exists, restoring this model."
            )
            model_wrapper.load()
            restore_transformations = True
    else:
        os.makedirs(metadata.model_dir, exist_ok=True)

    metadata.num_params = model_wrapper.num_parameters()

    dataset = Dataset(metadata)

    train_loader, val_loader, test_loader = create_data_loaders(
        dataset,
        metadata,
        model=model_type,
        transformations=TransformationsManager.get_transformations(
            metadata.transformations),
    )

    if restore_transformations:
        train_loader.tm.transformations_count = metadata.train_transformations[
            "transformations_count"]
        val_loader.tm.transformations_count = metadata.val_transformations[
            "transformations_count"]

    train_model(
        metadata=metadata,
        wrapper=model_wrapper,
        train_loader=train_loader,
        val_loader=val_loader,
        gan=(model_type == ModelType.SEGAN),
    )

    test_mse_loss = evaluate(model_wrapper, test_loader)
    print(f"Test set mse loss: {test_mse_loss}")

    metadata.test_mse_loss = test_mse_loss
    metadata.training_finished = True
    metadata.test_transformations = test_loader.tm.get_info()

    metadata.save_to_json(TRAINING_RESULTS_FILENAME)
def run_overfit(metadata: Metadata, model_wrapper: ModelWrapper,
                model_type: ModelType):
    print_cuda_info()

    os.makedirs(metadata.model_dir, exist_ok=True)

    if os.path.exists(os.path.join(metadata.model_dir, MODEL_FILENAME)):
        Metadata.restore_from_json(metadata,
                                   f"{model_wrapper.model_dir}/metadata.json")
        if metadata.training_finished:
            print(
                f"\n\n\nModel at {metadata.model_dir} already finished training."
            )
            return
        else:
            print(
                f"\n\n\nModel at {metadata.model_dir} already exists, restoring this model."
            )
            model_wrapper.load()
    else:
        os.makedirs(metadata.model_dir, exist_ok=True)

    metadata.num_params = model_wrapper.num_parameters()

    dataset = Dataset(metadata)

    train_loader, _, _ = create_data_loaders(
        dataset,
        metadata,
        model=model_type,
        transformations=TransformationsManager.get_transformations("none"),
    )

    eval_loader = deepcopy(train_loader)
    # if training gan, disable additional noisy inputs from datasets
    if model_type == ModelType.SEGAN:
        eval_loader.train_gan = False

    train_model(
        metadata=metadata,
        wrapper=model_wrapper,
        train_loader=train_loader,
        val_loader=eval_loader,
        gan=(model_type == ModelType.SEGAN),
    )

    test_mse_loss = evaluate(model_wrapper, eval_loader)
    print(f"Final mse loss: {test_mse_loss}")

    metadata.final_mse_loss = test_mse_loss
    metadata.training_finished = True
    metadata.save_to_json(TRAINING_RESULTS_FILENAME)
Esempio n. 3
0
def test_all_dataloaders():
    # test dataloaders for autoencoder, wavenet and segan
    # assert if they output correctly padded data (meaning we all outputs have the same dimension)
    dataset = Dataset(metadata=Metadata.get_mock())

    # autoencoder
    assert_dataloader_correct(
        DataLoader(Metadata.get_mock(), dataset, train_gan=False))

    # wavenet
    assert_dataloader_correct(
        DataLoader(Metadata.get_mock(), dataset, train_gan=False))

    # segan
    assert_dataloader_correct(
        DataLoader(Metadata.get_mock(), dataset, train_gan=True))
def get_test_set_files(metadata: Dict) -> List[str]:
    @dataclass
    class MockMetadata:
        input_sr: int
        target_sr: int
        random_seed: int
        train_files: int
        val_files: int
        test_files: int

    metadata = MockMetadata(
        input_sr=metadata["input_sr"],
        target_sr=metadata["target_sr"],
        random_seed=metadata["random_seed"],
        train_files=metadata["train_files"],
        val_files=metadata["val_files"],
        test_files=metadata["test_files"],
    )
    dataset = Dataset(metadata)
    test_files = dataset.files[:metadata.test_files]
    return test_files
Esempio n. 5
0
def learn(
        env,
        model_path,
        data_path,
        policy_fn,
        *,
        horizon=150,  # timesteps per actor per update
        rolloutSize=50,
        clip_param=0.2,
        entcoeff=0.02,  # clipping parameter epsilon, entropy coeff
        optim_epochs=10,
        optim_stepsize=3e-4,
        optim_batchsize=32,  # optimization hypers
        gamma=0.99,
        lam=0.95,  # advantage estimation
        max_iters=0,  # time constraint
        adam_epsilon=1e-4,
        schedule='constant',  # annealing for stepsize parameters (epsilon and adam)
        retrain=False):

    # Setup losses and policy
    ob_space = env.observation_space
    ac_space = env.action_space
    pi = policy_fn("pi", ob_space,
                   ac_space)  # Construct network for new policy
    oldpi = policy_fn("oldpi", ob_space, ac_space)  # Network for old policy
    atarg = tf.placeholder(
        dtype=tf.float32,
        shape=[None])  # Target advantage function (if applicable)
    ret = tf.placeholder(dtype=tf.float32, shape=[None])  # Empirical return
    lrmult = tf.placeholder(
        name='lrmult', dtype=tf.float32,
        shape=[])  # learning rate multiplier, updated with schedule

    ob = U.get_placeholder_cached(name="ob")
    ac = pi.pdtype.sample_placeholder([None])

    kloldnew = oldpi.pd.kl(pi.pd)
    ent = pi.pd.entropy()
    meankl = tf.reduce_mean(kloldnew)
    meanent = tf.reduce_mean(ent)
    pol_entpen = (-entcoeff) * meanent

    ratio = tf.exp(pi.pd.logp(ac) - oldpi.pd.logp(ac))  # pnew / pold
    surr1 = ratio * atarg  # surrogate from conservative policy iteration
    surr2 = tf.clip_by_value(ratio, 1.0 - clip_param,
                             1.0 + clip_param) * atarg  #
    pol_surr = -tf.reduce_mean(tf.minimum(
        surr1, surr2))  # PPO's pessimistic surrogate (L^CLIP)
    vf_loss = tf.reduce_mean(tf.square(pi.vpred - ret))
    total_loss = pol_surr + pol_entpen + vf_loss
    losses = [pol_surr, pol_entpen, vf_loss, meankl, meanent]
    loss_names = ["pol_surr", "pol_entpen", "vf_loss", "kl", "ent"]

    var_list = pi.get_trainable_variables()
    lossandgrad = U.function([ob, ac, atarg, ret, lrmult],
                             losses + [U.flatgrad(total_loss, var_list)])
    adam = MpiAdam(var_list, epsilon=adam_epsilon)

    assign_old_eq_new = U.function(
        [], [],
        updates=[
            tf.assign(oldv, newv)
            for (oldv,
                 newv) in zipsame(oldpi.get_variables(), pi.get_variables())
        ])
    compute_losses = U.function([ob, ac, atarg, ret, lrmult], losses)

    U.initialize()
    adam.sync()

    # Prepare for rollouts
    episodes_so_far = 0
    timesteps_so_far = 0
    iters_so_far = 0
    tstart = time.time()
    lenbuffer = deque(maxlen=5)  # rolling buffer for episode lengths
    rewbuffer = deque(maxlen=5)  # rolling buffer for episode rewards

    p = []  # for saving the rollouts

    if retrain == True:
        print("Retraining the policy from saved path")
        time.sleep(2)
        U.load_state(model_path)
    max_timesteps = int(horizon * rolloutSize * max_iters)

    while True:
        if max_iters and iters_so_far >= max_iters:
            break
        if schedule == 'constant':
            cur_lrmult = 1.0
        elif schedule == 'linear':
            cur_lrmult = max(1.0 - float(timesteps_so_far) / max_timesteps, 0)
        else:
            raise NotImplementedError

        logger.log("********** Iteration %i ************" % iters_so_far)
        print("Collecting samples for policy optimization !! ")
        if iters_so_far > 70:
            render = True
        else:
            render = False
        rollouts = sample_trajectory(pi,
                                     env,
                                     horizon=horizon,
                                     rolloutSize=rolloutSize,
                                     stochastic=True,
                                     render=render)
        # Save rollouts
        data = {'rollouts': rollouts}
        p.append(data)
        del data
        data_file_name = data_path + 'rollout_data.pkl'
        pickle.dump(p, open(data_file_name, "wb"))

        add_vtarg_and_adv(rollouts, gamma, lam)

        ob, ac, atarg, tdlamret = rollouts["ob"], rollouts["ac"], rollouts[
            "adv"], rollouts["tdlamret"]
        atarg = (atarg - atarg.mean()
                 ) / atarg.std()  # standardized advantage function estimate
        d = Dataset(dict(ob=ob, ac=ac, atarg=atarg, vtarg=tdlamret),
                    deterministic=pi.recurrent)
        optim_batchsize = optim_batchsize or ob.shape[0]

        if hasattr(pi, "ob_rms"):
            pi.ob_rms.update(ob)  # update running mean/std for policy

        assign_old_eq_new()  # set old parameter values to new parameter values
        logger.log("Optimizing...")
        # Here we do a bunch of optimization epochs over the data
        for _ in range(optim_epochs):
            losses = [
            ]  # list of tuples, each of which gives the loss for a minibatch
            for batch in d.iterate_once(optim_batchsize):
                *newlosses, g = lossandgrad(batch["ob"], batch["ac"],
                                            batch["atarg"], batch["vtarg"],
                                            cur_lrmult)
                adam.update(g, optim_stepsize * cur_lrmult)
                losses.append(newlosses)

        lrlocal = (rollouts["ep_lens"], rollouts["ep_rets"])  # local values
        listoflrpairs = MPI.COMM_WORLD.allgather(lrlocal)  # list of tuples
        lens, rews = map(flatten_lists, zip(*listoflrpairs))
        lenbuffer.extend(lens)
        rewbuffer.extend(rews)
        logger.record_tabular("Success", rollouts["success"])
        logger.record_tabular("EpLenMean", np.mean(lenbuffer))
        logger.record_tabular("EpRewMean", np.mean(rewbuffer))
        logger.record_tabular("EpThisIter", len(lens))
        episodes_so_far += len(lens)
        timesteps_so_far += sum(lens)
        iters_so_far += 1
        logger.record_tabular("EpisodesSoFar", episodes_so_far)
        logger.record_tabular("TimestepsSoFar", timesteps_so_far)
        logger.record_tabular("TimeElapsed", time.time() - tstart)
        if MPI.COMM_WORLD.Get_rank() == 0:
            logger.dump_tabular()

    return pi
Esempio n. 6
0
def main():
    """Main process"""
    args = parse_args()
    data_path = os.path.realpath(args.data_path)
    save_path = os.path.realpath(args.save_path)
    ckpt_path = os.path.realpath(args.ckpt_path)

    # Hyperparameters
    LEARNING_RATE = 0.001
    TOTAL_ITERS = 100
    BATCH_SIZE = 256

    # Prepare data
    (image_train, label_train), (image_val, label_val) = Mnist.load(
        data_path, one_hot=True)
    train_dataset = Dataset(image_train, label_train, BATCH_SIZE)

    # Build network
    input_image = tf.placeholder(
        'float', [None, Mnist.IMAGE_WIDTH, Mnist.IMAGE_HEIGHT, 1])
    input_label = tf.placeholder('float', [None, Mnist.CLASSES])
    logits = conv_net_example(input_image, Mnist.CLASSES)

    # Decompse graph and get restore variables,
    # after building model and before building optimizer
    decompose_graph(save_path)
    variables_to_restore = tf.global_variables()
    restorer = tf.train.Saver(variables_to_restore)

    # Loss and optimizer
    loss_op = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(
        logits=logits, labels=input_label))
    optimizer = tf.train.AdamOptimizer(learning_rate=LEARNING_RATE)
    train_op = optimizer.minimize(
        loss_op, global_step=tf.train.get_global_step())
    correct = tf.equal(tf.argmax(logits, 1), tf.argmax(input_label, 1))
    accuracy = tf.reduce_mean(tf.cast(correct, tf.float32))

    variables_to_init = [
        v for v in tf.global_variables() if v not in variables_to_restore]
    init = tf.variables_initializer(variables_to_init)
    saver = tf.train.Saver()
    with tf.Session() as sess:
        sess.run(init)
        # Restore the decomposed ckpt files for finetuning
        restorer.restore(sess, args.save_path)

        # Train loop
        for step in range(1, TOTAL_ITERS + 1):
            batch_image, batch_label = train_dataset.next_batch()
            sess.run(train_op, feed_dict={
                input_image: batch_image, input_label: batch_label})
            if step == 1 or step % 20 == 0:
                loss, acc = sess.run([loss_op, accuracy], feed_dict={
                    input_image: batch_image, input_label: batch_label})
                print('Step: {}/{}, Train Loss: {:.4f}, Train Accuracy: {:.3f}'
                      .format(step, TOTAL_ITERS, loss, acc))
            if step == TOTAL_ITERS:
                os.makedirs(os.path.dirname(ckpt_path), exist_ok=True)
                saver.save(sess, ckpt_path, global_step=step)

        # Validation
        print('Validation Accuracy:', sess.run(accuracy, feed_dict={
            input_image: image_val, input_label: label_val}))

        # Convert to pb file
        graph_def = sess.graph.as_graph_def()
        output_graph_def = tf.graph_util.convert_variables_to_constants(
            sess, graph_def, ['dense/BiasAdd'])
        with tf.io.gfile.GFile(ckpt_path + '.pb', 'wb') as fid:
            fid.write(output_graph_def.SerializeToString())
Esempio n. 7
0
def learn(env, model_path, data_path, policy_fn, *,
          rolloutSize, num_options=4, horizon=80,
          clip_param=0.025, ent_coeff=0.01,  # clipping parameter epsilon, entropy coeff
          optim_epochs=10, mainlr=3.25e-4, intlr=1e-4, piolr=1e-4, termlr=5e-7, optim_batchsize=100,  # optimization hypers
          gamma=0.99, lam=0.95,  # advantage estimation
          max_iters=20,  # time constraint
          adam_epsilon=1e-5,
          schedule='constant',  # annealing for stepsize parameters (epsilon and adam)
          retrain=False,
          ):
    """
        Core learning function
    """
    ob_space = env.observation_space
    ac_space = env.action_space
    pi = policy_fn("pi", ob_space, ac_space, num_options=num_options)  # Construct network for new policy
    oldpi = policy_fn("oldpi", ob_space, ac_space, num_options=num_options)  # Network for old policy
    atarg = tf.placeholder(dtype=tf.float32, shape=[None])  # Target advantage function (if applicable)
    ret = tf.placeholder(dtype=tf.float32, shape=[None])  # Empirical return

    lrmult = tf.placeholder(name='lrmult', dtype=tf.float32,
                            shape=[])  # learning rate multiplier, updated with schedule
    clip_param = clip_param * lrmult  # Annealed cliping parameter epislon

    ob = U.get_placeholder_cached(name="ob")
    option = U.get_placeholder_cached(name="option")
    term_adv = U.get_placeholder(name='term_adv', dtype=tf.float32, shape=[None])
    op_adv = tf.placeholder(dtype=tf.float32, shape=[None])  # Target advantage function (if applicable)
    betas = tf.placeholder(dtype=tf.float32, shape=[None])  # Empirical return

    ac = pi.pdtype.sample_placeholder([None])

    # Setup losses and stuff
    kloldnew = oldpi.pd.kl(pi.pd)
    ent = pi.pd.entropy()
    meankl = tf.reduce_mean(kloldnew)
    meanent = tf.reduce_mean(ent)
    pol_entpen = (-ent_coeff) * meanent

    ratio = tf.exp(pi.pd.logp(ac) - oldpi.pd.logp(ac))  # pnew / pold
    surr1 = ratio * atarg  # surrogate from conservative policy iteration
    surr2 = tf.clip_by_value(ratio, 1.0 - clip_param, 1.0 + clip_param) * atarg  #
    pol_surr = - tf.reduce_mean(tf.minimum(surr1, surr2))  # PPO's pessimistic surrogate (L^CLIP)

    vf_loss = tf.reduce_mean(tf.square(pi.vpred - ret))
    total_loss = pol_surr + pol_entpen + vf_loss
    losses = [pol_surr, pol_entpen, vf_loss, meankl, meanent]
    loss_names = ["pol_surr", "pol_entpen", "vf_loss", "kl", "ent"]

    term_loss = pi.tpred * term_adv

    activated_options = tf.placeholder(dtype=tf.float32, shape=[None, num_options])
    pi_w = tf.placeholder(dtype=tf.float32, shape=[None, num_options])
    option_hot = tf.one_hot(option, depth=num_options)
    pi_I = (pi.intfc * activated_options) * pi_w / tf.expand_dims(
        tf.reduce_sum((pi.intfc * activated_options) * pi_w, axis=1), 1)
    pi_I = tf.clip_by_value(pi_I, 1e-6, 1 - 1e-6)
    int_loss = - tf.reduce_sum(betas * tf.reduce_sum(pi_I * option_hot, axis=1) * op_adv)

    intfc = tf.placeholder(dtype=tf.float32, shape=[None, num_options])
    pi_I = (intfc * activated_options) * pi.op_pi / tf.expand_dims(
        tf.reduce_sum((intfc * activated_options) * pi.op_pi, axis=1), 1)
    pi_I = tf.clip_by_value(pi_I, 1e-6, 1 - 1e-6)
    op_loss = - tf.reduce_sum(betas * tf.reduce_sum(pi_I * option_hot, axis=1) * op_adv)

    log_pi = tf.log(tf.clip_by_value(pi.op_pi, 1e-20, 1.0))
    op_entropy = -tf.reduce_mean(pi.op_pi * log_pi, reduction_indices=1)
    op_loss -= 0.01 * tf.reduce_sum(op_entropy)

    var_list = pi.get_trainable_variables()
    lossandgrad = U.function([ob, ac, atarg, ret, lrmult, option], losses + [U.flatgrad(total_loss, var_list)])
    termgrad = U.function([ob, option, term_adv],
                          [U.flatgrad(term_loss, var_list)])  # Since we will use a different step size.
    opgrad = U.function([ob, option, betas, op_adv, intfc, activated_options],
                        [U.flatgrad(op_loss, var_list)])  # Since we will use a different step size.
    intgrad = U.function([ob, option, betas, op_adv, pi_w, activated_options],
                         [U.flatgrad(int_loss, var_list)])  # Since we will use a different step size.
    adam = MpiAdam(var_list, epsilon=adam_epsilon)

    assign_old_eq_new = U.function([], [], updates=[tf.assign(oldv, newv)
                                                    for (oldv, newv) in
                                                    zipsame(oldpi.get_variables(), pi.get_variables())])
    compute_losses = U.function([ob, ac, atarg, ret, lrmult, option], losses)

    U.initialize()
    adam.sync()

    episodes_so_far = 0
    timesteps_so_far = 0
    global iters_so_far
    iters_so_far = 0
    tstart = time.time()
    lenbuffer = deque(maxlen=5)  # rolling buffer for episode lengths
    rewbuffer = deque(maxlen=5)  # rolling buffer for episode rewards

    datas = [0 for _ in range(num_options)]

    if retrain:
        print("Retraining to New Task !! ")
        time.sleep(2)
        U.load_state(model_path+'/')

    p = []
    max_timesteps = int(horizon * rolloutSize * max_iters)
    while True:
        if max_iters and iters_so_far >= max_iters:
            break

        if schedule == 'constant':
            cur_lrmult = 1.0
        elif schedule == 'linear':
            cur_lrmult = max(1.0 - float(timesteps_so_far) / max_timesteps, 0)
        else:
            raise NotImplementedError

        logger.log("********** Iteration %i ************" % iters_so_far)
        render = False

        rollouts = sample_trajectory(pi, env, horizon=horizon, rolloutSize=rolloutSize, render=render)
        # Save rollouts
        data = {'rollouts': rollouts}
        p.append(data)
        del data
        data_file_name = data_path + 'rollout_data.pkl'
        pickle.dump(p, open(data_file_name, "wb"))

        add_vtarg_and_adv(rollouts, gamma, lam, num_options)

        opt_d = []
        for i in range(num_options):
            dur = np.mean(rollouts['opt_dur'][i]) if len(rollouts['opt_dur'][i]) > 0 else 0.
            opt_d.append(dur)

        ob, ac, opts, atarg, tdlamret = rollouts["ob"], rollouts["ac"], rollouts["opts"], rollouts["adv"], rollouts["tdlamret"]
        atarg = (atarg - atarg.mean()) / atarg.std()  # standardized advantage function estimate

        if hasattr(pi, "ob_rms"): pi.ob_rms.update(ob)  # update running mean/std for policy
        assign_old_eq_new()  # set old parameter values to new parameter values

        # Optimizing the policy
        for opt in range(num_options):
            indices = np.where(opts == opt)[0]
            print("Option- ", opt, " Batch Size: ", indices.size)
            opt_d[opt] = indices.size
            if not indices.size:
                continue

            datas[opt] = d = Dataset(dict(ob=ob[indices], ac=ac[indices], atarg=atarg[indices], vtarg=tdlamret[indices]), shuffle=not pi.recurrent)

            if indices.size < optim_batchsize:
                print("Too few samples for opt - ", opt)
                continue

            optim_batchsize_corrected = optim_batchsize
            optim_epochs_corrected = np.clip(np.int(indices.size / optim_batchsize_corrected), 1, optim_epochs)
            print("Optim Epochs:", optim_epochs_corrected)
            logger.log("Optimizing...")
            # Here we do a bunch of optimization epochs over the data

            for _ in range(optim_epochs_corrected):
                losses = []  # list of tuples, each of which gives the loss for a minibatch
                for batch in d.iterate_once(optim_batchsize_corrected):
                    *newlosses, grads = lossandgrad(batch["ob"], batch["ac"], batch["atarg"], batch["vtarg"],
                                                    cur_lrmult, [opt])
                    adam.update(grads, mainlr * cur_lrmult)
                    losses.append(newlosses)

            # Optimize termination functions
            termg = termgrad(rollouts["ob"], rollouts['opts'], rollouts["op_adv"])[0]
            adam.update(termg, termlr)

            # Optimize interest functions
            intgrads = intgrad(rollouts['ob'], rollouts['opts'], rollouts["last_betas"], rollouts["op_adv"], rollouts["op_probs"], rollouts["activated_options"])[0]
            adam.update(intgrads, intlr)

        # Optimize policy over options
        opgrads = opgrad(rollouts['ob'], rollouts['opts'], rollouts["last_betas"], rollouts["op_adv"], rollouts["intfc"], rollouts["activated_options"])[0]
        adam.update(opgrads, piolr)

        lrlocal = (rollouts["ep_lens"], rollouts["ep_rets"])  # local values
        listoflrpairs = MPI.COMM_WORLD.allgather(lrlocal)  # list of tuples
        lens, rews = map(flatten_lists, zip(*listoflrpairs))
        lenbuffer.extend(lens)
        rewbuffer.extend(rews)
        logger.record_tabular("Success", rollouts["success"])
        logger.record_tabular("EpLenMean", np.mean(lenbuffer))
        logger.record_tabular("EpRewMean", np.mean(rewbuffer))
        logger.record_tabular("EpThisIter", len(lens))
        episodes_so_far += len(lens)
        timesteps_so_far += sum(lens)
        iters_so_far += 1
        logger.record_tabular("EpisodesSoFar", episodes_so_far)
        logger.record_tabular("TimestepsSoFar", timesteps_so_far)
        logger.record_tabular("TimeElapsed", time.time() - tstart)
        if MPI.COMM_WORLD.Get_rank() == 0:
            logger.dump_tabular()

    return pi
Esempio n. 8
0
    parser.add_argument(
        "-mask_prob",
        default=0.5,
        type=float,
        help="The probability of observed entities",
    )
    args = parser.parse_args()

    return args


if __name__ == "__main__":
    args = get_parameters()
    for arg in vars(args):
        print("{}: {}".format(arg, getattr(args, arg)))
    dataset = Dataset(args.dataset, args.cons_mask, args.mask_prob)
    outKG_trainer = OutKGTrainer(dataset, args)

    print("~~~~ Training ~~~~")
    outKG_trainer.train()

    if args.val or True:
        with torch.no_grad():
            print("~~~~ Select best epoch on validation set ~~~~")
            epochs2test = [
                str(int(args.save_each * i))
                for i in range(args.ne // args.save_each)
            ]
            best_mrr = -1.0
            best_epoch = "0"
            valid_performance = None
L1rate = 75817.94
lumi_bm = 2e-2
lumi_real = 122.792 / 7319
L1rate_bm = L1rate * lumi_bm / lumi_real

if __name__ == '__main__':

    parser = argparse.ArgumentParser()
    parser.add_argument("plotName", help="tag added to the name of saved pdf plot")
    args = parser.parse_args()

    plot_name = args.plotName

    # load taus from VBF dataset
    dataset_eff = Dataset(fileName_eff, treeName_in, treeName_gen)
    taus = dataset_eff.get_taus()
    tau_leading, tau_subleading = get_leading_pair(taus, 20)

    pt_bins = [20, 35, 42, 49, 58, 72, 94, 126, 169, 221, 279, 343, 414, 491]
    ratio = []
    deep_thr = []
    eff_iso = []
    for i in range(0, len(pt_bins)):
        pt_min = pt_bins[i]
        if i != len(pt_bins) - 1:
            pt_max = pt_bins[i + 1]
            bin_mask = (tau_leading.pt >= pt_min) & (tau_leading.pt < pt_max) & (tau_subleading.pt >= pt_min) & (
                    tau_subleading.pt < pt_max)
        else:
            bin_mask = (tau_leading.pt >= pt_min) & (tau_subleading.pt >= pt_min)
Esempio n. 10
0
def learn(env, model_path, data_path, policy_fn, model_learning_params, svm_grid_params, svm_params_interest,
          svm_params_guard, *, modes, rolloutSize, num_options=2,
          horizon,  # timesteps per actor per update
          clip_param, ent_coeff=0.02,  # clipping parameter epsilon, entropy coeff
          optim_epochs=10, optim_stepsize=3e-4, optim_batchsize=160,  # optimization hypers
          gamma=0.99, lam=0.95,  # advantage estimation
          max_iters=0,  # time constraint
          adam_epsilon=1.2e-4,
          schedule='linear',  # annealing for stepsize parameters (epsilon and adam)
          retrain=False
          ):
    """
        Core learning function
    """

    ob_space = env.observation_space
    ac_space = env.action_space
    if retrain:
        model = pickle.load(open(model_path + '/hybrid_model.pkl', 'rb'))
        print("Model graph:", model.transitionGraph.nodes)
        print("Model options:", model.transitionGraph.edges)
    else:
        model = partialHybridModel(env, model_learning_params, svm_grid_params, svm_params_interest, svm_params_guard, horizon, modes, num_options, rolloutSize)
    pi = policy_fn("pi", ob_space, ac_space, model, num_options)  # Construct network for new policy
    oldpi = policy_fn("oldpi", ob_space, ac_space, model, num_options)  # Network for old policy
    atarg = tf1.placeholder(dtype=tf1.float32, shape=[None])  # Target advantage function (if applicable)
    ret = tf1.placeholder(dtype=tf1.float32, shape=[None])  # Empirical return

    lrmult = tf1.placeholder(name='lrmult', dtype=tf1.float32,
                             shape=[])  # learning rate multiplier, updated with schedule
    clip_param = clip_param * lrmult  # Annealed cliping parameter epislon

    # Define placeholders for computing the advantage
    ob = U.get_placeholder_cached(name="ob")
    option = U.get_placeholder_cached(name="option")
    ac = pi.pdtype.sample_placeholder([None])

    # Defining losses for optimization
    kloldnew = oldpi.pd.kl(pi.pd)
    ent = pi.pd.entropy()
    meankl = tf1.reduce_mean(kloldnew)
    meanent = tf1.reduce_mean(ent)
    pol_entpen = (-ent_coeff) * meanent

    ratio = tf1.exp(pi.pd.logp(ac) - oldpi.pd.logp(ac))  # pnew / pold
    surr1 = ratio * atarg  # surrogate from conservative policy iteration
    surr2 = tf1.clip_by_value(ratio, 1.0 - clip_param, 1.0 + clip_param) * atarg  #
    pol_surr = - tf1.reduce_mean(tf1.minimum(surr1, surr2))  # PPO's pessimistic surrogate (L^CLIP), negative to convert from a maximization to minimization problem
    vf_loss = tf1.reduce_mean(tf1.square(pi.vpred - ret))
    total_loss = pol_surr + pol_entpen + vf_loss
    losses = [pol_surr, pol_entpen, vf_loss, meankl, meanent]
    loss_names = ["pol_surr", "pol_entpen", "vf_loss", "kl", "ent"]

    var_list = pi.get_trainable_variables()
    lossandgrad = U.function([ob, ac, atarg, ret, lrmult, option], losses + [U.flatgrad(total_loss, var_list)])
    adam = MpiAdam(var_list, epsilon=adam_epsilon)

    assign_old_eq_new = U.function([], [], updates=[tf1.assign(oldv, newv) for (oldv, newv) in
                                                    zipsame(oldpi.get_variables(), pi.get_variables())])
    compute_losses = U.function([ob, ac, atarg, ret, lrmult, option], losses)

    U.initialize()
    adam.sync()

    # Prepare for rollouts
    episodes_so_far = 0
    timesteps_so_far = 0
    global iters_so_far
    iters_so_far = 0
    tstart = time.time()
    lenbuffer = deque(maxlen=10)  # rolling buffer for episode lengths
    rewbuffer = deque(maxlen=10)  # rolling buffer for episode rewards

    p = []  # for saving the rollouts

    if retrain:
        print("Retraining to New Task !!")
        time.sleep(2)
        U.load_state(model_path+'/')
        print(pi.eps)
    max_timesteps = int(horizon * rolloutSize * max_iters)

    while True:
        if max_iters and iters_so_far >= max_iters:
            break
        if schedule == 'constant':
            cur_lrmult = 1.0
        elif schedule == 'linear':
            cur_lrmult = max(1.0 - float(timesteps_so_far) / max_timesteps, 0)
        else:
            raise NotImplementedError

        logger.log("************* Iteration %i *************" % iters_so_far)
        print("Collecting samples for policy optimization !! ")
        render = False

        rollouts = sample_trajectory(pi, model, env, horizon=horizon, rolloutSize=rolloutSize, render=render)
        # Save rollouts
        data = {'rollouts': rollouts}
        p.append(data)
        del data
        data_file_name = data_path + '/rollout_data.pkl'
        pickle.dump(p, open(data_file_name, "wb"))

        # Model update
        print("Updating model !!\n")
        model.updateModel(rollouts, pi)
        print("Model graph:", model.transitionGraph.nodes)
        print("Model options:", model.transitionGraph.edges)
        edges = list(model.transitionGraph.edges)
        for i in range(0, len(edges)):
            print(edges[i][0], " -> ", edges[i][1], " : ", model.transitionGraph[edges[i][0]][edges[i][1]]['weight'])

        datas = [0 for _ in range(num_options)]
        add_vtarg_and_adv(rollouts, pi, gamma, lam, num_options)

        ob, ac, opts, atarg, tdlamret = rollouts["seg_obs"], rollouts["seg_acs"], rollouts["des_opts"], rollouts["adv"], rollouts["tdlamret"]
        old_opts = rollouts["seg_opts"]
        similarity = 0
        for i in range(0, len(old_opts)):
            if old_opts[i] == opts[i]:
                similarity += 1

        print("Percentage similarity of options: ", similarity/len(old_opts) * 100)

        vpredbefore = rollouts["vpreds"]  # predicted value function before udpate
        atarg = (atarg - atarg.mean()) / atarg.std()  # standardized advantage function estimate
        if hasattr(pi, "ob_rms"):
            pi.ob_rms.update(ob)  # update running mean/std for policy
        assign_old_eq_new()

        pi.eps = pi.eps * gamma #reduce exploration

        # Optimizing the policy
        print("\nOptimizing policy !! \n")
        for opt in range(num_options):
            indices = np.where(opts == opt)[0]
            print("Option- ", opt, " Batch Size: ", indices.size)
            if not indices.size:
                continue

            datas[opt] = d = Dataset(dict(ob=ob[indices], ac=ac[indices], atarg=atarg[indices], vtarg=tdlamret[indices]), shuffle=not pi.recurrent)

            if indices.size < optim_batchsize:
                print("Too few samples for opt - ", opt)
                continue

            optim_batchsize_corrected = optim_batchsize
            optim_epochs_corrected = np.clip(np.int(indices.size / optim_batchsize_corrected), 1, optim_epochs)
            print("Optim Epochs:", optim_epochs_corrected)
            logger.log("Optimizing...")
            # Here we do a bunch of optimization epochs over the data
            for _ in range(optim_epochs_corrected):
                losses = []  # list of tuples, each of which gives the loss for a minibatch
                for batch in d.iterate_once(optim_batchsize_corrected):
                    *newlosses, grads = lossandgrad(batch["ob"], batch["ac"], batch["atarg"], batch["vtarg"], cur_lrmult, [opt])
                    if np.isnan(newlosses).any():
                        continue
                    adam.update(grads, optim_stepsize * cur_lrmult)
                    losses.append(newlosses)
        if len(losses) > 0:
            meanlosses, _, _ = mpi_moments(losses, axis=0)
            print("Mean loss ", meanlosses)
            for (lossval, name) in zipsame(meanlosses, loss_names):
                logger.record_tabular("loss_" + name, lossval)

        lrlocal = (rollouts["ep_lens"], rollouts["ep_rets"])  # local values
        listoflrpairs = MPI.COMM_WORLD.allgather(lrlocal)  # list of tuples
        lens, rews = map(flatten_lists, zip(*listoflrpairs))
        lenbuffer.extend(lens)
        rewbuffer.extend(rews)
        logger.record_tabular("Success", rollouts["success"])
        logger.record_tabular("EpLenMean", np.mean(lenbuffer))
        logger.record_tabular("EpRewMean", np.mean(rewbuffer))
        logger.record_tabular("EpThisIter", len(lens))
        episodes_so_far += len(lens)
        timesteps_so_far += sum(lens)
        iters_so_far += 1
        logger.record_tabular("EpisodesSoFar", episodes_so_far)
        logger.record_tabular("TimestepsSoFar", timesteps_so_far)
        logger.record_tabular("TimeElapsed", time.time() - tstart)
        if MPI.COMM_WORLD.Get_rank() == 0:
            logger.dump_tabular()

        '''
        if model_path and not retrain:
            U.save_state(model_path + '/')
            model_file_name = model_path + '/hybrid_model.pkl'
            pickle.dump(model, open(model_file_name, "wb"), pickle.HIGHEST_PROTOCOL)
            print("Policy and Model saved in - ", model_path)
        '''
    return pi, model
Esempio n. 11
0
treeName_gen = "gen_counter"
treeName_in = "final_counter"

Pt_thr_list = [20, 25, 30, 35, 40, 45]
# Pt_thr_list = [20]

isocut_vars = {
    "loose": ["looseIsoAbs", "looseIsoRel"],
    "medium": ["mediumIsoAbs", "mediumIsoRel"],
    "tight": ["tightIsoAbs", "tightIsoRel"]
}
colors = ["green", "red", "orange"]

# get VBF sample
print("Loading sample for efficiency")
dataset_eff = Dataset(data_path + fileName_eff, treeName_in, treeName_gen)
taus = dataset_eff.get_taus()
gen_taus = dataset_eff.get_gen_taus()

# get taus before any selection
original_taus = dataset_eff.get_taus(apply_selection=False)

# get sample for rate computation
print("Loading sample for rate")
if args.qcd:
    # get QCD sample
    QCD_taus_list = []
    QCD_xs_list = []
    QCD_den_list = []
    with open(QCD_fileJson, "r") as json_file:
        samples = json.load(json_file)
treeName_gen = "gen_counter"

L1rate = 75817.94
lumi_bm = 2e-2
lumi_real = 122.792 / 7319
L1rate_bm = L1rate * lumi_bm / lumi_real

def eff_presel_inbin(pt_min, pt_max):
    bin_mask = (tau_leading.pt > pt_min) & (tau_leading.pt < pt_max) & (tau_subleading.pt > pt_min) & (tau_subleading.pt < pt_max)
    eff_presel = ak.sum(bin_mask)
    return eff_presel

if __name__ == '__main__':

    # load taus from VBF and Zprime dataset
    dataset_eff = Dataset(fileName_eff, treeName_in, treeName_gen)
    taus = dataset_eff.get_taus()
    tau_leading, tau_subleading = get_leading_pair(taus)

    n_bins = 100
    eff_mean = 400
    pt_min = 20
    pt_bins = [20]

    for i in range(n_bins-1):
        if eff_presel_inbin(pt_min, 2000) <= eff_mean:
            break
        def f(pt_max):
            eff_presel = eff_presel_inbin(pt_min, pt_max)
            return eff_presel - eff_mean
Esempio n. 13
0
def learn(
        env,
        agent,
        optimizer,
        scheduler,
        comm,
        timesteps_per_actorbatch,  # timesteps per actor per update
        clip_param,
        entcoeff,  # clipping parameter epsilon, entropy coeff
        optim_epochs,
        optim_batchsize,  # optimization hypers
        gamma,
        lam,  # advantage estimation
        checkpoint_dir,
        model_name,
        max_timesteps=0,
        max_episodes=0,
        max_iters=0,
        max_seconds=0,
        schedule='linear'):

    # Prepare for rollouts
    # ----------------------------------------
    seg_gen = traj_segment_generator(agent, env, timesteps_per_actorbatch)

    episodes_so_far = 0
    timesteps_so_far = 0
    iters_so_far = 0
    gradient_steps_so_far = 0
    tstart = time.time()
    lenbuffer = deque(maxlen=100)  # rolling buffer for episode lengths
    rewbuffer = deque(maxlen=100)  # rolling buffer for episode rewards
    loss_names = ["pol_surr", "pol_entpen", "vf_loss", "ent"]

    assert sum(
        [max_iters > 0, max_timesteps > 0, max_episodes > 0,
         max_seconds > 0]) == 1, "Only one time constraint permitted"

    while True:
        if max_timesteps and timesteps_so_far >= max_timesteps:
            break
        elif max_episodes and episodes_so_far >= max_episodes:
            break
        elif max_iters and iters_so_far >= max_iters:
            break
        elif max_seconds and time.time() - tstart >= max_seconds:
            break

        logger.log("********** Iteration %i ************" % iters_so_far)

        epsilon_mult_dict = {
            'constant': 1.0,
            'linear': max(1.0 - float(timesteps_so_far) / max_timesteps, 0)
        }
        current_clip_param = epsilon_mult_dict[schedule] * clip_param

        seg = next(seg_gen)
        add_vtarg_and_adv(seg, gamma, lam)

        ob, ac, logprobs, adv, tdlamret = seg["ob"], seg["ac"], seg[
            "logprobs"], seg["adv"], seg["tdlamret"]
        vpredbefore = seg["vpred"]  # predicted value function before udpate
        adv = (adv - adv.mean()
               ) / adv.std()  # standardized advantage function estimate
        d = Dataset(dict(ob=ob,
                         ac=ac,
                         logprobs=logprobs,
                         adv=adv,
                         vtarg=tdlamret),
                    deterministic=False)  # nonrecurrent

        logger.log("Optimizing...")
        logger.log(fmt_row(13, loss_names))
        # Here we do a bunch of optimization epochs over the data
        agent.train()
        for _ in range(optim_epochs):
            losses = [
            ]  # list of tuples, each of which gives the loss for a minibatch
            for batch in d.iterate_once(optim_batchsize):
                pol_surr, pol_entpen, vf_loss, ent = compute_losses(
                    batch, agent, entcoeff, current_clip_param)
                total_loss = pol_surr + pol_entpen + vf_loss

                optimizer.zero_grad()
                total_loss.backward()
                with tc.no_grad():
                    for p in agent.parameters():
                        g_old = p.grad.numpy()
                        g_new = np.zeros_like(g_old)
                        comm.Allreduce(sendbuf=g_old,
                                       recvbuf=g_new,
                                       op=MPI.SUM)
                        p.grad.copy_(
                            tc.tensor(g_new).float() / comm.Get_size())

                optimizer.step()
                scheduler.step()
                gradient_steps_so_far += 1

                # sync agent parameters from process with rank zero. should stay synced automatically,
                # this is just a failsafe
                if gradient_steps_so_far > 0 and gradient_steps_so_far % 100 == 0:
                    with tc.no_grad():
                        for p in agent.parameters():
                            p_data = p.data.numpy()
                            comm.Bcast(p_data, root=0)
                            p.data.copy_(tc.tensor(p_data).float())

                newlosses = (pol_surr.detach().numpy(),
                             pol_entpen.detach().numpy(),
                             vf_loss.detach().numpy(), ent.detach().numpy())
                losses.append(newlosses)
            logger.log(fmt_row(13, np.mean(losses, axis=0)))

        logger.log("Evaluating losses...")
        losses = []
        for batch in d.iterate_once(optim_batchsize):
            newlosses = compute_losses(batch, agent, entcoeff,
                                       current_clip_param)
            losses.append(
                tuple(
                    list(
                        map(lambda loss: loss.detach().numpy(),
                            list(newlosses)))))
        meanlosses, _, _ = mpi_moments(losses, axis=0)
        logger.log(fmt_row(13, meanlosses))
        for (lossval, name) in zipsame(meanlosses, loss_names):
            logger.record_tabular("loss_" + name, lossval)
        logger.record_tabular("ev_tdlam_before",
                              explained_variance(vpredbefore, tdlamret))
        lrlocal = (seg["ep_lens"], seg["ep_rets"])  # local values
        listoflrpairs = MPI.COMM_WORLD.allgather(lrlocal)  # list of tuples
        lens, rews = map(flatten_lists, zip(*listoflrpairs))
        lenbuffer.extend(lens)
        rewbuffer.extend(rews)
        logger.record_tabular("EpLenMean", np.mean(lenbuffer))
        logger.record_tabular("EpRewMean", np.mean(rewbuffer))
        logger.record_tabular("EpThisIter", len(lens))
        episodes_so_far += len(lens)
        timesteps_so_far += sum(lens)
        iters_so_far += 1
        logger.record_tabular("EpisodesSoFar", episodes_so_far)
        logger.record_tabular("TimestepsSoFar", timesteps_so_far)
        logger.record_tabular("TimeElapsed", time.time() - tstart)
        if comm.Get_rank() == 0:
            logger.dump_tabular()
            if iters_so_far > 0 and iters_so_far % 10 == 0:
                print("Saving checkpoint...")
                os.makedirs(os.path.join(checkpoint_dir, model_name),
                            exist_ok=True)
                tc.save(agent.state_dict(),
                        os.path.join(checkpoint_dir, model_name, 'model.pth'))
plot_path = '/plots/newPlots_CMSSW_11_2_0/'
fileName_eff = "/data/newSamples_CMSSW_11_2_0/VBFHToTauTau.root"
treeName_gen = "gen_counter"
treeName_in = "final_counter"

cutbased_vars = {
    "loose": ["looseIsoAbs", "looseIsoRel"],
    "medium": ["mediumIsoAbs", "mediumIsoRel"],
    "tight": ["tightIsoAbs", "tightIsoRel"]
}
colors = ['green', 'red', 'orange']

with PdfPages(plot_path + 'deepTau_output_{}.pdf'.format(plot_name)) as pdf:

    # get trees from file
    dataset_VBF = Dataset(fileName_eff, treeName_in, treeName_gen)
    L2taus = dataset_VBF.get_taus(apply_selection=True)

    # generate ROC curve for deepTau_VSjet
    fpr, tpr, thr, pred, truth = ROC_fromTuples(L2taus)
    score = auc(fpr, tpr)
    print("AUC ROC:", score)
    plt.yscale('log')
    plt.xlabel(r'$\tau$ ID efficiency')
    plt.ylabel('jet misID probability')
    plt.title("deepTau ROC curve in VBF simulation")
    plt.plot(tpr, fpr, '-', label="AUC-ROC score: {}".format(round(score, 4)))
    i = 0
    for key, value in cutbased_vars.items():
        tpr_cut, fpr_cut = cutbased_eff_flattentuples(L2taus, value[0],
                                                      value[1])
Esempio n. 15
0
 def evaluate(self, dataset: Dataset):
     _, x_test, _, y_test = dataset.get_training_test_sets()
     self._test(x_test, y_test, "test")
     print(self._model.score(x_test, y_test))
Esempio n. 16
0
 def fit(self, dataset: Dataset):
     x_train, _, y_train, _ = dataset.get_training_test_sets()
     self._model.fit(x_train, y_train)
     self._test(x_train, y_train, "train")
     mlflow.sklearn.log_model(self._model, "linear_model")