def run_full_pipeline(metadata: Metadata, model_wrapper: ModelWrapper, model_type: ModelType): print_cuda_info() restore_transformations = False if os.path.exists(os.path.join(metadata.model_dir, MODEL_FILENAME)): Metadata.restore_from_json(metadata, f"{model_wrapper.model_dir}/metadata.json") if metadata.training_finished: print( f"\n\n\nModel at {metadata.model_dir} already finished training." ) return else: print( f"\n\n\nModel at {metadata.model_dir} already exists, restoring this model." ) model_wrapper.load() restore_transformations = True else: os.makedirs(metadata.model_dir, exist_ok=True) metadata.num_params = model_wrapper.num_parameters() dataset = Dataset(metadata) train_loader, val_loader, test_loader = create_data_loaders( dataset, metadata, model=model_type, transformations=TransformationsManager.get_transformations( metadata.transformations), ) if restore_transformations: train_loader.tm.transformations_count = metadata.train_transformations[ "transformations_count"] val_loader.tm.transformations_count = metadata.val_transformations[ "transformations_count"] train_model( metadata=metadata, wrapper=model_wrapper, train_loader=train_loader, val_loader=val_loader, gan=(model_type == ModelType.SEGAN), ) test_mse_loss = evaluate(model_wrapper, test_loader) print(f"Test set mse loss: {test_mse_loss}") metadata.test_mse_loss = test_mse_loss metadata.training_finished = True metadata.test_transformations = test_loader.tm.get_info() metadata.save_to_json(TRAINING_RESULTS_FILENAME)
def run_overfit(metadata: Metadata, model_wrapper: ModelWrapper, model_type: ModelType): print_cuda_info() os.makedirs(metadata.model_dir, exist_ok=True) if os.path.exists(os.path.join(metadata.model_dir, MODEL_FILENAME)): Metadata.restore_from_json(metadata, f"{model_wrapper.model_dir}/metadata.json") if metadata.training_finished: print( f"\n\n\nModel at {metadata.model_dir} already finished training." ) return else: print( f"\n\n\nModel at {metadata.model_dir} already exists, restoring this model." ) model_wrapper.load() else: os.makedirs(metadata.model_dir, exist_ok=True) metadata.num_params = model_wrapper.num_parameters() dataset = Dataset(metadata) train_loader, _, _ = create_data_loaders( dataset, metadata, model=model_type, transformations=TransformationsManager.get_transformations("none"), ) eval_loader = deepcopy(train_loader) # if training gan, disable additional noisy inputs from datasets if model_type == ModelType.SEGAN: eval_loader.train_gan = False train_model( metadata=metadata, wrapper=model_wrapper, train_loader=train_loader, val_loader=eval_loader, gan=(model_type == ModelType.SEGAN), ) test_mse_loss = evaluate(model_wrapper, eval_loader) print(f"Final mse loss: {test_mse_loss}") metadata.final_mse_loss = test_mse_loss metadata.training_finished = True metadata.save_to_json(TRAINING_RESULTS_FILENAME)
def test_all_dataloaders(): # test dataloaders for autoencoder, wavenet and segan # assert if they output correctly padded data (meaning we all outputs have the same dimension) dataset = Dataset(metadata=Metadata.get_mock()) # autoencoder assert_dataloader_correct( DataLoader(Metadata.get_mock(), dataset, train_gan=False)) # wavenet assert_dataloader_correct( DataLoader(Metadata.get_mock(), dataset, train_gan=False)) # segan assert_dataloader_correct( DataLoader(Metadata.get_mock(), dataset, train_gan=True))
def get_test_set_files(metadata: Dict) -> List[str]: @dataclass class MockMetadata: input_sr: int target_sr: int random_seed: int train_files: int val_files: int test_files: int metadata = MockMetadata( input_sr=metadata["input_sr"], target_sr=metadata["target_sr"], random_seed=metadata["random_seed"], train_files=metadata["train_files"], val_files=metadata["val_files"], test_files=metadata["test_files"], ) dataset = Dataset(metadata) test_files = dataset.files[:metadata.test_files] return test_files
def learn( env, model_path, data_path, policy_fn, *, horizon=150, # timesteps per actor per update rolloutSize=50, clip_param=0.2, entcoeff=0.02, # clipping parameter epsilon, entropy coeff optim_epochs=10, optim_stepsize=3e-4, optim_batchsize=32, # optimization hypers gamma=0.99, lam=0.95, # advantage estimation max_iters=0, # time constraint adam_epsilon=1e-4, schedule='constant', # annealing for stepsize parameters (epsilon and adam) retrain=False): # Setup losses and policy ob_space = env.observation_space ac_space = env.action_space pi = policy_fn("pi", ob_space, ac_space) # Construct network for new policy oldpi = policy_fn("oldpi", ob_space, ac_space) # Network for old policy atarg = tf.placeholder( dtype=tf.float32, shape=[None]) # Target advantage function (if applicable) ret = tf.placeholder(dtype=tf.float32, shape=[None]) # Empirical return lrmult = tf.placeholder( name='lrmult', dtype=tf.float32, shape=[]) # learning rate multiplier, updated with schedule ob = U.get_placeholder_cached(name="ob") ac = pi.pdtype.sample_placeholder([None]) kloldnew = oldpi.pd.kl(pi.pd) ent = pi.pd.entropy() meankl = tf.reduce_mean(kloldnew) meanent = tf.reduce_mean(ent) pol_entpen = (-entcoeff) * meanent ratio = tf.exp(pi.pd.logp(ac) - oldpi.pd.logp(ac)) # pnew / pold surr1 = ratio * atarg # surrogate from conservative policy iteration surr2 = tf.clip_by_value(ratio, 1.0 - clip_param, 1.0 + clip_param) * atarg # pol_surr = -tf.reduce_mean(tf.minimum( surr1, surr2)) # PPO's pessimistic surrogate (L^CLIP) vf_loss = tf.reduce_mean(tf.square(pi.vpred - ret)) total_loss = pol_surr + pol_entpen + vf_loss losses = [pol_surr, pol_entpen, vf_loss, meankl, meanent] loss_names = ["pol_surr", "pol_entpen", "vf_loss", "kl", "ent"] var_list = pi.get_trainable_variables() lossandgrad = U.function([ob, ac, atarg, ret, lrmult], losses + [U.flatgrad(total_loss, var_list)]) adam = MpiAdam(var_list, epsilon=adam_epsilon) assign_old_eq_new = U.function( [], [], updates=[ tf.assign(oldv, newv) for (oldv, newv) in zipsame(oldpi.get_variables(), pi.get_variables()) ]) compute_losses = U.function([ob, ac, atarg, ret, lrmult], losses) U.initialize() adam.sync() # Prepare for rollouts episodes_so_far = 0 timesteps_so_far = 0 iters_so_far = 0 tstart = time.time() lenbuffer = deque(maxlen=5) # rolling buffer for episode lengths rewbuffer = deque(maxlen=5) # rolling buffer for episode rewards p = [] # for saving the rollouts if retrain == True: print("Retraining the policy from saved path") time.sleep(2) U.load_state(model_path) max_timesteps = int(horizon * rolloutSize * max_iters) while True: if max_iters and iters_so_far >= max_iters: break if schedule == 'constant': cur_lrmult = 1.0 elif schedule == 'linear': cur_lrmult = max(1.0 - float(timesteps_so_far) / max_timesteps, 0) else: raise NotImplementedError logger.log("********** Iteration %i ************" % iters_so_far) print("Collecting samples for policy optimization !! ") if iters_so_far > 70: render = True else: render = False rollouts = sample_trajectory(pi, env, horizon=horizon, rolloutSize=rolloutSize, stochastic=True, render=render) # Save rollouts data = {'rollouts': rollouts} p.append(data) del data data_file_name = data_path + 'rollout_data.pkl' pickle.dump(p, open(data_file_name, "wb")) add_vtarg_and_adv(rollouts, gamma, lam) ob, ac, atarg, tdlamret = rollouts["ob"], rollouts["ac"], rollouts[ "adv"], rollouts["tdlamret"] atarg = (atarg - atarg.mean() ) / atarg.std() # standardized advantage function estimate d = Dataset(dict(ob=ob, ac=ac, atarg=atarg, vtarg=tdlamret), deterministic=pi.recurrent) optim_batchsize = optim_batchsize or ob.shape[0] if hasattr(pi, "ob_rms"): pi.ob_rms.update(ob) # update running mean/std for policy assign_old_eq_new() # set old parameter values to new parameter values logger.log("Optimizing...") # Here we do a bunch of optimization epochs over the data for _ in range(optim_epochs): losses = [ ] # list of tuples, each of which gives the loss for a minibatch for batch in d.iterate_once(optim_batchsize): *newlosses, g = lossandgrad(batch["ob"], batch["ac"], batch["atarg"], batch["vtarg"], cur_lrmult) adam.update(g, optim_stepsize * cur_lrmult) losses.append(newlosses) lrlocal = (rollouts["ep_lens"], rollouts["ep_rets"]) # local values listoflrpairs = MPI.COMM_WORLD.allgather(lrlocal) # list of tuples lens, rews = map(flatten_lists, zip(*listoflrpairs)) lenbuffer.extend(lens) rewbuffer.extend(rews) logger.record_tabular("Success", rollouts["success"]) logger.record_tabular("EpLenMean", np.mean(lenbuffer)) logger.record_tabular("EpRewMean", np.mean(rewbuffer)) logger.record_tabular("EpThisIter", len(lens)) episodes_so_far += len(lens) timesteps_so_far += sum(lens) iters_so_far += 1 logger.record_tabular("EpisodesSoFar", episodes_so_far) logger.record_tabular("TimestepsSoFar", timesteps_so_far) logger.record_tabular("TimeElapsed", time.time() - tstart) if MPI.COMM_WORLD.Get_rank() == 0: logger.dump_tabular() return pi
def main(): """Main process""" args = parse_args() data_path = os.path.realpath(args.data_path) save_path = os.path.realpath(args.save_path) ckpt_path = os.path.realpath(args.ckpt_path) # Hyperparameters LEARNING_RATE = 0.001 TOTAL_ITERS = 100 BATCH_SIZE = 256 # Prepare data (image_train, label_train), (image_val, label_val) = Mnist.load( data_path, one_hot=True) train_dataset = Dataset(image_train, label_train, BATCH_SIZE) # Build network input_image = tf.placeholder( 'float', [None, Mnist.IMAGE_WIDTH, Mnist.IMAGE_HEIGHT, 1]) input_label = tf.placeholder('float', [None, Mnist.CLASSES]) logits = conv_net_example(input_image, Mnist.CLASSES) # Decompse graph and get restore variables, # after building model and before building optimizer decompose_graph(save_path) variables_to_restore = tf.global_variables() restorer = tf.train.Saver(variables_to_restore) # Loss and optimizer loss_op = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits( logits=logits, labels=input_label)) optimizer = tf.train.AdamOptimizer(learning_rate=LEARNING_RATE) train_op = optimizer.minimize( loss_op, global_step=tf.train.get_global_step()) correct = tf.equal(tf.argmax(logits, 1), tf.argmax(input_label, 1)) accuracy = tf.reduce_mean(tf.cast(correct, tf.float32)) variables_to_init = [ v for v in tf.global_variables() if v not in variables_to_restore] init = tf.variables_initializer(variables_to_init) saver = tf.train.Saver() with tf.Session() as sess: sess.run(init) # Restore the decomposed ckpt files for finetuning restorer.restore(sess, args.save_path) # Train loop for step in range(1, TOTAL_ITERS + 1): batch_image, batch_label = train_dataset.next_batch() sess.run(train_op, feed_dict={ input_image: batch_image, input_label: batch_label}) if step == 1 or step % 20 == 0: loss, acc = sess.run([loss_op, accuracy], feed_dict={ input_image: batch_image, input_label: batch_label}) print('Step: {}/{}, Train Loss: {:.4f}, Train Accuracy: {:.3f}' .format(step, TOTAL_ITERS, loss, acc)) if step == TOTAL_ITERS: os.makedirs(os.path.dirname(ckpt_path), exist_ok=True) saver.save(sess, ckpt_path, global_step=step) # Validation print('Validation Accuracy:', sess.run(accuracy, feed_dict={ input_image: image_val, input_label: label_val})) # Convert to pb file graph_def = sess.graph.as_graph_def() output_graph_def = tf.graph_util.convert_variables_to_constants( sess, graph_def, ['dense/BiasAdd']) with tf.io.gfile.GFile(ckpt_path + '.pb', 'wb') as fid: fid.write(output_graph_def.SerializeToString())
def learn(env, model_path, data_path, policy_fn, *, rolloutSize, num_options=4, horizon=80, clip_param=0.025, ent_coeff=0.01, # clipping parameter epsilon, entropy coeff optim_epochs=10, mainlr=3.25e-4, intlr=1e-4, piolr=1e-4, termlr=5e-7, optim_batchsize=100, # optimization hypers gamma=0.99, lam=0.95, # advantage estimation max_iters=20, # time constraint adam_epsilon=1e-5, schedule='constant', # annealing for stepsize parameters (epsilon and adam) retrain=False, ): """ Core learning function """ ob_space = env.observation_space ac_space = env.action_space pi = policy_fn("pi", ob_space, ac_space, num_options=num_options) # Construct network for new policy oldpi = policy_fn("oldpi", ob_space, ac_space, num_options=num_options) # Network for old policy atarg = tf.placeholder(dtype=tf.float32, shape=[None]) # Target advantage function (if applicable) ret = tf.placeholder(dtype=tf.float32, shape=[None]) # Empirical return lrmult = tf.placeholder(name='lrmult', dtype=tf.float32, shape=[]) # learning rate multiplier, updated with schedule clip_param = clip_param * lrmult # Annealed cliping parameter epislon ob = U.get_placeholder_cached(name="ob") option = U.get_placeholder_cached(name="option") term_adv = U.get_placeholder(name='term_adv', dtype=tf.float32, shape=[None]) op_adv = tf.placeholder(dtype=tf.float32, shape=[None]) # Target advantage function (if applicable) betas = tf.placeholder(dtype=tf.float32, shape=[None]) # Empirical return ac = pi.pdtype.sample_placeholder([None]) # Setup losses and stuff kloldnew = oldpi.pd.kl(pi.pd) ent = pi.pd.entropy() meankl = tf.reduce_mean(kloldnew) meanent = tf.reduce_mean(ent) pol_entpen = (-ent_coeff) * meanent ratio = tf.exp(pi.pd.logp(ac) - oldpi.pd.logp(ac)) # pnew / pold surr1 = ratio * atarg # surrogate from conservative policy iteration surr2 = tf.clip_by_value(ratio, 1.0 - clip_param, 1.0 + clip_param) * atarg # pol_surr = - tf.reduce_mean(tf.minimum(surr1, surr2)) # PPO's pessimistic surrogate (L^CLIP) vf_loss = tf.reduce_mean(tf.square(pi.vpred - ret)) total_loss = pol_surr + pol_entpen + vf_loss losses = [pol_surr, pol_entpen, vf_loss, meankl, meanent] loss_names = ["pol_surr", "pol_entpen", "vf_loss", "kl", "ent"] term_loss = pi.tpred * term_adv activated_options = tf.placeholder(dtype=tf.float32, shape=[None, num_options]) pi_w = tf.placeholder(dtype=tf.float32, shape=[None, num_options]) option_hot = tf.one_hot(option, depth=num_options) pi_I = (pi.intfc * activated_options) * pi_w / tf.expand_dims( tf.reduce_sum((pi.intfc * activated_options) * pi_w, axis=1), 1) pi_I = tf.clip_by_value(pi_I, 1e-6, 1 - 1e-6) int_loss = - tf.reduce_sum(betas * tf.reduce_sum(pi_I * option_hot, axis=1) * op_adv) intfc = tf.placeholder(dtype=tf.float32, shape=[None, num_options]) pi_I = (intfc * activated_options) * pi.op_pi / tf.expand_dims( tf.reduce_sum((intfc * activated_options) * pi.op_pi, axis=1), 1) pi_I = tf.clip_by_value(pi_I, 1e-6, 1 - 1e-6) op_loss = - tf.reduce_sum(betas * tf.reduce_sum(pi_I * option_hot, axis=1) * op_adv) log_pi = tf.log(tf.clip_by_value(pi.op_pi, 1e-20, 1.0)) op_entropy = -tf.reduce_mean(pi.op_pi * log_pi, reduction_indices=1) op_loss -= 0.01 * tf.reduce_sum(op_entropy) var_list = pi.get_trainable_variables() lossandgrad = U.function([ob, ac, atarg, ret, lrmult, option], losses + [U.flatgrad(total_loss, var_list)]) termgrad = U.function([ob, option, term_adv], [U.flatgrad(term_loss, var_list)]) # Since we will use a different step size. opgrad = U.function([ob, option, betas, op_adv, intfc, activated_options], [U.flatgrad(op_loss, var_list)]) # Since we will use a different step size. intgrad = U.function([ob, option, betas, op_adv, pi_w, activated_options], [U.flatgrad(int_loss, var_list)]) # Since we will use a different step size. adam = MpiAdam(var_list, epsilon=adam_epsilon) assign_old_eq_new = U.function([], [], updates=[tf.assign(oldv, newv) for (oldv, newv) in zipsame(oldpi.get_variables(), pi.get_variables())]) compute_losses = U.function([ob, ac, atarg, ret, lrmult, option], losses) U.initialize() adam.sync() episodes_so_far = 0 timesteps_so_far = 0 global iters_so_far iters_so_far = 0 tstart = time.time() lenbuffer = deque(maxlen=5) # rolling buffer for episode lengths rewbuffer = deque(maxlen=5) # rolling buffer for episode rewards datas = [0 for _ in range(num_options)] if retrain: print("Retraining to New Task !! ") time.sleep(2) U.load_state(model_path+'/') p = [] max_timesteps = int(horizon * rolloutSize * max_iters) while True: if max_iters and iters_so_far >= max_iters: break if schedule == 'constant': cur_lrmult = 1.0 elif schedule == 'linear': cur_lrmult = max(1.0 - float(timesteps_so_far) / max_timesteps, 0) else: raise NotImplementedError logger.log("********** Iteration %i ************" % iters_so_far) render = False rollouts = sample_trajectory(pi, env, horizon=horizon, rolloutSize=rolloutSize, render=render) # Save rollouts data = {'rollouts': rollouts} p.append(data) del data data_file_name = data_path + 'rollout_data.pkl' pickle.dump(p, open(data_file_name, "wb")) add_vtarg_and_adv(rollouts, gamma, lam, num_options) opt_d = [] for i in range(num_options): dur = np.mean(rollouts['opt_dur'][i]) if len(rollouts['opt_dur'][i]) > 0 else 0. opt_d.append(dur) ob, ac, opts, atarg, tdlamret = rollouts["ob"], rollouts["ac"], rollouts["opts"], rollouts["adv"], rollouts["tdlamret"] atarg = (atarg - atarg.mean()) / atarg.std() # standardized advantage function estimate if hasattr(pi, "ob_rms"): pi.ob_rms.update(ob) # update running mean/std for policy assign_old_eq_new() # set old parameter values to new parameter values # Optimizing the policy for opt in range(num_options): indices = np.where(opts == opt)[0] print("Option- ", opt, " Batch Size: ", indices.size) opt_d[opt] = indices.size if not indices.size: continue datas[opt] = d = Dataset(dict(ob=ob[indices], ac=ac[indices], atarg=atarg[indices], vtarg=tdlamret[indices]), shuffle=not pi.recurrent) if indices.size < optim_batchsize: print("Too few samples for opt - ", opt) continue optim_batchsize_corrected = optim_batchsize optim_epochs_corrected = np.clip(np.int(indices.size / optim_batchsize_corrected), 1, optim_epochs) print("Optim Epochs:", optim_epochs_corrected) logger.log("Optimizing...") # Here we do a bunch of optimization epochs over the data for _ in range(optim_epochs_corrected): losses = [] # list of tuples, each of which gives the loss for a minibatch for batch in d.iterate_once(optim_batchsize_corrected): *newlosses, grads = lossandgrad(batch["ob"], batch["ac"], batch["atarg"], batch["vtarg"], cur_lrmult, [opt]) adam.update(grads, mainlr * cur_lrmult) losses.append(newlosses) # Optimize termination functions termg = termgrad(rollouts["ob"], rollouts['opts'], rollouts["op_adv"])[0] adam.update(termg, termlr) # Optimize interest functions intgrads = intgrad(rollouts['ob'], rollouts['opts'], rollouts["last_betas"], rollouts["op_adv"], rollouts["op_probs"], rollouts["activated_options"])[0] adam.update(intgrads, intlr) # Optimize policy over options opgrads = opgrad(rollouts['ob'], rollouts['opts'], rollouts["last_betas"], rollouts["op_adv"], rollouts["intfc"], rollouts["activated_options"])[0] adam.update(opgrads, piolr) lrlocal = (rollouts["ep_lens"], rollouts["ep_rets"]) # local values listoflrpairs = MPI.COMM_WORLD.allgather(lrlocal) # list of tuples lens, rews = map(flatten_lists, zip(*listoflrpairs)) lenbuffer.extend(lens) rewbuffer.extend(rews) logger.record_tabular("Success", rollouts["success"]) logger.record_tabular("EpLenMean", np.mean(lenbuffer)) logger.record_tabular("EpRewMean", np.mean(rewbuffer)) logger.record_tabular("EpThisIter", len(lens)) episodes_so_far += len(lens) timesteps_so_far += sum(lens) iters_so_far += 1 logger.record_tabular("EpisodesSoFar", episodes_so_far) logger.record_tabular("TimestepsSoFar", timesteps_so_far) logger.record_tabular("TimeElapsed", time.time() - tstart) if MPI.COMM_WORLD.Get_rank() == 0: logger.dump_tabular() return pi
parser.add_argument( "-mask_prob", default=0.5, type=float, help="The probability of observed entities", ) args = parser.parse_args() return args if __name__ == "__main__": args = get_parameters() for arg in vars(args): print("{}: {}".format(arg, getattr(args, arg))) dataset = Dataset(args.dataset, args.cons_mask, args.mask_prob) outKG_trainer = OutKGTrainer(dataset, args) print("~~~~ Training ~~~~") outKG_trainer.train() if args.val or True: with torch.no_grad(): print("~~~~ Select best epoch on validation set ~~~~") epochs2test = [ str(int(args.save_each * i)) for i in range(args.ne // args.save_each) ] best_mrr = -1.0 best_epoch = "0" valid_performance = None
L1rate = 75817.94 lumi_bm = 2e-2 lumi_real = 122.792 / 7319 L1rate_bm = L1rate * lumi_bm / lumi_real if __name__ == '__main__': parser = argparse.ArgumentParser() parser.add_argument("plotName", help="tag added to the name of saved pdf plot") args = parser.parse_args() plot_name = args.plotName # load taus from VBF dataset dataset_eff = Dataset(fileName_eff, treeName_in, treeName_gen) taus = dataset_eff.get_taus() tau_leading, tau_subleading = get_leading_pair(taus, 20) pt_bins = [20, 35, 42, 49, 58, 72, 94, 126, 169, 221, 279, 343, 414, 491] ratio = [] deep_thr = [] eff_iso = [] for i in range(0, len(pt_bins)): pt_min = pt_bins[i] if i != len(pt_bins) - 1: pt_max = pt_bins[i + 1] bin_mask = (tau_leading.pt >= pt_min) & (tau_leading.pt < pt_max) & (tau_subleading.pt >= pt_min) & ( tau_subleading.pt < pt_max) else: bin_mask = (tau_leading.pt >= pt_min) & (tau_subleading.pt >= pt_min)
def learn(env, model_path, data_path, policy_fn, model_learning_params, svm_grid_params, svm_params_interest, svm_params_guard, *, modes, rolloutSize, num_options=2, horizon, # timesteps per actor per update clip_param, ent_coeff=0.02, # clipping parameter epsilon, entropy coeff optim_epochs=10, optim_stepsize=3e-4, optim_batchsize=160, # optimization hypers gamma=0.99, lam=0.95, # advantage estimation max_iters=0, # time constraint adam_epsilon=1.2e-4, schedule='linear', # annealing for stepsize parameters (epsilon and adam) retrain=False ): """ Core learning function """ ob_space = env.observation_space ac_space = env.action_space if retrain: model = pickle.load(open(model_path + '/hybrid_model.pkl', 'rb')) print("Model graph:", model.transitionGraph.nodes) print("Model options:", model.transitionGraph.edges) else: model = partialHybridModel(env, model_learning_params, svm_grid_params, svm_params_interest, svm_params_guard, horizon, modes, num_options, rolloutSize) pi = policy_fn("pi", ob_space, ac_space, model, num_options) # Construct network for new policy oldpi = policy_fn("oldpi", ob_space, ac_space, model, num_options) # Network for old policy atarg = tf1.placeholder(dtype=tf1.float32, shape=[None]) # Target advantage function (if applicable) ret = tf1.placeholder(dtype=tf1.float32, shape=[None]) # Empirical return lrmult = tf1.placeholder(name='lrmult', dtype=tf1.float32, shape=[]) # learning rate multiplier, updated with schedule clip_param = clip_param * lrmult # Annealed cliping parameter epislon # Define placeholders for computing the advantage ob = U.get_placeholder_cached(name="ob") option = U.get_placeholder_cached(name="option") ac = pi.pdtype.sample_placeholder([None]) # Defining losses for optimization kloldnew = oldpi.pd.kl(pi.pd) ent = pi.pd.entropy() meankl = tf1.reduce_mean(kloldnew) meanent = tf1.reduce_mean(ent) pol_entpen = (-ent_coeff) * meanent ratio = tf1.exp(pi.pd.logp(ac) - oldpi.pd.logp(ac)) # pnew / pold surr1 = ratio * atarg # surrogate from conservative policy iteration surr2 = tf1.clip_by_value(ratio, 1.0 - clip_param, 1.0 + clip_param) * atarg # pol_surr = - tf1.reduce_mean(tf1.minimum(surr1, surr2)) # PPO's pessimistic surrogate (L^CLIP), negative to convert from a maximization to minimization problem vf_loss = tf1.reduce_mean(tf1.square(pi.vpred - ret)) total_loss = pol_surr + pol_entpen + vf_loss losses = [pol_surr, pol_entpen, vf_loss, meankl, meanent] loss_names = ["pol_surr", "pol_entpen", "vf_loss", "kl", "ent"] var_list = pi.get_trainable_variables() lossandgrad = U.function([ob, ac, atarg, ret, lrmult, option], losses + [U.flatgrad(total_loss, var_list)]) adam = MpiAdam(var_list, epsilon=adam_epsilon) assign_old_eq_new = U.function([], [], updates=[tf1.assign(oldv, newv) for (oldv, newv) in zipsame(oldpi.get_variables(), pi.get_variables())]) compute_losses = U.function([ob, ac, atarg, ret, lrmult, option], losses) U.initialize() adam.sync() # Prepare for rollouts episodes_so_far = 0 timesteps_so_far = 0 global iters_so_far iters_so_far = 0 tstart = time.time() lenbuffer = deque(maxlen=10) # rolling buffer for episode lengths rewbuffer = deque(maxlen=10) # rolling buffer for episode rewards p = [] # for saving the rollouts if retrain: print("Retraining to New Task !!") time.sleep(2) U.load_state(model_path+'/') print(pi.eps) max_timesteps = int(horizon * rolloutSize * max_iters) while True: if max_iters and iters_so_far >= max_iters: break if schedule == 'constant': cur_lrmult = 1.0 elif schedule == 'linear': cur_lrmult = max(1.0 - float(timesteps_so_far) / max_timesteps, 0) else: raise NotImplementedError logger.log("************* Iteration %i *************" % iters_so_far) print("Collecting samples for policy optimization !! ") render = False rollouts = sample_trajectory(pi, model, env, horizon=horizon, rolloutSize=rolloutSize, render=render) # Save rollouts data = {'rollouts': rollouts} p.append(data) del data data_file_name = data_path + '/rollout_data.pkl' pickle.dump(p, open(data_file_name, "wb")) # Model update print("Updating model !!\n") model.updateModel(rollouts, pi) print("Model graph:", model.transitionGraph.nodes) print("Model options:", model.transitionGraph.edges) edges = list(model.transitionGraph.edges) for i in range(0, len(edges)): print(edges[i][0], " -> ", edges[i][1], " : ", model.transitionGraph[edges[i][0]][edges[i][1]]['weight']) datas = [0 for _ in range(num_options)] add_vtarg_and_adv(rollouts, pi, gamma, lam, num_options) ob, ac, opts, atarg, tdlamret = rollouts["seg_obs"], rollouts["seg_acs"], rollouts["des_opts"], rollouts["adv"], rollouts["tdlamret"] old_opts = rollouts["seg_opts"] similarity = 0 for i in range(0, len(old_opts)): if old_opts[i] == opts[i]: similarity += 1 print("Percentage similarity of options: ", similarity/len(old_opts) * 100) vpredbefore = rollouts["vpreds"] # predicted value function before udpate atarg = (atarg - atarg.mean()) / atarg.std() # standardized advantage function estimate if hasattr(pi, "ob_rms"): pi.ob_rms.update(ob) # update running mean/std for policy assign_old_eq_new() pi.eps = pi.eps * gamma #reduce exploration # Optimizing the policy print("\nOptimizing policy !! \n") for opt in range(num_options): indices = np.where(opts == opt)[0] print("Option- ", opt, " Batch Size: ", indices.size) if not indices.size: continue datas[opt] = d = Dataset(dict(ob=ob[indices], ac=ac[indices], atarg=atarg[indices], vtarg=tdlamret[indices]), shuffle=not pi.recurrent) if indices.size < optim_batchsize: print("Too few samples for opt - ", opt) continue optim_batchsize_corrected = optim_batchsize optim_epochs_corrected = np.clip(np.int(indices.size / optim_batchsize_corrected), 1, optim_epochs) print("Optim Epochs:", optim_epochs_corrected) logger.log("Optimizing...") # Here we do a bunch of optimization epochs over the data for _ in range(optim_epochs_corrected): losses = [] # list of tuples, each of which gives the loss for a minibatch for batch in d.iterate_once(optim_batchsize_corrected): *newlosses, grads = lossandgrad(batch["ob"], batch["ac"], batch["atarg"], batch["vtarg"], cur_lrmult, [opt]) if np.isnan(newlosses).any(): continue adam.update(grads, optim_stepsize * cur_lrmult) losses.append(newlosses) if len(losses) > 0: meanlosses, _, _ = mpi_moments(losses, axis=0) print("Mean loss ", meanlosses) for (lossval, name) in zipsame(meanlosses, loss_names): logger.record_tabular("loss_" + name, lossval) lrlocal = (rollouts["ep_lens"], rollouts["ep_rets"]) # local values listoflrpairs = MPI.COMM_WORLD.allgather(lrlocal) # list of tuples lens, rews = map(flatten_lists, zip(*listoflrpairs)) lenbuffer.extend(lens) rewbuffer.extend(rews) logger.record_tabular("Success", rollouts["success"]) logger.record_tabular("EpLenMean", np.mean(lenbuffer)) logger.record_tabular("EpRewMean", np.mean(rewbuffer)) logger.record_tabular("EpThisIter", len(lens)) episodes_so_far += len(lens) timesteps_so_far += sum(lens) iters_so_far += 1 logger.record_tabular("EpisodesSoFar", episodes_so_far) logger.record_tabular("TimestepsSoFar", timesteps_so_far) logger.record_tabular("TimeElapsed", time.time() - tstart) if MPI.COMM_WORLD.Get_rank() == 0: logger.dump_tabular() ''' if model_path and not retrain: U.save_state(model_path + '/') model_file_name = model_path + '/hybrid_model.pkl' pickle.dump(model, open(model_file_name, "wb"), pickle.HIGHEST_PROTOCOL) print("Policy and Model saved in - ", model_path) ''' return pi, model
treeName_gen = "gen_counter" treeName_in = "final_counter" Pt_thr_list = [20, 25, 30, 35, 40, 45] # Pt_thr_list = [20] isocut_vars = { "loose": ["looseIsoAbs", "looseIsoRel"], "medium": ["mediumIsoAbs", "mediumIsoRel"], "tight": ["tightIsoAbs", "tightIsoRel"] } colors = ["green", "red", "orange"] # get VBF sample print("Loading sample for efficiency") dataset_eff = Dataset(data_path + fileName_eff, treeName_in, treeName_gen) taus = dataset_eff.get_taus() gen_taus = dataset_eff.get_gen_taus() # get taus before any selection original_taus = dataset_eff.get_taus(apply_selection=False) # get sample for rate computation print("Loading sample for rate") if args.qcd: # get QCD sample QCD_taus_list = [] QCD_xs_list = [] QCD_den_list = [] with open(QCD_fileJson, "r") as json_file: samples = json.load(json_file)
treeName_gen = "gen_counter" L1rate = 75817.94 lumi_bm = 2e-2 lumi_real = 122.792 / 7319 L1rate_bm = L1rate * lumi_bm / lumi_real def eff_presel_inbin(pt_min, pt_max): bin_mask = (tau_leading.pt > pt_min) & (tau_leading.pt < pt_max) & (tau_subleading.pt > pt_min) & (tau_subleading.pt < pt_max) eff_presel = ak.sum(bin_mask) return eff_presel if __name__ == '__main__': # load taus from VBF and Zprime dataset dataset_eff = Dataset(fileName_eff, treeName_in, treeName_gen) taus = dataset_eff.get_taus() tau_leading, tau_subleading = get_leading_pair(taus) n_bins = 100 eff_mean = 400 pt_min = 20 pt_bins = [20] for i in range(n_bins-1): if eff_presel_inbin(pt_min, 2000) <= eff_mean: break def f(pt_max): eff_presel = eff_presel_inbin(pt_min, pt_max) return eff_presel - eff_mean
def learn( env, agent, optimizer, scheduler, comm, timesteps_per_actorbatch, # timesteps per actor per update clip_param, entcoeff, # clipping parameter epsilon, entropy coeff optim_epochs, optim_batchsize, # optimization hypers gamma, lam, # advantage estimation checkpoint_dir, model_name, max_timesteps=0, max_episodes=0, max_iters=0, max_seconds=0, schedule='linear'): # Prepare for rollouts # ---------------------------------------- seg_gen = traj_segment_generator(agent, env, timesteps_per_actorbatch) episodes_so_far = 0 timesteps_so_far = 0 iters_so_far = 0 gradient_steps_so_far = 0 tstart = time.time() lenbuffer = deque(maxlen=100) # rolling buffer for episode lengths rewbuffer = deque(maxlen=100) # rolling buffer for episode rewards loss_names = ["pol_surr", "pol_entpen", "vf_loss", "ent"] assert sum( [max_iters > 0, max_timesteps > 0, max_episodes > 0, max_seconds > 0]) == 1, "Only one time constraint permitted" while True: if max_timesteps and timesteps_so_far >= max_timesteps: break elif max_episodes and episodes_so_far >= max_episodes: break elif max_iters and iters_so_far >= max_iters: break elif max_seconds and time.time() - tstart >= max_seconds: break logger.log("********** Iteration %i ************" % iters_so_far) epsilon_mult_dict = { 'constant': 1.0, 'linear': max(1.0 - float(timesteps_so_far) / max_timesteps, 0) } current_clip_param = epsilon_mult_dict[schedule] * clip_param seg = next(seg_gen) add_vtarg_and_adv(seg, gamma, lam) ob, ac, logprobs, adv, tdlamret = seg["ob"], seg["ac"], seg[ "logprobs"], seg["adv"], seg["tdlamret"] vpredbefore = seg["vpred"] # predicted value function before udpate adv = (adv - adv.mean() ) / adv.std() # standardized advantage function estimate d = Dataset(dict(ob=ob, ac=ac, logprobs=logprobs, adv=adv, vtarg=tdlamret), deterministic=False) # nonrecurrent logger.log("Optimizing...") logger.log(fmt_row(13, loss_names)) # Here we do a bunch of optimization epochs over the data agent.train() for _ in range(optim_epochs): losses = [ ] # list of tuples, each of which gives the loss for a minibatch for batch in d.iterate_once(optim_batchsize): pol_surr, pol_entpen, vf_loss, ent = compute_losses( batch, agent, entcoeff, current_clip_param) total_loss = pol_surr + pol_entpen + vf_loss optimizer.zero_grad() total_loss.backward() with tc.no_grad(): for p in agent.parameters(): g_old = p.grad.numpy() g_new = np.zeros_like(g_old) comm.Allreduce(sendbuf=g_old, recvbuf=g_new, op=MPI.SUM) p.grad.copy_( tc.tensor(g_new).float() / comm.Get_size()) optimizer.step() scheduler.step() gradient_steps_so_far += 1 # sync agent parameters from process with rank zero. should stay synced automatically, # this is just a failsafe if gradient_steps_so_far > 0 and gradient_steps_so_far % 100 == 0: with tc.no_grad(): for p in agent.parameters(): p_data = p.data.numpy() comm.Bcast(p_data, root=0) p.data.copy_(tc.tensor(p_data).float()) newlosses = (pol_surr.detach().numpy(), pol_entpen.detach().numpy(), vf_loss.detach().numpy(), ent.detach().numpy()) losses.append(newlosses) logger.log(fmt_row(13, np.mean(losses, axis=0))) logger.log("Evaluating losses...") losses = [] for batch in d.iterate_once(optim_batchsize): newlosses = compute_losses(batch, agent, entcoeff, current_clip_param) losses.append( tuple( list( map(lambda loss: loss.detach().numpy(), list(newlosses))))) meanlosses, _, _ = mpi_moments(losses, axis=0) logger.log(fmt_row(13, meanlosses)) for (lossval, name) in zipsame(meanlosses, loss_names): logger.record_tabular("loss_" + name, lossval) logger.record_tabular("ev_tdlam_before", explained_variance(vpredbefore, tdlamret)) lrlocal = (seg["ep_lens"], seg["ep_rets"]) # local values listoflrpairs = MPI.COMM_WORLD.allgather(lrlocal) # list of tuples lens, rews = map(flatten_lists, zip(*listoflrpairs)) lenbuffer.extend(lens) rewbuffer.extend(rews) logger.record_tabular("EpLenMean", np.mean(lenbuffer)) logger.record_tabular("EpRewMean", np.mean(rewbuffer)) logger.record_tabular("EpThisIter", len(lens)) episodes_so_far += len(lens) timesteps_so_far += sum(lens) iters_so_far += 1 logger.record_tabular("EpisodesSoFar", episodes_so_far) logger.record_tabular("TimestepsSoFar", timesteps_so_far) logger.record_tabular("TimeElapsed", time.time() - tstart) if comm.Get_rank() == 0: logger.dump_tabular() if iters_so_far > 0 and iters_so_far % 10 == 0: print("Saving checkpoint...") os.makedirs(os.path.join(checkpoint_dir, model_name), exist_ok=True) tc.save(agent.state_dict(), os.path.join(checkpoint_dir, model_name, 'model.pth'))
plot_path = '/plots/newPlots_CMSSW_11_2_0/' fileName_eff = "/data/newSamples_CMSSW_11_2_0/VBFHToTauTau.root" treeName_gen = "gen_counter" treeName_in = "final_counter" cutbased_vars = { "loose": ["looseIsoAbs", "looseIsoRel"], "medium": ["mediumIsoAbs", "mediumIsoRel"], "tight": ["tightIsoAbs", "tightIsoRel"] } colors = ['green', 'red', 'orange'] with PdfPages(plot_path + 'deepTau_output_{}.pdf'.format(plot_name)) as pdf: # get trees from file dataset_VBF = Dataset(fileName_eff, treeName_in, treeName_gen) L2taus = dataset_VBF.get_taus(apply_selection=True) # generate ROC curve for deepTau_VSjet fpr, tpr, thr, pred, truth = ROC_fromTuples(L2taus) score = auc(fpr, tpr) print("AUC ROC:", score) plt.yscale('log') plt.xlabel(r'$\tau$ ID efficiency') plt.ylabel('jet misID probability') plt.title("deepTau ROC curve in VBF simulation") plt.plot(tpr, fpr, '-', label="AUC-ROC score: {}".format(round(score, 4))) i = 0 for key, value in cutbased_vars.items(): tpr_cut, fpr_cut = cutbased_eff_flattentuples(L2taus, value[0], value[1])
def evaluate(self, dataset: Dataset): _, x_test, _, y_test = dataset.get_training_test_sets() self._test(x_test, y_test, "test") print(self._model.score(x_test, y_test))
def fit(self, dataset: Dataset): x_train, _, y_train, _ = dataset.get_training_test_sets() self._model.fit(x_train, y_train) self._test(x_train, y_train, "train") mlflow.sklearn.log_model(self._model, "linear_model")