def get_compiled(loss_type, net, optimizer, min_margin): if loss_type == 'hkr': loss_fn = HKR_loss(alpha=config.alpha, min_margin=min_margin ) # HKR stands for the hinge regularized KR loss metrics = [ KR_loss(), # shows the KR term of the loss hinge_margin_loss( min_margin=min_margin), # shows the hinge term of the loss HKR_binary_accuracy # shows the classification accuracy ] elif loss_type == 'hinge': loss_fn = hinge_margin_loss(min_margin=min_margin) metrics = [ Histogram(1), # shows the KR term of the loss hinge_margin_loss( min_margin=min_margin), # shows the hinge term of the loss HKR_binary_accuracy # shows the classification accuracy ] elif loss_type == 'bce': loss_fn = tf.keras.losses.BinaryCrossentropy(from_logits=True) metrics = [LogitBinaryAccuracy()] elif loss_type == 'multihkr': margins = tf.constant([1.] * output_shape) * min_margin loss_fn = MultiClassHKR(alpha=config.alpha, margins=margins) metrics = ['accuracy'] elif loss_type == 'multice': temperatures = tf.constant([1.] * output_shape) * config.max_T loss_fn = CrossEntropyT(temperatures=temperatures) metrics = ['accuracy'] elif loss_type == 'multiminhkr': margins = tf.constant([1.] * output_shape) * min_margin loss_fn = MinMarginHKR(alpha=config.alpha, margins=margins, num_batchs=math.ceil(x_train.shape[0] / config.batch_size), perc=config.target_accuracy * 100) metrics = ['accuracy'] if config.alpha == 'adaptive': metrics.append(MarginWatcher(loss_fn, use_wandb)) elif loss_type == 'multitopkhkr': margins = tf.constant([1.] * output_shape) * min_margin loss_fn = TopKMarginHKR(alpha=config.alpha, margins=margins, top_k=int(output_shape**0.5)) metrics = ['accuracy'] if 'armijo' in config.optimizer: metrics.append(ArmijoMetric(net.armijo)) net.compile(loss=loss_fn, metrics=metrics, optimizer=optimizer, run_eagerly=eager) net.eager_loss = loss_fn return net, loss_fn
log.info("Saving test data to {}".format(filename_test)) with h5py.File(filename_test, 'w') as hf: for var in column_names: hf.create_dataset( f'{var}', data=np.array(data_test[var]), chunks=True, maxshape= (None,), compression='lzf') #============================================================================ # Plot reweighted data #============================================================================ weights_train = [weight_trainEst10, weight_trainEst20, weight_trainEst40] weights_train_names = ["weight_trainEst10", "weight_trainEst20", "weight_trainEst40"] ### Training data for w, wn in zip(weights_train, weights_train_names): fig, ax = plt.subplots(2,2,figsize=(15,10)) ax = ax.flatten() fig, ax[0] = Plot(Histogram(data_train['pho_eta'][trainMask], data_train["label"][trainMask], w, 120, -4, 4), fig, ax[0], r"$\eta$", includeN = True) ax[0].set_yscale('log') fig, ax[1] = Plot(Histogram(data_train['pho_et'][trainMask], data_train["label"][trainMask], w, 90, 0, 50), fig, ax[1], r"$E_T$", includeN = False) ax[1].set_yscale('log') fig, ax[2] = Plot(Histogram(data_train['correctedScaledActualMu'][trainMask], data_train["label"][trainMask], w, 90, 0, 90), fig, ax[2], r"$\langle\mu\rangle$", includeN = False) ax[2].set_yscale('log') counts_weight, edges_weight = np.histogram(w, bins=120, range=(0, 40)) ax[3].step(x=edges_weight, y=np.append(counts_weight, 0), where="post", color = "k"); ax[3].set_yscale('log', nonposy='clip') ax[3].set(xlabel = wn, ylabel = "Events per bin") fig.savefig(args.outdir + "train_reweighted" + wn + ".pdf") ### Validation data weights_valid = [weight_validEst10, weight_validEst20, weight_validEst40]
maxshape=(None, ), compression='lzf') #============================================================================ # Plot reweighted data #============================================================================ weights_train = [weight_trainEst10, weight_trainEst20, weight_trainEst40] weights_train_names = [ "weight_trainEst10", "weight_trainEst20", "weight_trainEst40" ] ### Training data for w, wn in zip(weights_train, weights_train_names): fig, ax = plt.subplots(2, 2, figsize=(15, 10)) ax = ax.flatten() fig, ax[0] = Plot(Histogram(data_train['muo_eta'][trainMask], data_train["label"][trainMask], w, 120, -4, 4), fig, ax[0], r"$\eta$", includeN=True) fig, ax[1] = Plot(Histogram(data_train['muo_pt'][trainMask], data_train["label"][trainMask], w, 120, -5, 120000), fig, ax[1], "pt", includeN=False) fig, ax[2] = Plot(Histogram( data_train['correctedScaledAverageMu'][trainMask], data_train["label"][trainMask], w, 80, -2, 80), fig,
def train_MetaNetwork(): print("\n ---- Training the Meta Network ----- \n") MODEL_NAME = "meta_grid16_zero_2" MODEL_NAME_save = "meta_grid16_zero_2" DIAMOND_MODEL_NAME = "diamond_grid16_4" ZOMBIE_MODEL_NAME = "zombie_grid16_2" EXPLORE_MODEL_NAME = "explore_grid16_2" # EXTRA_MODEL_NAME = "extra15_input6_2" # MODEL_NAME = "meta15_input6_1M_unfrozen_dojos" # DIAMOND_MODEL_NAME = "diamond15_input4_best_unfrozen_at_1M" # ZOMBIE_MODEL_NAME = "zombie15_input4_best_unfrozen_at_1M" # EXPLORE_MODEL_NAME = "explore15_input4_best_unfrozen_at_1M" # MODEL_NAME = "meta15_input6_1M_random_unfrozen_cointoss" # DIAMOND_MODEL_NAME = "diamond15_input4_1M_random_unfrozen_cointoss" # ZOMBIE_MODEL_NAME = "zombie15_input4_1M_random_unfrozen_cointoss" # EXPLORE_MODEL_NAME = "explore15_input4_1M_random_unfrozen_cointoss"k FOLDER = "Impossible" DOJO_FOLDER = "Impossible" MODEL_PATH_SAVE = "./Models/Tensorflow/"+FOLDER+"/"+MODEL_NAME+"/"+MODEL_NAME+".ckpt" LOGDIR = "./Logs/"+FOLDER+"/"+MODEL_NAME_save+"" USE_SAVED_MODEL_FILE = False GRID_SIZE = 16 LOCAL_GRID_SIZE = 15 MAP_PATH = None RANDOMIZE_MAPS = True RENDER_TO_SCREEN = False # RENDER_TO_SCREEN = True env = Environment(wrap = False, grid_size = GRID_SIZE, local_size = LOCAL_GRID_SIZE, rate = 80, max_time = 120, food_count = 0, obstacle_count = 0, lava_count = 0, zombie_count = 0, history = 100, action_space = 5, map_path = MAP_PATH) if RENDER_TO_SCREEN: env.prerender() model = MetaNetwork(local_size=LOCAL_GRID_SIZE, name=MODEL_NAME, path="./Models/Tensorflow/"+FOLDER+"/", load=False, trainable=True) diamond_net = Network(local_size=LOCAL_GRID_SIZE, name=DIAMOND_MODEL_NAME, path="./Models/Tensorflow/"+DOJO_FOLDER+"/", load=True, trainable=False) zombie_net = Network(local_size=LOCAL_GRID_SIZE, name=ZOMBIE_MODEL_NAME, path="./Models/Tensorflow/"+DOJO_FOLDER+"/", load=True, trainable=False) explore_net = Network(local_size=LOCAL_GRID_SIZE, name=EXPLORE_MODEL_NAME, path="./Models/Tensorflow/"+DOJO_FOLDER+"/", load=True, trainable=False) # extra_net = Network(local_size=LOCAL_GRID_SIZE, name=EXTRA_MODEL_NAME, path="./Models/Tensorflow/"+DOJO_FOLDER+"/", load=False, trainable=True) brain = Brain(epsilon=0.05, action_space=3) model.setup(brain) diamond_net.setup(brain) zombie_net.setup(brain) explore_net.setup(brain) # extra_net.setup(brain) score = tf.placeholder(tf.float32, []) avg_t = tf.placeholder(tf.float32, []) epsilon = tf.placeholder(tf.float32, []) avg_r = tf.placeholder(tf.float32, []) tf.summary.scalar('error', tf.squeeze(model.error)) tf.summary.scalar('score', score) tf.summary.scalar('average time', avg_t) tf.summary.scalar('epsilon', epsilon) tf.summary.scalar('avg reward', avg_r) avg_time = 0 avg_score = 0 avg_error = 0 avg_reward = 0 cumulative_reward = 0 # Number of episodes print_episode = 1000 total_episodes = 100000 saver = tf.train.Saver() # Initialising all variables (weights and biases) init = tf.global_variables_initializer() # Adds a summary graph of the error over time merged_summary = tf.summary.merge_all() # Tensorboard capabilties writer = tf.summary.FileWriter(LOGDIR) # Histogram histogram = Histogram(3, 10, total_episodes) # GPU capabilities gpu_options = tf.GPUOptions(per_process_gpu_memory_fraction=0.3) # Begin session with tf.Session(config=tf.ConfigProto(gpu_options=gpu_options)) as sess: if USE_SAVED_MODEL_FILE: saver.restore(sess, MODEL_PATH_SAVE) print("Model restored.") else: sess.run(init) writer.add_graph(sess.graph) start_time = time.time() print("") for episode in range(total_episodes): if RANDOMIZE_MAPS: # Make a random map 0: lava, 1: obstacle MAP_PATH = "./Maps/Grid{}/impossible_map{}.txt".format(GRID_SIZE, np.random.randint(5)) env.set_map(MAP_PATH) # state, info = env.reset() state, info = env.quick_reset() done = False # brain.linear_epsilon_decay(total_episodes, episode, start=1.0, end=0.05, percentage=0.5) # brain.linear_alpha_decay(total_episodes, episode) if RENDER_TO_SCREEN: env.render() while not done: # Retrieve the Q values from the NN in vector form Dojo_vector = sess.run(model.q_values, feed_dict={model.input: state}) dojo = brain.choose_action(state, sess, model) histogram.check_section(episode) histogram.add(dojo) # dojo = np.random.randint(3) # dojo = 0 # print(dojo) if dojo == 0: dojo_state = state # dojo_state[2]=0 # dojo_state[3]=0 # dojo_state = np.delete(dojo_state, 2, 0)# Take out the zombie layer # dojo_state = np.delete(dojo_state, 2, 0)# Take out the history layer action = brain.choose_dojo(dojo_state, sess, diamond_net, env.number_of_actions(), 0.05) elif dojo == 1: dojo_state = state # dojo_state[1]=0 # dojo_state[3]=0 # dojo_state = np.delete(dojo_state, 1, 0)# Take out the diamond layer # dojo_state = np.delete(dojo_state, 2, 0)# Take out the history layer action = brain.choose_dojo(dojo_state, sess, zombie_net, env.number_of_actions(), 0.05) elif dojo == 2: dojo_state = state # dojo_state[1]=0 # dojo_state[2]=0 # dojo_state = np.delete(dojo_state, 1, 0)# Take out the diamond layer # dojo_state = np.delete(dojo_state, 1, 0)# Take out the zombie layer action = brain.choose_dojo(dojo_state, sess, explore_net, env.number_of_actions(), 0.05) # elif dojo == 3: # dojo_state = state # action = brain.choose_dojo(dojo_state, sess, extra_net, env.number_of_actions(), 0.05) # print(action) # Update environment with by performing action new_state, reward, done, info = env.step(action) # print(new_state) brain.store_transition_dojo(state, action, reward, done, new_state, dojo) # print(tf.trainable_variables(scope=None)) # if dojo == 0: # e, Q_vector = brain.train_3_dojos(diamond_net, sess, dojo) # elif dojo == 1: # e, Q_vector = brain.train_3_dojos(zombie_net, sess, dojo) # elif dojo == 2: # e, Q_vector = brain.train_3_dojos(explore_net, sess, dojo) # e, Q_vector = brain.train_3(sess, diamond_net, zombie_net, explore_net) # e, Q_vector = brain.train(extra_net, sess) if done: Dojo_vector[:,dojo] = reward # print("Reward:", reward) else: # Gathering the now current state's action-value vector y_prime = sess.run(model.q_values, feed_dict={model.input: new_state}) # Equation for training maxq = sess.run(model.y_prime_max, feed_dict={model.actions: y_prime}) # RL Equation Dojo_vector[:,dojo] = reward + (brain.GAMMA * maxq) _, e = sess.run([model.optimizer, model.error], feed_dict={model.input: state, model.actions: Dojo_vector}) state = new_state cumulative_reward += reward if RENDER_TO_SCREEN: env.render() if done: avg_time += info["time"] avg_score += info["score"] avg_error += e avg_reward += cumulative_reward cumulative_reward = 0 if (episode%print_episode == 0 and episode != 0) or (episode == total_episodes-1): current_time = math.floor(time.time()-start_time) print("Ep:", episode, "\tavg t: {0:.3f}".format(avg_time/print_episode), "\tavg score: {0:.3f}".format(avg_score/print_episode), "\tErr {0:.3f}".format(avg_error/print_episode), "\tavg_reward {0:.3f}".format(avg_reward/print_episode), # avg cumulative reward "\tepsilon {0:.3f}".format(brain.EPSILON), end="") print_readable_time(current_time) # Save the model's weights and biases to .npz file model.save(sess, name=MODEL_NAME_save) # diamond_net.save(sess, name=DIAMOND_MODEL_NAME+"") # zombie_net.save(sess, name=ZOMBIE_MODEL_NAME+"") # explore_net.save(sess, name=EXPLORE_MODEL_NAME+"") # extra_net.save(sess, name=EXTRA_MODEL_NAME+"") # save_path = saver.save(sess, MODEL_PATH_SAVE) s = sess.run(merged_summary, feed_dict={model.input: state, model.actions: Dojo_vector, score:avg_score/print_episode, avg_t:avg_time/print_episode, epsilon:brain.EPSILON, avg_r:avg_reward/print_episode}) writer.add_summary(s, episode) avg_time = 0 avg_score = 0 avg_error = 0 avg_reward = 0 model.save(sess, verbose=True, name=MODEL_NAME_save) # diamond_net.save(sess, verbose=True, name=DIAMOND_MODEL_NAME+"") # zombie_net.save(sess, verbose=True, name=ZOMBIE_MODEL_NAME+"") # explore_net.save(sess, verbose=True, name=EXPLORE_MODEL_NAME+"") # extra_net.save(sess, verbose=True, name=EXTRA_MODEL_NAME+"") # save_path = saver.save(sess, MODEL_PATH_SAVE) # print("Model saved in path: %s" % save_path) writer.close() histogram.plot()
print("TESTING") import numpy as np from math import pi import sys from utils import custom_epsilon import matplotlib.pyplot as plt # import getch import csv import my_mission from utils import print_readable_time, Histogram histogram = Histogram(3, 4, 1000) for i in range(1000): histogram.add(np.random.randint(3)) histogram.plot() # print(my_mission.missionXML) # s = np.array([[[0,0], # [0,0]], # [[1,1], # [1,1]], # [[2,2], # [2,2]]])
linestyle='dashed', alpha=1, label="Signal reweighted") ax[i].set(xlim=(edges_sig[0], edges_sig[-1]), xlabel=xlab, ylabel=f"Events/{bw:4.2f}") ax[i].legend() else: for i, (var, bin, rang, xlab) in enumerate( zip(variables, bins, ranges, xlabel)): fig, ax[i] = Plot(Histogram( data_train[var][mask], data_train["label"][mask], data_train[weightType + "_" + weightName][mask], bin, rang[0], rang[1]), fig, ax[i], xlab, includeN=True) #fig, ax[1] = Plot(Histogram(data_train['pt'][mask], data_train["label"][mask], data_train[weightType+"_"+weightName][mask], 120, -5, 120), fig, ax[1], "pt", includeN = False) #fig, ax[2] = Plot(Histogram(data_train['invM'][mask], data_train["label"][mask], data_train[weightType+"_"+weightName][mask], 120, 50, 110), fig, ax[2], "invM", includeN = False) #fig, ax[3] = Plot(Histogram(data_train['correctedScaledAverageMu'][mask], data_train["label"][mask], data_train[weightType+"_"+weightName][mask], 80, -2, 80), fig, ax[3], r"$\langle\mu\rangle$", includeN = False) counts_weight, edges_weight = np.histogram( data_train[weightType + "_" + weightName][mask], bins=120, range=(0, 40)) ax[4].step(x=edges_weight, y=np.append(counts_weight, 0),