def prune_benchmark(): logger = get_logger("prune_pong_agent_benchmark") prune_model = DQNPacman(input_size=prune_config.input_size, output_size=prune_config.output_size, model_path=prune_config.model_path, scope=prune_config.scope, epsilon_stop=prune_config.final_epsilon, epsilon=prune_config.initial_epsilon, pruning_end=prune_config.pruning_end, pruning_freq=prune_config.pruning_freq, sparsity_end=prune_config.sparsity_end, target_sparsity=prune_config.target_sparsity, prune_till_death=True) target_model = PacmanTargetNet(input_size=dense_config.input_size, output_size=dense_config.output_size) logger.info("loading models") print("loading models") target_model.load_model(dense_config.ready_path) prune_model.load_model(dense_config.ready_path) prune_model.change_loss_to_benchmark_loss() prune_model.reset_global_step() logger.info("Commencing iterative pruning") sparsity_vs_accuracy = iterative_pruning(logger, prune_model, target_model, prune_config.n_epoch, benchmarking=True) print("benchmark finished") plot_graph(sparsity_vs_accuracy, "sparsity_vs_accuracy_benchmark", figure_num=1)
def main(): prune_model = SimpleNet(input_size=prune_config.input_size, output_size=prune_config.output_size, model_path=FLAGS.model_path, pruning_start=0, pruning_end=100000, target_sparsity=FLAGS.goal_sparsity, dropout=FLAGS.dropout, pruning_freq=FLAGS.prune_freq, initial_sparsity=prune_config.initial_sparsity, sparsity_start=prune_config.sparsity_start, sparsity_end=FLAGS.end_sparsity, scope='SimpleNetPruned') prune_model.load_model(FLAGS.ready_path) prune_model.reset_global_step() # important to allow a stable pruning, the pruning mechanism takes the global step as a parameter sparsity_vs_accuracy = prune_model.fit( n_epochs=FLAGS.n_epochs, learning_rate_schedule=prune_config.learning_rate_schedule, batch_size=FLAGS.batch_size, prune=True, config=prune_config) # the fit function allow gentle pruning along with fine-tuning to reach maximum sparsity and accuracy possible plot_graph(sparsity_vs_accuracy, "sparsity_vs_accuracy_simplenet") prune_model.load_model(FLAGS.best_path) plot_conv_weights(prune_model, title='after') prune_model.sess.close()
def testEnv(): env = Env() channelThroughPut = 0 # fraction of time that packets are successfully delivered over the channel # i.e no collisions or idle time slots for iteration in range(config.Iterations): for t in range(config.TimeSlots): initialState = env.reset() for user in range(config.N): action = slottedAlohaProtocol() env.step(action=action, user=user) # each user changes the inner state of the environment where the environment uses the inner state # in order to keep track on the channels and the ACK signals for each user nextStateForEachUser, rewardForEachUser = env.getNextState() # if a reward is one that means that a packets was successfully delivered over the channel # the sum has a maximum of the number of channels -> config.K channelThroughPut = channelThroughPut + np.sum(rewardForEachUser) # measuring the expected value channelThroughPut = channelThroughPut / (config.Iterations * config.TimeSlots) print("Channel Utilization average {}".format(channelThroughPut)) ToPlotX = range(config.Iterations * config.TimeSlots) ToPlotY = np.ones_like(ToPlotX) * channelThroughPut plot_graph(data=[ToPlotX, ToPlotY], filename="Aloha", title="Aloha", xlabel="Time slot", ylabel="Average channel utilization", legend="SlottedAloha") # # # def testTimeEnv(): # env = TimeDependentEnv() # channelThroughPut = 0 # fraction of time that packets are successfully delivered over the channel # # i.e no collisions or idle time slots # for iteration in range(config.Iterations): # TimeSPU = env.reset() # for t in range(config.TimeSlots): # env.resetTimeStep() # # reset the internal state of the environment # # which keep tracks of the users actions through out the time step # for user in range(config.N): # action = slottedAlohaProtocol() # env.step(action=action, user=user) # # each user changes the inner state of the environment where the environment uses the inner state # # in order to keep track on the channels and the ACK signals for each user # nextStateForEachUser, rewardForEachUser = env.tstep(timestep=t) # # if a reward is one that means that a packets was successfully delivered over the channel # # the sum has a maximum of the number of channels -> config.K # channelThroughPut = channelThroughPut + np.sum(rewardForEachUser) # # measuring the expected value # channelThroughPut = channelThroughPut / (config.Iterations * config.TimeSlots) # print("Channel Utilization average {}".format(channelThroughPut)) # ToPlotX = range(config.Iterations * config.TimeSlots) # ToPlotY = np.ones_like(ToPlotX) * channelThroughPut # plot_graph(data=[ToPlotX, ToPlotY], filename="Aloha", title="Aloha", # xlabel="Time slot", ylabel="Average channel utilization", legend="SlottedAloha")
def main(): # ----------------- Setting initial variables Section ----------------- logger = get_logger(FLAGS.PoPS_dir + "/PoPS_ITERATIVE") logger.info(" ------------- START: -------------") logger.info("Setting initial data structures") accuracy_vs_size = [[], []] logger.info("Loading models") teacher = CartPoleDQNTarget(input_size=dense_config.input_size, output_size=dense_config.output_size) teacher.load_model(path=FLAGS.teacher_path) # load teacher logger.info("----- evaluating teacher -----") print("----- evaluating teacher -----") teacher_score = evaluate(agent=teacher, n_epoch=FLAGS.eval_epochs) logger.info("----- teacher evaluated with {} ------".format(teacher_score)) print("----- teacher evaluated with {} -----".format(teacher_score)) prune_step_path = FLAGS.PoPS_dir + "/prune_step_" policy_step_path = FLAGS.PoPS_dir + "/policy_step_" initial_path = policy_step_path + "0" logger.info( "creating policy step 0 model, which is identical in size to the original model" ) copy_weights( output_path=initial_path, teacher_path=FLAGS.teacher_path) # inorder to create the initial model compressed_agent = StudentCartpole( input_size=student_config.input_size, output_size=student_config.output_size, model_path=initial_path, tau=student_config.tau, pruning_freq=student_config.pruning_freq, sparsity_end=student_config.sparsity_end, target_sparsity=student_config.target_sparsity) compressed_agent.load_model() initial_size = compressed_agent.get_number_of_nnz_params() accuracy_vs_size[0].append(initial_size) accuracy_vs_size[1].append(teacher_score) initial_number_of_params_at_each_layer = compressed_agent.get_number_of_nnz_params_per_layer( ) initial_number_of_nnz = sum(initial_number_of_params_at_each_layer) converge = False iteration = 0 convergence_information = deque(maxlen=2) convergence_information.append(100) precent = 100 arch_type = 0 last_measure = initial_size while not converge: iteration += 1 print("----- Pruning Step {} -----".format(iteration)) logger.info(" ----- Pruning Step {} -----".format(iteration)) path_to_save_pruned_model = prune_step_path + str(iteration) # ----------------- Pruning Section ----------------- if arch_type == 2: arch_type = 3 # special arch_type for prune-oriented learning rate sparsity_vs_accuracy = iterative_pruning_policy_distilliation( logger=logger, agent=compressed_agent, target_agent=teacher, iterations=FLAGS.iterations, config=student_config, best_path=path_to_save_pruned_model, arch_type=arch_type, lower_bound=student_config.LOWER_BOUND, accumulate_experience_fn=accumulate_experience_cartpole, evaluate_fn=evaluate, objective_score=student_config.OBJECTIVE_SCORE) plot_graph(data=sparsity_vs_accuracy, name=FLAGS.PoPS_dir + "/initial size {}%, Pruning_step number {}".format( precent, iteration), figure_num=iteration) # loading model which has reasonable score with the highest sparsity compressed_agent.load_model(path_to_save_pruned_model) # ----------------- Measuring redundancy Section ----------------- # the amount of parameters that are not zero at each layer nnz_params_at_each_layer = compressed_agent.get_number_of_nnz_params_per_layer( ) # the amount of parameters that are not zero nnz_params = sum(nnz_params_at_each_layer) # redundancy is the parameters we dont need, nnz_params / initial is the params we need the opposite redundancy = (1 - nnz_params / initial_number_of_nnz) * 100 print( "----- Pruning Step {} finished, got {}% redundancy in net params -----" .format(iteration, redundancy)) logger.info( "----- Pruning Step {} finished , got {}% redundancy in net params -----" .format(iteration, redundancy)) logger.info( "----- Pruning Step {} finished with {} NNZ params at each layer". format(iteration, nnz_params_at_each_layer)) print( " ----- Evaluating redundancy at each layer Step {}-----".format( iteration)) logger.info( " ----- Evaluating redundancy at each layer Step {} -----".format( iteration)) redundancy_at_each_layer = calculate_redundancy( initial_nnz_params=initial_number_of_params_at_each_layer, next_nnz_params=nnz_params_at_each_layer) logger.info( "----- redundancy for each layer at step {} is {} -----".format( iteration, redundancy_at_each_layer)) if iteration == 1: redundancy_at_each_layer = [ 0.83984375, 0.8346405029296875, 0.83795166015625, 0.83984375 ] # ----------------- Policy distillation Section ----------------- print( " ----- Creating Model with size according to the redundancy at each layer ----- " ) logger.info( "----- Creating Model with size according to the redundancy at each layer -----" ) policy_distilled_path = policy_step_path + str(iteration) # creating the compact model where every layer size is determined by the redundancy measure compressed_agent = StudentCartpole( input_size=student_config.input_size, output_size=student_config.output_size, model_path=policy_distilled_path, tau=student_config.tau, redundancy=redundancy_at_each_layer, pruning_freq=student_config.pruning_freq, sparsity_end=student_config.sparsity_end, target_sparsity=student_config.target_sparsity, last_measure=last_measure) nnz_params_at_each_layer = compressed_agent.get_number_of_nnz_params_per_layer( ) logger.info( "----- Step {} ,Created Model with {} NNZ params at each layer". format(iteration, nnz_params_at_each_layer)) iterative_size = compressed_agent.get_number_of_nnz_params() last_measure = iterative_size precent = (iterative_size / initial_size) * 100 convergence_information.append(precent) print( " ----- Step {}, Created Model with size {} which is {}% from original size ----- " .format(iteration, iterative_size, precent)) logger.info( "----- Created Model with size {} which is {}% from original size -----" .format(iterative_size, precent)) # scheduling the right learning rate for the size of the model if precent > 40: arch_type = 0 elif 10 <= precent <= 40: arch_type = 1 else: arch_type = 2 print(" ----- policy distilling Step {} ----- ".format(iteration)) logger.info("----- policy distilling Step {} -----".format(iteration)) fit_supervised(logger=logger, arch_type=arch_type, student=compressed_agent, teacher=teacher, n_epochs=FLAGS.n_epoch, evaluate_fn=evaluate, accumulate_experience_fn=accumulate_experience_cartpole, lower_score_bound=student_config.LOWER_BOUND, objective_score=student_config.OBJECTIVE_SCORE) policy_distilled_score = evaluate(agent=compressed_agent, n_epoch=FLAGS.eval_epochs) compressed_agent.reset_global_step() print( " ----- policy distilling Step {} finished with score {} ----- ". format(iteration, policy_distilled_score)) logger.info( "----- policy distilling Step {} finished with score {} -----". format(iteration, policy_distilled_score)) # checking convergence converge = check_convergence(convergence_information) # for debugging purposes accuracy_vs_size[0].append(iterative_size) accuracy_vs_size[1].append(policy_distilled_score) plot_graph(data=accuracy_vs_size, name=FLAGS.PoPS_dir + "/accuracy_vs_size", figure_num=iteration + 1, xaxis='NNZ params', yaxis='Accuracy')
def main(): #variables used for models logics raw_scores, states, actions_booleans = [BEGINING_SCORE], [], [] episode_number = 0 update_weights = False # if too much time passed, update the weights even if the game is not finished grads_sums = get_empty_grads_sums( ) # initialize the gradients holder for the trainable variables #variables for debugging: manual_prob_use = 0 #consider use the diffrences from 1 prob_deviation_sum = 0 default_data_counter = 0 # counts number of exceptions in reading the observations' file (and getting a default data) step_counter = 0 #for tests #variables for evaluation: best_average_score = 0 current_average_score = 0 average_scores_along_the_game = [] with tf.Session() as sess: sess.run(init) #sess.run(init2) #check if this necessary # check if file is not empty if (os.path.isfile(WEIGHTS_FILE) and LOAD_WEIGHTS): # Load with shmickle with open(WEIGHTS_FILE, 'rb') as f: # BEST_WEIGHTS for var, val in zip(tvars, pkl.load(f)): sess.run(tf.assign(var, val)) print("loaded weights successfully!") # creates file if it doesn't exisits: if (not os.path.isfile(WEIGHTS_FILE)): open(WEIGHTS_FILE, 'a').close() if (not os.path.isfile(BEST_WEIGHTS)): open(BEST_WEIGHTS, 'a').close() print("created weights file sucessfully!") while episode_number < MAX_GAMES: start_time = time.time() #get data and process score to reward obsrv, score, bonus, is_dead, request_id, default_obsrv, AI_action, AI_accel = get_observation( ) # get observation #if from some reason the bot died and the message for it got lost, we check it here. if (not is_dead): is_dead = check_if_died(raw_scores[-1], score) #if default takes the score from last step if (score == 0): score = raw_scores[-1] raw_scores.append(score) #FOR DEBUGGING #is_dead = False default_data_counter += default_obsrv #vars = sess.run(tvars) # append the relevant observation to the following action, to states states.append(obsrv) # Run the policy network and get a distribution over actions action_probs = sess.run(actions_probs, feed_dict={observations: [obsrv]}) # if - exploration, else - explotation if (np.random.binomial(1, EPSILON_FOR_EXPLORATION, 1)[0]): chosen_actions = pick_action_uniformly(action_probs[0]) logger.write_to_log('Tried exploration!') else: # np.random.multinomial cause problems try: chosen_actions = np.random.multinomial(1, action_probs[0]) except: chosen_actions = pick_random_action_manually( action_probs[0]) manual_prob_use += 1 prob_deviation_sum += np.abs(np.sum(action_probs) - 1) # Saves the selected action for a later use actions_booleans.append(chosen_actions) # index of the selected action action = np.argmax(actions_booleans[-1]) #FOR DEBUGGING ''' #print("action_probs: " + str(action_probs)) print("observation got: " + str(obsrv)) print("action chosen: " + str(action)) print("manual_prob_use: " + str(manual_prob_use)) print("prob_deviation_sum: " + str(prob_deviation_sum)) print("default_data_counter: " + str(default_data_counter)) print("step_counter: "+str(step_counter)) ''' if (step_counter % WRITE_TO_LOG == 0): #logger.write_to_log("observation got: " + str(obsrv)) logger.write_to_log("action_probs: " + str(action_probs)) logger.write_to_log("action chosen: " + str(action)) # step the environment and get new measurements send_action(action, request_id) # add reward to rewards for a later use in the training step #rewards.append(reward) step_counter += 1 #this is for tests if (step_counter % STEPS_UNTIL_BACKPROP == 0): update_weights = True #So the model won't read the same frame many times time.sleep(0.25) if (is_dead or update_weights) and len(raw_scores) > 2: #UPDATE MODEL: #calculate rewards from raw scores: #processed_rewards = calc_reward_from_raw(raw_scores,is_dead) processed_rewards = get_reward(raw_scores, is_dead) #processed_rewards = raw_score_reward(raw_scores,is_dead) #FOR DEBUGGING: if (is_dead): print('just died!') # print("processed_rewards: " + str(processed_rewards)) #logger.write_to_log("raw_score: " +str(raw_scores)) logger.write_to_log("processed_rewards: " + str(processed_rewards)) #''' # create the rewards sums of the reversed rewards array rewards_sums = np.cumsum(processed_rewards[::-1]) # normalize prizes and reverse rewards_sums = decrese_rewards(rewards_sums[::-1]) rewards_sums -= np.mean(rewards_sums) rewards_sums = np.divide(rewards_sums, np.std(rewards_sums)) #logger.write_to_log("rewards_sums: " + str(rewards_sums)) #''' modified_rewards_sums = np.reshape( rewards_sums, [1, len(processed_rewards)]) # modify actions_booleans to be an array of booleans actions_booleans = np.array(actions_booleans) actions_booleans = actions_booleans == 1 #FOR DEBUGGING: ''' fa_res = sess.run(filtered_actions, feed_dict={observations: states, actions_mask: actions_booleans, rewards_arr: modified_rewards_sums}) pi_res = sess.run(pi, feed_dict={observations: states, actions_mask: actions_booleans, rewards_arr: modified_rewards_sums}) loss_res = sess.run(loss, feed_dict={observations: states, actions_mask: actions_booleans, rewards_arr: modified_rewards_sums}) logger.write_to_log("filtered_actions: "+ str(fa_res)) ''' # gradients for current episode grads = sess.run(Gradients, feed_dict={ observations: states, actions_mask: actions_booleans, rewards_arr: modified_rewards_sums }) grads_sums += np.array(grads) episode_number += 1 update_weights = False #evaluation: current_average_score = np.average(raw_scores) average_scores_along_the_game.append(current_average_score) logger.write_to_log("average score after " + str(step_counter) + ' steps: ' + str(current_average_score)) #nullify step_counter: step_counter = 0 # Do the training step if (episode_number % BATCH_SIZE == 0): #if (episode_number % WRITE_TO_LOG == 0): #logger.write_to_log("learned variables: "+str(vars[0])) print("taking the update step") grad_dict = { Gradients_holder[i]: grads_sums[i] for i in range(VAR_NO) } #TODO choose learning rate? # take the train step sess.run(train_step, feed_dict=grad_dict) #nullify grads_sum grads_sums = get_empty_grads_sums() # evaluate and save: if (best_average_score < current_average_score): best_average_score = current_average_score # save with shmickle with open(BEST_WEIGHTS, 'wb') as f: pkl.dump(sess.run(tvars), f, protocol=2) print('Saved best weights successfully!') # print('Current best result for %d episodes: %f.' % (episode_number, best_average_score)) logger.write_to_log('Saved best weights successfully!') logger.write_to_log('Current best result for ' + str(episode_number) + ' episodes: ' + str(best_average_score)) # manual save with open(WEIGHTS_FILE, 'wb') as f: pkl.dump(sess.run(tvars), f, protocol=2) #print('auto-saved weights successfully.') # nullify relevant vars and updates episode number. raw_scores, states, actions_booleans = [BEGINING_SCORE], [], [] manual_prob_use = 0 wait_for_game_to_start() logger.write_spacer() plot_graph(average_scores_along_the_game, "Policy Gradient Average Score During Game", "PG_avg_score_during_game.png", "Epsisodes No.", "Average Score")
env.step(action=action, user=user) # each user changes the inner state of the environment where the environment uses the inner state # in order to keep track on the channels and the ACK signals for each user nextStateForEachUser, rewardForEachUser = env.getNextState() # if a reward is one that means that a packets was successfully delivered over the channel # the sum has a maximum of the number of channels -> config.K channelThroughPut = channelThroughPut + np.sum(rewardForEachUser) # measuring the expected value channelThroughPut = channelThroughPut / (config.Iterations * config.TimeSlots) print("Channel Utilization average {}".format(channelThroughPut)) ToPlotX = range(config.Iterations * config.TimeSlots) ToPlotY = np.ones_like(ToPlotX) * channelThroughPut plot_graph(data=[ToPlotX, ToPlotY], filename="Aloha", title="Aloha", xlabel="Time slot", ylabel="Average channel utilization", legend="SlottedAloha") def testEnv(): env = Env() channelThroughPut = 0 # fraction of time that packets are successfully delivered over the channel # i.e no collisions or idle time slots for iteration in range(config.Iterations): for t in range(config.TimeSlots): initialState = env.reset() for user in range(config.N): action = slottedAlohaProtocol() env.step(action=action, user=user) # each user changes the inner state of the environment where the environment uses the inner state
# Loop over k steps while agent.step_number < MAX_STEPS: agent.take_one_step() #write to log and add to plot array: if (agent.step_number % agent.WRITE_TO_LOG_EVERY == 0): avg_scores_per_step.append(np.average(agent.last_raw_scores)) logger.write_to_log("avg_scores_per_step" + str(avg_scores_per_step)) # Save weights and best weights: #last loaded weights will be override on the first save if (not agent.TEST_MODE): agent.save_weights(WEIGHTS_FILE) if (avg_scores_per_step[-1] > best_avg_per_step): best_avg_per_step = avg_scores_per_step[-1] agent.save_weights(BEST_WEIGHTS) # Evaluation and plotting plot_graph(avg_scores_per_step, "Average Score Per 100 Steps", "DQN_avg_score_per_step_by_epoch_" + str(agent.epoch_no), "Step No.", "Average score") avg_scores_per_epoch.append(np.average(avg_scores_per_step)) logger.write_to_log("avg_scores_per_epoch" + str(avg_scores_per_epoch)) logger.write_spacer() agent.epoch_no += 1 plot_graph(avg_scores_per_epoch, "Average Score Per Epoch", "DQN_avg_score_per_epoch_for_experiment", "Epoch No.", "Average score") print("finished experiement")
def main(): logger = get_logger(FLAGS.PoPS_dir + "/PoPS_ITERATIVE") logger.info(" ------------- START: -------------") logger.info("Setting initial data structures") accuracy_vs_size = [[], []] logger.info("Loading models") teacher = PongTargetNet(input_size=dense_config.input_size, output_size=dense_config.output_size) teacher.load_model(path=FLAGS.teacher_path) # load teacher logger.info("----- evaluating teacher -----") print("----- evaluating teacher -----") teacher_score = evaluate(agent=teacher, n_epoch=FLAGS.eval_epochs) logger.info("----- teacher evaluated with {} ------".format(teacher_score)) print("----- teacher evaluated with {} -----".format(teacher_score)) prune_step_path = FLAGS.PoPS_dir + "/prune_step_" policy_step_path = FLAGS.PoPS_dir + "/policy_step_" initial_path = policy_step_path + "0" copy_weights(output_path=initial_path, teacher_path=FLAGS.teacher_path) # inorder to create the initial model compressed_agent = StudentPong(input_size=student_config.input_size, output_size=student_config.output_size, model_path=initial_path, tau=student_config.tau, prune_till_death=True, pruning_freq=prune_config.pruning_freq, sparsity_end=prune_config.sparsity_end, target_sparsity=prune_config.target_sparsity) compressed_agent.load_model() initial_size = compressed_agent.get_number_of_nnz_params() accuracy_vs_size[0].append(initial_size) accuracy_vs_size[1].append(teacher_score) initial_number_of_params_at_each_layer = compressed_agent.get_number_of_nnz_params_per_layer() initial_number_of_nnz = sum(initial_number_of_params_at_each_layer) converge = False iteration = 0 convergence_information = deque(maxlen=2) convergence_information.append(100) precent = 100 arch_type = 0 last_measure = initial_size while not converge: iteration += 1 print("----- Pruning Step {} -----".format(iteration)) logger.info(" ----- Pruning Step {} -----".format(iteration)) path_to_save_pruned_model = prune_step_path + str(iteration) sparsity_vs_accuracy = iterative_pruning_policy_distilliation(logger=logger, agent=compressed_agent, target_agent=teacher, iterations=FLAGS.iterations, config=student_config, best_path=path_to_save_pruned_model, arch_type=arch_type) plot_graph(data=sparsity_vs_accuracy, name=FLAGS.PoPS_dir + "/initial size {}%, Pruning_step number {}" .format(precent, iteration), figure_num=iteration) # loading model which has reasonable score with the highest sparsity compressed_agent.load_model(path_to_save_pruned_model) # the amount of parameters that are not zero at each layer nnz_params_at_each_layer = compressed_agent.get_number_of_nnz_params_per_layer() # the amount of parameters that are not zero nnz_params = sum(nnz_params_at_each_layer) # redundancy is the parameters we dont need, nnz_params / initial is the params we need the opposite redundancy = (1 - nnz_params / initial_number_of_nnz) * 100 print("----- Pruning Step {} finished, got {}% redundancy in net params -----" .format(iteration, redundancy)) logger.info("----- Pruning Step {} finished , got {}% redundancy in net params -----" .format(iteration, redundancy)) logger.info("----- Pruning Step {} finished with {} NNZ params at each layer" .format(iteration, nnz_params_at_each_layer)) print(" ----- Evaluating redundancy at each layer Step {}-----".format(iteration)) logger.info(" ----- Evaluating redundancy at each layer Step {} -----".format(iteration)) redundancy_at_each_layer = calculate_redundancy(initial_nnz_params=initial_number_of_params_at_each_layer, next_nnz_params=nnz_params_at_each_layer) logger.info("----- redundancy for each layer at step {} is {} -----".format(iteration, redundancy_at_each_layer)) print(" ----- Creating Model with size according to the redundancy at each layer ----- ") logger.info("----- Creating Model with size according to the redundancy at each layer -----") policy_distilled_path = policy_step_path + str(iteration) # creating the compact model where every layer size is determined by the redundancy measure compressed_agent = StudentPong(input_size=student_config.input_size, output_size=student_config.output_size, model_path=policy_distilled_path, tau=student_config.tau, redundancy=redundancy_at_each_layer, pruning_freq=prune_config.pruning_freq, sparsity_end=prune_config.sparsity_end, target_sparsity=prune_config.target_sparsity, prune_till_death=True, last_measure=last_measure) nnz_params_at_each_layer = compressed_agent.get_number_of_nnz_params_per_layer() logger.info("----- Step {} ,Created Model with {} NNZ params at each layer" .format(iteration, nnz_params_at_each_layer)) iterative_size = compressed_agent.get_number_of_nnz_params() precent = (iterative_size / initial_size) * 100 convergence_information.append(precent) print(" ----- Step {}, Created Model with size {} which is {}% from original size ----- " .format(iteration, iterative_size, precent)) logger.info("----- Created Model with size {} which is {}% from original size -----" .format(iterative_size, precent)) if precent > 10: arch_type = 0 else: arch_type = 1 print(" ----- policy distilling Step {} ----- ".format(iteration)) logger.info("----- policy distilling Step {} -----".format(iteration)) fit_supervised(logger=logger, arch_type=arch_type, student=compressed_agent, teacher=teacher, n_epochs=FLAGS.n_epoch) compressed_agent.load_model(path=policy_distilled_path) policy_distilled_score = evaluate(agent=compressed_agent, n_epoch=FLAGS.eval_epochs) compressed_agent.reset_global_step() print(" ----- policy distilling Step {} finished with score {} ----- " .format(iteration, policy_distilled_score)) logger.info("----- policy distilling Step {} finished with score {} -----" .format(iteration, policy_distilled_score)) converge = check_convergence(convergence_information) accuracy_vs_size[0].append(iterative_size) accuracy_vs_size[1].append(policy_distilled_score) plot_graph(data=accuracy_vs_size, name=FLAGS.PoPS_dir + "/accuracy_vs_size", figure_num=iteration + 1, xaxis='NNZ params', yaxis='Accuracy')