def get_agent(environment): if FLAGS.agent == 'q-learner': return QLearner(env, FLAGS.discount_factor) elif FLAGS.agent == 'deep-q-learner': return DeepQLearner(env, FLAGS.discount_factor) else: raise ValueError('Unknown agent: {}'.format(FLAGS.agent))
def main(): prng = np.random.RandomState(ps.PRNG_SEED) sensor_decoder = SensorDecoder(n_fragments=ps.N_FRAGMENTS, n_checksum_bytes=ps.N_CHECKSUM_BYTES, frame_counter_position=ps.FRAME_COUNTER_POS, fragment_id_position=ps.FRAGMENT_ID_POS, img_data_position=ps.IMG_DATA_POS, img_fragment_length=ps.IMG_FRAGMENT_LENGTH, action_position=ps.ACTION_POS, reward_position=ps.REWARD_POS, n_reward_bytes=ps.N_REWARD_BYTES) labeling_net = load_labeling_function(ps.LABELING_NETWORK_FILE_NAME, ps.MB_SIZE, ps.LABELING_NETWORK_USE_LAYER) state_encoder_fn = labeling_net.get_single_output q_function = load_q_network(ps.Q_NETWORK_LOAD_FILENAME, ps.STATE_STM, ps.PERCEPT_LENGTH, ps.Q_HIDDEN_NEURONS, ps.N_ACTIONS, ps.MB_SIZE) q_learner = QLearner(q_function, exp_store_size=ps.EXP_STORE_SIZE, percept_length=ps.PERCEPT_LENGTH, n_actions=ps.N_ACTIONS, state_stm=ps.STATE_STM, gamma=ps.GAMMA, minibatch_size=ps.MB_SIZE, prng=prng) log_path = ps.LOG_PATH + time.strftime('%Y-%m-%d_%H-%M-%S') + '/' copy_parameter_file(log_path) # quality_logger = QualityLogger(ps.QUALITY_LOG_PATH) main_controller = MainController( q_learner, sensor_decoder=sensor_decoder, state_encoder_fn=state_encoder_fn, timeout_period=ps.TIMEOUT_PERIOD, remote_host=ps.REMOTE_HOST, remote_port=ps.REMOTE_PORT, learning_rate=ps.LEARNING_RATE, learning_iterations_per_step=ps.LEARNING_ITERATIONS_PER_STEP, random_action_duration=ps.RANDOM_ACTION_DURATION, epsilon_decrease_duration=ps.EPSILON_DECREASE_DURATION, epsilon_start=ps.EPSILON_START, epsilon_end=ps.EPSILON_END, burn_in=ps.BURN_IN, frame_counter_increment=ps.FRAME_COUNTER_INC_STEP, prng=prng, training_error_smoothing=ps.TRAIN_ERROR_SMOOTHING, log_path=log_path, reward_smoothing=ps.REWARD_SMOOTHING, quality_logger=QualityLogger(ps.QUALITY_LOG_PATH)) print 'Starting main loop.' while 1: main_controller.do()
def main(): mdp:MarkovDecisionProcess = MarkovDecisionProcess() mdp.set_field(1, 1, Field.OBSTACLE) mdp.set_field(3, 2, Field.POS_TERMINAL) mdp.set_field(3, 1, Field.NEG_TERMINAL) print(mdp) q_learner:QLearner = QLearner(mdp) while True: if mdp.terminated: mdp.restart() q_learner.print_actions() input("enter to advance") q_learner.step() print(q_learner)
action_list = json.load(infile) else: action_list = algs.search( structs.PriorityQueue, args['size'], lambda successor: algs.heuristic(successor))[0] outfile = open('path.json', 'w') dump = json.dumps(action_list, sort_keys=True, indent=2, separators=(',', ': ')) outfile.write(dump) main(action_list=action_list) elif args['learn']: path = None if args['weights']: path = 'training/demo.json' main(agent=QLearner(import_from=path, export_to='training/weights.json', epsilon=None, ld=1, training=True)) elif args['demo']: main(agent=QLearner(import_from='training/demo.json', training=False)) else: main()
fig2 = plt.figure(1) ax2 = fig2.add_subplot(1, 1, 1) ax2.clear() ax2.plot(x, mean_rewards_y) plt.savefig(os.path.join(save_dir, "mean_rewards.png")) if __name__ == "__main__": rospy.init_node('learning') env = gym.make('Learning-v0') # Replay policy # sourceQ_file = os.path.join(rospkg.RosPack().get_path('learning'), 'csv/sim', 'replay_policy', 'Q.csv') sourceQ_file = None if sourceQ_file != None: save_dir = os.path.join(rospkg.RosPack().get_path('learning'), 'csv/sim', 'replay_policy') else: save_dir = os.path.join(rospkg.RosPack().get_path('learning'), 'csv/sim', 'learn_policy_v7') # Additional parameters agent = QLearner( env ) #, sourceQ_file=sourceQ_file) # include sourceQ_file if you want to replay a policy render = False num_episodes = 500000 # 200 episodes per minute run(env, agent, render, save_dir, num_episodes) rospy.spin()
# def state_encoder_fn(x): return pca_encoder.transform(x)[0] if q_network_load_filename is not None: q_function = QNetwork.load_from_file(q_network_load_filename, MB_SIZE) else: hidden_layer = FullyConnectedLayer(STATE_STM * PERCEPT_LENGTH, Q_HIDDEN_NEURONS) output_layer = FullyConnectedLayer(Q_HIDDEN_NEURONS, N_ACTIONS, activation_fn=linear) q_function = QNetwork([hidden_layer, output_layer], minibatch_size=MB_SIZE) q_learner = QLearner(q_function, exp_store_size=EXP_STORE_SIZE, percept_length=PERCEPT_LENGTH, n_actions=N_ACTIONS, state_stm=STATE_STM, gamma=GAMMA, minibatch_size=MB_SIZE, prng=prng) if enable_plotting: bar_plotter = livebarchart.LiveBarPlotter(n_categories=5, n_bars_per_category=5) PORT = 8888 IP = "0.0.0.0" REMOTE_HOST = "127.0.0.1" REMOTE_PORT = 8889 sock = socket.socket(socket.AF_INET, socket.SOCK_DGRAM)
def test_all(self, save_dir, label_types, percentile, num_runs, budget_list, percent_sim_data, state_visits): self.label_types = label_types self.save_dir = save_dir self.percentile = percentile self.num_runs = num_runs self.filenames = {"sourceQ": self.sourceQ_file, "targetQ": self.targetQ_file, "sim_on_real": os.path.join(self.save_dir, "sim_on_real"), "data": os.path.join(self.save_dir, "data.csv"), "results": os.path.join(self.save_dir, "results.csv"), "true_sim": os.path.join(self.save_dir, "true_sim_data.csv"), "true_real": os.path.join(self.save_dir, "true_real_data.csv"), "acceptable_actions": os.path.join(self.save_dir, "acceptable_actions.csv")} self.estimation_baselines = ["dawid_skene", "majority_vote"] self.classifier_baselines = ["dawid_skene", "majority_vote", "all_labels"] self.estimation_metrics = ["accuracy", "error", "error1s"] self.classifier_metrics = ["average_precision_score", "mean_squared_error","f1_score","accuracy_score","precision_score","recall_score"] #"roc_auc_score", self.test_data_list = ["seen","unseen","all"] self.oracle_in_loop_baselines = ["model_query", "always_query", "never_query"] self.oracle_in_loop_metrics = ["avg_reward","percent_queries"] self.estimation_results = {} self.classifier_results = {} self.oracle_in_loop_results = {} self.data_sizes = {} if len(state_visits) > 0: self.filenames["sim_on_real"] = state_visits else: agent = QLearner(self.target_env, sourceQ_file=self.sourceQ_file) run(self.target_env, agent, False, self.filenames["sim_on_real"], 10000) x_label = "Budget" x_list = budget_list for label_type in label_types: label = label_type[0] self.data_sizes[label] = -1 # Creating data structures to store results for label_type in label_types: label = label_type[0] if label not in self.estimation_results: self.estimation_results[label] = {} for metric in self.estimation_metrics: if metric not in self.estimation_results[label]: self.estimation_results[label][metric] = {} for baseline in self.estimation_baselines: self.estimation_results[label][metric][baseline] = np.zeros((len(x_list), self.num_runs), dtype=np.float64) for label_type in label_types: label = label_type[0] if label not in self.classifier_results: self.classifier_results[label] = {} for metric in self.classifier_metrics: if metric not in self.classifier_results[label]: self.classifier_results[label][metric] = {} for test_data in self.test_data_list: if test_data not in self.classifier_results[label][metric]: self.classifier_results[label][metric][test_data] = {} for baseline in self.classifier_baselines: self.classifier_results[label][metric][test_data][baseline] = np.zeros((len(x_list), self.num_runs), dtype=np.float64) for label_type in label_types: label = label_type[0] if label not in self.oracle_in_loop_results: self.oracle_in_loop_results[label] = {} for metric in self.oracle_in_loop_metrics: if metric not in self.oracle_in_loop_results[label]: self.oracle_in_loop_results[label][metric] = {} for i in self.oracle_in_loop_baselines: self.oracle_in_loop_results[label][metric][i] = np.zeros((len(x_list), self.num_runs), dtype=np.float64) # Run approach many times (based on num_runs) and with the whole range of budget values for num in range(self.num_runs): self.target_env.env.generate_training_subset(percent_sim_data) self.target_env.env.set_to_training_set() for i in range(len(x_list)): x = x_list[i] self.max_states = -1 for label_type in label_types: print(label_type," ",x) self.test_one_instance(label_type, (i, x), (0, percent_sim_data), num) self.write_results(label_types, x_list, num)
mean_rewards.append(float(np.mean(all_rewards[-100:]))) if i_episode % save_freq == 0: x = range(i_episode + 1)[::interval] mean_rewards_y = mean_rewards[::interval] agent.saveQ(save_dir) agent.save_debug_info(save_dir) fig2 = plt.figure(1) ax2 = fig2.add_subplot(1, 1, 1) ax2.clear() ax2.plot(x, mean_rewards_y) plt.savefig(os.path.join(save_dir, "mean_rewards.png")) if __name__ == "__main__": # Env name to train on (e.g., MyCatcher-v0) env = gym.make(sys.argv[1]) # If a learned Q-value file is given, the game will be rendered, and the agent will play according to the learned Q-value function. if len(sys.argv) >= 4: agent = QLearner(env, sourceQ_file=sys.argv[3]) render = True else: agent = QLearner(env) render = False num_episodes = 10000000 save_dir = sys.argv[2] run(env, agent, render, save_dir, num_episodes)