def test_text_init(self): "Test the Probe object creation from text" # 1. Create Probe object from text myobj = probe.Probe(text=aoc_17.from_text(EXAMPLE_TEXT)) # 2. Make sure it has the expected values self.assertEqual(myobj.part2, False) self.assertEqual(len(myobj.text), 1) self.assertEqual(myobj.start, (0, 0)) self.assertEqual(myobj.target, [20, 30, -10, -5]) self.assertEqual(myobj.position, (0, 0)) self.assertEqual(myobj.velocity, None) self.assertEqual(myobj.height, 0) # 3. Check methods myobj.reload((7, 2)) self.assertEqual(myobj.position, (0, 0)) self.assertEqual(myobj.velocity, (7, 2)) self.assertEqual(myobj.is_in_target(), False) self.assertEqual(myobj.is_possible(), True) myobj.step() self.assertEqual(myobj.position, (7, 2)) self.assertEqual(myobj.velocity, (6, 1)) self.assertEqual(myobj.is_in_target(), False) self.assertEqual(myobj.is_possible(), True) myobj.step() self.assertEqual(myobj.position, (13, 3)) self.assertEqual(myobj.velocity, (5, 0)) self.assertEqual(myobj.is_in_target(), False) self.assertEqual(myobj.is_possible(), True) myobj.step() myobj.step() myobj.step() myobj.step() self.assertEqual(myobj.position, (27, -3)) self.assertEqual(myobj.velocity, (1, -4)) self.assertEqual(myobj.is_in_target(), False) self.assertEqual(myobj.is_possible(), True) myobj.step() self.assertEqual(myobj.position, (28, -7)) self.assertEqual(myobj.velocity, (0, -5)) self.assertEqual(myobj.is_in_target(), True) self.assertEqual(myobj.is_possible(), True) myobj.step() self.assertEqual(myobj.position, (28, -12)) self.assertEqual(myobj.velocity, (0, -6)) self.assertEqual(myobj.is_in_target(), False) self.assertEqual(myobj.is_possible(), False) self.assertEqual(myobj.height, 3) self.assertEqual(myobj.fire((7, 2)), 3) self.assertEqual(myobj.fire((6, 3)), 6) self.assertEqual(myobj.fire((9, 0)), 0) self.assertEqual(myobj.fire((17, -4)), -1) self.assertEqual(myobj.fire((6, 9)), 45) self.assertEqual(myobj.highest(), 45) self.assertEqual(myobj.count(), 112)
def test_part_two(self): "Test part two example of Probe object" # 1. Create Probe object from text myobj = probe.Probe(part2=True, text=aoc_17.from_text(PART_TWO_TEXT)) # 2. Check the part two result self.assertEqual(myobj.part_two(verbose=False), PART_TWO_RESULT)
def test_part_one(self): "Test part one example of Probe object" # 1. Create Probe object from text myobj = probe.Probe(text=aoc_17.from_text(PART_ONE_TEXT)) # 2. Check the part one result self.assertEqual(myobj.part_one(verbose=False), PART_ONE_RESULT)
def probe_callback(self, data): theprobe = probe.Probe(data, self.theta) wallOffset = theprobe.offsetBetweenWalls(90,5) forwardOffset = theprobe.offsetBetweenWalls(45,5) slope = -theprobe.theWalls(70,90,110,3) forwardDistance = theprobe.averageRanges(-20,20) stopCondition = (theprobe.averageRanges(-10,10) < .25) # leftRearObject = theprobe.objectDetection(-90,-125,1,0.9,"Left") #front angle, rear angle, max distance, distance required to detect object#rightRearObject = theprobe.objectDetection(90,125,2,1.8) #front angle, rear angle, max distance, distance required to detect object#rightRearObject = theprobe.objectDetection(90,125,2,1.8) #front angle, rear angle, max distance, distance required to detect object # rightRearObject = theprobe.objectDetection(90,125,1,0.9,"Right") #front angle, rear angle, max distance, distance required to detect object#rightRearObject = theprobe.objectDetection(90,125,2,1.8) #front angle, rear angle, max distance, distance required to detect object#rightRearObject = theprobe.objectDetection(90,125,2,1.8) #front angle, rear angle, max distance, distance required to detect object #leftRearObject = theprobe.objectDetection3(25,110,135,1,0.5,"Right") rightSideObj = theprobe.objectDetection3(25,90,130,1.5,0.5,"Right") leftSideObj = theprobe.objectDetection3(-25,-90,-130,1.5,0.5,"Left") #theprobe.edgeDetection(45,120,5) # print "distance: %f\tangle: %f\n" % theprobe.closestObjectsAngle() # print "distance: %f\tangle: %f\n" % theprobe.closestObjectsAngleFullRanges() # print "\n\n\n\n\nAngle\tH-Dist\t\tA-Dist" #Hypotenuse Distance/Distance to wall from Lidar #Adjacent Distance/X Component of distance to wall # theprobe.perpendicularLineDistance(-120) # theprobe.perpendicularLineDistance(-105) # theprobe.perpendicularLineDistance(-90) # theprobe.perpendicularLineDistance(-75) # theprobe.perpendicularLineDistance(-60) # theprobe.perpendicularLineDistance(-45) # theprobe.perpendicularLineDistance(-30) # theprobe.perpendicularLineDistance(-15) # theprobe.perpendicularLineDistance(0) # theprobe.perpendicularLineDistance(15) # theprobe.perpendicularLineDistance(30) # theprobe.perpendicularLineDistance(45) # theprobe.perpendicularLineDistance(60) # theprobe.perpendicularLineDistance(75) # theprobe.perpendicularLineDistance(90) # theprobe.perpendicularLineDistance(105) # theprobe.perpendicularLineDistance(120) # rightRearObject = theprobe.objectDetection(-30,30,1,0.9,"Center") # leftRearObject = theprobe.objectDetection(-30,30,1,0.9,"Center") # theprobe.objectDetection2(45,90,135, 2, 1.5, "test") #frontAngle, midAngle, rearAngle, tempMaxDistance, objectDetectionCutoffDistance, debugDirectionString overRideSpeed = 0.6 overRideTurn = rightSideObj if leftSideObj != 0: overRideTurn = -leftSideObj #slope = -theprobe.theWalls(45,80,100,3) #theprobe.averageWallSlope(-45,-135) if not self.pilotMode: self.stopList = [] self.lidarData = [slope, wallOffset, forwardOffset, forwardDistance, stopCondition, self.stopList, overRideSpeed, overRideTurn]
def read_probe(num = 10000): #reading Probe Data print 'Reading probe points ...', num with open('Partition6467ProbePoints.csv', 'rb') as csvfile: spamreader = csv.reader(csvfile, delimiter=',', quotechar='|') for i, row in enumerate(spamreader): probe_obj=probe.Probe(row) probe_data.append(probe_obj) if i > num: break
def test_empty_init(self): "Test the default Probe creation" # 1. Create default Probe object myobj = probe.Probe() # 2. Make sure it has the default values self.assertEqual(myobj.part2, False) self.assertEqual(myobj.text, None) self.assertEqual(myobj.start, (0, 0)) self.assertEqual(myobj.target, None) self.assertEqual(myobj.position, (0, 0)) self.assertEqual(myobj.velocity, None) self.assertEqual(myobj.height, 0)
def part_two(args, input_lines): "Process part two of the puzzle" # 1. Create the puzzle solver solver = probe.Probe(part2=True, text=input_lines) # 2. Determine the solution for part two solution = solver.part_two(verbose=args.verbose, limit=args.limit) if solution is None: print("There is no solution") else: print("The solution for part two is %s" % (solution)) # 3. Return result return solution is not None
def read_probe(): #reading Probe Data print 'reading probe points ...' with open('Partition6467ProbePoints.csv', 'rb') as csvfile: spamreader = csv.reader(csvfile, delimiter=',', quotechar='|') i = 0 for row in spamreader: probe_obj = probe.Probe(row) probe_data.append(probe_obj) # x.append(probe_obj.longitude) # y.append(probe_obj.latitude) # print 'No=',i,'latitude=',probe_obj.latitude,'longitude=', probe_obj.longitude i = i + 1 if i > 100: break
def main(): appList = json.load(open('./src/data/applications.json')) logging.info('Using SSDP to discover nodes') nodes = DIAL.discover() logging.info('Found {} nodes'.format(len(nodes))) for node in nodes: print('Name: {}\nManufacturer: {}\nModel: {}'.format( node['friendlyName'], node['manufacturer'], node['model'])) url = node['application-url'] logging.info('Probing {}'.format(url)) p = probe.Probe(url, appList) availableApps = p.checkAllApps() for app in availableApps: print('Found: {}'.format(app)) logging.info('Done')
def probe_callback(self, data): theprobe = probe.Probe(data, self.theta) wallOffset = theprobe.offsetBetweenWalls(90, 5) forwardOffset = theprobe.offsetBetweenWalls(45, 5) slope = -theprobe.theWalls(70, 90, 110, 3) forwardDistance = theprobe.averageRanges(-20, 20) stopCondition = (theprobe.averageRanges(-10, 10) < .25) leftRearObject = theprobe.objectDetection( -90, -125, 1, 0.9, "Left" ) #front angle, rear angle, max distance, distance required to detect object#rightRearObject = theprobe.objectDetection(90,125,2,1.8) #front angle, rear angle, max distance, distance required to detect object#rightRearObject = theprobe.objectDetection(90,125,2,1.8) #front angle, rear angle, max distance, distance required to detect object rightRearObject = theprobe.objectDetection( 90, 125, 1, 0.9, "Right" ) #front angle, rear angle, max distance, distance required to detect object#rightRearObject = theprobe.objectDetection(90,125,2,1.8) #front angle, rear angle, max distance, distance required to detect object#rightRearObject = theprobe.objectDetection(90,125,2,1.8) #front angle, rear angle, max distance, distance required to detect object #slope = -theprobe.theWalls(45,80,100,3) #theprobe.averageWallSlope(-45,-135) if not self.pilotMode: self.stopList = [] self.lidarData = [ slope, wallOffset, forwardOffset, forwardDistance, stopCondition, self.stopList ]
def __init__(self, target, ai_settings, screen, probes, treasures): super().__init__(target, ai_settings, screen, probes, treasures) self.rank = self.ai_settings.black_dragon_rank self.image = self.ai_settings.black_dragon_left[0] self.image_left = self.ai_settings.black_dragon_left self.image_right = self.ai_settings.black_dragon_right self.boom_image = self.ai_settings.black_dragon_boom self.width = self.ai_settings.black_dragon_width self.height = self.ai_settings.black_dragon_height self.adjusted_borny = random.uniform( self.orig_borny, (self.orig_borny + ai_settings.maze_block_width - self.width)) self.adjusted_bornx = random.uniform( self.orig_bornx, (self.orig_bornx + ai_settings.maze_block_height - self.height)) self.rect = pygame.Rect(self.adjusted_borny, self.adjusted_bornx, self.ai_settings.black_dragon_width, self.ai_settings.black_dragon_height) self.health = self.ai_settings.black_dragon_health self.speed = ai_settings.black_dragon_speed_factor self.atk_distance = self.ai_settings.black_dragon_atk_distance self.atk = self.ai_settings.black_dragon_atk self.ATKPRT = self.ai_settings.black_dragon_ATKPRBT * float( random.uniform(0.5, 1.5)) # Create the first probe of this monster self.probe = p.Probe(self.target, self.rect, self.ai_settings, self.screen) probes.append(self.probe) self.x_speed = 0 self.y_speed = 0 self.is_target_within_range = False
def detect(self, target, probes): # The new probe should be created after the last one "dead" if not self.probe.is_alive: self.probe = p.Probe(target, self.rect, self.ai_settings, self.screen) probes.append(self.probe)
def test_function(config, config_suffix=None): config_main = config['main'] config_probe = config['probe'] config_VAE = config['VAE'] config_DDQN = config['DDQN'] config_PER = config['PER'] config_ablation = config['ablation'] use_pi_e = config_ablation['use_pi_e'] phase = config_main['phase'] assert (phase == 'validation' or phase == 'test') domain = config_main['domain'] # Domain-specific parameters (e.g. state and action space dimensions) if domain == '2D': domain_name = "config_2D.json" elif domain == 'acrobot': domain_name = "config_acrobot.json" elif domain == 'hiv': if config_suffix is not None: domain_name = "config_hiv{}.json".format(config_suffix) else: domain_name = "config_hiv.json" elif domain == 'mujoco': domain_name = "config_mujoco.json" elif domain == 'cancer': domain_name = "config_cancer.json" else: raise ValueError("test_ablation.py : domain not recognized") with open(domain_name) as f: config_domain = json.load(f) n_state = config_domain['n_state'] n_action = config_domain['n_action'] seed = config_main['seed'] np.random.seed(seed) random.seed(seed) tf.set_random_seed(seed) N_instances = config_domain['N_test_instances'] N_episodes = config_domain['N_test_episodes'] test_steps = config_domain['test_steps'] dir_name = config_main['dir_name'] model_name = config_main['model_name'] # Instantiate HPMDP hpmdp = HiPMDP.HiPMDP(domain, config_domain, phase) # Instantiate probe policy n_probe_steps = config_domain['traj_length'] assert (n_probe_steps < test_steps) if use_pi_e: pi_e = probe.Probe(config_probe, n_state, n_action) else: # initial z z_avg = pickle.load(open('../results/%s/z_avg.p' % dir_name, 'rb')) # Instantiate VAE buffer_size_vae = config_VAE['buffer_size'] batch_size_vae = config_VAE['batch_size'] del config_VAE['buffer_size'] vae = vae_import.VAE(n_state, n_action, n_probe_steps, seed=seed, **config_VAE) # Instantiate control policy if config_DDQN['activate']: pi_c = ddqn.DDQN(config_DDQN, n_state, n_action, config_PER['activate'], config_VAE['n_latent']) # TF session config_proto = tf.ConfigProto() config_proto.gpu_options.allow_growth = True sess = tf.Session(config=config_proto) saver = tf.train.Saver() print("Restoring variables from %s" % dir_name) saver.restore(sess, '../results/%s/%s' % (dir_name, model_name)) reward_total = 0 cumulative_reward = np.zeros((test_steps, N_instances)) # Iterate through random instances from the HPMDP for idx_instance in range(1, N_instances + 1): hpmdp.switch_instance() print("idx_instance", idx_instance, " | Switching instance to", hpmdp.instance_param_set) # N_episodes should be 1, but we let it be flexible in case needed for idx_episode in range(1, N_episodes + 1): reward_episode = 0 collected_probe_traj = False while not collected_probe_traj: # list of (state, action) pairs traj_probe = [] state = hpmdp.reset() episode_step = 0 done = False probe_finished_early = False # Generate probe trajectory for step in range(1, n_probe_steps + 1): if use_pi_e: action = pi_e.run_actor(state, sess) else: action = pi_c.run_actor(state, z_avg, sess, epsilon=0) # print("Probe step %d action %d" % (step, action)) action_1hot = np.zeros(n_action) action_1hot[action] = 1 traj_probe.append((state, action_1hot)) state_next, reward, done = hpmdp.step(action) reward_episode += reward cumulative_reward[episode_step, idx_instance - 1] = reward_episode state = state_next episode_step += 1 if done and step < n_probe_steps: probe_finished_early = True print( "test_ablation.py : done is True while generating probe trajectory" ) break if not probe_finished_early: collected_probe_traj = True # Use VAE to estimate hidden parameter z = vae.encode(sess, traj_probe) print(z) if config_DDQN['activate']: # Start control policy while not done and episode_step < test_steps: # Use DDQN with prioritized replay for this action = pi_c.run_actor(state, z, sess, epsilon=0) state_next, reward, done = hpmdp.step(action) reward_episode += reward cumulative_reward[episode_step, idx_instance - 1] = reward_episode state = state_next episode_step += 1 print(reward_episode) # If episode ended earlier than test_steps, fill in the # rest of the cumulative rewards with the last value if episode_step < test_steps: remaining = np.ones(test_steps - episode_step) * reward_episode cumulative_reward[episode_step:, idx_instance - 1] = remaining reward_total += reward_episode header = 'Step' for idx in range(1, N_instances + 1): header += ',R_%d' % idx indices = np.arange(1, test_steps + 1).reshape(test_steps, 1) concated = np.concatenate([indices, cumulative_reward], axis=1) save_loc = '_'.join(dir_name.split('_')[:-1]) os.makedirs('../results/%s' % save_loc, exist_ok=True) run_number = dir_name.split('_')[-1] np.savetxt('../results/%s/test_%s.csv' % (save_loc, run_number), concated, delimiter=',', fmt='%.3e', header=header) print("Avg episode reward", reward_total / float(N_instances * N_episodes))
def train_function(config, config_suffix=None): config_main = config['main'] config_probe = config['probe'] autoencoder = config_main['autoencoder'] if autoencoder == 'VAE': config_VAE = config['VAE'] else: raise ValueError("Other autoencoders not supported") config_DDQN = config['DDQN'] config_PER = config['PER'] phase = config_main['phase'] assert (phase == 'train') domain = config_main['domain'] # Domain-specific parameters (e.g. state and action space dimensions) if domain == '2D': domain_name = "config_2D.json" elif domain == 'acrobot': domain_name = "config_acrobot.json" elif domain == 'hiv': if config_suffix is not None: domain_name = "config_hiv{}.json".format(config_suffix) else: domain_name = "config_hiv.json" elif domain == 'lander': domain_name = "config_lander.json" elif domain == 'cancer': domain_name = "config_cancer.json" else: raise ValueError("train.py : domain not recognized") with open(domain_name) as f: config_domain = json.load(f) n_state = config_domain['n_state'] n_action = config_domain['n_action'] min_samples_before_train = config_domain['min_samples_before_train'] seed = config_main['seed'] np.random.seed(seed) random.seed(seed) tf.set_random_seed(seed) N_instances = config_main['N_instances'] N_episodes = config_main['N_episodes'] period = config_main['period'] dir_name = config_main['dir_name'] model_name = config_main['model_name'] os.makedirs('../results/%s' % dir_name, exist_ok=True) # Instantiate HPMDP hpmdp = HiPMDP.HiPMDP(domain, config_domain) # Instantiate probe policy n_probe_steps = config_domain['traj_length'] pi_e = probe.Probe(config_probe, n_state, n_action) # Instantiate VAE buffer_size_vae = config_VAE['buffer_size'] batch_size_vae = config_VAE['batch_size'] del config_VAE['buffer_size'] if autoencoder == 'VAE': vae = vae_import.VAE(n_state, n_action, n_probe_steps, seed=seed, **config_VAE) else: raise ValueError('Other autoencoders not supported') # Instantiate control policy if config_DDQN['activate']: pi_c = ddqn.DDQN(config_DDQN, n_state, n_action, config_PER['activate'], config_VAE['n_latent']) epsilon_start = config_DDQN['epsilon_start'] epsilon_end = config_DDQN['epsilon_end'] epsilon_decay = np.exp( np.log(epsilon_end / epsilon_start) / (N_instances * N_episodes)) steps_per_train = config_DDQN['steps_per_train'] # TF session config_proto = tf.ConfigProto() config_proto.gpu_options.allow_growth = True sess = tf.Session(config=config_proto) sess.run(tf.global_variables_initializer()) if config_DDQN['activate']: sess.run(pi_c.list_initialize_target_ops) epsilon = epsilon_start if config_VAE['dual']: sess.run(vae.list_equate_dual_ops) writer = tf.summary.FileWriter('../results/%s' % dir_name, sess.graph) saver = tf.train.Saver() # use the DQN version of the replay, so instance_count and bnn-specific params do not matter exp_replay_param = { 'episode_count': N_instances * N_episodes, 'instance_count': 0, 'max_task_examples': hpmdp.max_steps_per_episode, 'ddqn_batch_size': config_DDQN['batch_size'], 'num_strata_samples': config_PER['num_strata_samples'], 'PER_alpha': config_PER['alpha'], 'PER_beta_zero': config_PER['beta_zero'], 'bnn_batch_size': 0, 'bnn_start': 0, 'dqn_start': min_samples_before_train } buf = ExperienceReplay.ExperienceReplay( exp_replay_param, buffer_size=config_PER['buffer_size']) # Logging header = "Episode,R_avg,R_p\n" with open("../results/%s/log.csv" % dir_name, 'w') as f: f.write(header) reward_period = 0 reward_p_period = 0 list_trajs = [] # circular buffer to store probe trajectories for VAE idx_traj = 0 # counter for list_trajs control_step = 0 train_count_probe = 1 train_count_vae = 1 train_count_control = 1 total_episodes = 0 t_start = time.time() # Iterate through random instances from the HPMDP for idx_instance in range(1, N_instances + 1): hpmdp.switch_instance() print("idx_instance", idx_instance, " | Switching instance to", hpmdp.instance_param_set) # Iterate through many episodes for idx_episode in range(1, N_episodes + 1): total_episodes += 1 # list of (state, action) pairs traj_probe = [] state = hpmdp.reset() done = False reward_episode = 0 # Generate probe trajectory probe_finished_early = False for step in range(1, n_probe_steps + 1): action = pi_e.run_actor(state, sess) action_1hot = np.zeros(n_action) action_1hot[action] = 1 traj_probe.append((state, action_1hot)) state_next, reward, done = hpmdp.step(action) state = state_next reward_episode += reward if done and step < n_probe_steps: probe_finished_early = True print( "train.py : done is True while generating probe trajectory" ) break if probe_finished_early: # Skip over pi_e and VAE training if probe finished early continue if idx_traj >= len(list_trajs): list_trajs.append(traj_probe) else: list_trajs[idx_traj] = traj_probe idx_traj = (idx_traj + 1) % buffer_size_vae # Compute probe reward using VAE if config_probe['reward'] == 'vae': reward_e = vae.compute_lower_bound(traj_probe, sess) elif config_probe['reward'] == 'total_variation': reward_e = pi_e.compute_reward(traj_probe) elif config_probe['reward'] == 'negvae': # this reward encourages maximizing entropy reward_e = -vae.compute_lower_bound(traj_probe, sess) # Write Tensorboard at the final episode of every instance if total_episodes % period == 0: summarize = True else: summarize = False # Train probe policy pi_e.train_step(sess, traj_probe, reward_e, train_count_probe, summarize, writer) train_count_probe += 1 # Train VAE if len(list_trajs) >= batch_size_vae: vae.train_step(sess, list_trajs, train_count_vae, summarize, writer) train_count_vae += 1 # Use VAE to estimate hidden parameter z = vae.encode(sess, traj_probe) if config_DDQN['activate']: # Start control policy summarized = False while not done: # Use DDQN with prioritized replay for this action = pi_c.run_actor(state, z, sess, epsilon) state_next, reward, done = hpmdp.step(action) control_step += 1 reward_episode += reward buf.add( np.reshape( np.array( [state, action, reward, state_next, done, z]), (1, 6))) state = state_next if control_step >= min_samples_before_train and control_step % steps_per_train == 0: batch, IS_weights, indices = buf.sample(control_step) if not summarized: # Write TF summary at first train step of the last episode of every instance td_loss = pi_c.train_step(sess, batch, IS_weights, indices, train_count_control, summarize, writer) summarized = True else: td_loss = pi_c.train_step(sess, batch, IS_weights, indices, train_count_control, False, writer) train_count_control += 1 if config_PER['activate']: buf.update_priorities( np.hstack( (np.reshape(td_loss, (len(td_loss), -1)), np.reshape(indices, (len(indices), -1))))) reward_period += reward_episode reward_p_period += reward_e if epsilon > epsilon_end: epsilon *= epsilon_decay # Logging if total_episodes % period == 0: s = "%d,%.2f,%.2f\n" % (total_episodes, reward_period / float(period), reward_p_period / float(period)) print(s) with open("../results/%s/log.csv" % dir_name, 'a') as f: f.write(s) if config_domain[ 'save_threshold'] and reward_period / float( period) > config_domain['save_threshold']: saver.save( sess, '../results/%s/%s.%d' % (dir_name, model_name, total_episodes)) reward_period = 0 reward_p_period = 0 with open("../results/%s/time.txt" % dir_name, 'a') as f: f.write("%.5e" % (time.time() - t_start)) saver.save(sess, '../results/%s/%s' % (dir_name, model_name))