def main(): parser = argparse.ArgumentParser( description="Export a robot design as a mesh.") parser.add_argument("grammar_file", type=str, help="Grammar file (.dot)") parser.add_argument("rule_sequence", nargs="+", help="Rule sequence") parser.add_argument("--output_file", type=str, required=True, help="Output file") args = parser.parse_args() graphs = rd.load_graphs(args.grammar_file) rules = [rd.create_rule_from_graph(g) for g in graphs] rule_sequence = [int(s.strip(",")) for s in args.rule_sequence] graph = make_graph(rules, rule_sequence) robot = build_normalized_robot(graph) # Simulation is only used to get link/joint transforms sim = rd.BulletSimulation() sim.add_robot(robot, [0.0, 0.0, 0.0], rd.Quaterniond(1.0, 0.0, 0.0, 0.0)) obj_file_name = args.output_file mtl_file_name = os.path.splitext(args.output_file)[0] + '.mtl' with open(obj_file_name, 'w') as obj_file, \ open(mtl_file_name, 'w') as mtl_file: dumper = ObjDumper(obj_file, mtl_file) obj_file.write("mtllib {}\n".format(mtl_file_name)) dump_sim(sim, dumper) dumper.finish()
def load_terminal_design_data(raw_dataset_path, grammar_file): graphs = rd.load_graphs(grammar_file) rules = [rd.create_rule_from_graph(g) for g in graphs] all_labels = set() for rule in rules: for node in rule.lhs.nodes: all_labels.add(node.attrs.label) all_labels = sorted(list(all_labels)) preprocessor = Preprocessor(all_labels=all_labels) with open(raw_dataset_path, newline='') as log_file: reader = csv.DictReader(log_file) all_link_features = [] all_link_adj = [] all_results = [] max_nodes = 0 for row in reader: rule_seq = ast.literal_eval(row['rule_seq']) result = float(row['result']) all_results.append(result) # Build a robot from the rule sequence robot_graph = make_initial_graph() for r in rule_seq: matches = rd.find_matches(rules[r].lhs, robot_graph) # Always use the first match robot_graph = rd.apply_rule(rules[r], robot_graph, matches[0]) adj_matrix, link_features, _ = preprocessor.preprocess(robot_graph) all_link_features.append(link_features) all_link_adj.append(adj_matrix) max_nodes = max(max_nodes, adj_matrix.shape[0]) all_adj_matrix_pad, all_link_features_pad, all_masks = [], [], [] for adj_matrix, link_features in zip(all_link_adj, all_link_features): adj_matrix_pad, link_features_pad, masks = preprocessor.pad_graph( adj_matrix, link_features, max_nodes=max_nodes) all_adj_matrix_pad.append(adj_matrix_pad) all_link_features_pad.append(link_features_pad) all_masks.append(masks) return all_link_features_pad, all_adj_matrix_pad, all_masks, all_results
def build_robot(args): graphs = rd.load_graphs(args.grammar_file) rules = [rd.create_rule_from_graph(g) for g in graphs] rule_sequence = [int(s.strip(",")) for s in args.rule_sequence] graph = make_initial_graph() for r in rule_sequence: matches = rd.find_matches(rules[r].lhs, graph) if matches: graph = rd.apply_rule(rules[r], graph, matches[0]) robot = build_normalized_robot(graph) finalize_robot(robot) return robot
def main(): parser = argparse.ArgumentParser(description="Robot design viewer.") parser.add_argument("task", type=str, help="Task (Python class name)") parser.add_argument("grammar_file", type=str, help="Grammar file (.dot)") parser.add_argument("rule_sequence", nargs="+", help="Rule sequence to apply") parser.add_argument("-o", "--optim", default=False, action="store_true", help="Optimize a trajectory") parser.add_argument("-s", "--opt_seed", type=int, default=None, help="Trajectory optimization seed") parser.add_argument("-e", "--episodes", type=int, default=1, help="Number of optimization episodes") parser.add_argument("-j", "--jobs", type=int, required=True, help="Number of jobs/threads") parser.add_argument("--input_sequence_file", type=str, help="File to save input sequence to (.csv)") parser.add_argument("--save_obj_dir", type=str, help="Directory to save .obj files to") parser.add_argument("--save_video_file", type=str, help="File to save video to (.mp4)") parser.add_argument("-l", "--episode_len", type=int, default=128, help="Length of episode") args = parser.parse_args() task_class = getattr(tasks, args.task) task = task_class(episode_len=args.episode_len) graphs = rd.load_graphs(args.grammar_file) rules = [rd.create_rule_from_graph(g) for g in graphs] rule_sequence = [int(s.strip(",")) for s in args.rule_sequence] if args.opt_seed is not None: opt_seed = args.opt_seed else: opt_seed = random.getrandbits(32) print("Using optimization seed:", opt_seed) graph = make_graph(rules, rule_sequence) robot = build_normalized_robot(graph) finalize_robot(robot) if args.optim: input_sequence, result = simulate(robot, task, opt_seed, args.jobs, args.episodes) print("Result:", result) else: input_sequence = None if args.input_sequence_file and input_sequence is not None: import csv with open(args.input_sequence_file, 'w', newline='') as input_seq_file: writer = csv.writer(input_seq_file) for col in input_sequence.T: writer.writerow(col) print("Saved input sequence to file:", args.input_sequence_file) robot_init_pos, has_self_collision = presimulate(robot) if has_self_collision: print("Warning: robot self-collides in initial configuration") main_sim = rd.BulletSimulation(task.time_step) task.add_terrain(main_sim) # Rotate 180 degrees around the y axis, so the base points to the right main_sim.add_robot(robot, robot_init_pos, rd.Quaterniond(0.0, 0.0, 1.0, 0.0)) robot_idx = main_sim.find_robot_index(robot) camera_params, record_step_indices = view_trajectory( main_sim, robot_idx, input_sequence, task) if args.save_obj_dir and input_sequence is not None: import export_mesh if record_step_indices: print("Saving .obj files for {} steps".format( len(record_step_indices))) os.makedirs(args.save_obj_dir, exist_ok=True) # Save the props/terrain once obj_file_name = os.path.join(args.save_obj_dir, 'terrain.obj') mtl_file_name = os.path.join(args.save_obj_dir, 'terrain.mtl') with open(obj_file_name, 'w') as obj_file, \ open(mtl_file_name, 'w') as mtl_file: dumper = export_mesh.ObjDumper(obj_file, mtl_file) obj_file.write("mtllib {}\n".format( os.path.split(mtl_file_name)[-1])) for prop_idx in range(main_sim.get_prop_count()): export_mesh.dump_prop(prop_idx, main_sim, dumper) dumper.finish() # Save the robot once per step def save_obj_callback(step_idx): if record_step_indices: if step_idx not in record_step_indices: return else: if step_idx % 128 != 0: return obj_file_name = os.path.join(args.save_obj_dir, 'robot_{:04}.obj'.format(step_idx)) # Use one .mtl file for all steps mtl_file_name = os.path.join(args.save_obj_dir, 'robot.mtl') with open(obj_file_name, 'w') as obj_file, \ open(mtl_file_name, 'w') as mtl_file: dumper = export_mesh.ObjDumper(obj_file, mtl_file) obj_file.write("mtllib {}\n".format( os.path.split(mtl_file_name)[-1])) export_mesh.dump_robot(robot_idx, main_sim, dumper) dumper.finish() run_trajectory(main_sim, robot_idx, input_sequence, task, save_obj_callback) if args.save_video_file and input_sequence is not None: import cv2 if record_step_indices: print("Saving video for {} steps".format(len(record_step_indices))) viewer = rd.GLFWViewer() # Copy camera parameters from the interactive viewer viewer.camera_params = camera_params tracker = CameraTracker(viewer, main_sim, robot_idx) fourcc = cv2.VideoWriter_fourcc(*'mp4v') writer = cv2.VideoWriter(args.save_video_file, fourcc, 60.0, viewer.get_framebuffer_size()) writer.set(cv2.VIDEOWRITER_PROP_QUALITY, 100) def write_frame_callback(step_idx): tracker.update(task.time_step) # 240 steps/second / 4 = 60 fps if step_idx % 4 == 0: # Flip vertically, convert RGBA to BGR frame = viewer.render_array(main_sim)[::-1, :, 2::-1] writer.write(frame) run_trajectory(main_sim, robot_idx, input_sequence, task, write_frame_callback) writer.release()
def search_algo(args): # iniailize random seed random.seed(args.seed) np.random.seed(args.seed) torch.manual_seed(args.seed) torch.set_num_threads(1) # initialize/load task_class = getattr(tasks, args.task) if args.no_noise: task = task_class(force_std=0.0, torque_std=0.0) else: task = task_class() graphs = rd.load_graphs(args.grammar_file) rules = [rd.create_rule_from_graph(g) for g in graphs] # initialize preprocessor # Find all possible link labels, so they can be one-hot encoded all_labels = set() for rule in rules: for node in rule.lhs.nodes: all_labels.add(node.attrs.require_label) all_labels = sorted(list(all_labels)) # TODO: use 80 to fit the input of trained MPC GNN, use args.depth * 3 later for real mpc max_nodes = args.depth * 3 global preprocessor # preprocessor = Preprocessor(max_nodes = max_nodes, all_labels = all_labels) preprocessor = Preprocessor(all_labels=all_labels) # initialize the env env = RobotGrammarEnv(task, rules, seed=args.seed, mpc_num_processes=args.mpc_num_processes) # initialize Value function device = 'cpu' state = env.reset() sample_adj_matrix, sample_features, sample_masks = preprocessor.preprocess( state) num_features = sample_features.shape[1] V = Net(max_nodes=max_nodes, num_channels=num_features, num_outputs=1).to(device) # load pretrained V function if args.load_V_path is not None: V.load_state_dict(torch.load(args.load_V_path)) print_info('Loaded pretrained V function from {}'.format( args.load_V_path)) # initialize target V_hat look up table V_hat = dict() # load pretrained V_hat if args.load_Vhat_path is not None: V_hat_fp = open(args.load_Vhat_path, 'rb') V_hat = pickle.load(V_hat_fp) V_hat_fp.close() print_info('Loaded pretrained Vhat from {}'.format( args.load_Vhat_path)) # initialize invalid_his invalid_his = dict() num_invalid_samples, num_valid_samples = 0, 0 repeated_cnt = 0 # initialize the seen states pool states_pool = StatesPool(capacity=args.states_pool_capacity) states_set = set() # explored designs designs = [] design_rewards = [] design_opt_seeds = [] # record prediction error prediction_error_sum = 0.0 if not args.test: # initialize save folders and files fp_log = open(os.path.join(args.save_dir, 'log.txt'), 'w') fp_log.close() fp_eval = open(os.path.join(args.save_dir, 'eval.txt'), 'w') fp_eval.close() design_csv_path = os.path.join(args.save_dir, 'designs.csv') fp_csv = open(design_csv_path, 'w') fieldnames = ['rule_seq', 'reward', 'opt_seed'] writer = csv.DictWriter(fp_csv, fieldnames=fieldnames) writer.writeheader() fp_csv.close() # initialize the optimizer global optimizer optimizer = torch.optim.Adam(V.parameters(), lr=args.lr) # initialize best design rule sequence best_design, best_reward = None, -np.inf # reward history epoch_rew_his = [] last_checkpoint = -1 # recording time t_sample_sum = 0. # record the count for invalid samples no_action_samples, step_exceeded_samples, self_collision_samples = 0, 0, 0 for epoch in range(args.num_iterations): t_start = time.time() V.eval() # update eps and eps_sample if args.eps_schedule == 'linear-decay': eps = args.eps_start + epoch / args.num_iterations * ( args.eps_end - args.eps_start) elif args.eps_schedule == 'exp-decay': eps = args.eps_end + (args.eps_start - args.eps_end) * np.exp( -1.0 * epoch / args.num_iterations / args.eps_decay) if args.eps_sample_schedule == 'linear-decay': eps_sample = args.eps_sample_start + epoch / args.num_iterations * ( args.eps_sample_end - args.eps_sample_start) elif args.eps_sample_schedule == 'exp-decay': eps_sample = args.eps_sample_end + ( args.eps_sample_start - args.eps_sample_end) * np.exp( -1.0 * epoch / args.num_iterations / args.eps_sample_decay) t_sample, t_update, t_mpc, t_opt = 0, 0, 0, 0 selected_design, selected_reward = None, -np.inf selected_state_seq, selected_rule_seq = None, None p = random.random() if p < eps_sample: num_samples = 1 else: num_samples = args.num_samples # use e-greedy to sample a design within maximum #steps. for _ in range(num_samples): valid = False while not valid: t0 = time.time() state = env.reset() rule_seq = [] state_seq = [state] no_action_flag = False for _ in range(args.depth): action, step_type = select_action(env, V, state, eps) if action is None: no_action_flag = True break rule_seq.append(action) next_state = env.transite(state, action) state_seq.append(next_state) state = next_state if not has_nonterminals(state): break valid = env.is_valid(state) t_sample += time.time() - t0 t0 = time.time() if not valid: # update the invalid sample's count if no_action_flag: no_action_samples += 1 elif has_nonterminals(state): step_exceeded_samples += 1 else: self_collision_samples += 1 # update the Vhat for invalid designs update_Vhat(args, V_hat, state_seq, -2.0, invalid=True, invalid_cnt=invalid_his) # update states pool update_states_pool(states_pool, state_seq, states_set, V_hat) num_invalid_samples += 1 else: num_valid_samples += 1 t_update += time.time() - t0 predicted_value = predict(V, state) if predicted_value > selected_reward: selected_design, selected_reward = state, predicted_value selected_rule_seq, selected_state_seq = rule_seq, state_seq t0 = time.time() repeated = False if (hash(selected_design) in V_hat) and (V_hat[hash(selected_design)] > -2.0 + 1e-3): repeated = True repeated_cnt += 1 reward, best_seed = -np.inf, None for _ in range(args.num_eval): _, rew = env.get_reward(selected_design) if rew > reward: reward, best_seed = rew, env.last_opt_seed t_mpc += time.time() - t0 # save the design and the reward in the list designs.append(selected_rule_seq) design_rewards.append(reward) design_opt_seeds.append(best_seed) # update best design if reward > best_reward: best_design, best_reward = selected_rule_seq, reward print_info( 'new best: reward = {:.4f}, predicted reward = {:.4f}, num_samples = {}' .format(reward, selected_reward, num_samples)) t0 = time.time() # update V_hat for the valid design update_Vhat(args, V_hat, selected_state_seq, reward) # update states pool for the valid design update_states_pool(states_pool, selected_state_seq, states_set, V_hat) t_update += time.time() - t0 t0 = time.time() # optimize V.train() total_loss = 0.0 for _ in range(args.opt_iter): minibatch = states_pool.sample( min(len(states_pool), args.batch_size)) train_adj_matrix, train_features, train_masks, train_reward = [], [], [], [] max_nodes = 0 for robot_graph in minibatch: hash_key = hash(robot_graph) target_reward = V_hat[hash_key] # adj_matrix, features, masks = preprocessor.preprocess(robot_graph) adj_matrix, features, _ = preprocessor.preprocess( robot_graph) max_nodes = max(max_nodes, len(features)) train_adj_matrix.append(adj_matrix) train_features.append(features) # train_masks.append(masks) train_reward.append(target_reward) for i in range(len(minibatch)): train_adj_matrix[i], train_features[i], masks = \ preprocessor.pad_graph(train_adj_matrix[i], train_features[i], max_nodes) train_masks.append(masks) train_adj_matrix_torch = torch.tensor(train_adj_matrix) train_features_torch = torch.tensor(train_features) train_masks_torch = torch.tensor(train_masks) train_reward_torch = torch.tensor(train_reward) optimizer.zero_grad() output, loss_link, loss_entropy = V(train_features_torch, train_adj_matrix_torch, train_masks_torch) loss = F.mse_loss(output[:, 0], train_reward_torch) loss.backward() total_loss += loss.item() optimizer.step() t_opt += time.time() - t0 t_end = time.time() t_sample_sum += t_sample # logging if (epoch + 1 ) % args.log_interval == 0 or epoch + 1 == args.num_iterations: iter_save_dir = os.path.join(args.save_dir, '{}'.format(epoch + 1)) os.makedirs(os.path.join(iter_save_dir), exist_ok=True) # save model save_path = os.path.join(iter_save_dir, 'V_model.pt') torch.save(V.state_dict(), save_path) # save V_hat save_path = os.path.join(iter_save_dir, 'V_hat') fp = open(save_path, 'wb') pickle.dump(V_hat, fp) fp.close() # save explored design and its reward fp_csv = open(design_csv_path, 'a') fieldnames = ['rule_seq', 'reward', 'opt_seed'] writer = csv.DictWriter(fp_csv, fieldnames=fieldnames) for i in range(last_checkpoint + 1, len(designs)): writer.writerow({ 'rule_seq': str(designs[i]), 'reward': design_rewards[i], 'opt_seed': design_opt_seeds[i] }) last_checkpoint = len(designs) - 1 fp_csv.close() epoch_rew_his.append(reward) avg_loss = total_loss / args.opt_iter len_his = min(len(epoch_rew_his), 30) avg_reward = np.sum(epoch_rew_his[-len_his:]) / len_his prediction_error_sum += (selected_reward - reward)**2 avg_prediction_error = prediction_error_sum / (epoch + 1) if repeated: print_white('Epoch {:4}: T_sample = {:5.2f}, T_update = {:5.2f}, T_mpc = {:5.2f}, T_opt = {:5.2f}, eps = {:5.3f}, eps_sample = {:5.3f}, #samples = {:2}, training loss = {:7.4f}, pred_error = {:6.4f}, predicted_reward = {:6.4f}, reward = {:6.4f}, last 30 epoch reward = {:6.4f}, best reward = {:6.4f}'.format(\ epoch, t_sample, t_update, t_mpc, t_opt, eps, eps_sample, num_samples, \ avg_loss, avg_prediction_error, selected_reward, reward, avg_reward, best_reward)) else: print_warning('Epoch {:4}: T_sample = {:5.2f}, T_update = {:5.2f}, T_mpc = {:5.2f}, T_opt = {:5.2f}, eps = {:5.3f}, eps_sample = {:5.3f}, #samples = {:2}, training loss = {:7.4f}, pred_error = {:6.4f}, predicted_reward = {:6.4f}, reward = {:6.4f}, last 30 epoch reward = {:6.4f}, best reward = {:6.4f}'.format(\ epoch, t_sample, t_update, t_mpc, t_opt, eps, eps_sample, num_samples, \ avg_loss, avg_prediction_error, selected_reward, reward, avg_reward, best_reward)) fp_log = open(os.path.join(args.save_dir, 'log.txt'), 'a') fp_log.write('eps = {:.4f}, eps_sample = {:.4f}, num_samples = {}, T_sample = {:4f}, T_update = {:4f}, T_mpc = {:.4f}, T_opt = {:.4f}, loss = {:.4f}, predicted_reward = {:.4f}, reward = {:.4f}, avg_reward = {:.4f}\n'.format(\ eps, eps_sample, num_samples, t_sample, t_update, t_mpc, t_opt, avg_loss, selected_reward, reward, avg_reward)) fp_log.close() if (epoch + 1) % args.log_interval == 0: print_info( 'Avg sampling time for last {} epoch: {:.4f} second'. format(args.log_interval, t_sample_sum / args.log_interval)) t_sample_sum = 0. print_info('size of states_pool = {}'.format(len(states_pool))) print_info( '#valid samples = {}, #invalid samples = {}, #valid / #invalid = {}' .format( num_valid_samples, num_invalid_samples, num_valid_samples / num_invalid_samples if num_invalid_samples > 0 else 10000.0)) print_info( 'Invalid samples: #no_action_samples = {}, #step_exceeded_samples = {}, #self_collision_samples = {}' .format(no_action_samples, step_exceeded_samples, self_collision_samples)) max_trials, cnt = 0, 0 for key in invalid_his.keys(): if invalid_his[key] > max_trials: if key not in V_hat: max_trials = invalid_his[key] elif V_hat[key] < -2.0 + 1e-3: max_trials = invalid_his[key] if invalid_his[key] >= args.max_trials: if V_hat[key] < -2.0 + 1e-3: cnt += 1 print_info( 'max invalid_trials = {}, #failed nodes = {}'.format( max_trials, cnt)) print_info('repeated rate = {}'.format(repeated_cnt / (epoch + 1))) save_path = os.path.join(args.save_dir, 'model_state_dict_final.pt') torch.save(V.state_dict(), save_path) else: import IPython IPython.embed() # test V.eval() print('Start testing') test_epoch = 30 y0 = [] y1 = [] x = [] for ii in range(0, 11): eps = 1.0 - 0.1 * ii print('------------------------------------------') print('eps = ', eps) reward_sum = 0. best_reward = -np.inf for epoch in range(test_epoch): t0 = time.time() # use e-greedy to sample a design within maximum #steps. vaild = False while not valid: state = env.reset() rule_seq = [] state_seq = [state] for _ in range(args.depth): action, step_type = select_action(env, V, state, eps) if action is None: break rule_seq.append(action) next_state = env.transite(state, action) state_seq.append(next_state) if not has_nonterminals(next_state): valid = True break state = next_state _, reward = env.get_reward(state) reward_sum += reward best_reward = max(best_reward, reward) print( f'design {epoch}: reward = {reward}, time = {time.time() - t0}' ) print('test avg reward = ', reward_sum / test_epoch) print('best reward found = ', best_reward) x.append(eps) y0.append(reward_sum / test_epoch) y1.append(best_reward) import matplotlib.pyplot as plt fig, ax = plt.subplots(1, 2, figsize=(10, 5)) ax[0].plot(x, y0) ax[0].set_title('Avg Reward') ax[0].set_xlabel('eps') ax[0].set_ylabel('reward') ax[1].plot(x, y1) ax[0].set_title('Best Reward') ax[0].set_xlabel('eps') ax[0].set_ylabel('reward') plt.show()
def main(): sns.set_context('paper') parser = argparse.ArgumentParser( description="Create plots using multiple log directories.") parser.add_argument('log_dir', type=str, nargs='+', help="Log directory containing meta.json") parser.add_argument('-t', '--task', type=str, nargs='+', help="Task to include in plots") parser.add_argument('-a', '--algorithm', type=str, nargs='+', help="Task to include in plots") parser.add_argument('-i', '--iterations', type=int, help="Maximum number of iterations to show") parser.add_argument('--servo_count', action='store_true', help="Include servo count as an objective") parser.add_argument('--ind_rewards', action='store_true', help="Include individual rewards in iterations plot") parser.add_argument('--estimator', type=str, help="Estimator for aggregating multiple trials") subparsers = parser.add_subparsers(help='Plot type') parser_iterations = subparsers.add_parser('iterations') parser_iterations.set_defaults(func=plot_iterations) parser_pareto = subparsers.add_parser('pareto') parser_pareto.set_defaults(func=plot_pareto) args = parser.parse_args() # Store every log file's contents into one big pandas dataframe df = pd.DataFrame() for log_dir in args.log_dir: try: with open(os.path.join(log_dir, 'meta.json'), 'r') as json_file: metadata = json.load(json_file) except FileNotFoundError: print("Directory '{}' does not contain metadata file, skipping".format(log_dir), file=sys.stderr) continue # Load the .csv data csv_file_names = glob.glob(os.path.join(log_dir, '*.csv')) if len(csv_file_names) == 0: print("Directory '{}' does not contain any .csv files, skipping".format(log_dir), file=sys.stderr) continue for trial_num, csv_file_name in enumerate(csv_file_names): try: log_df = pd.read_csv(csv_file_name) except FileNotFoundError: print("File '{}' does not exist, skipping".format(csv_file_name), file=sys.stderr) continue if 'iteration' not in log_df.columns: log_df['iteration'] = log_df.index log_df.rename(columns={'result': 'reward'}, inplace=True) if 'task' not in log_df.columns: log_df['task'] = metadata.get('task') if 'grammar' not in log_df.columns: log_df['grammar'] = metadata.get( 'grammar', 'data/designs/grammar_apr30.dot') if 'algorithm' not in log_df.columns: log_df['algorithm'] = metadata.get('algorithm') log_df['trial'] = trial_num df = df.append(log_df, ignore_index=True, sort=True) # Filter data based on arguments if args.iterations: df = df[df['iteration'] < args.iterations] if args.task: df = df[df['task'].isin(args.task)] if args.algorithm: df = df[df['algorithm'].isin(args.algorithm)] try: # Expecting only one grammar grammar_file, = df['grammar'].unique() except ValueError: print("All runs must use the same grammar", file=sys.stderr) raise graphs = rd.load_graphs(grammar_file) rules = [rd.create_rule_from_graph(g) for g in graphs] # Compute a graph hash, servo count for each rule_seq rule_seq_hashes = {} rule_seq_servo_counts = {} for rule_seq_str in df['rule_seq'].unique(): rule_seq = ast.literal_eval(rule_seq_str) graph = make_graph(rules, rule_seq) robot = build_normalized_robot(graph) rule_seq_hashes[rule_seq_str] = hash(graph) servo_count = 0 for link in robot.links: if link.joint_type == rd.JointType.HINGE: # Only hinge joints have servos servo_count += 1 rule_seq_servo_counts[rule_seq_str] = servo_count if args.servo_count: servo_count_df = pd.DataFrame({'rule_seq': df['rule_seq'].unique()}) servo_count_df['task'] = 'ServoCount' servo_count_df['reward'] = \ servo_count_df['rule_seq'].map(rule_seq_servo_counts) df = df.append(servo_count_df, ignore_index=True, sort=True) df['hash'] = df['rule_seq'].map(rule_seq_hashes) args.func(df, ind_rewards=args.ind_rewards, estimator=args.estimator)
def search_algo_1(args): # iniailize random seed random.seed(args.seed) np.random.seed(args.seed) torch.manual_seed(args.seed) # initialize/load # TODO: use 80 to fit the input of trained MPC GNN, use args.depth * 3 later for real mpc max_nodes = 80 task_class = getattr(tasks, args.task) task = task_class() graphs = rd.load_graphs(args.grammar_file) rules = [rd.create_rule_from_graph(g) for g in graphs] # state preprocessor # Find all possible link labels, so they can be one-hot encoded all_labels = set() for rule in rules: for node in rule.lhs.nodes: all_labels.add(node.attrs.require_label) all_labels = sorted(list(all_labels)) global preprocessor preprocessor = Preprocessor(max_nodes=max_nodes, all_labels=all_labels) # initialize the env env = RobotGrammarEnv(task, rules, enable_reward_oracle=True, preprocessor=preprocessor) # initialize Value function device = 'cpu' state = env.reset() sample_adj_matrix, sample_features, sample_masks = preprocessor.preprocess( state) num_features = sample_features.shape[1] V = Net(max_nodes=max_nodes, num_channels=num_features, num_outputs=1).to(device) # load pretrained V function if args.load_V_path is not None: V.load_state_dict(torch.load(args.load_V_path)) print_info('Loaded pretrained V function from {}'.format( args.load_V_path)) # initialize target V_hat look up table V_hat = dict() # load pretrained V_hat if args.load_Vhat_path is not None: V_hat_fp = open(args.load_Vhat_path, 'rb') V_hat = pickle.load(V_hat_fp) V_hat_fp.close() print_info('Loaded pretrained Vhat from {}'.format( args.load_Vhat_path)) # initialize the seen states pool states_pool = StatesPool(capacity=args.states_pool_capacity) all_sample_designs = [] # explored designs designs = [] design_rewards = [] # load previously explored designs if args.load_designs_path is not None: fp_csv = open(args.load_designs_path, newline='') reader = csv.DictReader(fp_csv) for row in reader: rule_seq = ast.literal_eval(row['rule_seq']) reward = float(row['reward']) state = make_initial_graph() for i in range(len(rule_seq)): state = env.transite(state, rule_seq[i]) designs.append(state) design_rewards.append(reward) if not np.isclose(V_hat[hash(state)], reward): print(rule_seq) print(V_hat[hash(state)], reward) print_error("Vhat and designs don't match") fp_csv.close() print_info('Loaded pretrained designs from {}'.format( args.load_designs_path)) if not args.test: # initialize save folders and files fp_log = open(os.path.join(args.save_dir, 'log.txt'), 'w') fp_log.close() fp_eval = open(os.path.join(args.save_dir, 'eval.txt'), 'w') fp_eval.close() design_csv_path = os.path.join(args.save_dir, 'designs.csv') fp_csv = open(design_csv_path, 'w') fieldnames = ['rule_seq', 'reward'] writer = csv.DictWriter(fp_csv, fieldnames=fieldnames) writer.writeheader() fp_csv.close() # initialize the optimizer global optimizer optimizer = torch.optim.Adam(V.parameters(), lr=args.lr) # initialize best design rule sequence best_design, best_reward = None, -np.inf # reward history epoch_rew_his = [] last_checkpoint = -1 # recording time t_sample_sum = 0. # record the count for invalid samples no_action_samples, step_exceeded_samples = 0, 0 for epoch in range(args.num_iterations): t_start = time.time() V.eval() # update eps and eps_sample if args.eps_schedule == 'linear-decay': eps = args.eps_start + epoch / args.num_iterations * ( args.eps_end - args.eps_start) elif args.eps_schedule == 'exp-decay': eps = args.eps_end + (args.eps_start - args.eps_end) * np.exp( -1.0 * epoch / args.num_iterations / args.eps_decay) if args.eps_sample_schedule == 'linear-decay': eps_sample = args.eps_sample_start + epoch / args.num_iterations * ( args.eps_sample_end - args.eps_sample_start) elif args.eps_sample_schedule == 'exp-decay': eps_sample = args.eps_sample_end + ( args.eps_sample_start - args.eps_sample_end) * np.exp( -1.0 * epoch / args.num_iterations / args.eps_sample_decay) t_sample, t_update, t_mpc, t_opt = 0, 0, 0, 0 best_candidate_design, best_candidate_reward = None, -1.0 best_candidate_state_seq, best_candidate_rule_seq = None, None p = random.random() if p < eps_sample: num_samples = 1 else: num_samples = args.num_samples # use e-greedy to sample a design within maximum #steps. for _ in range(num_samples): valid = False while not valid: t0 = time.time() state = env.reset() rule_seq = [] state_seq = [state] random_step_cnt, optimal_step_cnt = 0, 0 no_action_flag = False for _ in range(args.depth): action, step_type = select_action(env, V, state, eps) if action is None: no_action_flag = True break if step_type == 'random': random_step_cnt += 1 elif step_type == 'optimal': optimal_step_cnt += 1 rule_seq.append(action) next_state = env.transite(state, action) state_seq.append(next_state) state = next_state if env.is_valid(next_state): valid = True break t_sample += time.time() - t0 t0 = time.time() # update the invalid sample's count if not valid: if no_action_flag: no_action_samples += 1 else: step_exceeded_samples += 1 # update the Vhat for invalid designs if not valid: update_Vhat(V_hat, state_seq, 0.0) # update states pool update_states_pool(states_pool, state_seq) # if valid but has been explored as a valid design before, then put in state pool but resample it if valid and (hash(state) in V_hat) and (V_hat(hash(state)) > 1e-3): update_Vhat(V_hat, state_seq, V_hat[hash(state)]) update_states_pool(states_pool, state_seq) valid = False # record the sampled design all_sample_designs.append(rule_seq) t_update += time.time() - t0 predicted_value = predict(V, state) if predicted_value > best_candidate_reward: best_candidate_design, best_candidate_reward = state, predicted_value best_candidate_rule_seq, best_candidate_state_seq = rule_seq, state_seq t0 = time.time() _, reward = env.get_reward(best_candidate_design) t_mpc += time.time() - t0 # save the design and the reward in the list designs.append(best_candidate_rule_seq) design_rewards.append(reward) # update best design if reward > best_reward: best_design, best_reward = best_candidate_rule_seq, reward print_info( 'new best: reward = {:.4f}, predicted reward = {:.4f}, num_samples = {}' .format(reward, best_candidate_reward, num_samples)) t0 = time.time() # update V_hat for the valid design update_Vhat(V_hat, best_candidate_state_seq, reward) # update states pool for the valid design update_states_pool(states_pool, best_candidate_state_seq) t_update += time.time() - t0 t0 = time.time() # optimize V.train() total_loss = 0.0 for _ in range(args.opt_iter): minibatch = states_pool.sample( min(len(states_pool), args.batch_size)) train_adj_matrix, train_features, train_masks, train_reward = [], [], [], [] for robot_graph in minibatch: hash_key = hash(robot_graph) target_reward = V_hat[hash_key] adj_matrix, features, masks = preprocessor.preprocess( robot_graph) train_adj_matrix.append(adj_matrix) train_features.append(features) train_masks.append(masks) train_reward.append(target_reward) train_adj_matrix_torch = torch.tensor(train_adj_matrix) train_features_torch = torch.tensor(train_features) train_masks_torch = torch.tensor(train_masks) train_reward_torch = torch.tensor(train_reward) optimizer.zero_grad() output, loss_link, loss_entropy = V(train_features_torch, train_adj_matrix_torch, train_masks_torch) loss = F.mse_loss(output[:, 0], train_reward_torch) loss.backward() total_loss += loss.item() optimizer.step() t_opt += time.time() - t0 t_end = time.time() t_sample_sum += t_sample # logging if (epoch + 1 ) % args.log_interval == 0 or epoch + 1 == args.num_iterations: iter_save_dir = os.path.join(args.save_dir, '{}'.format(epoch + 1)) os.makedirs(os.path.join(iter_save_dir), exist_ok=True) # save model save_path = os.path.join(iter_save_dir, 'V_model.pt') torch.save(V.state_dict(), save_path) # save V_hat save_path = os.path.join(iter_save_dir, 'V_hat') fp = open(save_path, 'wb') pickle.dump(V_hat, fp) fp.close() # save all_sampled_designs save_path = os.path.join(iter_save_dir, 'all_sampled_designs') fp = open(save_path, 'wb') pickle.dump(all_sample_designs, fp) fp.close() # save explored design and its reward fp_csv = open(design_csv_path, 'a') fieldnames = ['rule_seq', 'reward'] writer = csv.DictWriter(fp_csv, fieldnames=fieldnames) for i in range(last_checkpoint + 1, len(designs)): writer.writerow({ 'rule_seq': str(designs[i]), 'reward': design_rewards[i] }) last_checkpoint = len(designs) - 1 fp_csv.close() epoch_rew_his.append(reward) avg_loss = total_loss / args.depth len_his = min(len(epoch_rew_his), 30) avg_reward = np.sum(epoch_rew_his[-len_his:]) / len_his print('Epoch {}: T_sample = {:.2f}, T_update = {:.2f}, T_mpc = {:.2f}, T_opt = {:.2f}, eps = {:.3f}, eps_sample = {:.3f}, #samples = {} = {}, training loss = {:.4f}, predicted_reward = {:.4f}, reward = {:.4f}, last 30 epoch reward = {:.4f}, best reward = {:.4f}'.format(\ epoch, t_sample, t_update, t_mpc, t_opt, eps, eps_sample, num_samples, \ avg_loss, best_candidate_reward, reward, avg_reward, best_reward)) fp_log = open(os.path.join(args.save_dir, 'log.txt'), 'a') fp_log.write('eps = {:.4f}, eps_sample = {:.4f}, num_samples = {}, T_sample = {:4f}, T_update = {:4f}, T_mpc = {:.4f}, T_opt = {:.4f}, loss = {:.4f}, predicted_reward = {:.4f}, reward = {:.4f}, avg_reward = {:.4f}\n'.format(\ eps, eps_sample, num_samples, t_sample, t_update, t_mpc, t_opt, avg_loss, best_candidate_reward, reward, avg_reward)) fp_log.close() if (epoch + 1) % args.log_interval == 0: print_info( 'Avg sampling time for last {} epoch: {:.4f} second'. format(args.log_interval, t_sample_sum / args.log_interval)) t_sample_sum = 0. invalid_cnt, valid_cnt = 0, 0 for state in states_pool.pool: if np.isclose(V_hat[hash(state)], 0.): invalid_cnt += 1 else: valid_cnt += 1 print_info( 'states_pool size = {}, #valid = {}, #invalid = {}, #valid / #invalid = {}' .format(len(states_pool), valid_cnt, invalid_cnt, valid_cnt / invalid_cnt)) print_info( 'Invalid samples: #no_action_samples = {}, #step_exceeded_samples = {}, #no_action / #step_exceeded = {}' .format(no_action_samples, step_exceeded_samples, no_action_samples / step_exceeded_samples)) # evaluation if args.eval_interval > 0 and ( (epoch + 1) % args.eval_interval == 0 or epoch + 1 == args.num_iterations): print_info('-------- Doing evaluation --------') print_info('#states = {}'.format(len(states_pool))) loss_total = 0. for state in states_pool.pool: value = predict(V, state) loss_total += (V_hat[hash(state)] - value)**2 print_info('Loss = {:.3f}'.format(loss_total / len(states_pool))) fp_eval = open(os.path.join(args.save_dir, 'eval.txt'), 'a') fp_eval.write('epoch = {}, loss = {:.3f}\n'.format( epoch + 1, loss_total / len(states_pool))) fp_eval.close() save_path = os.path.join(args.save_dir, 'model_state_dict_final.pt') torch.save(V.state_dict(), save_path) else: import IPython IPython.embed() # test V.eval() print('Start testing') test_epoch = 30 y0 = [] y1 = [] x = [] for ii in range(0, 11): eps = 1.0 - 0.1 * ii print('------------------------------------------') print('eps = ', eps) reward_sum = 0. best_reward = -np.inf for epoch in range(test_epoch): t0 = time.time() # use e-greedy to sample a design within maximum #steps. vaild = False while not valid: state = env.reset() rule_seq = [] state_seq = [state] for _ in range(args.depth): action, step_type = select_action(env, V, state, eps) if action is None: break rule_seq.append(action) next_state = env.transite(state, action) state_seq.append(next_state) if env.is_valid(state_next): valid = True break state = next_state _, reward = env.get_reward(state) reward_sum += reward best_reward = max(best_reward, reward) print( f'design {epoch}: reward = {reward}, time = {time.time() - t0}' ) print('test avg reward = ', reward_sum / test_epoch) print('best reward found = ', best_reward) x.append(eps) y0.append(reward_sum / test_epoch) y1.append(best_reward) import matplotlib.pyplot as plt fig, ax = plt.subplots(1, 2, figsize=(10, 5)) ax[0].plot(x, y0) ax[0].set_title('Avg Reward') ax[0].set_xlabel('eps') ax[0].set_ylabel('reward') ax[1].plot(x, y1) ax[0].set_title('Best Reward') ax[0].set_xlabel('eps') ax[0].set_ylabel('reward') plt.show()
def load_partial_design_data(raw_dataset_path, grammar_file): graphs = rd.load_graphs(grammar_file) rules = [rd.create_rule_from_graph(g) for g in graphs] all_labels = set() for rule in rules: for node in rule.lhs.nodes: all_labels.add(node.attrs.label) all_labels = sorted(list(all_labels)) preprocessor = Preprocessor(all_labels=all_labels) with open(raw_dataset_path, newline='') as log_file: reader = csv.DictReader(log_file) memory = dict() idx = 0 for row in reader: if idx % 1000 == 0: print(f'processing idx = {idx}') idx += 1 rule_seq = ast.literal_eval(row['rule_seq']) result = float(row['result']) # Build a robot from the rule sequence robot_graph = make_initial_graph() update_memory(memory, preprocessor, robot_graph, result) for r in rule_seq: matches = rd.find_matches(rules[r].lhs, robot_graph) # Always use the first match robot_graph = rd.apply_rule(rules[r], robot_graph, matches[0]) update_memory(memory, preprocessor, robot_graph, result) initial_robot_graph = make_initial_graph() print('#hit on initial state: ', memory[hash(initial_robot_graph)]['hit']) all_link_features = [] all_link_adj = [] all_results = [] max_nodes = 0 for _, robot_hash_key in enumerate(memory): adj_matrix, link_features, result = \ memory[robot_hash_key]['adj_matrix'], memory[robot_hash_key]['link_features'], memory[robot_hash_key]['V'] all_link_features.append(link_features) all_link_adj.append(adj_matrix) all_results.append(result) max_nodes = max(max_nodes, adj_matrix.shape[0]) all_adj_matrix_pad, all_link_features_pad, all_masks = [], [], [] for adj_matrix, link_features in zip(all_link_adj, all_link_features): adj_matrix_pad, link_features_pad, masks = preprocessor.pad_graph( adj_matrix, link_features, max_nodes=max_nodes) all_adj_matrix_pad.append(adj_matrix_pad) all_link_features_pad.append(link_features_pad) all_masks.append(masks) return all_link_features_pad, all_adj_matrix_pad, all_masks, all_results
def search(args): # initialize the env max_nodes = args.depth * 2 task_class = getattr(tasks, args.task) task = task_class() graphs = rd.load_graphs(args.grammar_file) rules = [rd.create_rule_from_graph(g) for g in graphs] env = RobotGrammarEnv(task, rules, seed = args.seed, mpc_num_processes = args.mpc_num_processes) # state preprocessor # Find all possible link labels, so they can be one-hot encoded all_labels = set() for rule in rules: for node in rule.lhs.nodes: all_labels.add(node.attrs.require_label) all_labels = sorted(list(all_labels)) global preprocessor preprocessor = Preprocessor(max_nodes = max_nodes, all_labels = all_labels) # initialize Q function device = 'cpu' state = env.reset() sample_adj_matrix, sample_features, sample_masks = preprocessor.preprocess(state) num_features = sample_features.shape[1] Q = Net(max_nodes = max_nodes, num_channels = num_features, num_outputs = len(rules)).to(device) # initialize the optimizer global optimizer optimizer = torch.optim.Adam(Q.parameters(), lr = args.lr) # initialize DQN memory = ReplayMemory(capacity = 1000000) scores = deque(maxlen = 100) data = [] for epoch in range(args.num_iterations): done = False eps = args.eps_start + epoch / args.num_iterations * (args.eps_end - args.eps_start) # eps = 1.0 while not done: state = env.reset() total_reward = 0. rule_seq = [] state_seq = [] for i in range(args.depth): action = select_action(env, Q, state, eps) rule_seq.append(action) if action is None: break next_state, reward, done = env.step(action) state_seq.append((state, action, next_state, reward, done)) total_reward += reward state = next_state if done: break for i in range(len(state_seq)): memory.push(state_seq[i][0], state_seq[i][1], state_seq[i][2], state_seq[i][3], state_seq[i][4]) data.append((state_seq[i][0], state_seq[i][1], total_reward)) scores.append(total_reward) loss = 0.0 for i in range(len(state_seq)): loss += optimize(Q, Q, memory, args.batch_size) print('epoch ', epoch, ': reward = ', total_reward, ', eps = ', eps, ', Q loss = ', loss) # test cnt = 0 for i in range(len(data)): if data[i][2] > 0.5: y_predict, _, _ = predict(Q, data[i][0]) print('target = ', data[i][2], ', predicted = ', y_predict[0][data[i][1]]) cnt += 1 if cnt == 5: break cnt = 0 for i in range(len(data)): if data[i][2] < 0.5: y_predict, _, _ = predict(Q, data[i][0]) print('target = ', data[i][2], ', predicted = ', y_predict[0][data[i][1]]) cnt += 1 if cnt == 5: break
def main(): signal.signal(signal.SIGUSR1, set_pdb_trace) parser = argparse.ArgumentParser(description="Robot design search demo.") parser.add_argument("task", type=str, help="Task (Python class name)") parser.add_argument("grammar_file", type=str, help="Grammar file (.dot)") parser.add_argument("-a", "--algorithm", choices=algorithms.keys(), default="mcts", help="Algorithm ({})".format("|".join(algorithms.keys()))) parser.add_argument("-s", "--seed", type=int, default=None, help="Random seed") parser.add_argument("-j", "--jobs", type=int, required=True, help="Number of jobs/threads") parser.add_argument("-i", "--iterations", type=int, required=True, help="Number of iterations") parser.add_argument("-d", "--depth", type=int, required=True, help="Maximum tree depth") parser.add_argument("-l", "--log_dir", type=str, default='', help="Log directory") parser.add_argument("-f", "--log_file", type=str, help="Existing log file, for resuming a previous run") args = parser.parse_args() random.seed(args.seed) task_class = getattr(tasks, args.task) task = task_class() graphs = rd.load_graphs(args.grammar_file) rules = [rd.create_rule_from_graph(g) for g in graphs] env = RobotDesignEnv(task, rules, args.seed, args.jobs, args.depth) search_alg = algorithms[args.algorithm](env, max_tries=1000) if args.log_file: # Resume an existing run log_path = args.log_file else: # Start a new run os.makedirs(args.log_dir, exist_ok=True) log_path = os.path.join(args.log_dir, f'mcts_{datetime.datetime.now():%Y%m%d_%H%M%S}.csv') print(f"Logging to '{log_path}'") fieldnames = ['iteration', 'rule_seq', 'opt_seed', 'result'] # Read log file if it exists and build a cache of previous results result_cache = dict() try: with open(log_path) as log_file: reader = csv.DictReader(log_file, fieldnames=fieldnames) next(reader) # Skip the header row for row in reader: result_cache[(tuple(ast.literal_eval(row['rule_seq'])), int(row['opt_seed']))] = float(row['result']) log_file_exists = True except FileNotFoundError: log_file_exists = False env.result_cache = result_cache with open(log_path, 'a', newline='') as log_file: writer = csv.DictWriter(log_file, fieldnames=fieldnames) if not log_file_exists: writer.writeheader() log_file.flush() for i in range(args.iterations): states, actions, result = search_alg.run_iteration() if i >= len(env.result_cache): rule_seq = [rules.index(rule) for rule in actions] writer.writerow({'iteration': i, 'rule_seq': rule_seq, 'opt_seed': env.latest_opt_seed, 'result': result}) log_file.flush() else: # Replaying existing log entries if i + 1 != env.result_cache_hit_count: print("Failed to replay existing log entries, stopping") sys.exit(1)
def main(): parser = argparse.ArgumentParser(description="Process robot design search results.") parser.add_argument("task", type=str, help="Task (Python class name)") parser.add_argument("-j", "--jobs", type=int, required=True, help="Number of jobs/threads") parser.add_argument("--grammar-file", type=str, default = './data/designs/grammar_apr30.dot', help="Grammar file (.dot)") parser.add_argument("-f", "--log_file", type=str, required=True, help="MCTS log file") parser.add_argument("-t", "--type", type=str) parser.add_argument("-d", "--save_image_dir", default = None, type=str) parser.add_argument("-i", "--iterations", type=int, nargs="+") parser.add_argument("-s", "--opt_seed", type=int) args = parser.parse_args() args.save_image_dir = os.path.join(os.path.dirname(args.log_file), args.type) os.makedirs(args.save_image_dir, exist_ok=True) os.system('cp {} {}/designs.csv'.format(args.log_file, args.save_image_dir)) task_class = getattr(tasks, args.task) task = task_class() graphs = rd.load_graphs(args.grammar_file) rules = [rd.create_rule_from_graph(g) for g in graphs] iteration_df = pd.read_csv(args.log_file, index_col=0) iteration_df['iteration'] = list(range(1, len(iteration_df) + 1)) if args.type == "iterations": os.makedirs(args.save_image_dir, exist_ok=True) mid_indices = np.arange(0, len(iteration_df) + 1, 1000) offset = 10 for mid_index in mid_indices: start_index = max(mid_index - offset, 0) end_index = min(mid_index + offset, len(iteration_df)) for index in range(start_index, end_index): rule_seq = ast.literal_eval(iteration_df['rule_seq'][index]) robot = make_robot_from_rule_sequence(rule_seq, rules) im_data = get_robot_image(robot, task)[::-1,:,:] im = Image.fromarray(im_data) im.save(os.path.join(args.save_image_dir, f"iteration_{index:05}.png")) if args.type == "iterations_top": block_size = 100 count = 2 total = ((len(iteration_df) - 1) // block_size + 1) * count for start_index in range(0, len(iteration_df), block_size): # sys.stdout.write('\r Finish {}/{}'.format(start_index // block_size * count, total)) # sys.stdout.flush() end_index = min(start_index + block_size, len(iteration_df)) block = iteration_df[start_index:end_index].copy() block = block.sort_values(by='reward', ascending=False).reset_index() for i in range(count): row = block.iloc[i] rule_seq = ast.literal_eval(row['rule_seq']) robot = make_robot_from_rule_sequence(rule_seq, rules) print('iteration = {}, reward = {}'.format(row['iteration'], row['reward'])) im_data = get_robot_image(robot, task)[::-1,:,:] im = Image.fromarray(im_data) block_index = start_index // block_size im.save(os.path.join(args.save_image_dir, f"iteration_{row['iteration']:05}_{row['reward']:.2f}.png")) if args.type == "all_top": # save all top K unique designs count = 50 sorted_df = iteration_df.sort_values(by='reward', ascending=False).reset_index() hash_values = set() j = -1 for i in range(count): while True: j = j + 1 row = sorted_df.iloc[j] rule_seq = ast.literal_eval(row['rule_seq']) robot = make_robot_from_rule_sequence(rule_seq, rules) robot_raw = make_robot_from_rule_sequence_raw(rule_seq, rules) hash_key = hash(robot_raw) if hash_key in hash_values: continue hash_values.add(hash_key) print('iteration = {}, reward = {}, hash = {}'.format(row['iteration'], row['reward'], hash_key)) im_data = get_robot_image(robot, task)[::-1,:,:] im = Image.fromarray(im_data) im.save(os.path.join(args.save_image_dir, f"rank_{i+1:03}_iteration_{row['iteration']:05}_{row['reward']:.2f}.png")) break elif args.type == "percentiles": os.makedirs(args.save_image_dir, exist_ok=True) percentiles = np.linspace(0.0, 1.0, 11) offset = 10 iteration_df.sort_values(by='reward') for percentile in percentiles: mid_index = int(round(percentile * (len(iteration_df) - 1))) start_index = max(mid_index - offset, 0) end_index = min(mid_index + offset, len(iteration_df)) for index in range(start_index, end_index): rule_seq = ast.literal_eval(iteration_df['rule_seq'][index]) robot = make_robot_from_rule_sequence(rule_seq, rules) im_data = get_robot_image(robot, task)[::-1,:,:] im = Image.fromarray(im_data) im.save(os.path.join(args.save_image_dir, f"sorted_{index:05}.png")) elif args.type == "terrain": # Take a screenshot of the terrain alone os.makedirs(args.save_image_dir, exist_ok=True) im_data = get_robot_image(None, task)[::-1,:,:] im = Image.fromarray(im_data) im.save(os.path.join(args.save_image_dir, f"terrain_{args.task}.png")) elif args.type == "simulate": results_df = pd.DataFrame(columns=['task', 'log_file', 'iteration', 'reward']) for iteration in args.iterations: row = iteration_df.ix[iteration] rule_seq = ast.literal_eval(row['rule_seq']) robot = make_robot_from_rule_sequence(rule_seq, rules) for i in range(10): _, result = simulate(robot, task, random.getrandbits(32), args.jobs) results_df = results_df.append({ 'task': args.task, 'log_file': args.log_file, 'iteration': iteration, 'reward': result}, ignore_index=True) with open('simulate_results.csv', 'a') as f: results_df.to_csv(f, header=(f.tell() == 0)) elif args.type == "video": os.makedirs(args.save_image_dir, exist_ok=True) row = iteration_df.ix[args.iterations[0]] rule_seq = ast.literal_eval(row['rule_seq']) if args.opt_seed is not None: opt_seed = args.opt_seed else: opt_seed = int(row['opt_seed']) robot = make_robot_from_rule_sequence(rule_seq, rules) save_robot_video_frames(robot, task, opt_seed, args.jobs, args.save_image_dir, frame_interval=4)
def main(): parser = argparse.ArgumentParser( description="Plot results of a search run.") parser.add_argument("task", type=str, help="Task (Python class name)") parser.add_argument("grammar_file", type=str, help="Grammar file (.dot)") parser.add_argument('log_file', type=str, help="Log file (.csv)") parser.add_argument('--max_iters', type=int, help="Maximum number of iterations to show") subparsers = parser.add_subparsers(dest='subcommand', help="Plotting subcommand") iter_scatter_parser = subparsers.add_parser( 'iter_scatter', help="Scatter plot of results vs. iteration") iter_scatter_parser.add_argument('--image_count', type=int, default=0, help='Number of design images to show') iter_scatter_parser.add_argument( '--spacing', type=int, default=1000, help='Minimum spacing between chosen designs, in iterations') args = parser.parse_args() task_class = getattr(tasks, args.task) task = task_class() graphs = rd.load_graphs(args.grammar_file) rules = [rd.create_rule_from_graph(g) for g in graphs] log_df = pd.read_csv(args.log_file) if args.max_iters: log_df = log_df[log_df['iteration'] < args.max_iters] if args.subcommand == 'iter_scatter': if args.image_count > 0: grid = plt.GridSpec(2, args.image_count, wspace=0, hspace=0, height_ratios=(0.2, 0.8)) scatter_ax = plt.subplot(grid[1, :]) # Select the top `image_count` designs, spaced at least `spacing` apart best_indices = [] log_df_remaining = log_df for i in range(args.image_count): best_idx = log_df_remaining['result'].idxmax() best_indices.append(best_idx) log_df_remaining = log_df_remaining[ abs(log_df_remaining.index - best_idx) > args.spacing] best_indices.sort() print(best_indices) for j, best_idx in enumerate(best_indices): rule_seq = ast.literal_eval(log_df['rule_seq'][best_idx]) graph = make_graph(rules, rule_seq) robot = build_normalized_robot(graph) image = get_robot_image(robot, task) image_ax = plt.subplot(grid[0, j]) plt.axis('off') plt.imshow(image, origin='lower') patch = ConnectionPatch(xyA=(0.5 * image.shape[1], 0.0), xyB=(log_df['iteration'][best_idx], log_df['result'][best_idx]), coordsA='data', coordsB='data', axesA=image_ax, axesB=scatter_ax, color='darkgray') image_ax.add_artist(patch) else: fig, scatter_ax = plt.subplots() sns.scatterplot(x='iteration', y='result', data=log_df, ci=None, marker='.', linewidth=0, ax=scatter_ax) plt.tight_layout() plt.savefig('iter_scatter.pdf')
def main(log_file=None, grammar_file=None): parser = argparse.ArgumentParser( description="Example code for parsing a MCTS log file.") if not log_file or not grammar_file: parser.add_argument("log_file", type=str, help="Log file (.csv)") parser.add_argument("grammar_file", type=str, help="Grammar file (.dot)") args = parser.parse_args() else: args = argparse.Namespace() args.grammar_file = grammar_file args.log_file = log_file graphs = rd.load_graphs(args.grammar_file) rules = [rd.create_rule_from_graph(g) for g in graphs] # Find all possible link labels, so they can be one-hot encoded all_labels = set() for rule in rules: for node in rule.lhs.nodes: all_labels.add(node.attrs.label) all_labels = sorted(list(all_labels)) with open(args.log_file, newline='') as log_file: reader = csv.DictReader(log_file) all_link_features = [] all_link_adj = [] all_results = [] for row in reader: full_rule_seq = ast.literal_eval(row['rule_seq']) result = float(row['result']) for prefix_len in range(len(full_rule_seq) + 1): rule_seq = full_rule_seq[:prefix_len] all_results.append(result) # Build a robot from the rule sequence robot_graph = make_initial_graph() for r in rule_seq: matches = rd.find_matches(rules[r].lhs, robot_graph) # Always use the first match robot_graph = rd.apply_rule(rules[r], robot_graph, matches[0]) robot = build_normalized_robot(robot_graph) # Find the world position and rotation of links pos_rot = [] for i, link in enumerate(robot.links): if link.parent >= 0: parent_pos, parent_rot = pos_rot[link.parent] parent_link_length = robot.links[link.parent].length else: parent_pos, parent_rot = np.zeros(3), np.quaternion( 1, 0, 0, 0) parent_link_length = 0 offset = np.array( [parent_link_length * link.joint_pos, 0, 0]) rel_pos = quaternion.rotate_vectors(parent_rot, offset) rel_rot = np_quaternion(link.joint_rot).conjugate() pos = parent_pos + rel_pos rot = parent_rot * rel_rot pos_rot.append((pos, rot)) # Generate adjacency matrix adj_matrix = np.zeros((len(robot.links), len(robot.links))) for i, link in enumerate(robot.links): if link.parent >= 0: adj_matrix[link.parent, i] += 1 # Generate features for links # Note: we can work with either the graph or the robot kinematic tree, but # the kinematic tree provides more information link_features = [] for i, link in enumerate(robot.links): world_pos, world_rot = pos_rot[i] world_joint_axis = quaternion.rotate_vectors( world_rot, link.joint_axis) label_vec = np.zeros(len(all_labels)) label_vec[all_labels.index(link.label)] = 1 link_features.append( np.array([ *featurize_link(link), *world_pos, *quaternion_coords(world_rot), *world_joint_axis, *label_vec ])) link_features = np.array(link_features) all_link_features.append(link_features) all_link_adj.append(adj_matrix) return all_link_features, all_link_adj, all_results
def search_algo_2(args): # iniailize random seed random.seed(args.seed) np.random.seed(args.seed) torch.manual_seed(args.seed) # initialize/load # TODO: use 80 to fit the input of trained MPC GNN, use args.depth * 3 later for real mpc max_nodes = 80 task_class = getattr(tasks, args.task) task = task_class() graphs = rd.load_graphs(args.grammar_file) rules = [rd.create_rule_from_graph(g) for g in graphs] # state preprocessor # Find all possible link labels, so they can be one-hot encoded all_labels = set() for rule in rules: for node in rule.lhs.nodes: all_labels.add(node.attrs.require_label) all_labels = sorted(list(all_labels)) global preprocessor preprocessor = Preprocessor(max_nodes = max_nodes, all_labels = all_labels) # initialize the env env = RobotGrammarEnv(task, rules, enable_reward_oracle = True, preprocessor = preprocessor) # initialize Value function device = 'cpu' state = env.reset() sample_adj_matrix, sample_features, sample_masks = preprocessor.preprocess(state) num_features = sample_features.shape[1] V = Net(max_nodes = max_nodes, num_channels = num_features, num_outputs = 1).to(device) # load pretrained V function if args.load_V_path is not None: V.load_state_dict(torch.load(args.load_V_path)) print_info('Loaded pretrained V function from {}'.format(args.load_V_path)) if not args.test: # initialize save folders and files fp_log = open(os.path.join(args.save_dir, 'log.txt'), 'w') fp_log.close() design_csv_path = os.path.join(args.save_dir, 'designs.csv') fp_csv = open(design_csv_path, 'w') fieldnames = ['rule_seq', 'reward'] writer = csv.DictWriter(fp_csv, fieldnames=fieldnames) writer.writeheader() fp_csv.close() # initialize the optimizer global optimizer optimizer = torch.optim.Adam(V.parameters(), lr = args.lr) # initialize best design best_design, best_reward = None, -np.inf # initialize the seen states pool states_pool = [] # initialize visited states state_set = set() # TODO: load previously explored designs # explored designs designs = [] design_rewards = [] # reward history epoch_rew_his = [] for epoch in range(args.num_iterations): t_start = time.time() V.eval() t0 = time.time() # use e-greedy to sample a design within maximum #steps. if args.eps_schedule == 'linear-decay': # linear schedule eps = args.eps_start + epoch / args.num_iterations * (args.eps_end - args.eps_start) elif args.eps_schedule == 'exp-decay': # exp schedule eps = args.eps_end + (args.eps_start - args.eps_end) * np.exp(-1.0 * epoch / args.num_iterations / args.eps_decay) done = False while not done: state = env.reset() rule_seq = [] state_seq = [state] total_reward = 0. for _ in range(args.depth): action = select_action(env, V, state, eps) if action is None: break rule_seq.append(action) next_state, reward, done = env.step(action) total_reward += reward state_seq.append(next_state) state = next_state if done: break # save the design and the reward in the list designs.append(rule_seq) design_rewards.append(total_reward) # update best design if total_reward > best_reward: best_design, best_reward = rule_seq, total_reward # update state pool for ancestor in state_seq: state_hash_key = hash(ancestor) if not (state_hash_key in state_set): state_set.add(state_hash_key) states_pool.append(ancestor) t1 = time.time() # optimize V.train() total_loss = 0.0 for _ in range(args.depth): minibatch = random.sample(states_pool, min(len(states_pool), args.batch_size)) train_adj_matrix, train_features, train_masks, train_reward = [], [], [], [] for robot_graph in minibatch: V_hat = compute_Vhat(robot_graph, env, V) adj_matrix, features, masks = preprocessor.preprocess(robot_graph) train_adj_matrix.append(adj_matrix) train_features.append(features) train_masks.append(masks) train_reward.append(V_hat) train_adj_matrix_torch = torch.tensor(train_adj_matrix) train_features_torch = torch.tensor(train_features) train_masks_torch = torch.tensor(train_masks) train_reward_torch = torch.tensor(train_reward) optimizer.zero_grad() output, loss_link, loss_entropy = V(train_features_torch, train_adj_matrix_torch, train_masks_torch) loss = F.mse_loss(output[:, 0], train_reward_torch) loss.backward() total_loss += loss.item() optimizer.step() t2 = time.time() # logging if (epoch + 1) % args.log_interval == 0 or epoch + 1 == args.num_iterations: iter_save_dir = os.path.join(args.save_dir, '{}'.format(epoch + 1)) os.makedirs(os.path.join(iter_save_dir), exist_ok = True) # save model save_path = os.path.join(iter_save_dir, 'V_model.pt') torch.save(V.state_dict(), save_path) # save explored designs and their rewards fp_csv = open(design_csv_path, 'a') fieldnames = ['rule_seq', 'reward'] writer = csv.DictWriter(fp_csv, fieldnames=fieldnames) for i in range(epoch - args.log_interval + 1, epoch + 1): writer.writerow({'rule_seq': str(designs[i]), 'reward': design_rewards[i]}) fp_csv.close() epoch_rew_his.append(total_reward) t_end = time.time() avg_loss = total_loss / args.depth len_his = min(len(epoch_rew_his), 30) avg_reward = np.sum(epoch_rew_his[-len_his:]) / len_his print('Epoch {}: Time = {:.2f}, T_sample = {:.2f}, T_opt = {:.2f}, eps = {:.3f}, training loss = {:.4f}, reward = {:.4f}, last 30 epoch reward = {:.4f}, best reward = {:.4f}'.format(epoch, t_end - t_start, t1 - t0, t2 - t1, eps, avg_loss, total_reward, avg_reward, best_reward)) fp_log = open(os.path.join(args.save_dir, 'log.txt'), 'a') fp_log.write('eps = {:.4f}, loss = {:.4f}, reward = {:.4f}, avg_reward = {:.4f}\n'.format(eps, avg_loss, total_reward, avg_reward)) fp_log.close() save_path = os.path.join(args.save_dir, 'model_state_dict_final.pt') torch.save(V.state_dict(), save_path) else: import IPython IPython.embed() # test V.eval() print('Start testing') test_epoch = 30 y0 = [] y1 = [] x = [] for ii in range(10): eps = 1.0 - 0.1 * ii print('------------------------------------------') print('eps = ', eps) reward_sum = 0. best_reward = -np.inf for epoch in range(test_epoch): t0 = time.time() # use e-greedy to sample a design within maximum #steps. done = False while not done: state = env.reset() rule_seq = [] state_seq = [state] total_reward = 0. for _ in range(args.depth): action = select_action(env, V, state, eps) if action is None: break rule_seq.append(action) next_state, reward, done = env.step(action) total_reward += reward state_seq.append(next_state) state = next_state if done: break reward_sum += total_reward best_reward = max(best_reward, total_reward) print(f'design {epoch}: reward = {total_reward}, time = {time.time() - t0}') print('test avg reward = ', reward_sum / test_epoch) print('best reward found = ', best_reward) x.append(eps) y0.append(reward_sum / test_epoch) y1.append(best_reward) import matplotlib.pyplot as plt fig, ax = plt.subplots(1, 2, figsize = (10, 5)) ax[0].plot(x, y0) ax[1].plot(x, y1) plt.show()
parser.add_argument('--grammar-file', type=str, default='../../data/designs/grammar_apr30.dot', help="Grammar file (.dot)") parser.add_argument('--index', type=int, default=None, help='index of the designs to be shown at the end') args = parser.parse_args() fp = open(args.log_path, newline='') reader = csv.DictReader(fp) graphs = rd.load_graphs(args.grammar_file) rules = [rd.create_rule_from_graph(g) for g in graphs] # initialize the env env = RobotGrammarEnv(None, rules) design_cnt = dict() memory = dict() N = 0 best_reward = [] rewards = [] rule_seqs = [] opt_seeds = [] best_design = None best_rule_seq = None best_designs = [] for row in reader: