Beispiel #1
0
def main():
    parser = argparse.ArgumentParser(
        description="Export a robot design as a mesh.")
    parser.add_argument("grammar_file", type=str, help="Grammar file (.dot)")
    parser.add_argument("rule_sequence", nargs="+", help="Rule sequence")
    parser.add_argument("--output_file",
                        type=str,
                        required=True,
                        help="Output file")
    args = parser.parse_args()

    graphs = rd.load_graphs(args.grammar_file)
    rules = [rd.create_rule_from_graph(g) for g in graphs]
    rule_sequence = [int(s.strip(",")) for s in args.rule_sequence]
    graph = make_graph(rules, rule_sequence)
    robot = build_normalized_robot(graph)

    # Simulation is only used to get link/joint transforms
    sim = rd.BulletSimulation()
    sim.add_robot(robot, [0.0, 0.0, 0.0], rd.Quaterniond(1.0, 0.0, 0.0, 0.0))

    obj_file_name = args.output_file
    mtl_file_name = os.path.splitext(args.output_file)[0] + '.mtl'
    with open(obj_file_name, 'w') as obj_file, \
         open(mtl_file_name, 'w') as mtl_file:
        dumper = ObjDumper(obj_file, mtl_file)
        obj_file.write("mtllib {}\n".format(mtl_file_name))
        dump_sim(sim, dumper)
        dumper.finish()
Beispiel #2
0
def load_terminal_design_data(raw_dataset_path, grammar_file):
    graphs = rd.load_graphs(grammar_file)
    rules = [rd.create_rule_from_graph(g) for g in graphs]

    all_labels = set()
    for rule in rules:
        for node in rule.lhs.nodes:
            all_labels.add(node.attrs.label)
    all_labels = sorted(list(all_labels))

    preprocessor = Preprocessor(all_labels=all_labels)

    with open(raw_dataset_path, newline='') as log_file:
        reader = csv.DictReader(log_file)

        all_link_features = []
        all_link_adj = []
        all_results = []
        max_nodes = 0
        for row in reader:
            rule_seq = ast.literal_eval(row['rule_seq'])
            result = float(row['result'])

            all_results.append(result)

            # Build a robot from the rule sequence
            robot_graph = make_initial_graph()
            for r in rule_seq:
                matches = rd.find_matches(rules[r].lhs, robot_graph)
                # Always use the first match
                robot_graph = rd.apply_rule(rules[r], robot_graph, matches[0])

            adj_matrix, link_features, _ = preprocessor.preprocess(robot_graph)

            all_link_features.append(link_features)
            all_link_adj.append(adj_matrix)

            max_nodes = max(max_nodes, adj_matrix.shape[0])

        all_adj_matrix_pad, all_link_features_pad, all_masks = [], [], []
        for adj_matrix, link_features in zip(all_link_adj, all_link_features):
            adj_matrix_pad, link_features_pad, masks = preprocessor.pad_graph(
                adj_matrix, link_features, max_nodes=max_nodes)
            all_adj_matrix_pad.append(adj_matrix_pad)
            all_link_features_pad.append(link_features_pad)
            all_masks.append(masks)

    return all_link_features_pad, all_adj_matrix_pad, all_masks, all_results
def build_robot(args):
    graphs = rd.load_graphs(args.grammar_file)
    rules = [rd.create_rule_from_graph(g) for g in graphs]

    rule_sequence = [int(s.strip(",")) for s in args.rule_sequence]

    graph = make_initial_graph()
    for r in rule_sequence:
        matches = rd.find_matches(rules[r].lhs, graph)
        if matches:
            graph = rd.apply_rule(rules[r], graph, matches[0])

    robot = build_normalized_robot(graph)
    finalize_robot(robot)

    return robot
Beispiel #4
0
def main():
    parser = argparse.ArgumentParser(description="Robot design viewer.")
    parser.add_argument("task", type=str, help="Task (Python class name)")
    parser.add_argument("grammar_file", type=str, help="Grammar file (.dot)")
    parser.add_argument("rule_sequence",
                        nargs="+",
                        help="Rule sequence to apply")
    parser.add_argument("-o",
                        "--optim",
                        default=False,
                        action="store_true",
                        help="Optimize a trajectory")
    parser.add_argument("-s",
                        "--opt_seed",
                        type=int,
                        default=None,
                        help="Trajectory optimization seed")
    parser.add_argument("-e",
                        "--episodes",
                        type=int,
                        default=1,
                        help="Number of optimization episodes")
    parser.add_argument("-j",
                        "--jobs",
                        type=int,
                        required=True,
                        help="Number of jobs/threads")
    parser.add_argument("--input_sequence_file",
                        type=str,
                        help="File to save input sequence to (.csv)")
    parser.add_argument("--save_obj_dir",
                        type=str,
                        help="Directory to save .obj files to")
    parser.add_argument("--save_video_file",
                        type=str,
                        help="File to save video to (.mp4)")
    parser.add_argument("-l",
                        "--episode_len",
                        type=int,
                        default=128,
                        help="Length of episode")
    args = parser.parse_args()

    task_class = getattr(tasks, args.task)
    task = task_class(episode_len=args.episode_len)
    graphs = rd.load_graphs(args.grammar_file)
    rules = [rd.create_rule_from_graph(g) for g in graphs]

    rule_sequence = [int(s.strip(",")) for s in args.rule_sequence]
    if args.opt_seed is not None:
        opt_seed = args.opt_seed
    else:
        opt_seed = random.getrandbits(32)
        print("Using optimization seed:", opt_seed)

    graph = make_graph(rules, rule_sequence)
    robot = build_normalized_robot(graph)
    finalize_robot(robot)
    if args.optim:
        input_sequence, result = simulate(robot, task, opt_seed, args.jobs,
                                          args.episodes)
        print("Result:", result)
    else:
        input_sequence = None

    if args.input_sequence_file and input_sequence is not None:
        import csv
        with open(args.input_sequence_file, 'w', newline='') as input_seq_file:
            writer = csv.writer(input_seq_file)
            for col in input_sequence.T:
                writer.writerow(col)
        print("Saved input sequence to file:", args.input_sequence_file)

    robot_init_pos, has_self_collision = presimulate(robot)

    if has_self_collision:
        print("Warning: robot self-collides in initial configuration")

    main_sim = rd.BulletSimulation(task.time_step)
    task.add_terrain(main_sim)
    # Rotate 180 degrees around the y axis, so the base points to the right
    main_sim.add_robot(robot, robot_init_pos,
                       rd.Quaterniond(0.0, 0.0, 1.0, 0.0))
    robot_idx = main_sim.find_robot_index(robot)

    camera_params, record_step_indices = view_trajectory(
        main_sim, robot_idx, input_sequence, task)

    if args.save_obj_dir and input_sequence is not None:
        import export_mesh

        if record_step_indices:
            print("Saving .obj files for {} steps".format(
                len(record_step_indices)))

        os.makedirs(args.save_obj_dir, exist_ok=True)

        # Save the props/terrain once
        obj_file_name = os.path.join(args.save_obj_dir, 'terrain.obj')
        mtl_file_name = os.path.join(args.save_obj_dir, 'terrain.mtl')
        with open(obj_file_name, 'w') as obj_file, \
             open(mtl_file_name, 'w') as mtl_file:
            dumper = export_mesh.ObjDumper(obj_file, mtl_file)
            obj_file.write("mtllib {}\n".format(
                os.path.split(mtl_file_name)[-1]))
            for prop_idx in range(main_sim.get_prop_count()):
                export_mesh.dump_prop(prop_idx, main_sim, dumper)
            dumper.finish()

        # Save the robot once per step
        def save_obj_callback(step_idx):
            if record_step_indices:
                if step_idx not in record_step_indices:
                    return
            else:
                if step_idx % 128 != 0:
                    return

            obj_file_name = os.path.join(args.save_obj_dir,
                                         'robot_{:04}.obj'.format(step_idx))
            # Use one .mtl file for all steps
            mtl_file_name = os.path.join(args.save_obj_dir, 'robot.mtl')
            with open(obj_file_name, 'w') as obj_file, \
                 open(mtl_file_name, 'w') as mtl_file:
                dumper = export_mesh.ObjDumper(obj_file, mtl_file)
                obj_file.write("mtllib {}\n".format(
                    os.path.split(mtl_file_name)[-1]))
                export_mesh.dump_robot(robot_idx, main_sim, dumper)
                dumper.finish()

        run_trajectory(main_sim, robot_idx, input_sequence, task,
                       save_obj_callback)

    if args.save_video_file and input_sequence is not None:
        import cv2

        if record_step_indices:
            print("Saving video for {} steps".format(len(record_step_indices)))

        viewer = rd.GLFWViewer()

        # Copy camera parameters from the interactive viewer
        viewer.camera_params = camera_params

        tracker = CameraTracker(viewer, main_sim, robot_idx)

        fourcc = cv2.VideoWriter_fourcc(*'mp4v')
        writer = cv2.VideoWriter(args.save_video_file, fourcc, 60.0,
                                 viewer.get_framebuffer_size())
        writer.set(cv2.VIDEOWRITER_PROP_QUALITY, 100)

        def write_frame_callback(step_idx):
            tracker.update(task.time_step)

            # 240 steps/second / 4 = 60 fps
            if step_idx % 4 == 0:
                # Flip vertically, convert RGBA to BGR
                frame = viewer.render_array(main_sim)[::-1, :, 2::-1]
                writer.write(frame)

        run_trajectory(main_sim, robot_idx, input_sequence, task,
                       write_frame_callback)

        writer.release()
Beispiel #5
0
def search_algo(args):
    # iniailize random seed
    random.seed(args.seed)
    np.random.seed(args.seed)
    torch.manual_seed(args.seed)
    torch.set_num_threads(1)

    # initialize/load
    task_class = getattr(tasks, args.task)
    if args.no_noise:
        task = task_class(force_std=0.0, torque_std=0.0)
    else:
        task = task_class()
    graphs = rd.load_graphs(args.grammar_file)
    rules = [rd.create_rule_from_graph(g) for g in graphs]

    # initialize preprocessor
    # Find all possible link labels, so they can be one-hot encoded
    all_labels = set()
    for rule in rules:
        for node in rule.lhs.nodes:
            all_labels.add(node.attrs.require_label)
    all_labels = sorted(list(all_labels))

    # TODO: use 80 to fit the input of trained MPC GNN, use args.depth * 3 later for real mpc
    max_nodes = args.depth * 3

    global preprocessor
    # preprocessor = Preprocessor(max_nodes = max_nodes, all_labels = all_labels)
    preprocessor = Preprocessor(all_labels=all_labels)

    # initialize the env
    env = RobotGrammarEnv(task,
                          rules,
                          seed=args.seed,
                          mpc_num_processes=args.mpc_num_processes)

    # initialize Value function
    device = 'cpu'
    state = env.reset()
    sample_adj_matrix, sample_features, sample_masks = preprocessor.preprocess(
        state)
    num_features = sample_features.shape[1]
    V = Net(max_nodes=max_nodes, num_channels=num_features,
            num_outputs=1).to(device)

    # load pretrained V function
    if args.load_V_path is not None:
        V.load_state_dict(torch.load(args.load_V_path))
        print_info('Loaded pretrained V function from {}'.format(
            args.load_V_path))

    # initialize target V_hat look up table
    V_hat = dict()

    # load pretrained V_hat
    if args.load_Vhat_path is not None:
        V_hat_fp = open(args.load_Vhat_path, 'rb')
        V_hat = pickle.load(V_hat_fp)
        V_hat_fp.close()
        print_info('Loaded pretrained Vhat from {}'.format(
            args.load_Vhat_path))

    # initialize invalid_his
    invalid_his = dict()
    num_invalid_samples, num_valid_samples = 0, 0
    repeated_cnt = 0

    # initialize the seen states pool
    states_pool = StatesPool(capacity=args.states_pool_capacity)
    states_set = set()

    # explored designs
    designs = []
    design_rewards = []
    design_opt_seeds = []

    # record prediction error
    prediction_error_sum = 0.0

    if not args.test:
        # initialize save folders and files
        fp_log = open(os.path.join(args.save_dir, 'log.txt'), 'w')
        fp_log.close()
        fp_eval = open(os.path.join(args.save_dir, 'eval.txt'), 'w')
        fp_eval.close()
        design_csv_path = os.path.join(args.save_dir, 'designs.csv')
        fp_csv = open(design_csv_path, 'w')
        fieldnames = ['rule_seq', 'reward', 'opt_seed']
        writer = csv.DictWriter(fp_csv, fieldnames=fieldnames)
        writer.writeheader()
        fp_csv.close()

        # initialize the optimizer
        global optimizer
        optimizer = torch.optim.Adam(V.parameters(), lr=args.lr)

        # initialize best design rule sequence
        best_design, best_reward = None, -np.inf

        # reward history
        epoch_rew_his = []
        last_checkpoint = -1

        # recording time
        t_sample_sum = 0.

        # record the count for invalid samples
        no_action_samples, step_exceeded_samples, self_collision_samples = 0, 0, 0

        for epoch in range(args.num_iterations):
            t_start = time.time()

            V.eval()

            # update eps and eps_sample
            if args.eps_schedule == 'linear-decay':
                eps = args.eps_start + epoch / args.num_iterations * (
                    args.eps_end - args.eps_start)
            elif args.eps_schedule == 'exp-decay':
                eps = args.eps_end + (args.eps_start - args.eps_end) * np.exp(
                    -1.0 * epoch / args.num_iterations / args.eps_decay)

            if args.eps_sample_schedule == 'linear-decay':
                eps_sample = args.eps_sample_start + epoch / args.num_iterations * (
                    args.eps_sample_end - args.eps_sample_start)
            elif args.eps_sample_schedule == 'exp-decay':
                eps_sample = args.eps_sample_end + (
                    args.eps_sample_start - args.eps_sample_end) * np.exp(
                        -1.0 * epoch / args.num_iterations /
                        args.eps_sample_decay)

            t_sample, t_update, t_mpc, t_opt = 0, 0, 0, 0

            selected_design, selected_reward = None, -np.inf
            selected_state_seq, selected_rule_seq = None, None

            p = random.random()
            if p < eps_sample:
                num_samples = 1
            else:
                num_samples = args.num_samples

            # use e-greedy to sample a design within maximum #steps.
            for _ in range(num_samples):
                valid = False
                while not valid:
                    t0 = time.time()

                    state = env.reset()
                    rule_seq = []
                    state_seq = [state]
                    no_action_flag = False
                    for _ in range(args.depth):
                        action, step_type = select_action(env, V, state, eps)
                        if action is None:
                            no_action_flag = True
                            break
                        rule_seq.append(action)
                        next_state = env.transite(state, action)
                        state_seq.append(next_state)
                        state = next_state
                        if not has_nonterminals(state):
                            break

                    valid = env.is_valid(state)

                    t_sample += time.time() - t0

                    t0 = time.time()

                    if not valid:
                        # update the invalid sample's count
                        if no_action_flag:
                            no_action_samples += 1
                        elif has_nonterminals(state):
                            step_exceeded_samples += 1
                        else:
                            self_collision_samples += 1

                        # update the Vhat for invalid designs
                        update_Vhat(args,
                                    V_hat,
                                    state_seq,
                                    -2.0,
                                    invalid=True,
                                    invalid_cnt=invalid_his)
                        # update states pool
                        update_states_pool(states_pool, state_seq, states_set,
                                           V_hat)
                        num_invalid_samples += 1
                    else:
                        num_valid_samples += 1

                    t_update += time.time() - t0

                predicted_value = predict(V, state)
                if predicted_value > selected_reward:
                    selected_design, selected_reward = state, predicted_value
                    selected_rule_seq, selected_state_seq = rule_seq, state_seq

            t0 = time.time()

            repeated = False
            if (hash(selected_design)
                    in V_hat) and (V_hat[hash(selected_design)] > -2.0 + 1e-3):
                repeated = True
                repeated_cnt += 1

            reward, best_seed = -np.inf, None

            for _ in range(args.num_eval):
                _, rew = env.get_reward(selected_design)
                if rew > reward:
                    reward, best_seed = rew, env.last_opt_seed

            t_mpc += time.time() - t0

            # save the design and the reward in the list
            designs.append(selected_rule_seq)
            design_rewards.append(reward)
            design_opt_seeds.append(best_seed)

            # update best design
            if reward > best_reward:
                best_design, best_reward = selected_rule_seq, reward
                print_info(
                    'new best: reward = {:.4f}, predicted reward = {:.4f}, num_samples = {}'
                    .format(reward, selected_reward, num_samples))

            t0 = time.time()

            # update V_hat for the valid design
            update_Vhat(args, V_hat, selected_state_seq, reward)

            # update states pool for the valid design
            update_states_pool(states_pool, selected_state_seq, states_set,
                               V_hat)

            t_update += time.time() - t0

            t0 = time.time()

            # optimize
            V.train()
            total_loss = 0.0
            for _ in range(args.opt_iter):
                minibatch = states_pool.sample(
                    min(len(states_pool), args.batch_size))

                train_adj_matrix, train_features, train_masks, train_reward = [], [], [], []
                max_nodes = 0
                for robot_graph in minibatch:
                    hash_key = hash(robot_graph)
                    target_reward = V_hat[hash_key]
                    # adj_matrix, features, masks = preprocessor.preprocess(robot_graph)
                    adj_matrix, features, _ = preprocessor.preprocess(
                        robot_graph)
                    max_nodes = max(max_nodes, len(features))
                    train_adj_matrix.append(adj_matrix)
                    train_features.append(features)
                    # train_masks.append(masks)
                    train_reward.append(target_reward)
                for i in range(len(minibatch)):
                    train_adj_matrix[i], train_features[i], masks = \
                        preprocessor.pad_graph(train_adj_matrix[i], train_features[i], max_nodes)
                    train_masks.append(masks)

                train_adj_matrix_torch = torch.tensor(train_adj_matrix)
                train_features_torch = torch.tensor(train_features)
                train_masks_torch = torch.tensor(train_masks)
                train_reward_torch = torch.tensor(train_reward)

                optimizer.zero_grad()
                output, loss_link, loss_entropy = V(train_features_torch,
                                                    train_adj_matrix_torch,
                                                    train_masks_torch)
                loss = F.mse_loss(output[:, 0], train_reward_torch)
                loss.backward()
                total_loss += loss.item()
                optimizer.step()

            t_opt += time.time() - t0

            t_end = time.time()

            t_sample_sum += t_sample

            # logging
            if (epoch + 1
                ) % args.log_interval == 0 or epoch + 1 == args.num_iterations:
                iter_save_dir = os.path.join(args.save_dir,
                                             '{}'.format(epoch + 1))
                os.makedirs(os.path.join(iter_save_dir), exist_ok=True)
                # save model
                save_path = os.path.join(iter_save_dir, 'V_model.pt')
                torch.save(V.state_dict(), save_path)
                # save V_hat
                save_path = os.path.join(iter_save_dir, 'V_hat')
                fp = open(save_path, 'wb')
                pickle.dump(V_hat, fp)
                fp.close()

            # save explored design and its reward
            fp_csv = open(design_csv_path, 'a')
            fieldnames = ['rule_seq', 'reward', 'opt_seed']
            writer = csv.DictWriter(fp_csv, fieldnames=fieldnames)
            for i in range(last_checkpoint + 1, len(designs)):
                writer.writerow({
                    'rule_seq': str(designs[i]),
                    'reward': design_rewards[i],
                    'opt_seed': design_opt_seeds[i]
                })
            last_checkpoint = len(designs) - 1
            fp_csv.close()

            epoch_rew_his.append(reward)

            avg_loss = total_loss / args.opt_iter
            len_his = min(len(epoch_rew_his), 30)
            avg_reward = np.sum(epoch_rew_his[-len_his:]) / len_his
            prediction_error_sum += (selected_reward - reward)**2
            avg_prediction_error = prediction_error_sum / (epoch + 1)

            if repeated:
                print_white('Epoch {:4}: T_sample = {:5.2f}, T_update = {:5.2f}, T_mpc = {:5.2f}, T_opt = {:5.2f}, eps = {:5.3f}, eps_sample = {:5.3f}, #samples = {:2}, training loss = {:7.4f}, pred_error = {:6.4f}, predicted_reward = {:6.4f}, reward = {:6.4f}, last 30 epoch reward = {:6.4f}, best reward = {:6.4f}'.format(\
                    epoch, t_sample, t_update, t_mpc, t_opt, eps, eps_sample, num_samples, \
                    avg_loss, avg_prediction_error, selected_reward, reward, avg_reward, best_reward))
            else:
                print_warning('Epoch {:4}: T_sample = {:5.2f}, T_update = {:5.2f}, T_mpc = {:5.2f}, T_opt = {:5.2f}, eps = {:5.3f}, eps_sample = {:5.3f}, #samples = {:2}, training loss = {:7.4f}, pred_error = {:6.4f}, predicted_reward = {:6.4f}, reward = {:6.4f}, last 30 epoch reward = {:6.4f}, best reward = {:6.4f}'.format(\
                    epoch, t_sample, t_update, t_mpc, t_opt, eps, eps_sample, num_samples, \
                    avg_loss, avg_prediction_error, selected_reward, reward, avg_reward, best_reward))

            fp_log = open(os.path.join(args.save_dir, 'log.txt'), 'a')
            fp_log.write('eps = {:.4f}, eps_sample = {:.4f}, num_samples = {}, T_sample = {:4f}, T_update = {:4f}, T_mpc = {:.4f}, T_opt = {:.4f}, loss = {:.4f}, predicted_reward = {:.4f}, reward = {:.4f}, avg_reward = {:.4f}\n'.format(\
                eps, eps_sample, num_samples, t_sample, t_update, t_mpc, t_opt, avg_loss, selected_reward, reward, avg_reward))
            fp_log.close()

            if (epoch + 1) % args.log_interval == 0:
                print_info(
                    'Avg sampling time for last {} epoch: {:.4f} second'.
                    format(args.log_interval,
                           t_sample_sum / args.log_interval))
                t_sample_sum = 0.
                print_info('size of states_pool = {}'.format(len(states_pool)))
                print_info(
                    '#valid samples = {}, #invalid samples = {}, #valid / #invalid = {}'
                    .format(
                        num_valid_samples, num_invalid_samples,
                        num_valid_samples / num_invalid_samples
                        if num_invalid_samples > 0 else 10000.0))
                print_info(
                    'Invalid samples: #no_action_samples = {}, #step_exceeded_samples = {}, #self_collision_samples = {}'
                    .format(no_action_samples, step_exceeded_samples,
                            self_collision_samples))
                max_trials, cnt = 0, 0
                for key in invalid_his.keys():
                    if invalid_his[key] > max_trials:
                        if key not in V_hat:
                            max_trials = invalid_his[key]
                        elif V_hat[key] < -2.0 + 1e-3:
                            max_trials = invalid_his[key]
                    if invalid_his[key] >= args.max_trials:
                        if V_hat[key] < -2.0 + 1e-3:
                            cnt += 1

                print_info(
                    'max invalid_trials = {}, #failed nodes = {}'.format(
                        max_trials, cnt))
                print_info('repeated rate = {}'.format(repeated_cnt /
                                                       (epoch + 1)))

        save_path = os.path.join(args.save_dir, 'model_state_dict_final.pt')
        torch.save(V.state_dict(), save_path)
    else:
        import IPython
        IPython.embed()

        # test
        V.eval()
        print('Start testing')
        test_epoch = 30
        y0 = []
        y1 = []
        x = []
        for ii in range(0, 11):
            eps = 1.0 - 0.1 * ii

            print('------------------------------------------')
            print('eps = ', eps)

            reward_sum = 0.
            best_reward = -np.inf
            for epoch in range(test_epoch):
                t0 = time.time()

                # use e-greedy to sample a design within maximum #steps.
                vaild = False
                while not valid:
                    state = env.reset()
                    rule_seq = []
                    state_seq = [state]
                    for _ in range(args.depth):
                        action, step_type = select_action(env, V, state, eps)
                        if action is None:
                            break
                        rule_seq.append(action)
                        next_state = env.transite(state, action)
                        state_seq.append(next_state)
                        if not has_nonterminals(next_state):
                            valid = True
                            break
                        state = next_state

                _, reward = env.get_reward(state)
                reward_sum += reward
                best_reward = max(best_reward, reward)
                print(
                    f'design {epoch}: reward = {reward}, time = {time.time() - t0}'
                )

            print('test avg reward = ', reward_sum / test_epoch)
            print('best reward found = ', best_reward)
            x.append(eps)
            y0.append(reward_sum / test_epoch)
            y1.append(best_reward)

        import matplotlib.pyplot as plt
        fig, ax = plt.subplots(1, 2, figsize=(10, 5))
        ax[0].plot(x, y0)
        ax[0].set_title('Avg Reward')
        ax[0].set_xlabel('eps')
        ax[0].set_ylabel('reward')

        ax[1].plot(x, y1)
        ax[0].set_title('Best Reward')
        ax[0].set_xlabel('eps')
        ax[0].set_ylabel('reward')

        plt.show()
Beispiel #6
0
def main():
    sns.set_context('paper')

    parser = argparse.ArgumentParser(
        description="Create plots using multiple log directories.")
    parser.add_argument('log_dir', type=str, nargs='+',
                        help="Log directory containing meta.json")
    parser.add_argument('-t', '--task', type=str, nargs='+',
                        help="Task to include in plots")
    parser.add_argument('-a', '--algorithm', type=str, nargs='+',
                        help="Task to include in plots")
    parser.add_argument('-i', '--iterations', type=int,
                        help="Maximum number of iterations to show")
    parser.add_argument('--servo_count', action='store_true',
                        help="Include servo count as an objective")
    parser.add_argument('--ind_rewards', action='store_true',
                        help="Include individual rewards in iterations plot")
    parser.add_argument('--estimator', type=str,
                        help="Estimator for aggregating multiple trials")
    subparsers = parser.add_subparsers(help='Plot type')
    parser_iterations = subparsers.add_parser('iterations')
    parser_iterations.set_defaults(func=plot_iterations)
    parser_pareto = subparsers.add_parser('pareto')
    parser_pareto.set_defaults(func=plot_pareto)

    args = parser.parse_args()

    # Store every log file's contents into one big pandas dataframe
    df = pd.DataFrame()

    for log_dir in args.log_dir:
        try:
            with open(os.path.join(log_dir, 'meta.json'), 'r') as json_file:
                metadata = json.load(json_file)
        except FileNotFoundError:
            print("Directory '{}' does not contain metadata file, skipping".format(log_dir),
                  file=sys.stderr)
            continue

        # Load the .csv data
        csv_file_names = glob.glob(os.path.join(log_dir, '*.csv'))
        if len(csv_file_names) == 0:
            print("Directory '{}' does not contain any .csv files, skipping".format(log_dir), file=sys.stderr)
            continue

        for trial_num, csv_file_name in enumerate(csv_file_names):
            try:
                log_df = pd.read_csv(csv_file_name)
            except FileNotFoundError:
                print("File '{}' does not exist, skipping".format(csv_file_name), file=sys.stderr)
                continue

            if 'iteration' not in log_df.columns:
                log_df['iteration'] = log_df.index

            log_df.rename(columns={'result': 'reward'}, inplace=True)

            if 'task' not in log_df.columns:
                log_df['task'] = metadata.get('task')

            if 'grammar' not in log_df.columns:
                log_df['grammar'] = metadata.get(
                    'grammar', 'data/designs/grammar_apr30.dot')

            if 'algorithm' not in log_df.columns:
                log_df['algorithm'] = metadata.get('algorithm')

            log_df['trial'] = trial_num

            df = df.append(log_df, ignore_index=True, sort=True)

    # Filter data based on arguments
    if args.iterations:
        df = df[df['iteration'] < args.iterations]
    if args.task:
        df = df[df['task'].isin(args.task)]
    if args.algorithm:
        df = df[df['algorithm'].isin(args.algorithm)]

    try:
        # Expecting only one grammar
        grammar_file, = df['grammar'].unique()
    except ValueError:
        print("All runs must use the same grammar", file=sys.stderr)
        raise

    graphs = rd.load_graphs(grammar_file)
    rules = [rd.create_rule_from_graph(g) for g in graphs]

    # Compute a graph hash, servo count for each rule_seq
    rule_seq_hashes = {}
    rule_seq_servo_counts = {}
    for rule_seq_str in df['rule_seq'].unique():
        rule_seq = ast.literal_eval(rule_seq_str)
        graph = make_graph(rules, rule_seq)
        robot = build_normalized_robot(graph)

        rule_seq_hashes[rule_seq_str] = hash(graph)

        servo_count = 0
        for link in robot.links:
            if link.joint_type == rd.JointType.HINGE:
                # Only hinge joints have servos
                servo_count += 1
        rule_seq_servo_counts[rule_seq_str] = servo_count

    if args.servo_count:
        servo_count_df = pd.DataFrame({'rule_seq': df['rule_seq'].unique()})
        servo_count_df['task'] = 'ServoCount'
        servo_count_df['reward'] = \
            servo_count_df['rule_seq'].map(rule_seq_servo_counts)
        df = df.append(servo_count_df, ignore_index=True, sort=True)

    df['hash'] = df['rule_seq'].map(rule_seq_hashes)

    args.func(df, ind_rewards=args.ind_rewards, estimator=args.estimator)
def search_algo_1(args):
    # iniailize random seed
    random.seed(args.seed)
    np.random.seed(args.seed)
    torch.manual_seed(args.seed)

    # initialize/load
    # TODO: use 80 to fit the input of trained MPC GNN, use args.depth * 3 later for real mpc
    max_nodes = 80
    task_class = getattr(tasks, args.task)
    task = task_class()
    graphs = rd.load_graphs(args.grammar_file)
    rules = [rd.create_rule_from_graph(g) for g in graphs]

    # state preprocessor
    # Find all possible link labels, so they can be one-hot encoded
    all_labels = set()
    for rule in rules:
        for node in rule.lhs.nodes:
            all_labels.add(node.attrs.require_label)
    all_labels = sorted(list(all_labels))
    global preprocessor
    preprocessor = Preprocessor(max_nodes=max_nodes, all_labels=all_labels)

    # initialize the env
    env = RobotGrammarEnv(task,
                          rules,
                          enable_reward_oracle=True,
                          preprocessor=preprocessor)

    # initialize Value function
    device = 'cpu'
    state = env.reset()
    sample_adj_matrix, sample_features, sample_masks = preprocessor.preprocess(
        state)
    num_features = sample_features.shape[1]
    V = Net(max_nodes=max_nodes, num_channels=num_features,
            num_outputs=1).to(device)

    # load pretrained V function
    if args.load_V_path is not None:
        V.load_state_dict(torch.load(args.load_V_path))
        print_info('Loaded pretrained V function from {}'.format(
            args.load_V_path))

    # initialize target V_hat look up table
    V_hat = dict()

    # load pretrained V_hat
    if args.load_Vhat_path is not None:
        V_hat_fp = open(args.load_Vhat_path, 'rb')
        V_hat = pickle.load(V_hat_fp)
        V_hat_fp.close()
        print_info('Loaded pretrained Vhat from {}'.format(
            args.load_Vhat_path))

    # initialize the seen states pool
    states_pool = StatesPool(capacity=args.states_pool_capacity)
    all_sample_designs = []

    # explored designs
    designs = []
    design_rewards = []

    # load previously explored designs
    if args.load_designs_path is not None:
        fp_csv = open(args.load_designs_path, newline='')
        reader = csv.DictReader(fp_csv)
        for row in reader:
            rule_seq = ast.literal_eval(row['rule_seq'])
            reward = float(row['reward'])
            state = make_initial_graph()
            for i in range(len(rule_seq)):
                state = env.transite(state, rule_seq[i])
            designs.append(state)
            design_rewards.append(reward)
            if not np.isclose(V_hat[hash(state)], reward):
                print(rule_seq)
                print(V_hat[hash(state)], reward)
                print_error("Vhat and designs don't match")
        fp_csv.close()
        print_info('Loaded pretrained designs from {}'.format(
            args.load_designs_path))

    if not args.test:
        # initialize save folders and files
        fp_log = open(os.path.join(args.save_dir, 'log.txt'), 'w')
        fp_log.close()
        fp_eval = open(os.path.join(args.save_dir, 'eval.txt'), 'w')
        fp_eval.close()
        design_csv_path = os.path.join(args.save_dir, 'designs.csv')
        fp_csv = open(design_csv_path, 'w')
        fieldnames = ['rule_seq', 'reward']
        writer = csv.DictWriter(fp_csv, fieldnames=fieldnames)
        writer.writeheader()
        fp_csv.close()

        # initialize the optimizer
        global optimizer
        optimizer = torch.optim.Adam(V.parameters(), lr=args.lr)

        # initialize best design rule sequence
        best_design, best_reward = None, -np.inf

        # reward history
        epoch_rew_his = []
        last_checkpoint = -1

        # recording time
        t_sample_sum = 0.

        # record the count for invalid samples
        no_action_samples, step_exceeded_samples = 0, 0

        for epoch in range(args.num_iterations):
            t_start = time.time()

            V.eval()

            # update eps and eps_sample
            if args.eps_schedule == 'linear-decay':
                eps = args.eps_start + epoch / args.num_iterations * (
                    args.eps_end - args.eps_start)
            elif args.eps_schedule == 'exp-decay':
                eps = args.eps_end + (args.eps_start - args.eps_end) * np.exp(
                    -1.0 * epoch / args.num_iterations / args.eps_decay)

            if args.eps_sample_schedule == 'linear-decay':
                eps_sample = args.eps_sample_start + epoch / args.num_iterations * (
                    args.eps_sample_end - args.eps_sample_start)
            elif args.eps_sample_schedule == 'exp-decay':
                eps_sample = args.eps_sample_end + (
                    args.eps_sample_start - args.eps_sample_end) * np.exp(
                        -1.0 * epoch / args.num_iterations /
                        args.eps_sample_decay)

            t_sample, t_update, t_mpc, t_opt = 0, 0, 0, 0

            best_candidate_design, best_candidate_reward = None, -1.0
            best_candidate_state_seq, best_candidate_rule_seq = None, None

            p = random.random()
            if p < eps_sample:
                num_samples = 1
            else:
                num_samples = args.num_samples

            # use e-greedy to sample a design within maximum #steps.
            for _ in range(num_samples):
                valid = False
                while not valid:
                    t0 = time.time()

                    state = env.reset()
                    rule_seq = []
                    state_seq = [state]
                    random_step_cnt, optimal_step_cnt = 0, 0
                    no_action_flag = False
                    for _ in range(args.depth):
                        action, step_type = select_action(env, V, state, eps)
                        if action is None:
                            no_action_flag = True
                            break
                        if step_type == 'random':
                            random_step_cnt += 1
                        elif step_type == 'optimal':
                            optimal_step_cnt += 1
                        rule_seq.append(action)
                        next_state = env.transite(state, action)
                        state_seq.append(next_state)
                        state = next_state
                        if env.is_valid(next_state):
                            valid = True
                            break

                    t_sample += time.time() - t0

                    t0 = time.time()

                    # update the invalid sample's count
                    if not valid:
                        if no_action_flag:
                            no_action_samples += 1
                        else:
                            step_exceeded_samples += 1

                    # update the Vhat for invalid designs
                    if not valid:
                        update_Vhat(V_hat, state_seq, 0.0)
                        # update states pool
                        update_states_pool(states_pool, state_seq)

                    # if valid but has been explored as a valid design before, then put in state pool but resample it
                    if valid and (hash(state)
                                  in V_hat) and (V_hat(hash(state)) > 1e-3):
                        update_Vhat(V_hat, state_seq, V_hat[hash(state)])
                        update_states_pool(states_pool, state_seq)
                        valid = False

                    # record the sampled design
                    all_sample_designs.append(rule_seq)

                    t_update += time.time() - t0

                predicted_value = predict(V, state)
                if predicted_value > best_candidate_reward:
                    best_candidate_design, best_candidate_reward = state, predicted_value
                    best_candidate_rule_seq, best_candidate_state_seq = rule_seq, state_seq

            t0 = time.time()

            _, reward = env.get_reward(best_candidate_design)

            t_mpc += time.time() - t0

            # save the design and the reward in the list
            designs.append(best_candidate_rule_seq)
            design_rewards.append(reward)

            # update best design
            if reward > best_reward:
                best_design, best_reward = best_candidate_rule_seq, reward
                print_info(
                    'new best: reward = {:.4f}, predicted reward = {:.4f}, num_samples = {}'
                    .format(reward, best_candidate_reward, num_samples))

            t0 = time.time()

            # update V_hat for the valid design
            update_Vhat(V_hat, best_candidate_state_seq, reward)

            # update states pool for the valid design
            update_states_pool(states_pool, best_candidate_state_seq)

            t_update += time.time() - t0

            t0 = time.time()

            # optimize
            V.train()
            total_loss = 0.0
            for _ in range(args.opt_iter):
                minibatch = states_pool.sample(
                    min(len(states_pool), args.batch_size))

                train_adj_matrix, train_features, train_masks, train_reward = [], [], [], []
                for robot_graph in minibatch:
                    hash_key = hash(robot_graph)
                    target_reward = V_hat[hash_key]
                    adj_matrix, features, masks = preprocessor.preprocess(
                        robot_graph)
                    train_adj_matrix.append(adj_matrix)
                    train_features.append(features)
                    train_masks.append(masks)
                    train_reward.append(target_reward)

                train_adj_matrix_torch = torch.tensor(train_adj_matrix)
                train_features_torch = torch.tensor(train_features)
                train_masks_torch = torch.tensor(train_masks)
                train_reward_torch = torch.tensor(train_reward)

                optimizer.zero_grad()
                output, loss_link, loss_entropy = V(train_features_torch,
                                                    train_adj_matrix_torch,
                                                    train_masks_torch)
                loss = F.mse_loss(output[:, 0], train_reward_torch)
                loss.backward()
                total_loss += loss.item()
                optimizer.step()

            t_opt += time.time() - t0

            t_end = time.time()

            t_sample_sum += t_sample

            # logging
            if (epoch + 1
                ) % args.log_interval == 0 or epoch + 1 == args.num_iterations:
                iter_save_dir = os.path.join(args.save_dir,
                                             '{}'.format(epoch + 1))
                os.makedirs(os.path.join(iter_save_dir), exist_ok=True)
                # save model
                save_path = os.path.join(iter_save_dir, 'V_model.pt')
                torch.save(V.state_dict(), save_path)
                # save V_hat
                save_path = os.path.join(iter_save_dir, 'V_hat')
                fp = open(save_path, 'wb')
                pickle.dump(V_hat, fp)
                fp.close()
                # save all_sampled_designs
                save_path = os.path.join(iter_save_dir, 'all_sampled_designs')
                fp = open(save_path, 'wb')
                pickle.dump(all_sample_designs, fp)
                fp.close()
                # save explored design and its reward
                fp_csv = open(design_csv_path, 'a')
                fieldnames = ['rule_seq', 'reward']
                writer = csv.DictWriter(fp_csv, fieldnames=fieldnames)
                for i in range(last_checkpoint + 1, len(designs)):
                    writer.writerow({
                        'rule_seq': str(designs[i]),
                        'reward': design_rewards[i]
                    })
                last_checkpoint = len(designs) - 1
                fp_csv.close()

            epoch_rew_his.append(reward)

            avg_loss = total_loss / args.depth
            len_his = min(len(epoch_rew_his), 30)
            avg_reward = np.sum(epoch_rew_his[-len_his:]) / len_his
            print('Epoch {}: T_sample = {:.2f}, T_update = {:.2f}, T_mpc = {:.2f}, T_opt = {:.2f}, eps = {:.3f}, eps_sample = {:.3f}, #samples = {} = {}, training loss = {:.4f}, predicted_reward = {:.4f}, reward = {:.4f}, last 30 epoch reward = {:.4f}, best reward = {:.4f}'.format(\
                epoch, t_sample, t_update, t_mpc, t_opt, eps, eps_sample, num_samples, \
                avg_loss, best_candidate_reward, reward, avg_reward, best_reward))

            fp_log = open(os.path.join(args.save_dir, 'log.txt'), 'a')
            fp_log.write('eps = {:.4f}, eps_sample = {:.4f}, num_samples = {}, T_sample = {:4f}, T_update = {:4f}, T_mpc = {:.4f}, T_opt = {:.4f}, loss = {:.4f}, predicted_reward = {:.4f}, reward = {:.4f}, avg_reward = {:.4f}\n'.format(\
                eps, eps_sample, num_samples, t_sample, t_update, t_mpc, t_opt, avg_loss, best_candidate_reward, reward, avg_reward))
            fp_log.close()

            if (epoch + 1) % args.log_interval == 0:
                print_info(
                    'Avg sampling time for last {} epoch: {:.4f} second'.
                    format(args.log_interval,
                           t_sample_sum / args.log_interval))
                t_sample_sum = 0.
                invalid_cnt, valid_cnt = 0, 0
                for state in states_pool.pool:
                    if np.isclose(V_hat[hash(state)], 0.):
                        invalid_cnt += 1
                    else:
                        valid_cnt += 1
                print_info(
                    'states_pool size = {}, #valid = {}, #invalid = {}, #valid / #invalid = {}'
                    .format(len(states_pool), valid_cnt, invalid_cnt,
                            valid_cnt / invalid_cnt))
                print_info(
                    'Invalid samples: #no_action_samples = {}, #step_exceeded_samples = {}, #no_action / #step_exceeded = {}'
                    .format(no_action_samples, step_exceeded_samples,
                            no_action_samples / step_exceeded_samples))

            # evaluation
            if args.eval_interval > 0 and (
                (epoch + 1) % args.eval_interval == 0
                    or epoch + 1 == args.num_iterations):
                print_info('-------- Doing evaluation --------')
                print_info('#states = {}'.format(len(states_pool)))
                loss_total = 0.
                for state in states_pool.pool:
                    value = predict(V, state)
                    loss_total += (V_hat[hash(state)] - value)**2
                print_info('Loss = {:.3f}'.format(loss_total /
                                                  len(states_pool)))
                fp_eval = open(os.path.join(args.save_dir, 'eval.txt'), 'a')
                fp_eval.write('epoch = {}, loss = {:.3f}\n'.format(
                    epoch + 1, loss_total / len(states_pool)))
                fp_eval.close()

        save_path = os.path.join(args.save_dir, 'model_state_dict_final.pt')
        torch.save(V.state_dict(), save_path)
    else:
        import IPython
        IPython.embed()

        # test
        V.eval()
        print('Start testing')
        test_epoch = 30
        y0 = []
        y1 = []
        x = []
        for ii in range(0, 11):
            eps = 1.0 - 0.1 * ii

            print('------------------------------------------')
            print('eps = ', eps)

            reward_sum = 0.
            best_reward = -np.inf
            for epoch in range(test_epoch):
                t0 = time.time()

                # use e-greedy to sample a design within maximum #steps.
                vaild = False
                while not valid:
                    state = env.reset()
                    rule_seq = []
                    state_seq = [state]
                    for _ in range(args.depth):
                        action, step_type = select_action(env, V, state, eps)
                        if action is None:
                            break
                        rule_seq.append(action)
                        next_state = env.transite(state, action)
                        state_seq.append(next_state)
                        if env.is_valid(state_next):
                            valid = True
                            break
                        state = next_state

                _, reward = env.get_reward(state)
                reward_sum += reward
                best_reward = max(best_reward, reward)
                print(
                    f'design {epoch}: reward = {reward}, time = {time.time() - t0}'
                )

            print('test avg reward = ', reward_sum / test_epoch)
            print('best reward found = ', best_reward)
            x.append(eps)
            y0.append(reward_sum / test_epoch)
            y1.append(best_reward)

        import matplotlib.pyplot as plt
        fig, ax = plt.subplots(1, 2, figsize=(10, 5))
        ax[0].plot(x, y0)
        ax[0].set_title('Avg Reward')
        ax[0].set_xlabel('eps')
        ax[0].set_ylabel('reward')

        ax[1].plot(x, y1)
        ax[0].set_title('Best Reward')
        ax[0].set_xlabel('eps')
        ax[0].set_ylabel('reward')

        plt.show()
Beispiel #8
0
def load_partial_design_data(raw_dataset_path, grammar_file):
    graphs = rd.load_graphs(grammar_file)
    rules = [rd.create_rule_from_graph(g) for g in graphs]

    all_labels = set()
    for rule in rules:
        for node in rule.lhs.nodes:
            all_labels.add(node.attrs.label)
    all_labels = sorted(list(all_labels))

    preprocessor = Preprocessor(all_labels=all_labels)

    with open(raw_dataset_path, newline='') as log_file:
        reader = csv.DictReader(log_file)

        memory = dict()
        idx = 0
        for row in reader:
            if idx % 1000 == 0:
                print(f'processing idx = {idx}')
            idx += 1

            rule_seq = ast.literal_eval(row['rule_seq'])
            result = float(row['result'])

            # Build a robot from the rule sequence
            robot_graph = make_initial_graph()
            update_memory(memory, preprocessor, robot_graph, result)
            for r in rule_seq:
                matches = rd.find_matches(rules[r].lhs, robot_graph)
                # Always use the first match
                robot_graph = rd.apply_rule(rules[r], robot_graph, matches[0])
                update_memory(memory, preprocessor, robot_graph, result)

        initial_robot_graph = make_initial_graph()
        print('#hit on initial state: ',
              memory[hash(initial_robot_graph)]['hit'])

        all_link_features = []
        all_link_adj = []
        all_results = []
        max_nodes = 0
        for _, robot_hash_key in enumerate(memory):
            adj_matrix, link_features, result = \
                memory[robot_hash_key]['adj_matrix'], memory[robot_hash_key]['link_features'], memory[robot_hash_key]['V']

            all_link_features.append(link_features)
            all_link_adj.append(adj_matrix)
            all_results.append(result)

            max_nodes = max(max_nodes, adj_matrix.shape[0])

        all_adj_matrix_pad, all_link_features_pad, all_masks = [], [], []
        for adj_matrix, link_features in zip(all_link_adj, all_link_features):
            adj_matrix_pad, link_features_pad, masks = preprocessor.pad_graph(
                adj_matrix, link_features, max_nodes=max_nodes)
            all_adj_matrix_pad.append(adj_matrix_pad)
            all_link_features_pad.append(link_features_pad)
            all_masks.append(masks)

    return all_link_features_pad, all_adj_matrix_pad, all_masks, all_results
Beispiel #9
0
def search(args):
    # initialize the env
    max_nodes = args.depth * 2
    task_class = getattr(tasks, args.task)
    task = task_class()
    graphs = rd.load_graphs(args.grammar_file)
    rules = [rd.create_rule_from_graph(g) for g in graphs]
    env = RobotGrammarEnv(task, rules, seed = args.seed, mpc_num_processes = args.mpc_num_processes)

    # state preprocessor
    # Find all possible link labels, so they can be one-hot encoded
    all_labels = set()
    for rule in rules:
        for node in rule.lhs.nodes:
            all_labels.add(node.attrs.require_label)
    all_labels = sorted(list(all_labels))
    global preprocessor
    preprocessor = Preprocessor(max_nodes = max_nodes, all_labels = all_labels)

    # initialize Q function
    device = 'cpu'
    state = env.reset()
    sample_adj_matrix, sample_features, sample_masks = preprocessor.preprocess(state)
    num_features = sample_features.shape[1]
    Q = Net(max_nodes = max_nodes, num_channels = num_features, num_outputs = len(rules)).to(device)

    # initialize the optimizer
    global optimizer
    optimizer = torch.optim.Adam(Q.parameters(), lr = args.lr)

    # initialize DQN
    memory = ReplayMemory(capacity = 1000000)
    scores = deque(maxlen = 100)
    data = []

    for epoch in range(args.num_iterations):
        done = False
        eps = args.eps_start + epoch / args.num_iterations * (args.eps_end - args.eps_start)
        # eps = 1.0
        while not done:
            state = env.reset()
            total_reward = 0.
            rule_seq = []
            state_seq = []
            for i in range(args.depth):
                action = select_action(env, Q, state, eps)
                rule_seq.append(action)
                if action is None:
                    break
                next_state, reward, done = env.step(action)
                state_seq.append((state, action, next_state, reward, done))
                total_reward += reward
                state = next_state
                if done:
                    break
        for i in range(len(state_seq)):
            memory.push(state_seq[i][0], state_seq[i][1], state_seq[i][2], state_seq[i][3], state_seq[i][4])
            data.append((state_seq[i][0], state_seq[i][1], total_reward))
        scores.append(total_reward)

        loss = 0.0
        for i in range(len(state_seq)):
            loss += optimize(Q, Q, memory, args.batch_size)
        print('epoch ', epoch, ': reward = ', total_reward, ', eps = ', eps, ', Q loss = ', loss)

    # test
    cnt = 0
    for i in range(len(data)):
        if data[i][2] > 0.5:
            y_predict, _, _ = predict(Q, data[i][0])
            print('target = ', data[i][2], ', predicted = ', y_predict[0][data[i][1]])
            cnt += 1
            if cnt == 5:
                break
    cnt = 0
    for i in range(len(data)):
        if data[i][2] < 0.5:
            y_predict, _, _ = predict(Q, data[i][0])
            print('target = ', data[i][2], ', predicted = ', y_predict[0][data[i][1]])
            cnt += 1
            if cnt == 5:
                break
Beispiel #10
0
def main():
  signal.signal(signal.SIGUSR1, set_pdb_trace)

  parser = argparse.ArgumentParser(description="Robot design search demo.")
  parser.add_argument("task", type=str, help="Task (Python class name)")
  parser.add_argument("grammar_file", type=str, help="Grammar file (.dot)")
  parser.add_argument("-a", "--algorithm", choices=algorithms.keys(),
                      default="mcts",
                      help="Algorithm ({})".format("|".join(algorithms.keys())))
  parser.add_argument("-s", "--seed", type=int, default=None,
                      help="Random seed")
  parser.add_argument("-j", "--jobs", type=int, required=True,
                      help="Number of jobs/threads")
  parser.add_argument("-i", "--iterations", type=int, required=True,
                      help="Number of iterations")
  parser.add_argument("-d", "--depth", type=int, required=True,
                      help="Maximum tree depth")
  parser.add_argument("-l", "--log_dir", type=str, default='',
                      help="Log directory")
  parser.add_argument("-f", "--log_file", type=str,
                      help="Existing log file, for resuming a previous run")
  args = parser.parse_args()

  random.seed(args.seed)

  task_class = getattr(tasks, args.task)
  task = task_class()
  graphs = rd.load_graphs(args.grammar_file)
  rules = [rd.create_rule_from_graph(g) for g in graphs]
  env = RobotDesignEnv(task, rules, args.seed, args.jobs, args.depth)
  search_alg = algorithms[args.algorithm](env, max_tries=1000)

  if args.log_file:
    # Resume an existing run
    log_path = args.log_file
  else:
    # Start a new run
    os.makedirs(args.log_dir, exist_ok=True)
    log_path = os.path.join(args.log_dir,
                            f'mcts_{datetime.datetime.now():%Y%m%d_%H%M%S}.csv')

  print(f"Logging to '{log_path}'")

  fieldnames = ['iteration', 'rule_seq', 'opt_seed', 'result']

  # Read log file if it exists and build a cache of previous results
  result_cache = dict()
  try:
    with open(log_path) as log_file:
      reader = csv.DictReader(log_file, fieldnames=fieldnames)
      next(reader) # Skip the header row
      for row in reader:
        result_cache[(tuple(ast.literal_eval(row['rule_seq'])),
                      int(row['opt_seed']))] = float(row['result'])
    log_file_exists = True
  except FileNotFoundError:
    log_file_exists = False
  env.result_cache = result_cache

  with open(log_path, 'a', newline='') as log_file:
    writer = csv.DictWriter(log_file, fieldnames=fieldnames)
    if not log_file_exists:
      writer.writeheader()
      log_file.flush()

    for i in range(args.iterations):
      states, actions, result = search_alg.run_iteration()

      if i >= len(env.result_cache):
        rule_seq = [rules.index(rule) for rule in actions]
        writer.writerow({'iteration': i, 'rule_seq': rule_seq,
                         'opt_seed': env.latest_opt_seed, 'result': result})
        log_file.flush()
      else:
        # Replaying existing log entries
        if i + 1 != env.result_cache_hit_count:
          print("Failed to replay existing log entries, stopping")
          sys.exit(1)
Beispiel #11
0
def main():
  parser = argparse.ArgumentParser(description="Process robot design search results.")
  parser.add_argument("task", type=str, help="Task (Python class name)")
  parser.add_argument("-j", "--jobs", type=int, required=True,
                      help="Number of jobs/threads")
  parser.add_argument("--grammar-file", type=str, default = './data/designs/grammar_apr30.dot', help="Grammar file (.dot)")
  parser.add_argument("-f", "--log_file", type=str, required=True,
                      help="MCTS log file")
  parser.add_argument("-t", "--type", type=str)
  parser.add_argument("-d", "--save_image_dir", default = None, type=str)
  parser.add_argument("-i", "--iterations", type=int, nargs="+")
  parser.add_argument("-s", "--opt_seed", type=int)
  args = parser.parse_args()
  
  args.save_image_dir = os.path.join(os.path.dirname(args.log_file), args.type)
  os.makedirs(args.save_image_dir, exist_ok=True)

  os.system('cp {} {}/designs.csv'.format(args.log_file, args.save_image_dir))

  task_class = getattr(tasks, args.task)
  task = task_class()
  graphs = rd.load_graphs(args.grammar_file)
  rules = [rd.create_rule_from_graph(g) for g in graphs]
  
  iteration_df = pd.read_csv(args.log_file, index_col=0)
  iteration_df['iteration'] = list(range(1, len(iteration_df) + 1))
  
  if args.type == "iterations":
    os.makedirs(args.save_image_dir, exist_ok=True)
    mid_indices = np.arange(0, len(iteration_df) + 1, 1000)
    offset = 10
    for mid_index in mid_indices:
      start_index = max(mid_index - offset, 0)
      end_index = min(mid_index + offset, len(iteration_df))
      for index in range(start_index, end_index):
        rule_seq = ast.literal_eval(iteration_df['rule_seq'][index])
        robot = make_robot_from_rule_sequence(rule_seq, rules)
        im_data = get_robot_image(robot, task)[::-1,:,:]
        im = Image.fromarray(im_data)
        im.save(os.path.join(args.save_image_dir,
                             f"iteration_{index:05}.png"))

  if args.type == "iterations_top":
    block_size = 100
    count = 2
    total = ((len(iteration_df) - 1) // block_size + 1) * count
    for start_index in range(0, len(iteration_df), block_size):
    #   sys.stdout.write('\r Finish {}/{}'.format(start_index // block_size * count, total))
    #   sys.stdout.flush()

      end_index = min(start_index + block_size, len(iteration_df))
      block = iteration_df[start_index:end_index].copy()
      block = block.sort_values(by='reward', ascending=False).reset_index()
      for i in range(count):
        row = block.iloc[i]
        rule_seq = ast.literal_eval(row['rule_seq'])
        robot = make_robot_from_rule_sequence(rule_seq, rules)
        print('iteration = {}, reward = {}'.format(row['iteration'], row['reward']))
        im_data = get_robot_image(robot, task)[::-1,:,:]
        im = Image.fromarray(im_data)
        block_index = start_index // block_size
        im.save(os.path.join(args.save_image_dir,
                             f"iteration_{row['iteration']:05}_{row['reward']:.2f}.png"))

  if args.type == "all_top": # save all top K unique designs
    count = 50
    sorted_df = iteration_df.sort_values(by='reward', ascending=False).reset_index()
    hash_values = set()
    j = -1
    for i in range(count):
      while True:
        j = j + 1
        row = sorted_df.iloc[j]
        rule_seq = ast.literal_eval(row['rule_seq'])
        robot = make_robot_from_rule_sequence(rule_seq, rules)
        robot_raw = make_robot_from_rule_sequence_raw(rule_seq, rules)
        hash_key = hash(robot_raw)
        if hash_key in hash_values:
          continue
        hash_values.add(hash_key)
        print('iteration = {}, reward = {}, hash = {}'.format(row['iteration'], row['reward'], hash_key))
        im_data = get_robot_image(robot, task)[::-1,:,:]
        im = Image.fromarray(im_data)
        im.save(os.path.join(args.save_image_dir,
                                f"rank_{i+1:03}_iteration_{row['iteration']:05}_{row['reward']:.2f}.png"))
        break

  elif args.type == "percentiles":
    os.makedirs(args.save_image_dir, exist_ok=True)
    percentiles = np.linspace(0.0, 1.0, 11)
    offset = 10
    iteration_df.sort_values(by='reward')
    for percentile in percentiles:
      mid_index = int(round(percentile * (len(iteration_df) - 1)))
      start_index = max(mid_index - offset, 0)
      end_index = min(mid_index + offset, len(iteration_df))
      for index in range(start_index, end_index):
        rule_seq = ast.literal_eval(iteration_df['rule_seq'][index])
        robot = make_robot_from_rule_sequence(rule_seq, rules)
        im_data = get_robot_image(robot, task)[::-1,:,:]
        im = Image.fromarray(im_data)
        im.save(os.path.join(args.save_image_dir,
                             f"sorted_{index:05}.png"))

  elif args.type == "terrain":
    # Take a screenshot of the terrain alone
    os.makedirs(args.save_image_dir, exist_ok=True)
    im_data = get_robot_image(None, task)[::-1,:,:]
    im = Image.fromarray(im_data)
    im.save(os.path.join(args.save_image_dir,
                         f"terrain_{args.task}.png"))

  elif args.type == "simulate":
    results_df = pd.DataFrame(columns=['task', 'log_file', 'iteration', 'reward'])
    for iteration in args.iterations:
      row = iteration_df.ix[iteration]
      rule_seq = ast.literal_eval(row['rule_seq'])
      robot = make_robot_from_rule_sequence(rule_seq, rules)
      for i in range(10):
        _, result = simulate(robot, task, random.getrandbits(32), args.jobs)
        results_df = results_df.append({
            'task': args.task, 'log_file': args.log_file,
            'iteration': iteration, 'reward': result}, ignore_index=True)

    with open('simulate_results.csv', 'a') as f:
      results_df.to_csv(f, header=(f.tell() == 0))

  elif args.type == "video":
    os.makedirs(args.save_image_dir, exist_ok=True)
    row = iteration_df.ix[args.iterations[0]]
    rule_seq = ast.literal_eval(row['rule_seq'])
    if args.opt_seed is not None:
      opt_seed = args.opt_seed
    else:
      opt_seed = int(row['opt_seed'])
    robot = make_robot_from_rule_sequence(rule_seq, rules)
    save_robot_video_frames(robot, task, opt_seed, args.jobs,
                            args.save_image_dir, frame_interval=4)
Beispiel #12
0
def main():
    parser = argparse.ArgumentParser(
        description="Plot results of a search run.")
    parser.add_argument("task", type=str, help="Task (Python class name)")
    parser.add_argument("grammar_file", type=str, help="Grammar file (.dot)")
    parser.add_argument('log_file', type=str, help="Log file (.csv)")
    parser.add_argument('--max_iters',
                        type=int,
                        help="Maximum number of iterations to show")
    subparsers = parser.add_subparsers(dest='subcommand',
                                       help="Plotting subcommand")

    iter_scatter_parser = subparsers.add_parser(
        'iter_scatter', help="Scatter plot of results vs. iteration")
    iter_scatter_parser.add_argument('--image_count',
                                     type=int,
                                     default=0,
                                     help='Number of design images to show')
    iter_scatter_parser.add_argument(
        '--spacing',
        type=int,
        default=1000,
        help='Minimum spacing between chosen designs, in iterations')

    args = parser.parse_args()

    task_class = getattr(tasks, args.task)
    task = task_class()
    graphs = rd.load_graphs(args.grammar_file)
    rules = [rd.create_rule_from_graph(g) for g in graphs]
    log_df = pd.read_csv(args.log_file)
    if args.max_iters:
        log_df = log_df[log_df['iteration'] < args.max_iters]

    if args.subcommand == 'iter_scatter':
        if args.image_count > 0:
            grid = plt.GridSpec(2,
                                args.image_count,
                                wspace=0,
                                hspace=0,
                                height_ratios=(0.2, 0.8))
            scatter_ax = plt.subplot(grid[1, :])

            # Select the top `image_count` designs, spaced at least `spacing` apart
            best_indices = []
            log_df_remaining = log_df
            for i in range(args.image_count):
                best_idx = log_df_remaining['result'].idxmax()
                best_indices.append(best_idx)
                log_df_remaining = log_df_remaining[
                    abs(log_df_remaining.index - best_idx) > args.spacing]
            best_indices.sort()
            print(best_indices)

            for j, best_idx in enumerate(best_indices):
                rule_seq = ast.literal_eval(log_df['rule_seq'][best_idx])
                graph = make_graph(rules, rule_seq)
                robot = build_normalized_robot(graph)
                image = get_robot_image(robot, task)
                image_ax = plt.subplot(grid[0, j])
                plt.axis('off')
                plt.imshow(image, origin='lower')
                patch = ConnectionPatch(xyA=(0.5 * image.shape[1], 0.0),
                                        xyB=(log_df['iteration'][best_idx],
                                             log_df['result'][best_idx]),
                                        coordsA='data',
                                        coordsB='data',
                                        axesA=image_ax,
                                        axesB=scatter_ax,
                                        color='darkgray')
                image_ax.add_artist(patch)
        else:
            fig, scatter_ax = plt.subplots()

        sns.scatterplot(x='iteration',
                        y='result',
                        data=log_df,
                        ci=None,
                        marker='.',
                        linewidth=0,
                        ax=scatter_ax)
        plt.tight_layout()
        plt.savefig('iter_scatter.pdf')
Beispiel #13
0
def main(log_file=None, grammar_file=None):
    parser = argparse.ArgumentParser(
        description="Example code for parsing a MCTS log file.")

    if not log_file or not grammar_file:
        parser.add_argument("log_file", type=str, help="Log file (.csv)")
        parser.add_argument("grammar_file",
                            type=str,
                            help="Grammar file (.dot)")
        args = parser.parse_args()
    else:
        args = argparse.Namespace()
        args.grammar_file = grammar_file
        args.log_file = log_file

    graphs = rd.load_graphs(args.grammar_file)
    rules = [rd.create_rule_from_graph(g) for g in graphs]

    # Find all possible link labels, so they can be one-hot encoded
    all_labels = set()
    for rule in rules:
        for node in rule.lhs.nodes:
            all_labels.add(node.attrs.label)
    all_labels = sorted(list(all_labels))

    with open(args.log_file, newline='') as log_file:
        reader = csv.DictReader(log_file)

        all_link_features = []
        all_link_adj = []
        all_results = []
        for row in reader:
            full_rule_seq = ast.literal_eval(row['rule_seq'])
            result = float(row['result'])

            for prefix_len in range(len(full_rule_seq) + 1):
                rule_seq = full_rule_seq[:prefix_len]
                all_results.append(result)

                # Build a robot from the rule sequence
                robot_graph = make_initial_graph()
                for r in rule_seq:
                    matches = rd.find_matches(rules[r].lhs, robot_graph)
                    # Always use the first match
                    robot_graph = rd.apply_rule(rules[r], robot_graph,
                                                matches[0])
                robot = build_normalized_robot(robot_graph)

                # Find the world position and rotation of links
                pos_rot = []
                for i, link in enumerate(robot.links):
                    if link.parent >= 0:
                        parent_pos, parent_rot = pos_rot[link.parent]
                        parent_link_length = robot.links[link.parent].length
                    else:
                        parent_pos, parent_rot = np.zeros(3), np.quaternion(
                            1, 0, 0, 0)
                        parent_link_length = 0

                    offset = np.array(
                        [parent_link_length * link.joint_pos, 0, 0])
                    rel_pos = quaternion.rotate_vectors(parent_rot, offset)
                    rel_rot = np_quaternion(link.joint_rot).conjugate()
                    pos = parent_pos + rel_pos
                    rot = parent_rot * rel_rot
                    pos_rot.append((pos, rot))

                # Generate adjacency matrix
                adj_matrix = np.zeros((len(robot.links), len(robot.links)))
                for i, link in enumerate(robot.links):
                    if link.parent >= 0:
                        adj_matrix[link.parent, i] += 1

                # Generate features for links
                # Note: we can work with either the graph or the robot kinematic tree, but
                # the kinematic tree provides more information
                link_features = []
                for i, link in enumerate(robot.links):
                    world_pos, world_rot = pos_rot[i]
                    world_joint_axis = quaternion.rotate_vectors(
                        world_rot, link.joint_axis)
                    label_vec = np.zeros(len(all_labels))
                    label_vec[all_labels.index(link.label)] = 1

                    link_features.append(
                        np.array([
                            *featurize_link(link), *world_pos,
                            *quaternion_coords(world_rot), *world_joint_axis,
                            *label_vec
                        ]))
                link_features = np.array(link_features)

                all_link_features.append(link_features)
                all_link_adj.append(adj_matrix)

    return all_link_features, all_link_adj, all_results
def search_algo_2(args):
    # iniailize random seed
    random.seed(args.seed)
    np.random.seed(args.seed)
    torch.manual_seed(args.seed)

    # initialize/load
    # TODO: use 80 to fit the input of trained MPC GNN, use args.depth * 3 later for real mpc
    max_nodes = 80
    task_class = getattr(tasks, args.task)
    task = task_class()
    graphs = rd.load_graphs(args.grammar_file)
    rules = [rd.create_rule_from_graph(g) for g in graphs]

    # state preprocessor
    # Find all possible link labels, so they can be one-hot encoded
    all_labels = set()
    for rule in rules:
        for node in rule.lhs.nodes:
            all_labels.add(node.attrs.require_label)
    all_labels = sorted(list(all_labels))
    global preprocessor
    preprocessor = Preprocessor(max_nodes = max_nodes, all_labels = all_labels)

    # initialize the env
    env = RobotGrammarEnv(task, rules, enable_reward_oracle = True, preprocessor = preprocessor)

    # initialize Value function
    device = 'cpu'
    state = env.reset()
    sample_adj_matrix, sample_features, sample_masks = preprocessor.preprocess(state)
    num_features = sample_features.shape[1]
    V = Net(max_nodes = max_nodes, num_channels = num_features, num_outputs = 1).to(device)

    # load pretrained V function
    if args.load_V_path is not None:
        V.load_state_dict(torch.load(args.load_V_path))
        print_info('Loaded pretrained V function from {}'.format(args.load_V_path))

    if not args.test:
        # initialize save folders and files
        fp_log = open(os.path.join(args.save_dir, 'log.txt'), 'w')
        fp_log.close()
        design_csv_path = os.path.join(args.save_dir, 'designs.csv')
        fp_csv = open(design_csv_path, 'w')
        fieldnames = ['rule_seq', 'reward']
        writer = csv.DictWriter(fp_csv, fieldnames=fieldnames)
        writer.writeheader()
        fp_csv.close()

        # initialize the optimizer
        global optimizer
        optimizer = torch.optim.Adam(V.parameters(), lr = args.lr)

        # initialize best design
        best_design, best_reward = None, -np.inf
        
        # initialize the seen states pool
        states_pool = []
        
        # initialize visited states
        state_set = set()

        # TODO: load previously explored designs
        
        # explored designs
        designs = []
        design_rewards = []
        
        # reward history
        epoch_rew_his = []

        for epoch in range(args.num_iterations):
            t_start = time.time()

            V.eval()

            t0 = time.time()

            # use e-greedy to sample a design within maximum #steps.
            if args.eps_schedule == 'linear-decay':
                # linear schedule
                eps = args.eps_start + epoch / args.num_iterations * (args.eps_end - args.eps_start)
            elif args.eps_schedule == 'exp-decay':
                # exp schedule
                eps = args.eps_end + (args.eps_start - args.eps_end) * np.exp(-1.0 * epoch / args.num_iterations / args.eps_decay)

            done = False
            while not done:
                state = env.reset()
                rule_seq = []
                state_seq = [state]
                total_reward = 0.
                for _ in range(args.depth):
                    action = select_action(env, V, state, eps)
                    if action is None:
                        break
                    rule_seq.append(action)
                    next_state, reward, done = env.step(action)
                    total_reward += reward
                    state_seq.append(next_state)
                    state = next_state
                    if done:
                        break
            
            # save the design and the reward in the list
            designs.append(rule_seq)
            design_rewards.append(total_reward)

            # update best design
            if total_reward > best_reward:
                best_design, best_reward = rule_seq, total_reward
            
            # update state pool
            for ancestor in state_seq:
                state_hash_key = hash(ancestor)
                if not (state_hash_key in state_set):
                    state_set.add(state_hash_key)
                    states_pool.append(ancestor)

            t1 = time.time()

            # optimize
            V.train()
            total_loss = 0.0
            for _ in range(args.depth):
                minibatch = random.sample(states_pool, min(len(states_pool), args.batch_size))

                train_adj_matrix, train_features, train_masks, train_reward = [], [], [], []
                for robot_graph in minibatch:
                    V_hat = compute_Vhat(robot_graph, env, V)
                    adj_matrix, features, masks = preprocessor.preprocess(robot_graph)
                    train_adj_matrix.append(adj_matrix)
                    train_features.append(features)
                    train_masks.append(masks)
                    train_reward.append(V_hat)
                
                train_adj_matrix_torch = torch.tensor(train_adj_matrix)
                train_features_torch = torch.tensor(train_features)
                train_masks_torch = torch.tensor(train_masks)
                train_reward_torch = torch.tensor(train_reward)
                
                optimizer.zero_grad()
                output, loss_link, loss_entropy = V(train_features_torch, train_adj_matrix_torch, train_masks_torch)
                loss = F.mse_loss(output[:, 0], train_reward_torch)
                loss.backward()
                total_loss += loss.item()
                optimizer.step()

            t2 = time.time()

            # logging
            if (epoch + 1) % args.log_interval == 0 or epoch + 1 == args.num_iterations:
                iter_save_dir = os.path.join(args.save_dir, '{}'.format(epoch + 1))
                os.makedirs(os.path.join(iter_save_dir), exist_ok = True)
                # save model
                save_path = os.path.join(iter_save_dir, 'V_model.pt')
                torch.save(V.state_dict(), save_path)
                # save explored designs and their rewards
                fp_csv = open(design_csv_path, 'a')
                fieldnames = ['rule_seq', 'reward']
                writer = csv.DictWriter(fp_csv, fieldnames=fieldnames)
                for i in range(epoch - args.log_interval + 1, epoch + 1):
                    writer.writerow({'rule_seq': str(designs[i]), 'reward': design_rewards[i]})
                fp_csv.close()

            epoch_rew_his.append(total_reward)

            t_end = time.time()
            avg_loss = total_loss / args.depth
            len_his = min(len(epoch_rew_his), 30)
            avg_reward = np.sum(epoch_rew_his[-len_his:]) / len_his
            print('Epoch {}: Time = {:.2f}, T_sample = {:.2f}, T_opt = {:.2f}, eps = {:.3f}, training loss = {:.4f}, reward = {:.4f}, last 30 epoch reward = {:.4f}, best reward = {:.4f}'.format(epoch, t_end - t_start, t1 - t0, t2 - t1, eps, avg_loss, total_reward, avg_reward, best_reward))
            fp_log = open(os.path.join(args.save_dir, 'log.txt'), 'a')
            fp_log.write('eps = {:.4f}, loss = {:.4f}, reward = {:.4f}, avg_reward = {:.4f}\n'.format(eps, avg_loss, total_reward, avg_reward))
            fp_log.close()

        save_path = os.path.join(args.save_dir, 'model_state_dict_final.pt')
        torch.save(V.state_dict(), save_path)
    else:
        import IPython
        IPython.embed()

        # test
        V.eval()
        print('Start testing')
        test_epoch = 30
        y0 = []
        y1 = []
        x = []
        for ii in range(10):
            eps = 1.0 - 0.1 * ii

            print('------------------------------------------')
            print('eps = ', eps)

            reward_sum = 0.
            best_reward = -np.inf
            for epoch in range(test_epoch):
                t0 = time.time()

                # use e-greedy to sample a design within maximum #steps.
                done = False
                while not done:
                    state = env.reset() 
                    rule_seq = []
                    state_seq = [state]
                    total_reward = 0.
                    for _ in range(args.depth):
                        action = select_action(env, V, state, eps)
                        if action is None:
                            break
                        rule_seq.append(action)
                        next_state, reward, done = env.step(action)
                        total_reward += reward
                        state_seq.append(next_state)
                        state = next_state
                        if done:
                            break

                reward_sum += total_reward
                best_reward = max(best_reward, total_reward)
                print(f'design {epoch}: reward = {total_reward}, time = {time.time() - t0}')

            print('test avg reward = ', reward_sum / test_epoch)
            print('best reward found = ', best_reward)
            x.append(eps)
            y0.append(reward_sum / test_epoch)
            y1.append(best_reward)

        import matplotlib.pyplot as plt
        fig, ax = plt.subplots(1, 2, figsize = (10, 5))
        ax[0].plot(x, y0)
        ax[1].plot(x, y1)
        plt.show()
Beispiel #15
0
    parser.add_argument('--log-path', type=str, required=True)
    parser.add_argument('--grammar-file',
                        type=str,
                        default='../../data/designs/grammar_apr30.dot',
                        help="Grammar file (.dot)")
    parser.add_argument('--index',
                        type=int,
                        default=None,
                        help='index of the designs to be shown at the end')

    args = parser.parse_args()

    fp = open(args.log_path, newline='')
    reader = csv.DictReader(fp)

    graphs = rd.load_graphs(args.grammar_file)
    rules = [rd.create_rule_from_graph(g) for g in graphs]

    # initialize the env
    env = RobotGrammarEnv(None, rules)

    design_cnt = dict()
    memory = dict()
    N = 0
    best_reward = []
    rewards = []
    rule_seqs = []
    opt_seeds = []
    best_design = None
    best_rule_seq = None
    best_designs = []