Example #1
0
File: run.py Project: sisl/MPHRL
def main():
    from config import n
    # NOTE: Plot Config
    if not os.path.exists('results'):
        os.mkdir('results')
    if os.path.exists(os.path.join('results', n(c, seed))):
        print("DELETING...")
        shutil.rmtree(os.path.join('results', n(c, seed)), ignore_errors=True)
    for name in os.listdir('results'):
        name_dir = os.path.join('results', name)
        if os.path.isdir(name_dir):
            events_exist = False
            for number in os.listdir(name_dir):
                if os.path.isdir(os.path.join(name_dir, number)):
                    for f in os.listdir(os.path.join(name_dir, number)):
                        if 'events' in f:
                            events_exist = True
            if not events_exist:
                shutil.rmtree(name_dir)
    os.mkdir(os.path.join('results', n(c, seed)))

    shutil.copyfile('./config.py', os.path.join('results', n(c, seed),
                                                'cfg.py'))

    # NOTE: ALGORITHM
    result, msg = run(np.random.randint(100))
    return 'Job Succeeded!', msg
Example #2
0
    def __init__(self, id=0, eval=False, dirname=None, real=False):
        self.pi_replay_buffer = None
        self.id = id
        self.tensorboard_keys = []
        self.dir_name = n() if self.is_master() else dirname
        print('self.dir_name', self.dir_name)
        self.fd = {}
        self.t = 0
        self.env = None
        self.actors = None
        self.effective_timesteps_so_far = 0
        self.real_timesteps_so_far = 0
        self.eval = eval
        self.real = real
        self.grasp_scale_obv_2d = None
        self.max_train_success_perc = 0

        if self.is_master() and not self.eval:
            self.delete_prev_directory()
            self.custom_init_op()
            self.summary_phs = {}
            for tb_key in self.tensorboard_keys:
                key = tb_key.split('/')[-1]
                if tb_key.endswith('_tr'):
                    tf.summary.scalar(tb_key[:-3], getattr(self, key))
                else:
                    self.summary_phs[tb_key] = tf.placeholder(tf.float64,
                                                              shape=(),
                                                              name=tb_key)
                    tf.summary.scalar(tb_key, self.summary_phs[tb_key])

            self.summary_op = tf.summary.merge_all()
            self.file_writer = tf.summary.FileWriter(
                logdir=os.path.join('results', self.dir_name),
                graph=tf.get_default_graph())
        else:
            self.custom_init_op(host=g.hosts[self.id - 1],
                                port=g.ports[self.id - 1])

        self.sess = tf.Session()
        self.init_op = tf.global_variables_initializer()
        if len(
                tf.get_collection(tf.GraphKeys.GLOBAL_VARIABLES,
                                  scope=g.grasp_cnn)) > 0:
            if self.eval:
                self.grasp_saver = tf.train.Saver(var_list=[
                    var
                    for var in tf.get_collection(tf.GraphKeys.GLOBAL_VARIABLES,
                                                 scope=g.grasp_cnn)
                    if 'finger_4' not in var.name and 'log_std' not in var.name
                ])
            else:
                self.grasp_saver = tf.train.Saver(var_list=tf.get_collection(
                    tf.GraphKeys.GLOBAL_VARIABLES, scope=g.grasp_cnn))
        self.saver = tf.train.Saver()
        self.sess.graph.finalize()

        if self.is_master():
            self.eval_op() if self.eval else self.master()
Example #3
0
File: run.py Project: sisl/MPHRL
def run(run_id):
    starting_iter = 0
    task_id = c.task_id
    utils.modify_xml(task_id, seed)
    algo = algo_class(run_id, True, c.restore_model, task_id, seed)
    ray.init(num_cpus=c.num_cores, redirect_output=True)
    algo_threads = ray.remote(algo_class)
    algo.actors = [
        algo_threads.remote(_ + np.random.randint(100) + run_id, False, False,
                            task_id, seed) for _ in range(c.num_cores)
    ]

    algo.save_model_op(first=True)
    print('initial eval reward', algo.evaluate_avg_r())

    for iter in range(c.num_batches * c.num_tasks):
        start = time.time()
        algo.env = gym.make('%s%s' % (c.task_id, c.env_name))

        # NOTE: GET EXPERIENCE
        algo.save_model_op()
        ray.get(
            [ac.restore_model_op.remote(algo.run_id) for ac in algo.actors])

        ret = list(
            zip(*ray.get([
                actor.gen_experiences_op.remote()
                for actor in algo.actors[:c.num_cpus]
            ])))
        experience = list(itertools.chain(*ret[0]))
        num_traj = sum(ret[1])
        cum_r = sum(ret[2])
        num_succ = sum(ret[3])

        # NOTE: experience is the list of tuples in one rollout
        algo.avg_train_rewards = cum_r / (num_traj + c.e)
        algo.num_traj_per_cpu = num_traj / c.num_cpus
        algo.suc_train = num_succ / (num_traj + c.e)
        algo.num_timesteps = len(experience)
        algo.update_learning_rates_op(iter)

        algo.suc_test = algo.suc_train
        algo.eval_rewards = algo.avg_train_rewards
        algo.suc_test_std = np.std(ret[3])

        if c.gpu:
            with tf.device('/gpu:0'):
                algo.learn(experience)
        else:
            algo.learn(experience)

        # NOTE: LIST
        algo.suc_perc_train_list.append(algo.suc_train)
        algo.eval_r_list.append(algo.eval_rewards)
        algo.suc_perc_test_list.append(algo.suc_test)

        # NOTE: MOV AVG
        algo.suc_perc_train_list = algo.suc_perc_train_list[-c.mov_avg:]
        algo.eval_r_list = algo.eval_r_list[-c.mov_avg:]
        algo.suc_perc_test_list = algo.suc_perc_test_list[-c.mov_avg:]

        algo.mov_suc_train = np.mean(algo.suc_perc_train_list)
        algo.moving_eval_rewards = np.mean(algo.eval_r_list)
        algo.mov_suc_test = np.mean(algo.suc_perc_test_list)
        algo.max_eval_suc_mov = max(algo.mov_suc_test, algo.max_eval_suc_mov)

        algo.seconds = time.time() - start
        print((iter, '%s S' % algo.seconds, algo.eval_rewards,
               algo.avg_train_rewards, task_id))
        print(n(c, seed))
        if algo.mov_suc_test > algo.max_reward + 0.03:
            algo.save_best_model_op()
            algo.max_reward = algo.mov_suc_test

        # NOTE: LIFELONG LEARNING
        if algo.mov_suc_test >= c.solved_threshold or algo.timesteps_so_far > \
                c.total_ts:
            task_id += 1
            algo.save_best_model_op()
            if task_id == c.num_tasks:
                algo.timesteps_used_list.append(algo.timesteps_so_far / 1e6)
                algo.timesteps_used_list.append(sum(algo.timesteps_used_list))
                algo.fd[algo.timesteps_used_text_ph] = str(
                    algo.timesteps_used_list)
                algo.fd[algo.ts_string_ph] = ' & '.join(
                    [str(round(num, 1)) for num in algo.timesteps_used_list])
                algo.summary()
                break
            algo.suc_perc_test_list = []
            algo.suc_perc_train_list = []
            algo.eval_r_list = []
            utils.modify_xml(task_id, seed)
            algo.reinit(task_id=task_id)
            ray.get([actor.reinit.remote(task_id) for actor in algo.actors])
            algo.iteration_solved = iter - starting_iter
            starting_iter = iter

        algo.memory = 0
        algo.summary()

    return utils.pickle_compatible(algo), algo.fd[algo.ts_string_ph] or 'None'
Example #4
0
File: run.py Project: sisl/MPHRL
            algo.suc_perc_train_list = []
            algo.eval_r_list = []
            utils.modify_xml(task_id, seed)
            algo.reinit(task_id=task_id)
            ray.get([actor.reinit.remote(task_id) for actor in algo.actors])
            algo.iteration_solved = iter - starting_iter
            starting_iter = iter

        algo.memory = 0
        algo.summary()

    return utils.pickle_compatible(algo), algo.fd[algo.ts_string_ph] or 'None'


if __name__ == "__main__":
    from test_env import seed

    print('FINAL SEED', seed)
    tf.set_random_seed(seed=seed)
    np.random.seed(seed=seed)

    module = __import__("algorithms.%s" % c.algo, fromlist=[c.algo])
    algo_class = getattr(module, c.algo)
    try:
        subject, msg = main()
        content = n(c, seed) + '\n' + msg
    except:
        subject = 'Job Failed!'
        content = '%s\n%s' % (n(c, seed), traceback.format_exc())
        print(content)
Example #5
0
        algo_class = getattr(module, d.config.algo)
        algo_class(eval=True)
        with open('results.json', 'r') as f:
            j = json.load(f)
        subject = '%s: Job Succeeded! %s %s %s %s' % (
            g.ip, g.finger_closing_only, g.regrasp_only, g.pos_adjustment_only,
            g.ori_adjustment_only)
        content = str([
            'test_single_seen_ret', 'test_single_novel_ret',
            'test_multi_seen_ret, test_multi_novel_ret'
        ]) + '\n'
        content += str(j['mean']) + '\n'
        content += str(j['std']) + '\n' + g.path
    except:
        subject = '%s: Job Failed!' % g.ip
        content = '%s\n%s' % (n(), traceback.format_exc())
        print(content)

    with open('credential.json', mode='r') as f:
        login = json.load(f)

    success = False
    while not success:
        time.sleep(1)
        try:
            if g.statistics:
                print('Trying to send email')
                yag = yagmail.SMTP(login['from'], login['password'])
                yag.send(login["to"], subject=subject, contents=content)
                print("Email sent")
            success = True
Example #6
0
else:
    seed = 10
    print('NOISE: %s %s' % (c.Tnoise, c.Fnoise))
    print('SOLVED: %s' % c.solved_threshold)
    print('ABOVE TARGET: %s' % c.above_target)
    print('RUN: %s' % c.xml_name)
    print('pg_sub_lr %s' % c.pg_sub_lr)
    print('pg_master_ce_lr_source %s' % c.pg_master_ce_lr_source)
    print('REPEAT %s' % c.repeat)
    print('update_master_interval %s' % c.update_master_interval)
    print('reset %s' % c.reset)
    # NOTE: MASTER
    with open(seed_json, 'w') as f:
        json.dump({'seed': seed}, f)
    print('MASTER SEED', seed, 'can leave now...')
    print(n(c, seed))

env_id = c.env_name.replace('-', '_')
for i in range(c.num_tasks):
    id = '%s%s' % (i, c.env_name)
    if c.env_type == 'mujoco':
        register(id=id,
                 entry_point='test_env.envs.%s:%s' % (env_id, env_id),
                 max_episode_steps=c.max_num_timesteps,
                 reward_threshold=60000.0,
                 kwargs={
                     'paths': c.paths[i],
                     'seed': seed
                 })
    else:
        domain, task, _ = c.xml_name.split('-')