예제 #1
0
def main():
    parser = argparse.ArgumentParser(formatter_class=argparse.ArgumentDefaultsHelpFormatter)
    parser.add_argument('--smooth', help='How many points to smooth', type=int, default=200)
    parser.add_argument('--font-size', help='Font size on plots', type=int, default=24)
    args = parser.parse_args()

    # Main plot for each experiment

    for experiment_folder in glob.iglob('./results/solarfox-ls-pcg-progressive-fixed/'):
        title = experiment_folder.split('/')[-2].replace('-', ' ').title()
        title = title.replace('Pcg', 'PCG').replace('Ls ', '')
        path = os.path.join(experiment_folder, 'plots/')
        data = []
        i = 0
        for experiment_log in glob.iglob(os.path.join(experiment_folder, 'logs/*.log')):
            i += 1
            experiment_data = load(experiment_log)
            experiment_title = title + " " + str(i)
            data.append(experiment_data)
            #plot(path, experiment_title, experiment_data, smooth=args.smooth, fontsize=args.font_size, multiple=False)
            plt.clf()
        make_path(path)
        title = "Progressive PCG in Solarfox"
        plot(path, title, data, smooth=args.smooth, fontsize=args.font_size, multiple=True, ymin_lim=None, ymax_lim=None)
        plt.clf()


    # Mixed plot for each experiment
    '''
예제 #2
0
    def get_selector(selector_name, game, path, fixed=False):

        # Register classes for sharing across procs
        for c in [
                RandomSelector, RandomWithDifSelector, RandomPCGSelector,
                RandomWithDifPCGSelector, ProgressivePCGSelector,
                SequentialSelector
        ]:
            BaseManager.register(c.__name__, c)
        manager = BaseManager()
        manager.start()

        # Determine selector
        if selector_name is not None:
            make_path(path)
            path = os.path.realpath(path)
            if selector_name == "random-all":
                selector = manager.RandomSelector(path, game, [0, 1, 2, 3, 4])
            elif selector_name == "random-0123":
                selector = manager.RandomSelector(path, game, [0, 1, 2, 3])
            elif selector_name.startswith('random-'):
                difficulty = float(selector_name.split('random-')[1]) * 0.1
                selector = manager.RandomWithDifSelector(
                    path, game, difficulty)
            elif selector_name.startswith('seq-'):
                difficulty = float(selector_name.split('seq-')[1]) * 0.1
                selector = manager.SequentialSelector(path, game, difficulty)
            elif selector_name == "pcg-random":
                selector = manager.RandomPCGSelector(path, game)
            elif selector_name.startswith('pcg-random-'):
                difficulty = float(selector_name.split('pcg-random-')[1]) * 0.1
                selector = manager.RandomWithDifPCGSelector(path,
                                                            game,
                                                            difficulty,
                                                            fixed=fixed)
            elif selector_name == "pcg-progressive":
                selector = manager.ProgressivePCGSelector(path, game)
            elif selector_name == "pcg-progressive-fixed":
                selector = manager.ProgressivePCGSelector(path,
                                                          game,
                                                          upper_limit=False)
            else:
                raise Exception("Unknown level selector: + " + selector_name)
        else:
            return None

        return selector
예제 #3
0
    def get_selector(selector_name, game, path, fixed=False, max=-1):
        # pdb.set_trace()
        # Register classes for sharing across procs
        for c in [RandomSelector, RandomWithDifSelector, SequentialHumanLevelSelector,
                  RandomPCGSelector,RandomWithDifPCGSelector, ProgressivePCGSelector, 
                  SequentialSelector, ABTestSelector, MapEliteSelector]:
            BaseManager.register(c.__name__, c)
        manager = BaseManager()
        manager.start()

        # Determine selector
        if selector_name is not None:
            make_path(path)
            path = os.path.realpath(path)
            if selector_name == "ab-test":
                selector = manager.ABTestSelector(path, game, "levels_2", max=max)
            elif selector_name == "random-all":
                selector = manager.RandomSelector(path, game, [0, 1, 2, 3, 4], max=max)
            elif selector_name == "random-0123":
                selector = manager.RandomSelector(path, game, [0, 1, 2, 3], max=max)
            elif selector_name.startswith('random-'):
                difficulty = float(selector_name.split('random-')[1]) * 0.1
                selector = manager.RandomWithDifSelector(path, game, difficulty, max=max)
            elif selector_name.startswith('seq-human-'):
                level_id = int(selector_name.split('seq-human-')[1])
                selector = manager.SequentialHumanLevelSelector(path, game, level_id, max=max)
            elif selector_name.startswith('seq-'):
                difficulty = float(selector_name.split('seq-')[1]) * 0.1
                selector = manager.SequentialSelector(path, game, difficulty, max=max)
            elif selector_name == "pcg-random":
                selector = manager.RandomPCGSelector(path, game, max=max)
            elif selector_name.startswith('pcg-random-'):
                difficulty = float(selector_name.split('pcg-random-')[1]) * 0.1
                selector = manager.RandomWithDifPCGSelector(path, game, difficulty, fixed=fixed, max=max)
            elif selector_name == "pcg-progressive":
                selector = manager.ProgressivePCGSelector(path, game, max=max)
            elif selector_name == "pcg-progressive-fixed":
                selector = manager.ProgressivePCGSelector(path, game, upper_limit=False, max=max)
            elif selector_name == "map-elite":
                selector = manager.MapEliteSelector(path, game, max=max)
            else:
                raise Exception("Unknown level selector: + " + selector_name)
        else:
            return None

        return selector
예제 #4
0
def main():
    parser = argparse.ArgumentParser(formatter_class=argparse.ArgumentDefaultsHelpFormatter)
    parser.add_argument('--smooth', help='How many points to smooth', type=int, default=10)
    parser.add_argument('--font-size', help='Font size on plots', type=int, default=14)
    args = parser.parse_args()

    # Main plot for each experiment
    for experiment_folder in glob.iglob('./results/*/'):
        title = experiment_folder.split('/')[-2].replace('-', ' ').title()
        title = title.replace('Pcg', 'PCG').replace('Ls ', '')
        path = os.path.join(experiment_folder, 'plots/')
        make_path(path)
        data = []
        i = 0
        for experiment_log in glob.iglob(os.path.join(experiment_folder, 'logs/*.log')):
            i += 1
            experiment_data = load(experiment_log)
            experiment_title = title + " " + str(i)
            data.append(experiment_data)
            plot(path, experiment_title, experiment_data, smooth=args.smooth, fontsize=args.font_size, multiple=False)
            plt.clf()
        plot(path, title, data, smooth=args.smooth, fontsize=args.font_size, multiple=True)
        plt.clf()

    # Mixed plot for each experiment
    titles = []
    datasets = []
    for experiment_folder in glob.iglob('./results/*pcg-random*/'):
        title = experiment_folder.split('/')[-2].replace('-', ' ').title()
        title = title.replace('Pcg', 'PCG').replace('Ls ', '')
        data = []
        for experiment_log in glob.iglob(os.path.join(experiment_folder, 'logs/*.log')):
            experiment_data = load(experiment_log)
            data.append(experiment_data)
        datasets.append(data)
        titles.append(title)
    if len(titles) > 0:
        path = './plots/'
        make_path(path)
        plot_mixed(path, "PCG with Fixed Difficulty", titles, datasets, smooth=args.smooth, fontsize=args.font_size)
        plt.clf()
예제 #5
0
 def save(save_path):
     ps = sess.run(params)
     make_path(osp.dirname(save_path))
     joblib.dump(ps, save_path)
예제 #6
0
파일: a2c.py 프로젝트: NeteaseFuxiRL/asf
 def save(save_path):
     ps = sess.run(params)
     make_path(save_path)
     joblib.dump(ps, save_path)
예제 #7
0
def test_on(game, level, selector, experiment_name, experiment_id, policy, num_envs=1, seed=0, runs=100, render=False, record_path=None, save_results=True, model_steps=-1):

    # Environment name
    env_id = "gvgai-" + game + "-lvl" + str(level) + "-v0"

    # Test name
    test_name = game
    if selector is not None:
        test_name += "-ls-" + selector
    else:
        test_name += "-lvl-" + str(level)

    print("Test name: " + test_name)
    print('Training name: ' + experiment_name)
    print("Training id: " + experiment_id)

    # Folders
    score_path = './results/' + experiment_name + '/eval/' + test_name + '/scores/'
    level_path = './results/' + experiment_name + '/eval/' + test_name + '/levels/'
    make_path(level_path)
    make_path(score_path)

    # Create file and override if necessary
    score_file = score_path + test_name + "_" + experiment_id + ".dat"
    with open(score_file, 'w+') as myfile:
        myfile.write('')

    # Level selector
    level_selector = LevelSelector.get_selector(selector, game, level_path, max=runs)

    env = make_gvgai_env(env_id, num_envs, seed, level_selector=level_selector)

    # Main plots per experiment
    mean_scores = []
    std_scores = []
    model_folder = './results/' + experiment_name + '/models/' + experiment_id + "/"

    # Find number of steps for last model
    if model_steps < 0:
        for model_meta_name in glob.iglob(model_folder + '*.meta'):
            print(model_meta_name)
            s = int(model_meta_name.split('.meta')[0].split('/')[-1].split("-")[1])
            print(s)
            if s > model_steps:
                model_steps = s

    print(model_steps)

    if policy == 'cnn':
        policy_fn = CnnPolicy
    elif policy == 'lstm':
        policy_fn = LstmPolicy
    elif policy == 'lnlstm':
        policy_fn = LnLstmPolicy

    tf.reset_default_graph()

    ob_space = env.observation_space
    ac_space = env.action_space
    print("creating model")
    model = Model(policy=policy_fn, ob_space=ob_space, ac_space=ac_space, nenvs=num_envs, nsteps=5)

    print("loading model")
    try:
        model.load(model_folder, model_steps)
    except Exception as e:
        print(e)
        env.close()
        return

    if record_path is not None:
    	name = "{}/{}_{}_steps_{}m".format(record_path, experiment_name,policy,model_steps/1000000)
    else:
        name = None

    print("evaluate")
    scores = eval(model, env, runs=runs, render=render, record_name=name, level_selector=level_selector)

    mean_score = np.mean(scores)
    std_score = np.std(scores)
    print("Testing on=" + test_name)
    print("Trained on=" + experiment_name)
    print("Model id=" + experiment_id)
    print("Steps trained=" + str(model_steps))
    print("Runs=" + str(runs))
    print("Mean score=" + str(mean_score))
    print("Std. dev.=" + str(std_score))
    print("All scores=" + str(scores))


    if save_results:
        print("saving results to " + score_file)
        # Save results
        with open(score_file, "a") as myfile:
            line = "Testing on=" + test_name + "\n"
            line += "Trained on=" + experiment_name + "\n"
            line += "Id=" + experiment_id + "\n"
            line += "Steps trained=" + str(model_steps) + "\n"
            line += "Runs=" + str(runs) + "\n"
            line += "Mean score=" + str(mean_score) + "\n"
            line += "Std. dev.=" + str(std_score) + "\n"
            line += "All scores=" + str(scores) + "\n"
            line += "\n"
            myfile.write(line)

    env.close()
    return [wins[0], mean_score, steps]
예제 #8
0
 def save(save_path):
     ps = sess.run(params)
     dirname = osp.dirname(save_path)
     if dirname:
         make_path(dirname)
     joblib.dump(ps, save_path)
예제 #9
0
def learn(policy,
          env,
          experiment_name,
          experiment_id,
          seed=None,
          nsteps=5,
          total_timesteps=int(80e6),
          vf_coef=0.5,
          ent_coef=0.01,
          max_grad_norm=0.5,
          lr=7e-4,
          lrschedule='linear',
          epsilon=1e-5,
          alpha=0.99,
          gamma=0.99,
          save_interval=25000,
          frame_skip=False,
          level_selector=None,
          render=False,
          diff=0):

    if seed is None:
        seed = random.randint(0, 10000000)

    tf.reset_default_graph()
    set_global_seeds(seed)

    # Check if experiment_id exists - then continue
    if sys.platform.startswith('win'):
        sep = '\\'
    else:
        sep = '/'
    model_path = '.' + sep + 'results' + sep + experiment_name + sep + 'models' + sep
    steps = 0
    resume = False
    for model_folder in glob.iglob(model_path + '*' + sep):

        # Experiment id
        id = model_folder.split(sep)[-2]

        if id != experiment_id:
            continue

        # Find number of steps for last model
        for model_meta_name in glob.iglob(model_folder + sep + '*.meta'):
            resume = True
            s = int(
                model_meta_name.split('.meta')[0].split(sep)[-1].split("-")[1])
            if s >= steps:
                steps = s
    if resume:
        print("Resuming from step {}".format(steps))

    # Log file path
    log_path = "./results/" + experiment_name + "/logs/"
    make_path(log_path)
    log_file = log_path + experiment_id + ".log"

    f = open("demofile.txt", "w")

    # Create log file
    if not resume:
        with open(log_file, "a") as myfile:
            line = "episodes; steps; frames; mean_score; std_score; min_score; max_score; difficulty; policy_loss; value_loss; episode_length;fps;\n"
            myfile.write(line)

    # Model folder path
    model_path = "./results/" + experiment_name + "/models/" + experiment_id + "/"
    make_path(model_path)

    # Create model
    nenvs = env.num_envs
    ob_space = env.observation_space
    ac_space = env.action_space
    #num_procs = len(env.remotes) # HACK
    model = Model(policy=policy,
                  ob_space=ob_space,
                  ac_space=ac_space,
                  nenvs=nenvs,
                  nsteps=nsteps,
                  ent_coef=ent_coef,
                  vf_coef=vf_coef,
                  max_grad_norm=max_grad_norm,
                  lr=lr,
                  alpha=alpha,
                  epsilon=epsilon,
                  total_timesteps=total_timesteps,
                  lrschedule=lrschedule)

    if resume:
        try:
            model.load(model_path, steps)
        except Exception as e:
            print(e)
            env.close()
            return

    # Create parallel runner
    runner = Runner(env,
                    model,
                    nsteps=nsteps,
                    gamma=gamma,
                    render=render,
                    record_name=None)

    # Training loop
    nbatch = nenvs * nsteps
    tstart = time.time()
    next_model_save = steps + save_interval
    start_update = steps // nbatch
    episodes = 0

    if not resume:
        model.save(model_path, 0)
    else:
        with open(log_file, "r") as myfile:
            last_line = None
            for line in myfile:
                if len(line.strip()) > 0 and '\x00' not in line:
                    last_line = line
                pass

            episodes = int(last_line.split(';')[0])
            if level_selector is not None and last_line.split(
                    ';')[-1] != '' and hasattr(level_selector, 'difficulty'):
                print("Restoring difficulty to " + last_line.split(';')[-1])
                level_selector.difficulty = int(last_line.split(';')[-1])

    last_frames = 0
    for update in range(start_update, total_timesteps // nbatch + 1):
        obs, states, rewards, masks, actions, values = runner.run()

        policy_loss, value_loss, policy_entropy = model.train(
            obs, states, rewards, masks, actions, values)
        nseconds = time.time() - tstart
        f = 4 if frame_skip else 1
        fps = int((update * nbatch * f) / nseconds)
        steps = update * nbatch
        frames = steps * f  # Frames is the same as steps if no frame skipping - both are logged

        # If n final resuts were reported - save the average and std dev
        if len(runner.final_rewards) >= runner.nenv:

            episodes += runner.nenv

            # Extract and remove a number of final rewards equal to the number of workers
            final_rewards = runner.final_rewards[:runner.nenv]
            mean_score = np.mean(final_rewards)
            std_score = np.std(final_rewards)
            min_score = np.min(final_rewards)
            max_score = np.max(final_rewards)
            runner.final_rewards = runner.final_rewards[runner.nenv:]

            # Debug logging
            f = frames - last_frames
            frames_per_episode = f / runner.nenv
            last_frames = frames
            last_level_id = runner.last_level_id

            # Log using baselines logger
            logger.record_tabular("mean_score", mean_score)
            logger.record_tabular("std_score", std_score)
            logger.record_tabular("min_score", min_score)
            logger.record_tabular("max_score", max_score)
            logger.record_tabular("steps", steps)
            logger.record_tabular("frames", frames)
            logger.record_tabular("episodes", episodes)
            logger.record_tabular("fps", fps)
            if level_selector is not None:
                logger.record_tabular("difficulty",
                                      str(level_selector.get_info()))
            else:
                logger.record_tabular("difficulty", diff)
            logger.dump_tabular()  # here is where the print statement is done.

            # Log to file
            with open(log_file, "a") as myfile:
                dif = ""
                if level_selector is not None:
                    dif = str(level_selector.get_info())
                else:
                    dif = str(diff)
                line = str(episodes) + ";" + str(steps) + ";" + str(
                    frames
                ) + ";" + str(mean_score) + ";" + str(std_score) + ";" + str(
                    min_score) + ";" + str(max_score) + ";" + dif + ";" + str(
                        policy_loss) + ";" + str(value_loss) + ";" + str(
                            frames_per_episode) + ";" + str(fps) + ";" + "\n"
                myfile.write(line)

        # Save model
        if steps >= next_model_save:
            model.save(model_path, next_model_save)
            next_model_save += save_interval

    # Save model in the end
    model.save(model_path, total_timesteps)

    env.close()
예제 #10
0
def main():
    parser = argparse.ArgumentParser(
        formatter_class=argparse.ArgumentDefaultsHelpFormatter)
    parser.add_argument('--policy',
                        help='Policy architecture',
                        choices=['cnn', 'lstm', 'lnlstm'],
                        default='cnn')
    parser.add_argument('--lrschedule',
                        help='Learning rate schedule',
                        choices=['constant', 'linear'],
                        default='constant')
    parser.add_argument(
        '--num-envs',
        help='Number of environments/workers to run in parallel (default=12)',
        type=int,
        default=12)
    parser.add_argument('--num-timesteps',
                        help='Number of timesteps to train the model',
                        type=int,
                        default=int(20e6))
    parser.add_argument('--game',
                        help='Game name (default=zelda)',
                        default='zelda')
    parser.add_argument('--seed', help='RNG seed', type=int, default=0)
    parser.add_argument('--save-interval',
                        help='Model saving interval in steps',
                        type=int,
                        default=int(1e6))
    parser.add_argument('--level',
                        help='Level (integer) to train on',
                        type=int,
                        default=0)
    parser.add_argument('--resume',
                        help='The experiment id to resume',
                        default=None)
    parser.add_argument(
        '--repetitions',
        help='Number of repetitions to run sequentially (default=1)',
        type=int,
        default=1)
    parser.add_argument(
        '--selector',
        help=
        'Level selector to use in training - will ignore the level argument if set (default: None)',
        choices=[None] + LevelSelector.available,
        default=None)
    parser.add_argument('--render',
                        action='store_true',
                        default=False,
                        help='Render screen (default: False)')
    parser.add_argument('--version', help='game version', type=int, default=0)

    args = parser.parse_args()

    # Gym environment name
    env_id = "gvgai-" + args.game + "-lvl" + str(args.level) + "-v" + str(
        args.version)
    difficulty = -1
    # Experiment name
    make_path("./results")
    experiment_name = args.game
    if args.selector is not None:
        experiment_name += "-ls-" + args.selector + "-v" + str(args.version)
    else:
        difficulty = math.ceil((args.level + args.version * 5) / 2)
        experiment_name += "-lvl-" + str(args.level) + "-v" + str(args.version)
    make_path("./results/" + experiment_name)

    for i in range(args.repetitions):

        print("Starting experiment " + str(i + 1) + " of " +
              str(args.repetitions))

        # Unique id for experiment
        if args.resume is None:
            experiment_id = str(uuid.uuid1())
        else:
            experiment_id = args.resume

        # Level selector
        level_path = './results/' + experiment_name + '/levels/' + experiment_id + '/'
        level_selector = LevelSelector.get_selector(args.selector, args.game,
                                                    level_path)

        # Make gym environment
        env = make_gvgai_env(env_id=env_id,
                             num_env=args.num_envs,
                             seed=args.seed,
                             level_selector=level_selector)

        # Atari
        #env_id = "BreakoutNoFrameskip-v4"
        #env = make_atari_env(env_id, args.num_envs, args.seed)

        # Select model
        if args.policy == 'cnn':
            policy_fn = CnnPolicy
        elif args.policy == 'lstm':
            policy_fn = LstmPolicy
        elif args.policy == 'lnlstm':
            policy_fn = LnLstmPolicy

        learn(policy=policy_fn,
              env=env,
              experiment_name=experiment_name,
              experiment_id=experiment_id,
              seed=args.seed,
              total_timesteps=args.num_timesteps,
              lrschedule=args.lrschedule,
              frame_skip=False,
              save_interval=args.save_interval,
              level_selector=level_selector,
              render=args.render,
              diff=difficulty)

        env.close()

        print("Experiment DONE")
예제 #11
0
 def save(save_path):
     session_params = sess.run(params)
     make_path(os.path.dirname(save_path))
     joblib.dump(session_params, save_path)
예제 #12
0
 def save(save_path):
     ps = sess.run(params)
     folder = "models/"
     make_path(folder)
     joblib.dump(ps, folder + save_path)
예제 #13
0
 def save(save_path, model_name):
     ps = sess.run(params)
     make_path(save_path)
     joblib.dump(ps, '{}/{}.model'.format(save_path, model_name))
예제 #14
0
 def save(path, steps):
     make_path(path)
     self.saver.save(sess, path + 'model', global_step=steps)
예제 #15
0
 def save(save_path):
     ps = sess.run(params)
     make_path(osp.dirname(save_path))
     joblib.dump(ps, save_path)
예제 #16
0
파일: a2c.py 프로젝트: IcarusTan/baselines
 def save(save_path):
     ps = sess.run(params)
     make_path(save_path)
     joblib.dump(ps, save_path)
예제 #17
0
 def save(save_path):
     parameters = sess.run(params)
     make_path(os.path.dirname(save_path))
     joblib.dump(parameters, save_path)