def main():
    create_log_dir(log_dir, __file__)

    train_env = make_vec_env(train_env_num)
    test_env = make_vec_env(test_env_num)

    agent_online = make_agent_online()
    agent_train = make_agent_train()
    agent_online.load_state_dict(agent_train.state_dict())

    trainer = OnPolicyTrainer(agent_online,
                              agent_train,
                              train_env,
                              **trainer_args,
                              test_env=test_env)
    trainer.train(**train_args)

    train_env.close()
    test_env.close()
def main(args):

    # I/O
    config_file = args.config_file
    config = imp.load_source('config', config_file)
    if args.name:
        config.name = args.name

    trainset = Dataset(config.train_dataset_path)

    network = Network()
    network.initialize(config, trainset.num_classes)

    # Initalization for running
    log_dir = utils.create_log_dir(config, config_file)
    summary_writer = tf.summary.FileWriter(log_dir, network.graph)
    if config.restore_model:
        network.restore_model(config.restore_model, config.restore_scopes)

    proc_func = lambda images: preprocess(images, config, True)
    trainset.start_batch_queue(config.batch_format, proc_func=proc_func)


    # Main Loop
    print('\nStart Training\nname: {}\n# epochs: {}\nepoch_size: {}\nbatch_size: {}\n'.format(
            config.name, config.num_epochs, config.epoch_size, config.batch_format['size']))
    global_step = 0
    start_time = time.time()
    for epoch in range(config.num_epochs):

        # Training
        for step in range(config.epoch_size):
            # Prepare input
            learning_rate = utils.get_updated_learning_rate(global_step, config)
            batch = trainset.pop_batch_queue()

            wl, sm, global_step = network.train(batch['mu'].reshape(config.batch_format['size'], -1)
                                                , batch['conv_final'].reshape(config.batch_format['size'], -1)
                                                , batch['label']
                                                , learning_rate
                                                , config.keep_prob)

            wl['lr'] = learning_rate

            # Display
            if step % config.summary_interval == 0:
                duration = time.time() - start_time
                start_time = time.time()
                utils.display_info(epoch, step, duration, wl)
                summary_writer.add_summary(sm, global_step=global_step)

        # Save the model
        network.save_model(log_dir, global_step)
Beispiel #3
0
def main(args):
    print('start main')
    test_1v1_target = 'cfp_fp,agedb_30'
    test_1v1_target = 'cfp_fp'
    test_lfw_openset_numTrials = 0

    # I/O
    config_file = args.config_file
    config = imp.load_source('config', config_file)
    if args.name:
        config.name = args.name

    t1 = time.time()
    read_imagelist_from_file = False
    imagelist_file_for_train = 'data/list_to_train_ms1m-retinaface-t1-img.txt'
    if read_imagelist_from_file:
        trainset = Dataset(imagelist_file_for_train)
        print('time', time.time() - t1)
    else:
        trainset = Dataset(config.train_dataset_path)
        print('time', time.time() - t1)
        # trainset.write_datalist_to_file(imagelist_file_for_train)
    trainset.set_base_seed(config.base_random_seed)

    network = Network()
    network.initialize(config, trainset.num_classes)

    # Initalization for running
    log_dir = utils.create_log_dir(config, config_file)
    summary_writer = tf.summary.FileWriter(log_dir, network.graph)
    if config.restore_model:
        print(config.restore_model)
        network.restore_model(config.restore_model, config.restore_scopes,
                              config.exclude_restore_scopes)

    test_images_lfw = None
    if test_lfw_openset_numTrials > 0 and args.dataset_path:
        lfw_paths = get_paths_all(os.path.expanduser(args.dataset_path))
        test_images_lfw = preprocess(lfw_paths, config, False)

    ver_list = []
    ver_name_list = []
    for name in test_1v1_target.split(','):
        path = os.path.join(config.test_data_dir_mx, name + ".bin")
        if os.path.exists(path):
            image_size = [112, 112]
            data_list, issame_list = verification.load_bin(path, image_size)
            data_list = data_list[0].asnumpy()
            images = preprocess(data_list, network.config, False)
            data_set = (images, issame_list)
            ver_list.append(data_set)
            ver_name_list.append(name)
            print('ver', name)

    proc_func = lambda images: preprocess(images, config, True)
    trainset.start_batch_queue(config.batch_format, proc_func=proc_func)
    # batch = trainset.pop_batch_queue()

    # Main Loop
    print(
        '\nStart Training\nname: {}\n# epochs: {}\nepoch_size: {}\nbatch_size: {}\n'
        .format(config.name, config.num_epochs, config.epoch_size,
                config.batch_format['size']))
    global_step = 0
    network.save_model(log_dir, global_step)
    start_time = time.time()
    for epoch in range(config.num_epochs + 1):

        # Save the model
        network.save_model(log_dir, global_step)

        if epoch > 0:
            info_w = ''
            if test_lfw_openset_numTrials > 0 and args.dataset_path:
                mu, sigma_sq = network.extract_feature(test_images_lfw,
                                                       64,
                                                       verbose=True)
                quality_score = -np.mean(np.log(sigma_sq), axis=1)
                print('sigma_sq percentile [0, 10, 30, 50, 70, 90, 100]')
                print(
                    'sigma_sq ',
                    np.percentile(quality_score.ravel(),
                                  [0, 10, 30, 50, 70, 90, 100]))
                feat_pfe = np.concatenate([mu, sigma_sq], axis=1)
                info1 = openset_lfw(mu, utils.pair_cosin_score,
                                    test_lfw_openset_numTrials)
                info_w += info1 + '\n'
                print(info1)
                info2 = openset_lfw(feat_pfe, utils.nvm_MLS_score,
                                    test_lfw_openset_numTrials)
                print(info2)
                info_w += info2 + '\n'
                info3 = openset_lfw(feat_pfe, utils.nvm_MLS_score_attention,
                                    test_lfw_openset_numTrials)
                print(info3)
                info_w += info3 + '\n'
            info_ver = ''
            for i in range(len(ver_list)):
                print('---', ver_name_list[i], '---')
                info_ver_ = verification.eval_images(ver_list[i][0],
                                                     ver_list[i][1], network,
                                                     128, 10)
                print(info_ver_)
                info_ver += '---' + ver_name_list[i] + '\n'
                info_ver += info_ver_ + '\n'
            info_w += info_ver + '\n'
            with open(os.path.join(log_dir, 'training-log.txt'), 'a') as f:
                f.write(info_w)
        if epoch == config.num_epochs:
            break

        # Training
        for step in range(config.epoch_size):
            # Prepare input
            learning_rate = utils.get_updated_learning_rate(
                global_step, config)
            batch = trainset.pop_batch_queue()
            if len(batch['image']) > len(batch['label']):
                batch['label'] = np.concatenate(
                    [batch['label'], batch['label']], axis=0)

            wl, global_step = network.train(batch['image'], batch['label'],
                                            learning_rate, config.keep_prob)

            wl['lr'] = learning_rate

            # Display
            if step % config.summary_interval == 0:
                duration = time.time() - start_time
                start_time = time.time()
                with open(os.path.join(log_dir, 'training-log.txt'), 'a') as f:
                    s = utils.display_info(epoch, step, duration, wl)
                    print(s)
                    f.write(s + '\n')
Beispiel #4
0
def main():
    args = get_args()
    log_dir = create_log_dir(args)
    if not args.test:
        writer = SummaryWriter(log_dir)
    else:
        writer = None

    SEED = 721
    if args.ram_obs or args.env == "slimevolley_v0":
        obs_type = 'ram'
    else:
        obs_type = 'rgb_image'
    env = make_env(args.env, SEED, obs_type=obs_type)

    state_spaces = env.observation_spaces
    action_spaces = env.action_spaces
    print('state_spaces: ', state_spaces, ',  action_spaces: ', action_spaces)

    learner_args = {'device': args.device}
    env.reset()
    print(env.agents)
    agents = env.agents
    if args.train_both:
        fixed_agents = []
    else:
        fixed_agents = [
            'first_0'
        ]  # SlimeVolley: opponent is the first, the second agent is the learnable one

    if obs_type == 'ram':
        model = MultiPPODiscrete(agents, state_spaces, action_spaces, 'MLP',
                                 fixed_agents, learner_args,
                                 **hyperparams).to(args.device)
    else:
        # model = PPODiscrete(state_space, action_space, 'CNN', learner_args, **hyperparams).to(device)
        model = MultiPPODiscrete(agents, state_spaces, action_spaces, 'CNN',
                                 fixed_agents, learner_args,
                                 **hyperparams).to(args.device)

    load_model(model, args)

    for individual_model in model.agents.values():
        individual_model.policy.share_memory()
        individual_model.policy_old.share_memory()
        individual_model.value.share_memory()
        ShareParameters(individual_model.optimizer)

    path = 'model/' + args.env
    os.makedirs(path, exist_ok=True)

    if args.fictitious:
        path = path + '/fictitious_'

    processes = []
    for p in range(args.num_envs):
        process = Process(target=parallel_rollout, args=(p, args.env, model, writer, max_eps, \
            max_timesteps, selfplay_interval,\
            args.render, path, args.against_baseline, \
            args.selfplay, args.fictitious, SEED))  # the args contain shared and not shared
        process.daemon = True  # all processes closed when the main stops
        processes.append(process)

    [p.start() for p in processes]

    [p.join() for p in processes]  # finished at the same time

    env.close()
Beispiel #5
0
def main():
    args = get_args()
    log_dir = create_log_dir(args)
    if not args.test:
        writer = SummaryWriter(log_dir)
    else:
        writer = None

    SEED = 721
    if args.ram_obs or args.env == "slimevolley_v0":
        obs_type = 'ram'
    else:
        obs_type = 'rgb_image'
    # env = make_env(args.env, SEED, obs_type=obs_type)
    VectorEnv = [
        DummyVectorEnv, SubprocVectorEnv
    ][1]  # https://github.com/thu-ml/tianshou/blob/master/tianshou/env/venvs.py
    envs = VectorEnv([
        lambda: make_env(args.env, obs_type=obs_type)
        for _ in range(args.num_envs)
    ])

    envs.seed(np.random.randint(1000,
                                size=args.num_envs).tolist())  # random seeding

    state_spaces = envs.observation_spaces[
        0]  # same for all env instances, so just take one
    action_spaces = envs.action_spaces[
        0]  # same for all env instances, so just take one
    print('state_spaces: ', state_spaces, ',  action_spaces: ', action_spaces)

    learner_args = {'device': args.device}
    envs.reset()
    agents = envs.agents[0]  # same for all env instances, so just take one
    print('agents: ', agents)

    if args.train_both:
        fixed_agents = []
    else:
        fixed_agents = [
            'first_0'
        ]  # SlimeVolley: opponent is the first, the second agent is the learnable one

    if obs_type == 'ram':
        model = ParallelMultiPPODiscrete(args.num_envs, agents, state_spaces,
                                         action_spaces, 'MLP', fixed_agents,
                                         learner_args,
                                         **hyperparams).to(args.device)
    else:
        model = ParallelMultiPPODiscrete(args.num_envs, agents, state_spaces,
                                         action_spaces, 'CNN', fixed_agents,
                                         learner_args,
                                         **hyperparams).to(args.device)

    load_model(model, args)

    path = f'model/{args.env}/'
    os.makedirs(path, exist_ok=True)

    if args.fictitious:
        path = path + 'fictitious_'

    parallel_rollout(envs, model, writer, max_eps=max_eps, max_timesteps=max_timesteps, selfplay_interval=selfplay_interval,\
        render=args.render, model_path=path, against_baseline=args.against_baseline, selfplay=args.selfplay, \
        fictitious=args.fictitious, test=args.test, args=args)

    envs.close()
Beispiel #6
0
    # init demo buffer
    demo_data = BCDataSet(args.demo_file)
    demo_buffer = DataLoader(demo_data, args.batch_size, shuffle=True)

    # init trainer and train
    trainer = BehaviorCloningTrainer(agent, test_env, demo_buffer,
                                     args.log_dir)
    if args.load_checkpoint is not None:
        trainer.load(args.load_checkpoint)
    trainer.train(args.n_epoch, args.n_tests_per_epoch)
    test_env.close()


def parse_args():
    parser = argparse.ArgumentParser()
    parser.add_argument('--config', '-c', help='path to yaml config', type=str)
    args = parser.parse_args()
    return args


if __name__ == '__main__':
    args_ = parse_args()
    with open(args_.config) as f:
        config = yaml.safe_load(f)

    # create log-dir and copy config into it
    create_log_dir(config['log_dir'])
    shutil.copyfile(args_.config, config['log_dir'] + 'config.yaml')

    main(config)
def main():
    args = get_args()
    log_dir = create_log_dir(args)
    if not args.test:
        writer = SummaryWriter(log_dir)
    else:
        writer = None

    SEED = 721
    if args.ram_obs or args.env == "slimevolley_v0":
        obs_type = 'ram'
    else:
        obs_type = 'rgb_image'
    env = make_env(
        args.env, SEED, obs_type=obs_type
    )  # TODO used for providing spaces info, can also modify SubprocVecEnv wrapper
    # https://stable-baselines.readthedocs.io/en/master/guide/vec_envs.html?highlight=multiprocessing
    envs = SubprocVecEnv([
        lambda: make_env(args.env, obs_type=obs_type)
        for _ in range(args.num_envs)
    ],
                         start_method='spawn')

    # envs.seed(np.random.randint(1000, size=args.num_envs).tolist())  # random seeding
    envs.seed(SEED)  # fix seeding
    state_spaces = env.observation_spaces
    action_spaces = env.action_spaces
    print('state_spaces: ', state_spaces, ',  action_spaces: ', action_spaces)

    learner_args = {'device': args.device}
    env.reset()
    agents = env.agents
    print('agents: ', agents)

    if args.train_both:
        fixed_agents = []
    else:
        fixed_agents = [
            'first_0'
        ]  # SlimeVolley: opponent is the first, the second agent is the learnable one

    if obs_type == 'ram':
        model = ParallelMultiPPODiscrete(args.num_envs, agents, state_spaces,
                                         action_spaces, 'MLP', fixed_agents,
                                         learner_args,
                                         **hyperparams).to(args.device)
    else:
        model = ParallelMultiPPODiscrete(args.num_envs, agents, state_spaces,
                                         action_spaces, 'CNN', fixed_agents,
                                         learner_args,
                                         **hyperparams).to(args.device)

    load_model(model, args)

    path = f"model/{args.env}/"
    os.makedirs(path, exist_ok=True)

    if args.fictitious:
        path = path + 'fictitious_'

    parallel_rollout(envs, model, writer, max_eps=max_eps, max_timesteps=max_timesteps, selfplay_interval=selfplay_interval,\
        render=args.render, model_path=path, against_baseline=args.against_baseline, selfplay=args.selfplay, \
        fictitious=args.fictitious, test=args.test)

    envs.close()
def main():
    args = get_args()
    print_args(args)
    log_dir = create_log_dir(args)
    if not args.test:
        writer = SummaryWriter(log_dir)
    else:
        writer = None

    SEED = 721
    if args.ram_obs or args.env == "slimevolley_v0":
        obs_type = 'ram'
    else:
        obs_type = 'rgb_image'
    env = make_env(args.env, SEED, obs_type=obs_type)

    state_spaces = env.observation_spaces
    action_spaces = env.action_spaces
    print('state_spaces: ', state_spaces, ',  action_spaces: ', action_spaces)

    learner_args = {'device': args.device}
    env.reset()
    print(env.agents)
    agents = env.agents

    if args.train_both:
        fixed_agents = []
    else:
        fixed_agents = [
            'first_0'
        ]  # SlimeVolley: opponent is the first, the second agent is the learnable one
    path = f"model/{args.env}/"
    os.makedirs(path, exist_ok=True)
    data_path = f"data/{args.env}/"
    os.makedirs(data_path, exist_ok=True)

    if obs_type == 'ram':
        model = MultiPPODiscrete(agents, state_spaces, action_spaces, 'MLP',
                                 fixed_agents, learner_args,
                                 **hyperparams).to(args.device)
    else:
        # model = PPODiscrete(state_space, action_space, 'CNN', learner_args, **hyperparams).to(device)
        model = MultiPPODiscrete(agents, state_spaces, action_spaces, 'CNN',
                                 fixed_agents, learner_args,
                                 **hyperparams).to(args.device)
        path = path + 'cnn_'
    if args.selfplay:
        os.makedirs(path + 'selfplay/', exist_ok=True)
    load_model(model, args)

    if args.fictitious:
        path = path + 'fictitious_'

    eval_env = make_env(args.env, np.random.randint(0, 100), obs_type=obs_type)
    evaluater = Evaluater(eval_env, max_timesteps)

    parallel_rollout(env, model, writer, evaluater=evaluater, max_eps=max_eps, max_timesteps=max_timesteps, selfplay_interval=selfplay_interval,\
        render=args.render, model_path=path, against_baseline=args.against_baseline, selfplay=args.selfplay, \
        fictitious=args.fictitious, test=args.test)

    env.close()
Beispiel #9
0
def main(args):
    config_file = args.config_file
    # I/O
    config = utils.import_file(config_file, "config")

    trainset = Dataset(config.train_dataset_path, config.mode)
    testset = Dataset(config.test_dataset_path, config.mode)

    network = AdvFaces()
    network.initialize(config, trainset.num_classes)

    # Initalization for running
    log_dir = utils.create_log_dir(config, config_file)
    summary_writer = tf.summary.FileWriter(log_dir, network.graph)

    if config.restore_model:
        network.restore_model(config.restore_model, config.restore_scopes)
    proc_func = lambda images: preprocess(images, config, True)
    trainset.start_batch_queue(config.batch_size,
                               batch_format=config.batch_format,
                               proc_func=proc_func)

    #
    # Main Loop
    #
    print("\nStart Training\n# epochs: %d\nepoch_size: %d\nbatch_size: %d\n" %
          (config.num_epochs, config.epoch_size, config.batch_size))
    global_step = 0
    start_time = time.time()
    for epoch in range(config.num_epochs):

        if epoch == 0:
            print("Loading Test Set")
            originals = preprocess(testset.images, config, is_training=False)
            targets = preprocess(testset.targets, config, False)
            print('Done loading test set')
            test_images = np.squeeze(
                originals[np.where(testset.labels < 5)[0]])
            target_feats = network.aux_matcher_extract_feature(targets)
            output_dir = os.path.join(log_dir, "samples")
            if not os.path.isdir(output_dir):
                os.makedirs(output_dir)
            utils.save_manifold(test_images,
                                os.path.join(output_dir, "original.jpg"))
            print("Computing initial success rates..")
            success_rate(network, config, originals, targets, target_feats,
                         log_dir, global_step)
            print("testing.")
            test(
                network,
                config,
                test_images,
                targets,
                log_dir,
                global_step,
            )

        # Training
        for step in range(config.epoch_size):
            # Prepare input
            learning_rate = utils.get_updated_learning_rate(
                global_step, config)
            batch = trainset.pop_batch_queue()
            wl, sm, global_step = network.train(
                batch["images"],
                batch["targets"],
                batch["labels"],
                learning_rate,
                config.keep_prob,
                trainset.num_classes,
            )
            wl["lr"] = learning_rate

            # Display
            if step % config.summary_interval == 0:
                duration = time.time() - start_time
                start_time = time.time()
                utils.display_info(epoch, step, duration, wl)
                summary_writer.add_summary(sm, global_step=global_step)

        # Computing success rate
        success_rate(network, config, originals, targets, target_feats,
                     log_dir, global_step)

        # Testing
        test(
            network,
            config,
            test_images,
            targets,
            log_dir,
            global_step,
        )

        # Save the model
        network.save_model(log_dir, global_step)