Beispiel #1
0
def worker_policy(args, manager, config):
    init_logging_handler(args.log_dir, '_policy')
    agent = GAN(None, args, manager, config, 0, pre=True)
    
    best = float('inf')
    for e in range(args.epoch):
        agent.imitating(e)
        best = agent.imit_test(e, best)
Beispiel #2
0
def worker_user(args, manager, config):
    init_logging_handler(args.log_dir, '_user')
    env = UserNeural(args, manager, config, True)
    
    best = float('inf')
    for e in range(args.epoch):
        env.imitating(e)
        best = env.imit_test(e, best)
Beispiel #3
0
def worker_estimator(args, manager, config, make_env):
    init_logging_handler(args.log_dir, '_estimator')
    agent = DiaSeq(make_env, args, manager, config, args.process, pre_irl=True)
    agent.load(args.save_dir+'/best')
    
    best0, best1 = float('inf'), float('inf')
    for e in range(args.epoch):
        agent.train_disc(e, args.batchsz_traj)
        best0 = agent.test_disc(e, args.batchsz, best0)
Beispiel #4
0
def worker_policy_usr(args, manager, config):
    init_logging_handler(args.log_dir, '_policy_usr')
    if args.config == 'multiwoz':
        print("MultiWoz Agent Usr")
        agent = Policy(None, args, manager, config, 0, 'usr', True)
    elif args.config == 'dstcsgds':
        print("DSTC Agent Usr")
        agent = DSTCPolicy(None, args, manager, config, 0, 'usr', True)
    else:
        raise NotImplementedError(
            'Policy usr of the dataset {} not implemented'.format(args.config))

    best = float('inf')
    for e in range(2):
        agent.imitating(e)
        best = agent.imit_test(e, best)
Beispiel #5
0
def main(config):
    parser = get_parser()
    argv = sys.argv[1:]
    args, _ = parser.parse_known_args(argv)

    init_logging_handler(config.log_dir)
    logging.info(args)
    config = update_cfg(config, args)

    logging.info("Start initializing")
    irl_model = RewardModule(config).to(device=device)   # this is the reward model only, which will be fed to RewardEstimator.
    reward_agent = RewardEstimator(config=config, irl_model=irl_model)
    
    user_policy = ActorCriticDiscrete(config).to(device=device)
    user_policy = init_net(user_policy)
    user_ppo = PPO(config, user_policy)

    system_policy = ActorCriticContinuous(config).to(device=device)
    system_policy = init_net(system_policy)

    init_system_policy = ActorCriticContinuous(config).to(device=device)
    init_system_policy.load_state_dict(system_policy.state_dict())

    system_ppo = PPO(config, system_policy, init_policy=init_system_policy)

    # reward_true = RewardTruth(config).to(device=device)  # this is the ground truth which will not be updated once randomly initialized.
    reward_true = RewardTruthSampled(config).to(device)
    reward_true = init_net(reward_true)
    logging.info("Finish building module: reward agent, user ppo, system ppo")

    main_agent = InteractAgent(config=config,
                               user_agent=user_ppo,
                               user_reward=reward_agent,
                               system_agent=system_ppo,
                               reward_groundtruth=reward_true
                               )
    
    for e_id in range(config.master_epochs):
        main_agent.master_train(e_id)
    # for _ in range(3):
        # main_agent.system_train()
    # raise ValueError("stop here")
    logging.info("@@@@@@@@@@  Finished  @@@@@@@@@@@")
Beispiel #6
0
if __name__ == '__main__':
    parser = get_parser()
    argv = sys.argv[1:]
    args, _ = parser.parse_known_args(argv)

    if args.config == 'multiwoz':
        print("MultiWoz Config")
        config = MultiWozConfig()
    elif args.config == 'dstcsgds':
        print("DSTC Config")
        config = DSTCSGDSConfig(args.data_dir)
    else:
        raise NotImplementedError(
            'Config of the dataset {} not implemented'.format(args.config))

    init_logging_handler(args.log_dir)
    logging.debug(str(args))

    try:
        mp = mp.get_context('spawn')
    except RuntimeError:
        pass

    if args.pretrain:
        logging.debug('pretrain')
        if args.config == 'dstcsgds':
            print("DSTC Manager")
            manager = DSTCDataManager(args.data_dir, config)
        elif args.config == 'multiwoz':
            print("MultiWoz Manager")
            manager = DataManager(args.data_dir, config)
Beispiel #7
0
import sys

from fabric.api import *
from fabric.colors import green, red

# append the home path
PROJECT_HOME = os.path.dirname(
    os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
sys.path.append(PROJECT_HOME)

from settings import (hosts, user, key_filename, DirConfig as DC)
from utils import init_logging_handler, timestamp

env.hosts, env.user, env.key_filename = hosts, user, key_filename

logger = init_logging_handler()


def prepare_deploy():
    with lcd(DC.ANALYSIS_DIR):
        local("ipython analyze.py")


def zip_files():
    with lcd(DC.LOCAL_RELATIVE_DATA_DIR + 'data_' + timestamp()):
        local('tar -czf data.tar.gz ./')
        local('mv data.tar.gz ../../automatic_deploy/deploy_files')


def upload():
    # upload the zipped files