def __init__(self, is_train=False, dataset='Multiwoz'): with open(os.path.join(os.path.dirname(os.path.abspath(__file__)), 'config.json'), 'r') as f: cfg = json.load(f) self.save_dir = os.path.join(os.path.dirname(os.path.abspath(__file__)), cfg['save_dir']) self.save_per_epoch = cfg['save_per_epoch'] self.training_iter = cfg['training_iter'] self.training_batch_iter = cfg['training_batch_iter'] self.batch_size = cfg['batch_size'] self.epsilon = cfg['epsilon_spec']['start'] self.rule_bot = RuleBasedMultiwozBot() self.gamma = cfg['gamma'] self.is_train = is_train if is_train: init_logging_handler(os.path.join(os.path.dirname(os.path.abspath(__file__)), cfg['log_dir'])) # construct multiwoz vector if dataset == 'Multiwoz': voc_file = os.path.join(root_dir, 'data/multiwoz/sys_da_voc.txt') voc_opp_file = os.path.join(root_dir, 'data/multiwoz/usr_da_voc.txt') self.vector = MultiWozVector(voc_file, voc_opp_file, composite_actions=True, vocab_size=cfg['vocab_size']) #replay memory self.memory = MemoryReplay(cfg['memory_size']) self.net = EpsilonGreedyPolicy(self.vector.state_dim, cfg['hv_dim'], self.vector.da_dim, cfg['epsilon_spec']).to(device=DEVICE) self.target_net = copy.deepcopy(self.net) self.online_net = self.target_net self.eval_net = self.target_net if is_train: self.net_optim = optim.Adam(self.net.parameters(), lr=cfg['lr']) self.loss_fn = nn.MSELoss()
def generate_necessary_file(root_dir): voc_file = os.path.join(root_dir, 'data/multiwoz/sys_da_voc.txt') voc_opp_file = os.path.join(root_dir, 'data/multiwoz/usr_da_voc.txt') vector = MultiWozVector(voc_file, voc_opp_file) action_map_file = os.path.join(root_dir, 'convlab2/policy/act_500_list.txt') act2ind_dict, ind2act_dict = read_action_map(action_map_file) return vector, act2ind_dict, ind2act_dict
def __init__(self, is_train=False, dataset='Multiwoz'): with open("/home/raliegh/图片/ConvLab-2/convlab2/policy/pg/config.json", 'r') as f: cfg = json.load(f) self.save_dir = os.path.join( os.path.dirname(os.path.abspath(__file__)), cfg['save_dir']) self.save_per_epoch = cfg['save_per_epoch'] self.update_round = cfg['update_round'] self.optim_batchsz = cfg['batchsz'] self.gamma = cfg['gamma'] self.is_train = is_train if is_train: init_logging_handler(cfg['log_dir']) # load vocabulary if dataset == 'Multiwoz': voc_file = os.path.join(root_dir, 'data/multiwoz/sys_da_voc.txt') voc_opp_file = os.path.join(root_dir, 'data/multiwoz/usr_da_voc.txt') self.vector = MultiWozVector(voc_file, voc_opp_file) self.policy = MultiDiscretePolicy( self.vector.state_dim, cfg['h_dim'], self.vector.da_dim).to(device=DEVICE) # self.policy = MultiDiscretePolicy(self.vector.state_dim, cfg['h_dim'], self.vector.da_dim).to(device=DEVICE) if is_train: self.policy_optim = optim.RMSprop(self.policy.parameters(), lr=cfg['lr']) # load_best model from the web. self.load( "/home/raliegh/图片/ConvLab-2/convlab2/policy/pg/save/best/best_pg_from_web.pol.mdl" )
def __init__(self, is_train=False, dataset='Multiwoz'): with open(os.path.join(os.path.dirname(os.path.abspath(__file__)), 'config.json'), 'r') as f: cfg = json.load(f) self.save_dir = os.path.join(os.path.dirname(os.path.abspath(__file__)), cfg['save_dir']) self.save_per_epoch = cfg['save_per_epoch'] self.update_round = cfg['update_round'] self.optim_batchsz = cfg['batchsz'] self.gamma = cfg['gamma'] self.epsilon = cfg['epsilon'] self.tau = cfg['tau'] self.is_train = is_train if is_train: init_logging_handler(os.path.join(os.path.dirname(os.path.abspath(__file__)), cfg['log_dir'])) # construct policy and value network if dataset == 'Multiwoz': voc_file = os.path.join(root_dir, 'data/multiwoz/sys_da_voc.txt') voc_opp_file = os.path.join(root_dir, 'data/multiwoz/usr_da_voc.txt') self.vector = MultiWozVector(voc_file, voc_opp_file) self.policy = MultiDiscretePolicy(self.vector.state_dim, cfg['h_dim'], self.vector.da_dim).to(device=DEVICE) self.value = Value(self.vector.state_dim, cfg['hv_dim']).to(device=DEVICE) if is_train: self.policy_optim = optim.RMSprop(self.policy.parameters(), lr=cfg['policy_lr']) self.value_optim = optim.Adam(self.value.parameters(), lr=cfg['value_lr'])
def get_vector(self): root_dir = os.path.dirname( os.path.dirname( os.path.dirname( os.path.dirname(os.path.dirname( os.path.abspath(__file__)))))) voc_file = os.path.join(root_dir, 'data/multiwoz/sys_da_voc.txt') voc_opp_file = os.path.join(root_dir, 'data/multiwoz/usr_da_voc.txt') return MultiWozVector(voc_file, voc_opp_file)
def __init__(self, manager, cfg): self._init_data(manager, cfg) voc_file = os.path.join(root_dir, 'data/multiwoz/sys_da_voc.txt') voc_opp_file = os.path.join(root_dir, 'data/multiwoz/usr_da_voc.txt') vector = MultiWozVector(voc_file, voc_opp_file) self.policy = MultiDiscretePolicy(vector.state_dim, cfg['h_dim'], vector.da_dim).to(device=DEVICE) self.policy.eval() self.policy_optim = torch.optim.Adam(self.policy.parameters(), lr=cfg['lr'])
def __init__(self, cfg, load_path): self._init_data(cfg, load_path) voc_file = os.path.join(root_dir, 'data/multiwoz/sys_da_voc.txt') voc_opp_file = os.path.join(root_dir, 'data/multiwoz/usr_da_voc.txt') self.vector = MultiWozVector(voc_file, voc_opp_file) # override the loss defined in the MLE_Trainer_Abstract to support pos_weight #pos_weight = cfg['pos_weight'] * torch.ones(self.vector.da_dim).to(device=DEVICE) #self.multi_entropy_loss = nn.BCEWithLogitsLoss(pos_weight=pos_weight) self.criterion = nn.BCELoss() self.policy = MGAIL() if load_path: self.policy.load(load_path)
def __init__(self, train=True): # load configuration file with open( os.path.join(os.path.dirname(os.path.abspath(__file__)), 'config.json'), 'r') as f: cfg = json.load(f) self.gamma = cfg['gamma'] self.epsilon_init = cfg['epsilon_init'] self.epsilon_final = cfg['epsilon_final'] self.istrain = train if self.istrain: self.epsilon = self.epsilon_init else: self.epsilon = self.epsilon_final self.epsilon_degrade_period = cfg['epsilon_degrade_period'] self.tau = cfg['tau'] self.action_number = cfg[ 'action_number'] # total number of actions considered init_logging_handler( os.path.join(os.path.dirname(os.path.abspath(__file__)), cfg['log_dir'])) # load action mapping file action_map_file = os.path.join(root_dir, 'convlab2/policy/act_500_list.txt') _, self.ind2act_dict = read_action_map(action_map_file) # load vector for MultiWoz 2.1 voc_file = os.path.join(root_dir, 'data/multiwoz/sys_da_voc.txt') voc_opp_file = os.path.join(root_dir, 'data/multiwoz/usr_da_voc.txt') self.vector = MultiWozVector(voc_file, voc_opp_file) # build Q network # current Q network to be trained self.Q = DuelDQN(self.vector.state_dim, cfg['h_dim'], self.action_number).to(device=DEVICE) # target Q network self.target_Q = DuelDQN(self.vector.state_dim, cfg['h_dim'], self.action_number).to(device=DEVICE) self.target_Q.load_state_dict(self.Q.state_dict()) # define optimizer # self.optimizer = RAdam(self.Q.parameters(), lr=cfg['lr'], weight_decay=cfg['weight_decay']) self.optimizer = optim.Adam(self.Q.parameters(), lr=cfg['lr'], weight_decay=cfg['weight_decay']) self.scheduler = StepLR(self.optimizer, step_size=cfg['lr_decay_step'], gamma=cfg['lr_decay']) self.min_lr = cfg['min_lr'] # loss function self.criterion = torch.nn.MSELoss()
def __init__(self, manager, cfg): self._init_data(manager, cfg) voc_file = os.path.join(root_dir, 'data/multiwoz/sys_da_voc.txt') voc_opp_file = os.path.join(root_dir, 'data/multiwoz/usr_da_voc.txt') vector = MultiWozVector(voc_file, voc_opp_file) # override the loss defined in the MLE_Trainer_Abstract to support pos_weight pos_weight = cfg['pos_weight'] * torch.ones( vector.da_dim).to(device=DEVICE) self.multi_entropy_loss = nn.BCEWithLogitsLoss(pos_weight=pos_weight) self.policy = MultiDiscretePolicy(vector.state_dim, cfg['h_dim'], vector.da_dim).to(device=DEVICE) self.policy.eval() self.policy_optim = torch.optim.RMSprop( self.policy.parameters(), lr=cfg['lr'], weight_decay=cfg['weight_decay'])
def __init__(self, save_path): root_dir = os.path.dirname( os.path.dirname( os.path.dirname( os.path.dirname(os.path.dirname( os.path.abspath(__file__)))))) voc_file = os.path.join(root_dir, 'data/multiwoz/sys_da_voc.txt') voc_opp_file = os.path.join(root_dir, 'data/multiwoz/usr_da_voc.txt') self.vector = MultiWozVector(voc_file, voc_opp_file) processed_dir = os.path.join(os.path.dirname( os.path.abspath(__file__)), save_path) #'processed_data' if os.path.exists(processed_dir): print('Load processed data file') self._load_data(processed_dir) else: print('Start preprocessing the dataset') self._build_data(root_dir, processed_dir)
def __init__(self): root_dir = os.path.dirname( os.path.dirname( os.path.dirname( os.path.dirname(os.path.dirname( os.path.abspath(__file__)))))) with open( os.path.join(os.path.dirname(os.path.abspath(__file__)), 'config.json'), 'r') as f: cfg = json.load(f) voc_file = os.path.join(root_dir, 'data/multiwoz/sys_da_voc.txt') voc_opp_file = os.path.join(root_dir, 'data/multiwoz/usr_da_voc.txt') self.vector = MultiWozVector(voc_file, voc_opp_file) self.policy = MultiDiscretePolicy(self.vector.state_dim, cfg['h_dim'], self.vector.da_dim).to(device=DEVICE)
def __init__( self, archive_file=DEFAULT_ARCHIVE_FILE, model_file='https://tatk-data.s3-ap-northeast-1.amazonaws.com/mle_policy_multiwoz.zip' ): root_dir = os.path.dirname( os.path.dirname( os.path.dirname( os.path.dirname(os.path.dirname( os.path.abspath(__file__)))))) with open( os.path.join(os.path.dirname(os.path.abspath(__file__)), 'config.json'), 'r') as f: cfg = json.load(f) voc_file = os.path.join(root_dir, 'data/multiwoz/sys_da_voc.txt') voc_opp_file = os.path.join(root_dir, 'data/multiwoz/usr_da_voc.txt') self.vector = MultiWozVector(voc_file, voc_opp_file) self.policy = MultiDiscretePolicy(self.vector.state_dim, cfg['h_dim'], self.vector.da_dim).to(device=DEVICE) self.load(archive_file, model_file, cfg['load'])