Python HRED 예제들

프로그래밍 언어: Python

네임스페이스/패키지 이름: latent_dialog.models_deal

클래스/타입: HRED

hotexamples.com에서의 예제들: 3

Python HRED - 3개의 예제가 발견되었습니다. 이것들은 오픈소스 프로젝트에서 추출된 Python의 latent_dialog.models_deal.HRED에 대한 실세계 최고 등급의 예제들입니다. 예제들을 평가하여 예제의 품질 향상에 도움을 줄 수 있습니다.

자주 사용되는 메소드들

보기 숨기기

HRED(3)

cuda(3)

load_state_dict(2)

eval(1)

state_dict(1)

예제 #1

파일 보기

파일: reinforce_word.py 프로젝트: justinchiu/NeuralDialog

def main():
    start_time = time.strftime('%Y-%m-%d-%H-%M-%S',
                               time.localtime(time.time()))
    print('[START]', start_time, '=' * 30)

    # RL configuration
    env = 'gpu'
    epoch_id = '29'
    #folder = '2019-06-20-09-19-39-sl_word'
    #simulator_folder = '2019-06-20-09-19-39-sl_word'
    folder = "2019-12-06-02-20-58-sl_word_dlg_noattn"
    simulator_folder = "2019-12-06-02-20-58-sl_word_dlg_noattn"
    exp_dir = os.path.join('config_log_model', folder, 'rl-' + start_time)
    if not os.path.exists(exp_dir):
        os.mkdir(exp_dir)

    rl_config = Pack(
        train_path='../data/negotiate/train.txt',
        val_path='../data/negotiate/val.txt',
        test_path='../data/negotiate/test.txt',
        selfplay_path='../data/negotiate/selfplay.txt',
        selfplay_eval_path='../data/negotiate/selfplay_eval.txt',
        sim_config_path=os.path.join('config_log_model', simulator_folder,
                                     'config.json'),
        sim_model_path=os.path.join('config_log_model', simulator_folder,
                                    '{}-model'.format(epoch_id)),
        sv_config_path=os.path.join('config_log_model', folder, 'config.json'),
        sv_model_path=os.path.join('config_log_model', folder,
                                   '{}-model'.format(epoch_id)),
        rl_config_path=os.path.join(exp_dir, 'rl_config.json'),
        rl_model_path=os.path.join(exp_dir, 'rl_model'),
        ppl_best_model_path=os.path.join(exp_dir, 'ppl_best_model'),
        reward_best_model_path=os.path.join(exp_dir, 'reward_best_model'),
        judger_model_path=os.path.join('../FB', 'sv_model.th'),
        judger_config_path=os.path.join('../FB', 'judger_config.json'),
        record_path=exp_dir,
        record_freq=100,
        use_gpu=env == 'gpu',
        nepoch=4,
        nepisode=0,
        sv_train_freq=4,
        eval_freq=0,
        max_words=100,
        rl_lr=0.1,
        momentum=0.1,
        nesterov=True,
        gamma=0.95,
        rl_clip=1.0,  # TODO it appears this is very very important
        ref_text='../data/negotiate/train.txt',
        domain='object_division',
        max_nego_turn=50,
        random_seed=0,
    )

    # save configuration
    with open(rl_config.rl_config_path, 'w') as f:
        json.dump(rl_config, f, indent=4)

    # set random seed
    set_seed(rl_config.random_seed)

    # load previous supervised learning configuration and corpus
    sv_config = Pack(json.load(open(rl_config.sv_config_path)))
    sim_config = Pack(json.load(open(rl_config.sim_config_path)))

    # TODO revise the use_gpu in the config
    sv_config['use_gpu'] = rl_config.use_gpu
    sim_config['use_gpu'] = rl_config.use_gpu
    corpus = DealCorpus(sv_config)

    # load models for two agents
    # TARGET AGENT
    sys_model = HRED(corpus, sv_config)
    if sv_config.use_gpu:  # TODO gpu -> cpu transfer
        sys_model.cuda()
    sys_model.load_state_dict(
        th.load(rl_config.sv_model_path,
                map_location=lambda storage, location: storage))
    # we don't want to use Dropout during RL
    sys_model.eval()
    sys = RlAgent(sys_model, corpus, rl_config, name='System')

    # SIMULATOR we keep usr frozen, i.e. we don't update its parameters
    usr_model = HRED(corpus, sim_config)
    if sim_config.use_gpu:  # TODO gpu -> cpu transfer
        usr_model.cuda()

    usr_model.load_state_dict(
        th.load(rl_config.sim_model_path,
                map_location=lambda storage, location: storage))
    usr_model.eval()
    usr_type = LstmAgent
    usr = usr_type(usr_model, corpus, rl_config, name='User')

    # load FB judger model
    judger_config = Pack(json.load(open(rl_config.judger_config_path)))
    judger_config['cuda'] = rl_config.use_gpu
    judger_config['data'] = '../data/negotiate'
    judger_device_id = FB_use_cuda(judger_config.cuda)
    judger_word_corpus = FbWordCorpus(judger_config.data,
                                      freq_cutoff=judger_config.unk_threshold,
                                      verbose=True)
    judger_model = FbDialogModel(judger_word_corpus.word_dict,
                                 judger_word_corpus.item_dict,
                                 judger_word_corpus.context_dict,
                                 judger_word_corpus.output_length,
                                 judger_config, judger_device_id)
    if judger_device_id is not None:
        judger_model.cuda(judger_device_id)
    judger_model.load_state_dict(
        th.load(rl_config.judger_model_path,
                map_location=lambda storage, location: storage))
    judger_model.eval()
    judger = Judger(judger_model, judger_device_id)

    # initialize communication dialogue between two agents
    dialog = Dialog([sys, usr], judger, rl_config)
    ctx_gen = ContextGenerator(rl_config.selfplay_path)

    # simulation module
    dialog_eval = DialogEval([sys, usr], judger, rl_config)
    ctx_gen_eval = ContextGeneratorEval(rl_config.selfplay_eval_path)

    # start RL
    reinforce = Reinforce(dialog, ctx_gen, corpus, sv_config, sys_model,
                          usr_model, rl_config, dialog_eval, ctx_gen_eval)
    reinforce.run()

    # save sys model
    th.save(sys_model.state_dict(), rl_config.rl_model_path)

    end_time = time.strftime('%Y-%m-%d-%H-%M-%S', time.localtime(time.time()))
    print('[END]', end_time, '=' * 30)

예제 #2

파일 보기

logger.info('[START]\n{}\n{}'.format(start_time, '=' * 30))

# save configuration
with open(os.path.join(saved_path, 'config.json'), 'w') as f:
    json.dump(config, f, indent=4)  # sort_keys=True

corpus = DealCorpus(config)
train_dial, val_dial, test_dial = corpus.get_corpus()

train_data = DealDataLoaders('Train', train_dial, config)
val_data = DealDataLoaders('Val', val_dial, config)
test_data = DealDataLoaders('Test', test_dial, config)

evaluator = BleuEvaluator('Deal')

model = HRED(corpus, config)

if config.use_gpu:
    model.cuda()

best_epoch = None
if not config.forward_only:
    try:
        best_epoch = train(model,
                           train_data,
                           val_data,
                           test_data,
                           config,
                           evaluator,
                           gen=generate)
    except KeyboardInterrupt:

예제 #3

파일 보기

# save configuration
with open(os.path.join(saved_path, 'config.json'), 'w') as f:
    json.dump(config, f, indent=4)  # sort_keys=True

corpus = DealCorpus(config)
train_dial, val_dial, test_dial = corpus.get_corpus()

train_data = DealDataLoaders('Train', train_dial, config)
val_data = DealDataLoaders('Val', val_dial, config)
test_data = DealDataLoaders('Test', test_dial, config)

evaluator = BleuEvaluator('Deal')

hmm = Hmm(corpus, config)
config.pretrain_folder = "2019-12-06-02-20-58-sl_word_dlg_noattn"
word = HRED(corpus, config)
hmm.cuda()
word.cuda()

from latent_dialog.enc2dec.decoders import TEACH_FORCE

train_data.epoch_init(config)
batch = train_data.next_batch()
while batch is not None:
    batch = train_data.next_batch()
    hmm_out = hmm(batch, TEACH_FORCE, get_marginals=True)
    word_out = word(batch, TEACH_FORCE, get_marginals=True)
    import pdb
    pdb.set_trace()

if config.use_gpu: