def __init__(self, model, args, name='Alice'): super(LstmAgent, self).__init__() self.model = model self.args = args self.name = name self.human = False self.domain = domain.get_domain(args.domain)
def gather_information(name, url): domain = get_domain(url) ipaddress = get_address(domain) nmap = get_nmap(domain, '-F') robots = get_robots(url) whois = get_whois(domain) create_report(name, url, domain, ipaddress, nmap, robots, whois)
def __init__(self, agents, args): # for now we only suppport dialog of 2 agents assert len(agents) == 2 self.agents = agents self.args = args self.domain = domain.get_domain(args.domain) self.metrics = MetricsContainer() self._register_metrics()
def main(): parser = argparse.ArgumentParser(description='Negotiator') parser.add_argument('--dataset', type=str, default='./data/negotiate/val.txt', help='location of the dataset') parser.add_argument('--model_file', type=str, help='model file') parser.add_argument('--smart_ai', action='store_true', default=False, help='to use rollouts') parser.add_argument('--seed', type=int, default=1, help='random seed') parser.add_argument('--temperature', type=float, default=1.0, help='temperature') parser.add_argument('--domain', type=str, default='object_division', help='domain for the dialogue') parser.add_argument('--log_file', type=str, default='', help='log file') args = parser.parse_args() utils.set_seed(args.seed) model = utils.load_model(args.model_file) ai = LstmAgent(model, args) logger = DialogLogger(verbose=True, log_file=args.log_file) domain = get_domain(args.domain) score_func = rollout if args.smart_ai else likelihood dataset, sents = read_dataset(args.dataset) ranks, n, k = 0, 0, 0 for ctx, dialog in dataset: start_time = time.time() ai.feed_context(ctx) for sent, you in dialog: if you: rank = compute_rank(sent, sents, ai, domain, args.temperature, score_func) # Compute lang_h for the groundtruth sentence enc = ai._encode(sent, ai.model.word_dict) _, ai.lang_h, lang_hs = ai.model.score_sent( enc, ai.lang_h, ai.ctx_h, args.temperature) ai.lang_hs.append(lang_hs) ai.words.append(ai.model.word2var('YOU:')) ai.words.append(Variable(enc)) ranks += rank n += 1 else: ai.read(sent) k += 1 time_elapsed = time.time() - start_time logger.dump('dialogue %d | avg rank %.3f | raw %d/%d | time %.3f' % (k, 1. * ranks / n, ranks, n, time_elapsed)) logger.dump('final avg rank %.3f' % (1. * ranks / n))
def __init__(self, cfg): Spider.config = cfg Spider.project_name = Spider.config['project'] Spider.base_url = Spider.config['base_url'] Spider.domain_name = get_domain(Spider.base_url) Spider.queue_file = Spider.config['queue'] Spider.crawled_file = Spider.config['crawled'] self.boot() self.crawl_page('Spider one', Spider.base_url)
def __init__(self, sess, model, use_rollouts=False): self.sess = sess self.model = model self.all_rewards = [] self.domain = domain.get_domain('object_division') self.use_rollouts = use_rollouts # only used for generation # params for rollouts self.ncandidate = 10 self.nrollout = 5 self.rollout_len = 100
def __init__(self, word_dict, item_dict, context_dict, count_dict, args): super(LatentClusteringPredictionModel, self).__init__() self.lang_model = utils.load_model(args.lang_model_file) self.lang_model.eval() domain = get_domain(args.domain) self.word_dict = word_dict self.item_dict = item_dict self.context_dict = context_dict self.count_dict = count_dict self.args = args self.ctx_encoder = MlpContextEncoder(len(self.context_dict), domain.input_length(), args.nembed_ctx, args.nhid_ctx, args.dropout, args.init_range, False) self.word_embed = nn.Embedding(len(self.word_dict), args.nembed_word) self.encoder = nn.GRU(input_size=args.nembed_word, hidden_size=args.nhid_lang, bias=True) self.embed2hid = nn.Sequential( nn.Linear(args.nhid_lang + args.nhid_lang + args.nhid_ctx, self.args.nhid_lang), nn.Tanh()) self.latent_bottleneck = ShardedLatentBottleneckModule( num_shards=len(count_dict), num_clusters=self.lang_model.cluster_model.args.num_clusters, input_size=args.nhid_lang, output_size=self.lang_model.cluster_model.args.nhid_cluster, args=args) # copy lat vars from the cluster model self.latent_bottleneck.latent_vars.weight.data.copy_( self.lang_model.cluster_model.latent_bottleneck.latent_vars.weight. data) self.memory = RecurrentUnit( input_size=args.nhid_lang, hidden_size=self.lang_model.cluster_model.args.nhid_cluster, args=args) self.dropout = nn.Dropout(args.dropout) self.kldiv = nn.KLDivLoss(reduction='sum') # init self.word_embed.weight.data.uniform_(-args.init_range, args.init_range) init_rnn(self.encoder, args.init_range) init_cont(self.embed2hid, args.init_range)
def __init__(self, word_dict, item_dict, context_dict, count_dict, args): super(LatentClusteringLanguageModel, self).__init__() self.cluster_model = utils.load_model(args.cluster_model_file) self.cluster_model.eval() domain = get_domain(args.domain) self.word_dict = word_dict self.item_dict = item_dict self.context_dict = context_dict self.count_dict = count_dict self.args = args self.word_embed = nn.Embedding(len(self.word_dict), args.nembed_word) self.encoder = nn.GRU(input_size=args.nembed_word, hidden_size=args.nhid_lang, bias=True) self.hid2output = nn.Sequential( nn.Linear(args.nhid_lang, args.nembed_word), nn.Dropout(args.dropout)) self.cond2input = nn.Linear( args.nhid_lang + self.cluster_model.args.nhid_cluster, args.nembed_word) self.decoder_reader = nn.GRU(input_size=args.nembed_word, hidden_size=args.nhid_lang, bias=True) self.decoder_writer = nn.GRUCell(input_size=args.nembed_word, hidden_size=args.nhid_lang, bias=True) # tie the weights between reader and writer self.decoder_writer.weight_ih = self.decoder_reader.weight_ih_l0 self.decoder_writer.weight_hh = self.decoder_reader.weight_hh_l0 self.decoder_writer.bias_ih = self.decoder_reader.bias_ih_l0 self.decoder_writer.bias_hh = self.decoder_reader.bias_hh_l0 self.dropout = nn.Dropout(args.dropout) self.special_token_mask = make_mask(len(word_dict), [ word_dict.get_idx(w) for w in ['<unk>', 'YOU:', 'THEM:', '<pad>'] ]) # init self.word_embed.weight.data.uniform_(-args.init_range, args.init_range) init_rnn(self.decoder_reader, args.init_range) init_linear(self.cond2input, args.init_range) init_cont(self.hid2output, args.init_range) init_rnn(self.encoder, args.init_range)
def main(): parser = argparse.ArgumentParser(description='Negotiator') parser.add_argument('--dataset', type=str, default='./data/negotiate/val.txt', help='location of the dataset') parser.add_argument('--model_file', type=str, help='model file') parser.add_argument('--smart_ai', action='store_true', default=False, help='to use rollouts') parser.add_argument('--seed', type=int, default=1, help='random seed') parser.add_argument('--temperature', type=float, default=1.0, help='temperature') parser.add_argument('--domain', type=str, default='object_division', help='domain for the dialogue') parser.add_argument('--log_file', type=str, default='', help='log file') args = parser.parse_args() utils.set_seed(args.seed) model = utils.load_model(args.model_file) ai = LstmAgent(model, args) logger = DialogLogger(verbose=True, log_file=args.log_file) domain = get_domain(args.domain) score_func = rollout if args.smart_ai else likelihood dataset, sents = read_dataset(args.dataset) ranks, n, k = 0, 0, 0 for ctx, dialog in dataset: start_time = time.time() # start new conversation ai.feed_context(ctx) for sent, you in dialog: if you: # if it is your turn to say, take the target word and compute its rank rank = compute_rank(sent, sents, ai, domain, args.temperature, score_func) # compute lang_h for the groundtruth sentence enc = ai._encode(sent, ai.model.word_dict) _, ai.lang_h, lang_hs = ai.model.score_sent(enc, ai.lang_h, ai.ctx_h, args.temperature) # save hidden states and the utterance ai.lang_hs.append(lang_hs) ai.words.append(ai.model.word2var('YOU:')) ai.words.append(Variable(enc)) ranks += rank n += 1 else: ai.read(sent) k += 1 time_elapsed = time.time() - start_time logger.dump('dialogue %d | avg rank %.3f | raw %d/%d | time %.3f' % (k, 1. * ranks / n, ranks, n, time_elapsed)) logger.dump('final avg rank %.3f' % (1. * ranks / n))
def __init__(self, agents, args, markable_detector, markable_detector_corpus): # For now we only suppport dialog of 2 agents assert len(agents) == 2 self.agents = agents self.args = args self.domain = domain.get_domain(args.domain) self.metrics = MetricsContainer() self._register_metrics() self.markable_detector = markable_detector self.markable_detector_corpus = markable_detector_corpus self.selfplay_markables = {} self.selfplay_referents = {}
def main(): parser = argparse.ArgumentParser(description='chat utility') parser.add_argument('--model_file', type=str, help='model file') parser.add_argument('--domain', type=str, default='object_division', help='domain for the dialogue') parser.add_argument('--context_file', type=str, default='', help='context file') parser.add_argument('--temperature', type=float, default=1.0, help='temperature') parser.add_argument('--num_types', type=int, default=3, help='number of object types') parser.add_argument('--num_objects', type=int, default=6, help='total number of objects') parser.add_argument('--max_score', type=int, default=10, help='max score per object') parser.add_argument('--score_threshold', type=int, default=6, help='successful dialog should have more than score_threshold in score') parser.add_argument('--seed', type=int, default=1, help='random seed') parser.add_argument('--smart_ai', action='store_true', default=False, help='make AI smart again') parser.add_argument('--ai_starts', action='store_true', default=False, help='allow AI to start the dialog') parser.add_argument('--ref_text', type=str, help='file with the reference text') args = parser.parse_args() utils.set_seed(args.seed) human = HumanAgent(domain.get_domain(args.domain)) alice_ty = LstmRolloutAgent if args.smart_ai else LstmAgent ai = alice_ty(utils.load_model(args.model_file), args) agents = [ai, human] if args.ai_starts else [human, ai] dialog = Dialog(agents, args) logger = DialogLogger(verbose=True) # either take manually produced contextes, or relay on the ones from the dataset if args.context_file == '': ctx_gen = ManualContextGenerator(args.num_types, args.num_objects, args.max_score) else: ctx_gen = ContextGenerator(args.context_file) chat = Chat(dialog, ctx_gen, logger) chat.run()
def main(): parser = argparse.ArgumentParser(description='chat utility') parser.add_argument('--model_file', type=str, help='model file') parser.add_argument('--domain', type=str, default='object_division', help='domain for the dialogue') parser.add_argument('--context_file', type=str, default='', help='context file') parser.add_argument('--temperature', type=float, default=1.0, help='temperature') parser.add_argument('--num_types', type=int, default=3, help='number of object types') parser.add_argument('--num_objects', type=int, default=6, help='total number of objects') parser.add_argument('--max_score', type=int, default=10, help='max score per object') parser.add_argument('--score_threshold', type=int, default=6, help='successful dialog should have more than score_threshold in score') parser.add_argument('--seed', type=int, default=1, help='random seed') parser.add_argument('--smart_ai', action='store_true', default=False, help='make AI smart again') parser.add_argument('--ai_starts', action='store_true', default=False, help='allow AI to start the dialog') parser.add_argument('--ref_text', type=str, help='file with the reference text') args = parser.parse_args() utils.set_seed(args.seed) human = HumanAgent(domain.get_domain(args.domain)) alice_ty = LstmRolloutAgent if args.smart_ai else LstmAgent ai = alice_ty(utils.load_model(args.model_file), args) agents = [ai, human] if args.ai_starts else [human, ai] dialog = Dialog(agents, args) logger = DialogLogger(verbose=True) if args.context_file == '': ctx_gen = ManualContextGenerator(args.num_types, args.num_objects, args.max_score) else: ctx_gen = ContextGenerator(args.context_file) chat = Chat(dialog, ctx_gen, logger) chat.run()
def main(): parser = argparse.ArgumentParser( description='A script to compute Pareto efficiency') parser.add_argument('--log_file', type=str, default='', help='location of the log file') parser.add_argument('--domain', type=str, default='object_division', help='domain for the dialogue') args = parser.parse_args() domain = get_domain(args.domain) dataset = parse_log(args.log_file, domain) avg_agree, avg_can_improve = 0, 0 avg_score1, avg_score2 = 0, 0 avg_max_score1, avg_max_score2 = 0, 0 for cnts, vals1, picks1, vals2, picks2 in dataset: if np.min(picks1) == -1 or np.min(picks2) == -1: continue agree = True for p1, p2, n in zip(picks1, picks2, cnts): agree = agree and (p1 + p2 == n) if not agree: continue avg_agree += 1 score1 = compute_score(vals1, picks1) score2 = compute_score(vals2, picks2) choices = gen_choices(cnts) can_improve = False for cand1, cand2 in choices: cand_score1 = compute_score(vals1, cand1) cand_score2 = compute_score(vals2, cand2) if (cand_score1 > score1 and cand_score2 >= score2) or (cand_score1 >= score1 and cand_score2 > score2): can_improve = True avg_score1 += score1 avg_score2 += score2 avg_can_improve += int(can_improve) print('pareto opt (%%)\t:\t%.2f' % (100. * (1 - avg_can_improve / avg_agree))) print('agree (%%)\t:\t%.2f' % (100. * avg_agree / len(dataset))) print('score (all)\t:\t%.2f vs. %.2f' % ( 1. * avg_score1 / len(dataset), 1. * avg_score2 / len(dataset))) print('score (agreed)\t:\t%.2f vs. %.2f' % ( 1. * avg_score1 / avg_agree, 1. * avg_score2 / avg_agree))
def __init__(self, word_dict, output_length, args, device): super(SelectModel, self).__init__() domain = get_domain(args.domain) self.word_dict = word_dict self.args = args self.device = device self.num_ent = domain.num_ent() # embedding for words self.word_encoder = nn.Embedding(len(self.word_dict), args.nembed_word) # context encoder if args.rel_ctx_encoder: self.ctx_encoder = modules.RelationalContextEncoder( domain.num_ent(), domain.dim_ent(), args.rel_hidden, args.nembed_ctx, args.dropout, args.init_range, device) else: self.ctx_encoder = modules.MlpContextEncoder( domain.input_length(), args.nembed_ctx, args.dropout, args.init_range, device) self.dropout = nn.Dropout(args.dropout) # a bidirectional selection RNN # it will go through input words and generate by the reader hidden states # to produce a hidden representation self.sel_rnn = nn.GRU(input_size=args.nembed_word, hidden_size=args.nhid_lang, bias=True, bidirectional=True) self.sel_encoder = nn.Sequential( torch.nn.Linear(2 * args.nhid_lang + args.nembed_ctx, args.nhid_sel), nn.Tanh()) # selection decoder self.sel_decoder = nn.Linear(args.nhid_sel, self.num_ent) if self.args.context_only: self.sel_encoder = nn.Sequential( torch.nn.Linear(args.nembed_ctx, args.nhid_sel), nn.Tanh()) self.sel_decoder = nn.Linear(args.nhid_sel, self.num_ent) self.init_weights()
def __init__(self, model, args, name='Alice', train=False): super(RnnAgent, self).__init__() self.model = model self.args = args self.name = name self.human = False self.domain = domain.get_domain(args.domain) self.train = train if train: self.model.train() self.opt = optim.RMSprop( self.model.parameters(), lr=args.rl_lr, momentum=self.args.momentum) self.all_rewards = [] self.t = 0 else: self.model.eval()
def __init__(self, word_dict, item_dict, context_dict, count_dict, args): super(SelectionModel, self).__init__() self.nhid_pos = 32 self.nhid_speaker = 32 self.len_cutoff = 10 domain = get_domain(args.domain) self.word_dict = word_dict self.item_dict = item_dict self.context_dict = context_dict self.count_dict = count_dict self.args = args self.word_encoder = nn.Embedding(len(self.word_dict), args.nembed_word) self.pos_encoder = nn.Embedding(self.len_cutoff, self.nhid_pos) self.speaker_encoder = nn.Embedding(len(self.word_dict), self.nhid_speaker) self.ctx_encoder = MlpContextEncoder(len(self.context_dict), domain.input_length(), args.nembed_ctx, args.nhid_ctx, args.dropout, args.init_range, args.skip_values) self.sel_head = SelectionModule(query_size=args.nhid_ctx, value_size=args.nembed_word + self.nhid_pos + self.nhid_speaker, hidden_size=args.nhid_attn, selection_size=args.nhid_sel, num_heads=6, output_size=len(item_dict), args=args) self.dropout = nn.Dropout(args.dropout) # init embeddings self.word_encoder.weight.data.uniform_(-self.args.init_range, self.args.init_range) self.pos_encoder.weight.data.uniform_(-self.args.init_range, self.args.init_range) self.speaker_encoder.weight.data.uniform_(-self.args.init_range, self.args.init_range)
def print_items(sess, model, count, val, words): # to print output choices, we need to find max probable that is in valid set my_domain = domain.get_domain('object_division') count_w, val_w = model.corpus.context_dict.i2w( count[0]), model.corpus.context_dict.i2w(val[0]) ctx = [ str(count_w[0]), str(val_w[0]), str(count_w[1]), str(val_w[1]), str(count_w[2]), str(val_w[2]) ] choices = my_domain.generate_choices(ctx) idxs = [model.corpus.item_dict.w2i(c) for c in choices] probs = sess.run( model.item_softmax, { model.ctx_count: count, model.ctx_val: val, model.inpt: words, model.init_lang_state: model.lang_state_one, }) probs = np.array(probs)[:, 0, :] probs_arr = [] for i in range(len(idxs)): choice = idxs[i] prob_of_choice = 1. for j in range(6): prob_of_choice *= probs[j][choice[j]] probs_arr.append(prob_of_choice) best_choce = idxs[np.argmax(probs_arr)] items = model.corpus.item_dict.i2w(best_choce) for item in items: print(item, end=" ") print("\n")
def __init__(self, word_dict, item_dict, context_dict, count_dict, args): super(BaselineClusteringModel, self).__init__() domain = get_domain(args.domain) self.word_dict = word_dict self.item_dict = item_dict self.context_dict = context_dict self.count_dict = count_dict self.args = args self.ctx_encoder = MlpContextEncoder(len(self.context_dict), domain.input_length(), args.nembed_ctx, args.nhid_lang, args.dropout, args.init_range, False) self.word_embed = nn.Embedding(len(self.word_dict), args.nembed_word) self.encoder = nn.GRU(input_size=args.nembed_word, hidden_size=args.nhid_lang, bias=True) self.latent_bottleneck = ShardedLatentBottleneckModule( num_shards=len(count_dict), num_clusters=self.args.num_clusters, input_size=args.nhid_lang, output_size=self.args.nhid_cluster, args=args) self.dropout = nn.Dropout(args.dropout) self.decoder_reader = nn.GRU(input_size=args.nembed_word, hidden_size=args.nhid_lang, bias=True) self.decoder_writer = nn.GRUCell(input_size=args.nembed_word, hidden_size=args.nhid_lang, bias=True) self.cond2input = nn.Linear(args.nhid_cluster, args.nembed_word) self.hid2output = nn.Sequential( nn.Linear(args.nhid_lang, args.nembed_word), nn.Dropout(args.dropout)) self.memory = RecurrentUnit(input_size=args.nhid_lang, hidden_size=args.nhid_lang, args=args) # tie the weights between reader and writer self.decoder_writer.weight_ih = self.decoder_reader.weight_ih_l0 self.decoder_writer.weight_hh = self.decoder_reader.weight_hh_l0 self.decoder_writer.bias_ih = self.decoder_reader.bias_ih_l0 self.decoder_writer.bias_hh = self.decoder_reader.bias_hh_l0 self.special_token_mask = make_mask(len(word_dict), [ word_dict.get_idx(w) for w in ['<unk>', 'YOU:', 'THEM:', '<pad>'] ]) # init self.word_embed.weight.data.uniform_(-args.init_range, args.init_range) init_rnn(self.encoder, args.init_range) init_rnn(self.decoder_reader, args.init_range) init_linear(self.cond2input, args.init_range) init_cont(self.hid2output, args.init_range)
def __init__(self, word_dict, item_dict, context_dict, output_length, args, device_id): super(DialogModel, self).__init__(device_id) domain = get_domain(args.domain) self.word_dict = word_dict self.item_dict = item_dict self.context_dict = context_dict self.args = args # embedding for words self.word_encoder = nn.Embedding(len(self.word_dict), args.nembed_word) # context encoder ctx_encoder_ty = modules.RnnContextEncoder if args.rnn_ctx_encoder \ else modules.MlpContextEncoder self.ctx_encoder = ctx_encoder_ty(len(self.context_dict), domain.input_length(), args.nembed_ctx, args.nhid_ctx, args.init_range, device_id) # a reader RNN, to encode words self.reader = nn.GRU(input_size=args.nhid_ctx + args.nembed_word, hidden_size=args.nhid_lang, bias=True) self.decoder = nn.Linear(args.nhid_lang, args.nembed_word) # a writer, a RNNCell that will be used to generate utterances self.writer = nn.GRUCell(input_size=args.nhid_ctx + args.nembed_word, hidden_size=args.nhid_lang, bias=True) # tie the weights of reader and writer self.writer.weight_ih = self.reader.weight_ih_l0 self.writer.weight_hh = self.reader.weight_hh_l0 self.writer.bias_ih = self.reader.bias_ih_l0 self.writer.bias_hh = self.reader.bias_hh_l0 self.dropout = nn.Dropout(args.dropout) # a bidirectional selection RNN # it will go through input words and generate by the reader hidden states # to produce a hidden representation self.sel_rnn = nn.GRU(input_size=args.nhid_lang + args.nembed_word, hidden_size=args.nhid_attn, bias=True, bidirectional=True) # mask for disabling special tokens when generating sentences self.special_token_mask = torch.FloatTensor(len(self.word_dict)) # attention to combine selection hidden states self.attn = nn.Sequential( torch.nn.Linear(2 * args.nhid_attn, args.nhid_attn), nn.Tanh(), torch.nn.Linear(args.nhid_attn, 1)) # selection encoder, takes attention output and context hidden and combines them self.sel_encoder = nn.Sequential( torch.nn.Linear(2 * args.nhid_attn + args.nhid_ctx, args.nhid_sel), nn.Tanh()) # selection decoders, one per each item self.sel_decoders = nn.ModuleList() for i in range(output_length): self.sel_decoders.append( nn.Linear(args.nhid_sel, len(self.item_dict))) self.init_weights() # fill in the mask for i in range(len(self.word_dict)): w = self.word_dict.get_word(i) special = domain.item_pattern.match(w) or w in ('<unk>', 'YOU:', 'THEM:', '<pad>') self.special_token_mask[i] = -999 if special else 0.0 self.special_token_mask = self.to_device(self.special_token_mask)
def __init__(self, word_dict, item_dict, context_dict, count_dict, args): super(LatentClusteringModel, self).__init__() domain = get_domain(args.domain) self.word_dict = word_dict self.item_dict = item_dict self.context_dict = context_dict self.count_dict = count_dict self.args = args self.ctx_encoder = MlpContextEncoder(len(self.context_dict), domain.input_length(), args.nembed_ctx, args.nhid_ctx, args.dropout, args.init_range, args.skip_values) self.word_embed = nn.Embedding(len(self.word_dict), args.nembed_word) self.hid2output = nn.Sequential( nn.Linear(args.nhid_lang, args.nembed_word), nn.Dropout(args.dropout)) self.mem2input = nn.Linear(args.nhid_lang, args.nembed_word) self.encoder = nn.GRU(input_size=args.nembed_word, hidden_size=args.nhid_lang, bias=True) self.embed2hid = nn.Sequential( nn.Linear(args.nhid_lang + args.nhid_lang + args.nhid_ctx, args.nhid_cluster), nn.Tanh()) self.decoder_reader = nn.GRU(input_size=args.nembed_word, hidden_size=args.nhid_lang, bias=True) self.decoder_writer = nn.GRUCell(input_size=args.nembed_word, hidden_size=args.nhid_lang, bias=True) # tie the weights between reader and writer self.decoder_writer.weight_ih = self.decoder_reader.weight_ih_l0 self.decoder_writer.weight_hh = self.decoder_reader.weight_hh_l0 self.decoder_writer.bias_ih = self.decoder_reader.bias_ih_l0 self.decoder_writer.bias_hh = self.decoder_reader.bias_hh_l0 self.latent_bottleneck = ShardedLatentBottleneckModule( num_shards=len(count_dict), num_clusters=args.num_clusters, input_size=args.nhid_lang, output_size=args.nhid_cluster, args=args) self.memory = nn.GRUCell(input_size=args.nhid_cluster, hidden_size=args.nhid_lang, bias=True) self.dropout = nn.Dropout(args.dropout) self.selection = SimpleSeparateSelectionModule( input_size=args.nhid_cluster, hidden_size=args.nhid_sel, output_size=len(item_dict), args=args) # init self.word_embed.weight.data.uniform_(-args.init_range, args.init_range) init_rnn(self.encoder, args.init_range) init_rnn(self.decoder_reader, args.init_range) init_rnn_cell(self.memory, args.init_range) init_linear(self.mem2input, args.init_range) init_cont(self.hid2output, args.init_range) init_cont(self.embed2hid, args.init_range)
def __init__(self, word_dict, args): super(RnnReferenceModel, self).__init__() domain = get_domain(args.domain) self.word_dict = word_dict self.args = args self.num_ent = domain.num_ent() # define modules: self.word_embed = nn.Embedding(len(self.word_dict), args.nembed_word) ctx_encoder_ty = models.get_ctx_encoder_type(args.ctx_encoder_type) self.ctx_encoder = ctx_encoder_ty(domain, args) self.reader = nn.GRU(input_size=args.nembed_word, hidden_size=args.nhid_lang, bias=True) self.writer = nn.GRUCell(input_size=args.nembed_word, hidden_size=args.nhid_lang, bias=True) self.hid2output = nn.Sequential( nn.Linear(args.nhid_lang + args.nembed_ctx, args.nembed_word), nn.Tanh(), nn.Dropout(args.dropout), ) if args.share_attn: self.attn = nn.Sequential( nn.Linear(args.nhid_lang + args.nembed_ctx, args.nhid_attn), nn.Tanh(), nn.Dropout(args.dropout), torch.nn.Linear(args.nhid_attn, args.nhid_attn), nn.Tanh(), nn.Dropout(args.dropout), torch.nn.Linear(args.nhid_attn, 1)) else: self.attn = nn.Sequential( nn.Linear(args.nhid_lang + args.nembed_ctx, args.nhid_sel), nn.Tanh(), nn.Dropout(args.dropout)) self.lang_attn = nn.Sequential( torch.nn.Linear(args.nhid_sel, args.nhid_attn), nn.Tanh(), nn.Dropout(args.dropout), torch.nn.Linear(args.nhid_attn, 1)) self.sel_attn = nn.Sequential( torch.nn.Linear(args.nhid_sel, args.nhid_sel), nn.Tanh(), nn.Dropout(args.dropout), torch.nn.Linear(args.nhid_sel, 1)) self.ref_attn = nn.Sequential( torch.nn.Linear(args.nhid_sel, args.nhid_sel), nn.Tanh(), nn.Dropout(args.dropout), torch.nn.Linear(args.nhid_sel, 1)) # tie the weights between reader and writer self.writer.weight_ih = self.reader.weight_ih_l0 self.writer.weight_hh = self.reader.weight_hh_l0 self.writer.bias_ih = self.reader.bias_ih_l0 self.writer.bias_hh = self.reader.bias_hh_l0 self.dropout = nn.Dropout(args.dropout) # mask for disabling special tokens when generating sentences self.special_token_mask = make_mask(len(word_dict), [ word_dict.get_idx(w) for w in ['<unk>', 'YOU:', 'THEM:', '<pad>'] ]) # init self.word_embed.weight.data.uniform_(-args.init_range, args.init_range) init_rnn(self.reader, args.init_range) init_cont(self.hid2output, args.init_range) if args.share_attn: init_cont(self.attn, args.init_range) else: init_cont(self.attn, args.init_range) init_cont(self.lang_attn, args.init_range) init_cont(self.sel_attn, args.init_range) init_cont(self.ref_attn, args.init_range)
def main(): parser = argparse.ArgumentParser(description='training script') parser.add_argument('--data', type=str, default='data/negotiate', help='location of the data corpus') parser.add_argument('--nembed_word', type=int, default=256, help='size of word embeddings') parser.add_argument('--nembed_ctx', type=int, default=64, help='size of context embeddings') parser.add_argument( '--nhid_lang', type=int, default=256, help='size of the hidden state for the language module') parser.add_argument( '--nhid_cluster', type=int, default=256, help='size of the hidden state for the language module') parser.add_argument('--nhid_ctx', type=int, default=64, help='size of the hidden state for the context module') parser.add_argument( '--nhid_strat', type=int, default=64, help='size of the hidden state for the strategy module') parser.add_argument( '--nhid_attn', type=int, default=64, help='size of the hidden state for the attention module') parser.add_argument( '--nhid_sel', type=int, default=64, help='size of the hidden state for the selection module') parser.add_argument('--lr', type=float, default=20.0, help='initial learning rate') parser.add_argument('--min_lr', type=float, default=1e-5, help='min threshold for learning rate annealing') parser.add_argument('--decay_rate', type=float, default=9.0, help='decrease learning rate by this factor') parser.add_argument('--decay_every', type=int, default=1, help='decrease learning rate after decay_every epochs') parser.add_argument('--momentum', type=float, default=0.0, help='momentum for sgd') parser.add_argument('--clip', type=float, default=0.2, help='gradient clipping') parser.add_argument('--dropout', type=float, default=0.5, help='dropout rate in embedding layer') parser.add_argument('--init_range', type=float, default=0.1, help='initialization range') parser.add_argument('--max_epoch', type=int, default=30, help='max number of epochs') parser.add_argument('--num_clusters', type=int, default=50, help='number of clusters') parser.add_argument('--bsz', type=int, default=25, help='batch size') parser.add_argument('--unk_threshold', type=int, default=20, help='minimum word frequency to be in dictionary') parser.add_argument('--temperature', type=float, default=0.1, help='temperature') parser.add_argument('--partner_ctx_weight', type=float, default=0.0, help='selection weight') parser.add_argument('--sel_weight', type=float, default=0.6, help='selection weight') parser.add_argument('--seed', type=int, default=1, help='random seed') parser.add_argument('--cuda', action='store_true', default=False, help='use CUDA') parser.add_argument('--model_file', type=str, default='', help='path to save the final model') parser.add_argument('--prediction_model_file', type=str, default='', help='path to save the prediction model') parser.add_argument('--selection_model_file', type=str, default='', help='path to save the selection model') parser.add_argument('--cluster_model_file', type=str, default='', help='path to save the cluster model') parser.add_argument('--lang_model_file', type=str, default='', help='path to save the language model') parser.add_argument('--visual', action='store_true', default=False, help='plot graphs') parser.add_argument('--skip_values', action='store_true', default=False, help='skip values in ctx encoder') parser.add_argument('--model_type', type=str, default='rnn_model', help='model type', choices=models.get_model_names()) parser.add_argument('--domain', type=str, default='object_division', help='domain for the dialogue') parser.add_argument('--clustering', action='store_true', default=False, help='use clustering') parser.add_argument('--sep_sel', action='store_true', default=False, help='use separate classifiers for selection') args = parser.parse_args() utils.use_cuda(args.cuda) utils.set_seed(args.seed) domain = get_domain(args.domain) model_ty = models.get_model_type(args.model_type) corpus = model_ty.corpus_ty(domain, args.data, freq_cutoff=args.unk_threshold, verbose=True, sep_sel=args.sep_sel) model = model_ty(corpus.word_dict, corpus.item_dict_old, corpus.context_dict, corpus.count_dict, args) if args.cuda: model.cuda() engine = model_ty.engine_ty(model, args, verbose=True) train_loss, valid_loss, select_loss, extra = engine.train(corpus) utils.save_model(engine.get_model(), args.model_file)
def main(): parser = argparse.ArgumentParser( description='training script for reference resolution') parser.add_argument('--data', type=str, default='data/onecommon', help='location of the data corpus') parser.add_argument('--model_type', type=str, default='rnn_reference_model', help='type of model to use', choices=models.get_model_names()) parser.add_argument('--ctx_encoder_type', type=str, default='mlp_encoder', help='type of context encoder to use', choices=models.get_ctx_encoder_names()) parser.add_argument('--attention', action='store_true', default=False, help='use attention') parser.add_argument('--nembed_word', type=int, default=128, help='size of word embeddings') parser.add_argument( '--nhid_rel', type=int, default=64, help='size of the hidden state for the language module') parser.add_argument('--nembed_ctx', type=int, default=128, help='size of context embeddings') parser.add_argument('--nembed_cond', type=int, default=128, help='size of condition embeddings') parser.add_argument( '--nhid_lang', type=int, default=128, help='size of the hidden state for the language module') parser.add_argument( '--nhid_strat', type=int, default=128, help='size of the hidden state for the strategy module') parser.add_argument( '--nhid_attn', type=int, default=64, help='size of the hidden state for the attention module') parser.add_argument( '--nhid_sel', type=int, default=64, help='size of the hidden state for the selection module') parser.add_argument( '--share_attn', action='store_true', default=False, help='share attention modules for selection and language output') parser.add_argument('--optimizer', choices=['adam', 'rmsprop'], default='adam', help='optimizer to use') parser.add_argument('--lr', type=float, default=0.001, help='initial learning rate') parser.add_argument('--min_lr', type=float, default=1e-5, help='min threshold for learning rate annealing') parser.add_argument('--decay_rate', type=float, default=9.0, help='decrease learning rate by this factor') parser.add_argument('--decay_every', type=int, default=1, help='decrease learning rate after decay_every epochs') parser.add_argument('--momentum', type=float, default=0.0, help='momentum for sgd') parser.add_argument('--clip', type=float, default=0.5, help='gradient clipping') parser.add_argument('--dropout', type=float, default=0.5, help='dropout rate in embedding layer') parser.add_argument('--init_range', type=float, default=0.01, help='initialization range') parser.add_argument('--max_epoch', type=int, default=20, help='max number of epochs') parser.add_argument('--bsz', type=int, default=16, help='batch size') parser.add_argument('--unk_threshold', type=int, default=20, help='minimum word frequency to be in dictionary') parser.add_argument('--temperature', type=float, default=0.1, help='temperature') parser.add_argument('--lang_weight', type=float, default=1.0, help='language loss weight') parser.add_argument('--ref_weight', type=float, default=1.0, help='reference loss weight') parser.add_argument('--num_ref_weight', type=float, default=1.0, help='reference loss weight') parser.add_argument('--sel_weight', type=float, default=1.0, help='selection loss weight') parser.add_argument('--seed', type=int, default=1, help='random seed') parser.add_argument('--cuda', action='store_true', default=False, help='use CUDA') parser.add_argument('--model_file', type=str, default='tmp.th', help='path to save the final model') parser.add_argument('--domain', type=str, default='one_common', help='domain for the dialogue') parser.add_argument('--tensorboard_log', action='store_true', default=False, help='log training with tensorboard') parser.add_argument('--repeat_train', action='store_true', default=False, help='repeat training n times') parser.add_argument('--corpus_type', choices=['full', 'uncorrelated', 'success_only'], default='full', help='type of training corpus to use') parser.add_argument('--remove_location', action='store_true', default=False, help='remove locative information from input') parser.add_argument('--remove_size', action='store_true', default=False, help='remove size information from input') parser.add_argument('--remove_color', action='store_true', default=False, help='remove color information from input') parser.add_argument('--remove_size_color', action='store_true', default=False, help='remove size and color information from input') args = parser.parse_args() if args.repeat_train: seeds = list(range(10)) else: seeds = [1] for seed in seeds: utils.use_cuda(args.cuda) utils.set_seed(args.seed) domain = get_domain(args.domain) model_ty = models.get_model_type(args.model_type) corpus = model_ty.corpus_ty( domain, args.data, train='train_reference_shift_{}.txt'.format(seed), valid='valid_reference_shift_{}.txt'.format(seed), test='test_reference_shift_{}.txt'.format(seed), freq_cutoff=args.unk_threshold, verbose=True) model = model_ty(corpus.word_dict, args) if args.cuda: model.cuda() engine = model_ty.engine_ty(model, args, verbose=True) if args.optimizer == 'adam': best_valid_loss, best_model = engine.train(corpus) elif args.optimizer == 'rmsprop': best_valid_loss, best_model = engine.train_scheduled(corpus) utils.save_model(best_model, args.model_file + '_' + str(seed) + '.th') utils.save_model(best_model.state_dict(), 'stdict_' + args.model_file)
def main(): parser = argparse.ArgumentParser(description='Reinforce') parser.add_argument('--alice_model_file', type=str, help='Alice model file') parser.add_argument('--bob_model_file', type=str, help='Bob model file') parser.add_argument('--output_model_file', type=str, help='output model file') parser.add_argument('--context_file', type=str, help='context file') parser.add_argument('--temperature', type=float, default=1.0, help='temperature') parser.add_argument('--pred_temperature', type=float, default=1.0, help='temperature') parser.add_argument('--cuda', action='store_true', default=False, help='use CUDA') parser.add_argument('--verbose', action='store_true', default=False, help='print out converations') parser.add_argument('--seed', type=int, default=1, help='random seed') parser.add_argument( '--score_threshold', type=int, default=6, help='successful dialog should have more than score_threshold in score' ) parser.add_argument('--log_file', type=str, default='', help='log successful dialogs to file for training') parser.add_argument('--smart_bob', action='store_true', default=False, help='make Bob smart again') parser.add_argument('--gamma', type=float, default=0.99, help='discount factor') parser.add_argument('--eps', type=float, default=0.5, help='eps greedy') parser.add_argument('--momentum', type=float, default=0.1, help='momentum for sgd') parser.add_argument('--lr', type=float, default=0.1, help='learning rate') parser.add_argument('--clip', type=float, default=0.1, help='gradient clip') parser.add_argument('--rl_lr', type=float, default=0.002, help='RL learning rate') parser.add_argument('--rl_clip', type=float, default=2.0, help='RL gradient clip') parser.add_argument('--ref_text', type=str, help='file with the reference text') parser.add_argument('--sv_train_freq', type=int, default=-1, help='supervision train frequency') parser.add_argument('--nepoch', type=int, default=1, help='number of epochs') parser.add_argument('--hierarchical', action='store_true', default=False, help='use hierarchical training') parser.add_argument('--visual', action='store_true', default=False, help='plot graphs') parser.add_argument('--domain', type=str, default='object_division', help='domain for the dialogue') parser.add_argument('--selection_model_file', type=str, default='', help='path to save the final model') parser.add_argument('--data', type=str, default='data/negotiate', help='location of the data corpus') parser.add_argument('--unk_threshold', type=int, default=20, help='minimum word frequency to be in dictionary') parser.add_argument('--bsz', type=int, default=16, help='batch size') parser.add_argument('--validate', action='store_true', default=False, help='plot graphs') parser.add_argument('--scratch', action='store_true', default=False, help='erase prediciton weights') parser.add_argument('--sep_sel', action='store_true', default=False, help='use separate classifiers for selection') args = parser.parse_args() utils.use_cuda(args.cuda) utils.set_seed(args.seed) alice_model = utils.load_model(args.alice_model_file) # RnnModel alice_ty = get_agent_type(alice_model) # RnnRolloutAgent alice = alice_ty(alice_model, args, name='Alice', train=True) alice.vis = args.visual bob_model = utils.load_model(args.bob_model_file) # RnnModel bob_ty = get_agent_type(bob_model) # RnnAgent bob = bob_ty(bob_model, args, name='Bob', train=False) dialog = Dialog([alice, bob], args) logger = DialogLogger(verbose=args.verbose, log_file=args.log_file) ctx_gen = ContextGenerator(args.context_file) domain = get_domain(args.domain) corpus = alice_model.corpus_ty(domain, args.data, freq_cutoff=args.unk_threshold, verbose=True, sep_sel=args.sep_sel) engine = alice_model.engine_ty(alice_model, args) reinforce = Reinforce(dialog, ctx_gen, args, engine, corpus, logger) reinforce.run() utils.save_model(alice.model, args.output_model_file)
def __init__(self, word_dict, item_dict, context_dict, output_length, args, device_id): super(DialogModel, self).__init__(device_id) domain = get_domain(args.domain) self.word_dict = word_dict self.item_dict = item_dict self.context_dict = context_dict self.args = args self.word_encoder = nn.Embedding(len(self.word_dict), args.nembed_word) ctx_encoder_ty = modules.RnnContextEncoder if args.rnn_ctx_encoder \ else modules.MlpContextEncoder self.ctx_encoder = ctx_encoder_ty(len(self.context_dict), domain.input_length(), args.nembed_ctx, args.nhid_ctx, args.init_range, device_id) self.reader = nn.GRU( input_size=args.nhid_ctx + args.nembed_word, hidden_size=args.nhid_lang, bias=True) self.decoder = nn.Linear(args.nhid_lang, args.nembed_word) self.writer = nn.GRUCell( input_size=args.nhid_ctx + args.nembed_word, hidden_size=args.nhid_lang, bias=True) # Tie the weights of reader and writer self.writer.weight_ih = self.reader.weight_ih_l0 self.writer.weight_hh = self.reader.weight_hh_l0 self.writer.bias_ih = self.reader.bias_ih_l0 self.writer.bias_hh = self.reader.bias_hh_l0 self.dropout = nn.Dropout(args.dropout) self.sel_rnn = nn.GRU( input_size=args.nhid_lang + args.nembed_word, hidden_size=args.nhid_attn, bias=True, bidirectional=True) # Mask for disabling special tokens when generating sentences self.special_token_mask = torch.FloatTensor(len(self.word_dict)) self.sel_encoder = nn.Sequential( torch.nn.Linear(2 * args.nhid_attn + args.nhid_ctx, args.nhid_sel), nn.Tanh() ) self.attn = nn.Sequential( torch.nn.Linear(2 * args.nhid_attn, args.nhid_attn), nn.Tanh(), torch.nn.Linear(args.nhid_attn, 1) ) self.sel_decoders = nn.ModuleList() for i in range(output_length): self.sel_decoders.append(nn.Linear(args.nhid_sel, len(self.item_dict))) self.init_weights() for i in range(len(self.word_dict)): w = self.word_dict.get_word(i) special = domain.item_pattern.match(w) or w in ('<unk>', 'YOU:', 'THEM:', '<pad>') self.special_token_mask[i] = -999 if special else 0.0 self.special_token_mask = self.to_device(self.special_token_mask)
def main(): parser = argparse.ArgumentParser( description='training script for markable detection') parser.add_argument('--data', type=str, default='data/onecommon', help='location of the data corpus') parser.add_argument('--nembed_word', type=int, default=128, help='size of word embeddings') parser.add_argument('--nembed_ctx', type=int, default=128, help='size of context embeddings') parser.add_argument( '--nhid_lang', type=int, default=128, help='size of the hidden state for the language module') parser.add_argument('--optimizer', choices=['adam', 'rmsprop'], default='adam', help='optimizer to use') parser.add_argument('--lr', type=float, default=0.001, help='initial learning rate') parser.add_argument('--min_lr', type=float, default=1e-5, help='min threshold for learning rate annealing') parser.add_argument('--decay_rate', type=float, default=9.0, help='decrease learning rate by this factor') parser.add_argument('--decay_every', type=int, default=1, help='decrease learning rate after decay_every epochs') parser.add_argument('--momentum', type=float, default=0.0, help='momentum for sgd') parser.add_argument('--clip', type=float, default=0.5, help='gradient clipping') parser.add_argument('--dropout', type=float, default=0.5, help='dropout rate in embedding layer') parser.add_argument('--init_range', type=float, default=0.01, help='initialization range') parser.add_argument('--max_epoch', type=int, default=10, help='max number of epochs') parser.add_argument('--bsz', type=int, default=1, help='batch size') parser.add_argument('--unk_threshold', type=int, default=20, help='minimum word frequency to be in dictionary') parser.add_argument('--seed', type=int, default=1, help='random seed') parser.add_argument('--cuda', action='store_true', default=False, help='use CUDA') parser.add_argument('--model_file', type=str, default='markable_detector', help='path to save the final model') parser.add_argument('--domain', type=str, default='one_common', help='domain for the dialogue') parser.add_argument('--tensorboard_log', action='store_true', default=False, help='log training with tensorboard') parser.add_argument('--repeat_train', action='store_true', default=False, help='repeat training n times') parser.add_argument('--test_only', action='store_true', default=False, help='test only') parser.add_argument('--corpus_type', choices=['full', 'uncorrelated', 'success_only'], default='full', help='type of training corpus to use') args = parser.parse_args() if args.repeat_train: seeds = list(range(10)) else: seeds = [1] for seed in seeds: utils.use_cuda(args.cuda) utils.set_seed(args.seed) domain = get_domain(args.domain) corpus = BiLSTM_CRF.corpus_ty( domain, args.data, train='train_markable_{}.txt'.format(seed), valid='valid_markable_{}.txt'.format(seed), test='test_markable_{}.txt'.format(seed), verbose=True) if args.test_only: best_model = utils.load_model(args.model_file + '_' + str(seed) + '.th') if args.cuda: best_model.cuda() else: device = torch.device("cpu") best_model.to(device) best_model.eval() else: model = BiLSTM_CRF(len(corpus.word_dict), corpus.bio_dict, args.nembed_word, args.nhid_lang) optimizer = optim.Adam(model.parameters(), lr=args.lr) if args.cuda: model.cuda() best_model, best_valid_loss = copy.deepcopy(model), 1e100 validdata = corpus.valid_dataset(args.bsz) for epoch in range(1, args.max_epoch + 1): traindata = corpus.train_dataset(args.bsz) trainset, trainset_stats = traindata validset, validset_stats = validdata # train pass model.train() total_lang_loss, total_select_loss, total_num_correct, total_num_select = 0, 0, 0, 0 start_time = time.time() for batch in tqdm(trainset): model.zero_grad() ctx, words, markables, scenario_ids, agents, chat_ids = batch ctx = Variable(ctx) words = Variable(words) markables = Variable(markables) loss = model.neg_log_likelihood(words, markables) loss.sum().backward() optimizer.step() # valid pass model.eval() with torch.no_grad(): correct = 0 total = 0 valid_loss = 0 for batch in tqdm(validset): ctx, words, markables, scenario_ids, agents, chat_ids = batch valid_loss += model.neg_log_likelihood( words, markables).sum().item() score, tag_seq = model(words) correct += (torch.Tensor(tag_seq).long() == markables ).sum().item() total += len(tag_seq) print("epoch {}".format(epoch)) print("valid loss: {:.5f}".format(valid_loss)) print("valid accuracy: {:.5f}".format(correct / total)) if valid_loss < best_valid_loss: print("update best model") best_model = copy.deepcopy(model) best_valid_loss = valid_loss # test pass testdata = corpus.test_dataset(args.bsz) testset, testset_stats = testdata best_model.eval() with torch.no_grad(): correct = 0 total = 0 test_loss = 0 for batch in tqdm(testset): ctx, words, markables, scenario_ids, agents, chat_ids = batch test_loss += best_model.neg_log_likelihood( words, markables).sum().item() score, tag_seq = best_model(words) correct += ( torch.Tensor(tag_seq).long() == markables).sum().item() total += len(tag_seq) print("final test {}".format(epoch)) print("test loss: {:.5f}".format(test_loss)) print("test accuracy: {:.5f}".format(correct / total)) if not args.test_only: utils.save_model(best_model, args.model_file + '_' + str(seed) + '.th') utils.save_model(best_model.state_dict(), 'stdict_' + args.model_file)
def main(): parser = argparse.ArgumentParser( description='testing script for reference resolution') parser.add_argument('--data', type=str, default='data/onecommon', help='location of the data corpus') parser.add_argument('--unk_threshold', type=int, default=10, help='minimum word frequency to be in dictionary') parser.add_argument('--model_file', type=str, required=True, help='pretrained model file') parser.add_argument('--seed', type=int, default=1, help='random seed') parser.add_argument('--hierarchical', action='store_true', default=False, help='use hierarchical model') parser.add_argument('--bsz', type=int, default=16, help='batch size') parser.add_argument('--cuda', action='store_true', default=False, help='use CUDA') parser.add_argument('--domain', type=str, default='one_common', help='domain for the dialogue') parser.add_argument('--vocab_corpus', choices=['full', 'uncorrelated', 'success_only'], default='full', help='vocabulary of the corpus to use') parser.add_argument('--corpus_type', choices=['full', 'uncorrelated', 'success_only'], default='full', help='type of test corpus to use') parser.add_argument('--bleu_n', type=int, default=0, help='test ngram bleu') parser.add_argument('--temperature', type=float, default=0.1, help='temperature') # for error analysis parser.add_argument('--transcript_file', type=str, default='final_transcripts.json', help='scenario file') parser.add_argument('--markable_file', type=str, default='markable_annotation.json', help='scenario file') parser.add_argument('--show_errors', action='store_true', default=False, help='show errors') # analysis parameters parser.add_argument('--fix_misspellings', action='store_true', default=False, help='fix misspellings') parser.add_argument('--shuffle_utterance', action='store_true', default=False, help='shuffle order of words in the utterance') parser.add_argument('--shuffle_word_types', type=str, nargs='*', default=[], help='shuffle specified class of words in the output') parser.add_argument('--drop_word_types', type=str, nargs='*', default=[], help='drop specified class of words in the output') parser.add_argument('--replace_word_types', type=str, nargs='*', default=[], help='replace specified class of words in the output') parser.add_argument('--repeat_test', action='store_true', default=False, help='repeat training n times') parser.add_argument('--test_ref_forward', action='store_true', default=False, help='test forward reference instead') args = parser.parse_args() if args.bleu_n > 0: # current support args.bsz = 1 if args.repeat_test: seeds = list(range(10)) else: seeds = [args.seed] repeat_results = defaultdict(list) model_referent_annotation = {} init2num_referents = defaultdict(Counter) for seed in seeds: device_id = utils.use_cuda(args.cuda) utils.set_seed(args.seed) domain = get_domain(args.domain) model = utils.load_model(args.model_file + '_' + str(seed) + '.th') if args.cuda: model.cuda() else: device = torch.device("cpu") model.to(device) model.eval() corpus = model.corpus_ty( domain, args.data, train='train_reference_shift_{}.txt'.format(seed), valid='valid_reference_shift_{}.txt'.format(seed), test='test_reference_shift_{}.txt'.format(seed), freq_cutoff=args.unk_threshold, verbose=True) with open(os.path.join(args.data, args.transcript_file), "r") as f: dialog_corpus = json.load(f) with open(os.path.join(args.data, args.markable_file), "r") as f: markable_annotation = json.load(f) with open( os.path.join(args.data, "aggregated_referent_annotation.json"), "r") as f: aggregated_referent_annotation = json.load(f) scenarios = { scenario['scenario_uuid']: scenario for scenario in dialog_corpus } crit = Criterion(model.word_dict, device_id=device_id) sel_crit = nn.CrossEntropyLoss() ref_crit = nn.BCEWithLogitsLoss() testset, testset_stats = corpus.test_dataset(args.bsz) test_lang_loss, test_select_loss, test_reference_loss, test_select_correct, test_select_total, test_reference_correct, test_reference_total, test_num_ref_correct, test_num_ref_total = 0, 0, 0, 0, 0, 0, 0, 0, 0 """ Variables to keep track of the results for analysis """ # num_referents --> count, count correct num_markables = 0 num_markables_counter = Counter() num_markables_correct = Counter() exact_match = 0 exact_match_counter = Counter() # location of markable --> count, count correct, count exact match location_counter = Counter() location_correct = Counter() location_exact_match = Counter() # information to compute correlation between selection and reference score select_correct = {} reference_correct = {} reference_total = {} # markable text --> count, count correct, count exact match text_counter = Counter() text_correct = Counter() text_exact_match = Counter() # init token --> count, count correct init_counter = Counter() init_correct = Counter() init_exact_match = Counter() # num ref confusion num_ref_confusion = np.zeros([8, 8], dtype=int) anaphora_list = [ "it", "that", "thats", "this", "its", "they", "their", "itself", "them", "those", "it's" ] total_anaphora = 0 correct_anaphora = 0 bleu_scores = [] for batch in testset: ctx, inpt, tgt, ref_inpt, ref_tgt, num_ref_tgt, sel_tgt, scenario_ids, real_ids, agents, chat_ids, sel_idx = batch ctx = Variable(ctx) inpt = Variable(inpt) if ref_inpt is not None: ref_inpt = Variable(ref_inpt) out, ref_out, num_ref_out, sel_out = model.forward( ctx, inpt, ref_inpt, sel_idx) tgt = Variable(tgt) sel_tgt = Variable(sel_tgt) lang_loss = crit(out, tgt) if ref_inpt is not None: ref_tgt = Variable(ref_tgt) ref_tgt = torch.transpose(ref_tgt, 0, 1).contiguous().float() ref_loss = ref_crit(ref_out, ref_tgt) t = Variable(torch.FloatTensor([0])) # threshold if model.args.num_ref_weight > 0: num_ref_pred = num_ref_out.max(dim=2)[1] ref_results = torch.zeros_like(ref_tgt) ref_correct = 0 for i in range(ref_out.size(0)): for j in range(ref_out.size(1)): ref_pred = torch.zeros_like(ref_tgt[i][j]) for ref_idx in range(ref_pred.size(0)): if ref_idx in ref_out[i][j].topk( num_ref_pred[i][j])[1]: ref_pred[ref_idx] = 1.0 ref_results[i][j] = ( ref_pred.long() == ref_tgt[i][j].long()) ref_correct += (ref_pred.long() == ref_tgt[i] [j].long()).sum().item() ref_total = ref_tgt.size(0) * ref_tgt.size( 1) * ref_tgt.size(2) else: ref_results = ((ref_out > 0).long() == ref_tgt.long()) ref_correct = ((ref_out > 0).long() == ref_tgt.long()).sum().item() ref_total = ref_tgt.size(0) * ref_tgt.size( 1) * ref_tgt.size(2) # compute more details of reference resolution for i in range(ref_tgt.size(0)): # markable idx for j in range(ref_tgt.size(1)): # batch idx chat_id = chat_ids[j] # add chat level details if not exists if chat_id not in reference_correct: reference_correct[chat_id] = ref_results[:, j, :].sum( ).item() if chat_id not in reference_total: reference_total[ chat_id] = ref_results[:, j, :].size( 0) * ref_results[:, j, :].size(1) if chat_id not in model_referent_annotation: model_referent_annotation[chat_id] = {} markables = [] # markables information from aggregated_referent_annotation for markable in markable_annotation[chat_id][ "markables"]: markable_id = markable["markable_id"] if markable_id in aggregated_referent_annotation[ chat_id] and markable["speaker"] == agents[ j]: if "unidentifiable" in aggregated_referent_annotation[ chat_id][ markable_id] and aggregated_referent_annotation[ chat_id][markable_id][ "unidentifiable"]: if markable_id not in model_referent_annotation[ chat_id] and markable[ "speaker"] == agents[j]: model_referent_annotation[chat_id][ markable_id] = { 'ambiguous': False, 'referents': [], 'unidentifiable': True } continue markables.append(markable) assert len(markables) == ref_tgt.size(0) if model.args.num_ref_weight > 0: ref_pred = torch.zeros_like(ref_tgt[i][j]) for ref_idx in range(ref_pred.size(0)): #if ref_idx in ref_out[i][j].topk(num_ref_tgt[i][j])[1]: if ref_idx in ref_out[i][j].topk( num_ref_pred[i][j])[1]: ref_pred[ref_idx] = 1.0 correct_result = (ref_pred.long() == ref_tgt[i] [j].long()).sum().item() exact_match_result = torch.equal( ref_pred.long(), ref_tgt[i][j].long()) num_referents = ref_tgt[i][j].long().sum().item() else: correct_result = ((ref_out > 0).long( )[i][j] == ref_tgt.long())[i][j].sum().item() exact_match_result = torch.equal( (ref_out > 0).long()[i][j], ref_tgt.long()[i][j]) num_referents = ref_tgt.long()[i][j].sum().item() ref_pred = (ref_out > 0).long()[i][j] """ Add information to variables """ num_markables += 1 num_markables_counter[num_referents] += 1 num_markables_correct[num_referents] += correct_result # compute exact match if exact_match_result: exact_match += 1 exact_match_counter[ref_tgt.long()[i] [j].sum().item()] += 1 location_exact_match[i] += 1 if num_referents == 1: # temporal condition text_exact_match[markables[i] ["text"].lower()] += 1 init_exact_match[markables[i]["text"].lower(). split(" ")[0]] += 1 location_correct[i] += correct_result location_counter[i] += 1 if num_referents == 1: # temporal condition text_counter[markables[i]["text"].lower()] += 1 text_correct[markables[i] ["text"].lower()] += correct_result init_counter[markables[i]["text"].lower().split( " ")[0]] += 1 init_correct[markables[i]["text"].lower().split( " ")[0]] += correct_result init2num_referents[markables[i]["text"].lower().split( " ")[0]][num_referents] += 1 # test anaphora if markables[i]["text"].lower() in anaphora_list: total_anaphora += 1 if exact_match_result: correct_anaphora += 1 # keep track of model predictions for later visualization chat = [ chat for chat in dialog_corpus if chat['uuid'] == chat_id ] chat = chat[0] if markables[i][ 'markable_id'] not in model_referent_annotation[ chat_id]: model_referent_annotation[chat_id][ markables[i]['markable_id']] = {} model_referent_annotation[chat_id][ markables[i]['markable_id']]['referents'] = [] model_referent_annotation[chat_id][markables[i][ 'markable_id']]['ambiguous'] = False model_referent_annotation[chat_id][markables[i][ 'markable_id']]['unidentifiable'] = False for ent, is_referent in zip( chat['scenario']['kbs'][agents[j]], ref_pred.long().tolist()): #for ent, is_referent in zip(chat['scenario']['kbs'][agents[j]], (ref_out > 0).long()[i][j].tolist()): if is_referent: model_referent_annotation[chat_id][ markables[i] ['markable_id']]['referents'].append( "agent_{}_{}".format( agents[j], ent['id'])) else: ref_loss = None ref_correct = 0 ref_total = 0 sel_loss = sel_crit(sel_out, sel_tgt) sel_correct = (sel_out.max(dim=1)[1] == sel_tgt).sum().item() sel_total = sel_out.size(0) for i in range(sel_tgt.size(0)): # batch idx chat_id = chat_ids[i] sel_resuts = (sel_out.max(dim=1)[1] == sel_tgt) if sel_resuts[i]: select_correct[chat_id] = 1 else: select_correct[chat_id] = 0 if model.args.num_ref_weight > 0 and num_ref_out is not None: num_ref_out = num_ref_out.view(-1, num_ref_out.size(2)) num_ref_tgt = torch.transpose(num_ref_tgt, 0, 1).contiguous() num_ref_tgt = num_ref_tgt.view(-1) num_ref_loss = sel_crit(num_ref_out, num_ref_tgt) num_ref_correct = (num_ref_out.max( dim=1)[1] == num_ref_tgt).sum().item() num_ref_total = num_ref_tgt.size(0) for mi in range(num_ref_out.size(0)): model_pred = num_ref_out[mi].max(dim=0)[1].item() ground_truth = num_ref_tgt[mi].item() num_ref_confusion[ground_truth][model_pred] += 1 else: num_ref_correct = 0 num_ref_total = 0 test_lang_loss += lang_loss.item() test_select_loss += sel_loss.item() if ref_loss: test_reference_loss += ref_loss.item() test_select_correct += sel_correct test_select_total += sel_total test_reference_correct += ref_correct test_reference_total += ref_total test_num_ref_correct += num_ref_correct test_num_ref_total += num_ref_total if args.bleu_n > 0: ctx_h = model.ctx_encoder(ctx.transpose(0, 1)) my_utterance = None idx = 0 while True: if inpt[idx] == model.word_dict.word2idx['YOU:']: start = idx my_utterance = model.read_and_write( inpt[:idx], ctx_h, 30, temperature=args.temperature) my_utterance = model.word_dict.i2w(my_utterance) #print(my_utterance) while not inpt[idx] in [ model.word_dict.word2idx[stop_token] for stop_token in data.STOP_TOKENS ]: idx += 1 end = idx golden_utterance = inpt[start:end] golden_utterance = model.word_dict.i2w( golden_utterance) bleu_scores.append(100 * sentence_bleu( [golden_utterance], my_utterance, weights=[ 1 for i in range(4) if args.bleu_n == i ], #weights=[1 / args.bleu_n] * args.bleu_n, smoothing_function=SmoothingFunction().method7)) if inpt[idx] == model.word_dict.word2idx['<selection>']: break idx += 1 # Main results: # Dividing by the number of words in the input, not the tokens modeled, # because the latter includes padding test_lang_loss /= testset_stats['nonpadn'] test_select_loss /= len(testset) test_select_accuracy = test_select_correct / test_select_total test_reference_accuracy = test_reference_correct / test_reference_total if test_num_ref_total > 0: test_num_ref_accuracy = test_num_ref_correct / test_num_ref_total else: test_num_ref_accuracy = 0 print('testlangloss %.8f | testlangppl %.8f' % (test_lang_loss, np.exp(test_lang_loss))) print('testselectloss %.8f | testselectaccuracy %.6f' % (test_select_loss, test_select_accuracy)) print('testreferenceloss %.8f | testreferenceaccuracy %.6f' % (test_reference_loss, test_reference_accuracy)) print('reference_exact_match %.6f' % (exact_match / num_markables)) for k in num_markables_counter.keys(): print('{}: {:.4f} {:.4f} (out of {})'.format( k, num_markables_correct[k] / (num_markables_counter[k] * 7), exact_match_counter[k] / num_markables_counter[k], num_markables_counter[k])) print('test anaphora: {} (out of {})'.format( correct_anaphora / total_anaphora, total_anaphora)) if args.bleu_n > 0: print('average bleu score {}'.format(np.mean(bleu_scores))) # reference/selection correlation reference_score = [] selection_score = [] for chat_id in reference_correct.keys(): reference_score.append(reference_correct[chat_id] / reference_total[chat_id]) selection_score.append(select_correct[chat_id]) plt.xlabel('reference score', fontsize=14) plt.ylabel('selection score', fontsize=14) sns.regplot(x=reference_score, y=selection_score) plt.savefig('reference_selection_{}.png'.format(seed), dpi=300) plt.clf() reference_score = np.array(reference_score) selection_score = np.array(selection_score) print("reference selection correlation: {}".format( np.corrcoef(reference_score, selection_score))) # keep track of results for this run repeat_results["test_lang_loss"].append(test_lang_loss) repeat_results["test_select_loss"].append(test_select_loss) repeat_results["test_select_accuracy"].append(test_select_accuracy) repeat_results["test_reference_loss"].append(test_reference_loss) repeat_results["test_reference_accuracy"].append( test_reference_accuracy) repeat_results["test_num_ref_accuracy"].append(test_num_ref_accuracy) repeat_results["correlation_score"].append( np.corrcoef(reference_score, selection_score)[0][1]) repeat_results["num_markables_counter"].append( copy.copy(num_markables_counter)) repeat_results["exact_match_counter"].append( copy.copy(exact_match_counter)) repeat_results["num_markables_correct"].append( copy.copy(num_markables_correct)) repeat_results["reference_exact_match"].append(exact_match / num_markables) repeat_results["test_perplexity"].append(np.exp(test_lang_loss)) repeat_results["location_counter"].append(copy.copy(location_counter)) repeat_results["location_correct"].append(copy.copy(location_correct)) repeat_results["location_exact_match"].append( copy.copy(location_exact_match)) repeat_results["init_counter"].append(copy.copy(init_counter)) repeat_results["init_correct"].append(copy.copy(init_correct)) repeat_results["init_exact_match"].append(copy.copy(init_exact_match)) print("=================================\n\n") print("repeat test lang loss %.8f" % np.mean(repeat_results["test_lang_loss"])) print("repeat test select loss %.8f" % np.mean(repeat_results["test_select_loss"])) print("repeat test select accuracy %.8f ( %.8f )" % (np.mean(repeat_results["test_select_accuracy"]), np.std(repeat_results["test_select_accuracy"]))) print("repeat test reference loss %.8f" % np.mean(repeat_results["test_reference_loss"])) print("repeat test reference accuracy %.8f ( %.8f )" % (np.mean(repeat_results["test_reference_accuracy"]), np.std(repeat_results["test_reference_accuracy"]))) print("repeat test num ref accuracy %.8f ( %.8f )" % (np.mean(repeat_results["test_num_ref_accuracy"]), np.std(repeat_results["test_reference_accuracy"]))) print("repeat correlation score %.8f ( %.8f )" % (np.mean(repeat_results["correlation_score"]), np.std(repeat_results["correlation_score"]))) print("repeat correlation score %.8f ( %.8f )" % (np.mean(repeat_results["correlation_score"]), np.std(repeat_results["correlation_score"]))) print("repeat reference exact match %.8f ( %.8f )" % (np.mean(repeat_results["reference_exact_match"]), np.std(repeat_results["reference_exact_match"]))) print("repeat test perplexity %.8f ( %.8f )" % (np.mean(repeat_results["test_perplexity"]), np.std(repeat_results["test_perplexity"]))) for k in num_markables_counter.keys(): print("repeat accuracy and exact match:") num_markables = [] exact_match = [] exact_match_rate = [] num_markables_correct = [] for seed in range(len(seeds)): num_markables.append( repeat_results["num_markables_counter"][seed][k]) exact_match.append(repeat_results["exact_match_counter"][seed][k]) exact_match_rate.append( repeat_results["exact_match_counter"][seed][k] / repeat_results["num_markables_counter"][seed][k]) num_markables_correct.append( repeat_results["num_markables_correct"][seed][k] / (repeat_results["num_markables_counter"][seed][k] * 7)) print('{}: {:.5f} (std {}) {:.5f} (std {}) (count {})'.format( k, np.mean(num_markables_correct), np.std(num_markables_correct), np.mean(exact_match_rate), np.std(exact_match_rate), np.mean(num_markables))) dump_json(model_referent_annotation, "{}_referent_annotation.json".format(args.model_file)) print("exact match at each location:") markable_location_plot = [] exact_match_rate_plot = [] accuracy_plot = [] for loc in range(12): accuracy = [] exact_match_rate = [] total_count = 0 for seed in range(len(seeds)): if repeat_results["location_counter"][seed][loc] > 0: exact_match_rate.append( repeat_results["location_exact_match"][seed][loc] / repeat_results["location_counter"][seed][loc]) total_count += repeat_results["location_counter"][seed][loc] markable_location_plot.append(loc + 1) exact_match_rate_plot.append( repeat_results["location_exact_match"][seed][loc] / repeat_results["location_counter"][seed][loc]) accuracy_plot.append( repeat_results["location_correct"][seed][loc] / (7 * repeat_results["location_counter"][seed][loc])) if len(exact_match_rate) > 0: print('Loc @ {}: {:.5f} (std {:.5f}) (valid runs: {}, total: {})'. format(loc + 1, np.mean(exact_match_rate), np.std(exact_match_rate), len(exact_match_rate), total_count)) plt.xlabel('markable location', fontsize=14) plt.ylabel('exact match rate', fontsize=14) sns.lineplot(x=markable_location_plot, y=exact_match_rate_plot) plt.savefig('location_exact_match_rate.png', dpi=300) plt.clf() plt.xlabel('markable location', fontsize=14) plt.ylabel('accuracy', fontsize=14) sns.lineplot(x=markable_location_plot, y=accuracy_plot) plt.savefig('location_accuracy.png', dpi=300) plt.clf() plt.xlabel('markable position', fontsize=14) plt.ylabel('percentage', fontsize=14) sns.lineplot(x=markable_location_plot, y=accuracy_plot, legend="brief", label="accuracy") sns.lineplot(x=markable_location_plot, y=exact_match_rate_plot, legend="brief", label="exact match") plt.savefig('location_results.png', dpi=300) plt.clf() print("compute results based on initial token:") #for tok in model.word_dict.w2i.keys(): definite_toks = ["the"] indefinite_toks = ["a", "an"] definite_accuracies = [] indefinite_accuracies = [] other_accuracies = [] definite_exact_matches = [] indefinite_exact_matches = [] other_exact_matches = [] definite_counts = [] indefinite_counts = [] other_counts = [] for seed in range(len(seeds)): num_correct = 0 num_exact_match = 0 num_total = 0 for tok in definite_toks: num_total += repeat_results["init_counter"][seed][tok] num_correct += repeat_results["init_correct"][seed][tok] num_exact_match += repeat_results["init_exact_match"][seed][tok] definite_accuracies.append(num_correct / (7 * num_total)) definite_exact_matches.append(num_exact_match / num_total) definite_counts.append(num_total) num_correct = 0 num_exact_match = 0 num_total = 0 for tok in indefinite_toks: num_total += repeat_results["init_counter"][seed][tok] num_correct += repeat_results["init_correct"][seed][tok] num_exact_match += repeat_results["init_exact_match"][seed][tok] indefinite_accuracies.append(num_correct / (7 * num_total)) indefinite_exact_matches.append(num_exact_match / num_total) indefinite_counts.append(num_total) num_correct = 0 num_exact_match = 0 num_total = 0 for tok in repeat_results["init_counter"][seed].keys(): if tok not in definite_toks + indefinite_toks: num_total += repeat_results["init_counter"][seed][tok] num_correct += repeat_results["init_correct"][seed][tok] num_exact_match += repeat_results["init_exact_match"][seed][ tok] other_accuracies.append(num_correct / (7 * num_total)) other_exact_matches.append(num_exact_match / num_total) other_counts.append(num_total) print( "definite: accuracies {} (std {}), exact match rate {} (std {}), total count {} (std {})" .format(np.mean(definite_accuracies), np.std(definite_accuracies), np.mean(definite_exact_matches), np.std(definite_exact_matches), np.mean(definite_counts), np.std(definite_counts))) print( "indefinite: accuracies {} (std {}), exact match rate {} (std {}), total count {} (std {})" .format(np.mean(indefinite_accuracies), np.std(indefinite_accuracies), np.mean(indefinite_exact_matches), np.std(indefinite_exact_matches), np.mean(indefinite_counts), np.std(indefinite_counts))) print( "other: accuracies {} (std {}), exact match rate {} (std {}), total count {} (std {})" .format(np.mean(other_accuracies), np.std(other_accuracies), np.mean(other_exact_matches), np.std(other_exact_matches), np.mean(other_counts), np.std(other_counts))) valid_markables = 0 for chat_id in model_referent_annotation.keys(): for markable_id in model_referent_annotation[chat_id].keys(): if 'unidentifiable' in aggregated_referent_annotation[chat_id][ markable_id] and aggregated_referent_annotation[chat_id][ markable_id]['unidentifiable']: continue valid_markables += 1 print("model valid markables: {}".format(valid_markables)) valid_markables = 0 for chat_id in aggregated_referent_annotation.keys(): for markable_id in aggregated_referent_annotation[chat_id].keys(): if 'unidentifiable' in aggregated_referent_annotation[chat_id][ markable_id] and aggregated_referent_annotation[chat_id][ markable_id]['unidentifiable']: continue valid_markables += 1 print("aggregated valid markables: {}".format(valid_markables))
def __init__(self, word_dict, item_dict, context_dict, count_dict, args): super(RnnVariationalModel, self).__init__() domain = get_domain(args.domain) self.word_dict = word_dict self.item_dict = item_dict self.context_dict = context_dict self.count_dict = count_dict self.args = args self.word_encoder = nn.Embedding(len(self.word_dict), args.nembed_word) self.word_encoder_dropout = nn.Dropout(args.dropout) ctx_encoder_ty = MlpContextEncoder self.ctx_encoder = nn.Sequential( ctx_encoder_ty(len(self.context_dict), domain.input_length(), args.nembed_ctx, args.nhid_ctx, args.dropout, args.init_range), nn.Dropout(args.dropout)) # Encoder GRU outputs self.reader = nn.GRU(args.nhid_ctx + args.nembed_word, args.nhid_lang * 2, bias=True) self.reader_dropout = nn.Dropout(args.dropout) self.decoder = nn.Sequential( nn.Linear(args.nhid_lang, args.nembed_word), nn.Dropout(args.dropout)) self.writer = nn.GRUCell(input_size=args.nhid_ctx + args.nembed_word, hidden_size=args.nhid_lang, bias=True) # Tie the weights of reader and writer self.writer.weight_ih = self.reader.weight_ih_l0 self.writer.weight_hh = self.reader.weight_hh_l0 self.writer.bias_ih = self.reader.bias_ih_l0 self.writer.bias_hh = self.reader.bias_hh_l0 self.sel_rnn = nn.GRU(input_size=args.nhid_lang + args.nembed_word, hidden_size=args.nhid_attn, bias=True, bidirectional=True) self.sel_dropout = nn.Dropout(args.dropout) # Mask for disabling special tokens when generating sentences self.special_token_mask = torch.FloatTensor(len(self.word_dict)) self.sel_encoder = nn.Sequential( torch.nn.Linear(2 * args.nhid_attn + args.nhid_ctx, args.nhid_sel), nn.Tanh(), nn.Dropout(args.dropout)) self.attn = nn.Sequential( torch.nn.Linear(2 * args.nhid_attn, args.nhid_attn), nn.Tanh(), torch.nn.Linear(args.nhid_attn, 1)) self.sel_decoders = nn.ModuleList() for i in range(domain.selection_length()): self.sel_decoders.append( nn.Linear(args.nhid_sel, len(self.item_dict))) self.init_weights() self.special_token_mask = make_mask(len(word_dict), [ word_dict.get_idx(w) for w in ['<unk>', 'YOU:', 'THEM:', '<pad>'] ])