def __init__(self, opt, encoder_name='encoder_cand'): super().__init__() n_positions = get_n_positions_from_options(opt) d = DictionaryAgent(opt) e = torch.nn.Embedding(len(d), opt['embedding_size'], d[d.null_token]) torch.nn.init.normal_(e.weight, mean=0, std=opt['embedding_size'] ** -0.5) torch.nn.init.constant_(e.weight[d[d.null_token]], 0) self.opt = opt self.vocab_size = len(d) encoder_cand = TransformerAREncoder( n_heads=opt['n_heads'], n_layers=opt['n_layers'], embedding_size=opt['embedding_size'], ffn_size=opt['ffn_size'], embedding=e, dropout=opt['dropout'], attention_dropout=opt['attention_dropout'], relu_dropout=opt['relu_dropout'], padding_idx=d[d.null_token], learn_positional_embeddings=opt['learn_positional_embeddings'], embeddings_scale=opt['embeddings_scale'], n_positions=n_positions, n_segments=opt.get('n_segments', 2), activation=opt['activation'], variant=opt['variant'], output_scaling=opt['output_scaling'], ) self.encoder_name = encoder_name setattr(self, encoder_name, encoder_cand) self.cls = LMPredictionHead(opt, len(d))
def test_gpt2_bpe_tokenize(self): opt = Opt({'dict_tokenizer': 'gpt2', 'datapath': './data'}) agent = DictionaryAgent(opt) self.assertEqual( agent.gpt2_tokenize(u'Hello, ParlAI! 😀'), [ 'Hello', ',', r'\xc4\xa0Par', 'l', 'AI', '!', r'\xc4\xa0\xc3\xb0\xc5\x81\xc4\xba', r'\xc4\xa2', ], ) self.assertEqual( agent.vec2txt( [ 'Hello', ',', r'\xc4\xa0Par', 'l', 'AI', '!', r'\xc4\xa0\xc3\xb0\xc5\x81\xc4\xba', r'\xc4\xa2', ] ), u'Hello, ParlAI! 😀', )
def build_dict(opt, skip_if_built=False): if isinstance(opt, ParlaiParser): print('[ Deprecated Warning: should be passed opt not Parser ]') opt = opt.parse_args() if not opt.get('dict_file'): print('Tried to build dictionary but `--dict-file` is not set. Set ' + 'this param so the dictionary can be saved.') return print('[ setting up dictionary. ]') if skip_if_built and os.path.isfile(opt['dict_file']): # Dictionary already built, skip all loading or setup print("[ dictionary already built .]") return None if opt.get('dict_class'): # Custom dictionary class dictionary = str2class(opt['dict_class'])(opt) else: # Default dictionary class dictionary = DictionaryAgent(opt) if os.path.isfile(opt['dict_file']): # Dictionary already built, return loaded dictionary agent print("[ dictionary already built .]") return dictionary ordered_opt = copy.deepcopy(opt) cnt = 0 # we use train set to build dictionary ordered_opt['numthreads'] = 1 ordered_opt['batchsize'] = 1 ordered_opt['image_mode'] = 'none' if ordered_opt['task'] == 'pytorch_teacher': pytorch_buildteacher_task = ordered_opt.get('pytorch_buildteacher', '') if pytorch_buildteacher_task != '': ordered_opt['task'] = pytorch_buildteacher_task datatypes = ['train:ordered:stream'] if opt.get('dict_include_valid'): datatypes.append('valid:stream') if opt.get('dict_include_test'): datatypes.append('test:stream') cnt = 0 for dt in datatypes: ordered_opt['datatype'] = dt world_dict = create_task(ordered_opt, dictionary) # pass examples to dictionary while not world_dict.epoch_done(): cnt += 1 if cnt > opt['dict_maxexs'] and opt['dict_maxexs'] > 0: print('Processed {} exs, moving on.'.format( opt['dict_maxexs'])) # don't wait too long... break world_dict.parley() dictionary.save(opt['dict_file'], sort=True) print('[ dictionary built with {} tokens ]'.format(len(dictionary))) return dictionary
def test_basic_parse(self): """ Check the dictionary is correctly adding and parsing short sentence. """ argparser = ParlaiParser() DictionaryAgent.add_cmdline_args(argparser) opt = argparser.parse_args([]) dictionary = DictionaryAgent(opt) num_builtin = len(dictionary) dictionary.observe({'text': 'hello world'}) dictionary.act() assert len(dictionary) - num_builtin == 2 vec = dictionary.parse('hello world') assert len(vec) == 2 assert vec[0] == num_builtin assert vec[1] == num_builtin + 1 vec = dictionary.parse('hello world', vec_type=list) assert len(vec) == 2 assert vec[0] == num_builtin assert vec[1] == num_builtin + 1 vec = dictionary.parse('hello world', vec_type=tuple) assert len(vec) == 2 assert vec[0] == num_builtin assert vec[1] == num_builtin + 1
def test_add_special_tokens(self): """ Add a list of special tokens to the dictionary. """ special_toks_lst = ['MY', 'NAME', 'IS', 'EMILY'] # create Dictionary Agent parser = ParlaiParser() parser.set_params( dict_tokenizer='bytelevelbpe', bpe_vocab=DEFAULT_BYTELEVEL_BPE_VOCAB, bpe_merge=DEFAULT_BYTELEVEL_BPE_MERGE, hf_skip_special_tokens=False, ) opt = parser.parse_args([]) agent = DictionaryAgent(opt) agent.add_additional_special_tokens(special_toks_lst) self.assertEqual(agent.additional_special_tokens, special_toks_lst) phrases = [ 'Hi what is up EMILY', 'What IS your NAME', 'That is MY dog' ] for phrase in phrases: vec = agent.txt2vec(phrase) text = agent.vec2txt(vec) self.assertEqual(phrase, text)
def test_swap_is_not_persisted_in_class(self): opt = self._opt() dictionary = DictionaryAgent(opt) CustomFFN = type('CustomFFN', (TransformerFFN,), {}) wrapped_class = TransformerGeneratorModel.with_components( encoder=TransformerEncoder.with_components( layer=TransformerEncoderLayer.with_components(feedforward=CustomFFN) ) ) model = wrapped_class(opt=opt, dictionary=dictionary) assert ( model.swappables.encoder.swappables.layer.swappables.feedforward == CustomFFN ) # type: ignore another_model = TransformerGeneratorModel(opt, dictionary) assert another_model.swappables != model.swappables assert issubclass( another_model.swappables.encoder, TransformerEncoder ) # type: ignore wrapped_class.swap_components( encoder=TransformerEncoder.with_components( layer=TransformerEncoderLayer.with_components( feedforward=TransformerFFN ) ) ) one_more_model = wrapped_class(opt=opt, dictionary=dictionary) assert ( one_more_model.swappables.encoder.swappables.layer.swappables.feedforward == TransformerFFN ) # type: ignore
def replace30percent(self, turns): self.opt['dict_file'] = self.opt['model_file'] + '.dict' dictionary = DictionaryAgent(self.opt) dictionary_len = len(dictionary) modified_turns = [] for turn in turns: turn = turn.split() turn_len = len(turn) replace_len = math.ceil(turn_len * 0.3) if (replace_len == 1 or replace_len == 2) and turn_len > 2: replace_len = 3 replace_target_index = np.random.choice(turn_len, replace_len, replace=False).tolist() replace_target_index.sort() replace_target_words = [turn[x] for x in replace_target_index] replace_words = [] i = 0 while i < replace_len: replace_word_index = np.random.choice(dictionary_len, 1, replace=False).tolist() replace_word = dictionary.ind2tok[replace_word_index[0]] if replace_word == replace_target_words[i]: i -= 1 else: replace_words.append(replace_word) i += 1 for i in range(replace_len): turn[replace_target_index[i]] = replace_words[i] modified_turns.append(' '.join(turn)) return modified_turns
def build_dict(opt): if 'dict_file' not in opt: return print('[ setting up dictionary. ]') if os.path.isfile(opt['dict_file']): # Dictionary already built print("[ dictionary already built .]") return if 'dict_class' in opt: # Custom dictionary class name = opt['dict_class'].split(':') module = importlib.import_module(name[0]) dict_class = getattr(module, name[1]) dictionary = dict_class(opt) else: # Default dictionary class dictionary = DictionaryAgent(opt) ordered_opt = copy.deepcopy(opt) cnt = 0 # we use train set to build dictionary ordered_opt['datatype'] = 'train:ordered' ordered_opt['numthreads'] = 1 ordered_opt['batchsize'] = 1 world_dict = create_task(ordered_opt, dictionary) # pass examples to dictionary for _ in world_dict: cnt += 1 if cnt > opt['dict_maxexs'] and opt['dict_maxexs'] > 0: print('Processed {} exs, moving on.'.format(opt['dict_maxexs'])) # don't wait too long... break world_dict.parley() print('[ dictionary built. ]') dictionary.save(opt['dict_file'], sort=True)
def test_gpt2_bpe_tokenize(self): with testing_utils.capture_output(): opt = Opt({'dict_tokenizer': 'gpt2', 'datapath': './data'}) agent = DictionaryAgent(opt) self.assertEqual( # grinning face emoji agent.gpt2_tokenize(u'Hello, ParlAI! \U0001f600'), [ 'Hello', ',', r'\xc4\xa0Par', 'l', 'AI', '!', r'\xc4\xa0\xc3\xb0\xc5\x81\xc4\xba', r'\xc4\xa2', ], ) self.assertEqual( agent.vec2txt(agent.tok2ind[w] for w in [ 'Hello', ',', r'\xc4\xa0Par', 'l', 'AI', '!', r'\xc4\xa0\xc3\xb0\xc5\x81\xc4\xba', r'\xc4\xa2', ]), # grinning face emoji u'Hello, ParlAI! \U0001f600', )
def test_tokenize_prefix_space(self): """ Tests a bytelevel bpe tokenizer inside ParlAI. """ parser = ParlaiParser() parser.set_params( dict_tokenizer='bytelevelbpe', bpe_vocab=DEFAULT_BYTELEVEL_BPE_VOCAB, bpe_merge=DEFAULT_BYTELEVEL_BPE_MERGE, ) opt = parser.parse_args([]) agent = DictionaryAgent(opt) self.assertEqual( # grinning face emoji agent.bytelevelbpe_tokenize(u'Hello, ParlAI! \U0001f600'), ['Ġ'] + BYTELEVEL_BPE_RESULT, ) self.assertEqual( agent.vec2txt( [agent.tok2ind[w] for w in ['Ġ'] + BYTELEVEL_BPE_RESULT]), # grinning face emoji u'Hello, ParlAI! \U0001f600', ) self.assertEqual( agent.txt2vec(u'Hello, ParlAI! \U0001f600'), [agent.tok2ind[w] for w in ['Ġ'] + BYTELEVEL_BPE_RESULT], )
def __init__(self, opt, num_features, padding_idx=0, start_idx=1, end_idx=2, longest_label=1): super().__init__() self.opt = opt self.rank = opt['rank_candidates'] self.attn_type = opt['attention'] self.NULL_IDX = padding_idx self.END_IDX = end_idx self.register_buffer('START', torch.LongTensor([start_idx])) self.longest_label = longest_label rnn_class = Seq2seq.RNN_OPTS[opt['rnn_class']] self.decoder = Decoder(num_features, padding_idx=self.NULL_IDX, rnn_class=rnn_class, emb_size=opt['embeddingsize'], hidden_size=opt['hiddensize'], num_layers=opt['numlayers'], dropout=opt['dropout'], share_output=opt['lookuptable'] in ['dec_out', 'all'], attn_type=opt['attention'], attn_length=opt['attention_length'], attn_time=opt.get('attention_time'), bidir_input=opt['bidirectional'], numsoftmax=opt.get('numsoftmax', 1)) shared_lt = (self.decoder.lt if opt['lookuptable'] in ['enc_dec', 'all'] else None) shared_rnn = self.decoder.rnn if opt['decoder'] == 'shared' else None self.encoder = Encoder(num_features, padding_idx=self.NULL_IDX, rnn_class=rnn_class, emb_size=opt['embeddingsize'], hidden_size=opt['hiddensize'], num_layers=opt['numlayers'], dropout=opt['dropout'], bidirectional=opt['bidirectional'], shared_lt=shared_lt, shared_rnn=shared_rnn) if self.rank: self.ranker = Ranker(self.decoder, padding_idx=self.NULL_IDX, attn_type=opt['attention']) self.beam_log_freq = opt.get('beam_log_freq', 0.0) if self.beam_log_freq > 0.0: self.dict = DictionaryAgent(opt) self.beam_dump_filecnt = 0 self.beam_dump_path = opt['model_file'] + '.beam_dump' if not os.path.exists(self.beam_dump_path): os.makedirs(self.beam_dump_path)
def __init__(self, opt, shared=None): super().__init__(opt, shared) if not shared: # Need to set up the model from scratch self.dict = DictionaryAgent(opt) else: # ... copy initialized data from shared table self.opt = shared['opt'] self.dict = shared['dict'] self.use_cuda = not opt['no_cuda'] and torch.cuda.is_available() if self.use_cuda: if not shared: print('[ Using CUDA ]') torch.cuda.device(opt['gpu']) self.NULL_IDX = self.dict[self.dict.null_token] self.END_IDX = self.dict[self.dict.end_token] self.START_IDX = self.dict[self.dict.start_token] self.history = {} self.truncate = opt['truncate'] self.history_dialog = opt['history_dialog'] self.history_replies = opt['history_replies']
def build_dict(opt): if not opt.get('dict_file'): print('Tried to build dictionary but `--dict-file` is not set. Set ' + 'this param so the dictionary can be saved.') return print('[ setting up dictionary. ]') if os.path.isfile(opt['dict_file']): # Dictionary already built print("[ dictionary already built .]") return if opt.get('dict_class'): # Custom dictionary class dictionary = str2class(opt['dict_class'])(opt) else: # Default dictionary class dictionary = DictionaryAgent(opt) ordered_opt = copy.deepcopy(opt) cnt = 0 # we use train set to build dictionary ordered_opt['datatype'] = 'train:ordered' ordered_opt['numthreads'] = 1 ordered_opt['batchsize'] = 1 world_dict = create_task(ordered_opt, dictionary) # pass examples to dictionary for _ in world_dict: cnt += 1 if cnt > opt['dict_maxexs'] and opt['dict_maxexs'] > 0: print('Processed {} exs, moving on.'.format(opt['dict_maxexs'])) # don't wait too long... break world_dict.parley() print('[ dictionary built. ]') dictionary.save(opt['dict_file'], sort=True)
def __init__(self, opt, shared=None): super().__init__(opt) self.id = 'IRBaselineAgent' self.length_penalty = float(opt['length_penalty']) self.dictionary = DictionaryAgent(opt) self.opt = opt self.history = [] self.episodeDone = True
def __init__(self, opt, shared=None): """Initialize NER dictionary agent""" child_opt = copy.deepcopy(opt) # child_opt['model_file'] += '.labels' child_opt['dict_file'] = child_opt['dict_file'] + '.labels.dict' self.labels_dict = DictionaryAgent(child_opt, shared) self.char_dict = get_char_dict() super().__init__(opt, shared)
def __init__(self, opt, shared=None): self.opt = opt self.datatype = self.opt.get('datatype') self.training = self.datatype.startswith('train') self.num_epochs = self.opt.get('num_epochs', 0) self.image_loader = ImageLoader(opt) data_path, self.image_path = _path(opt) self._setup_data(data_path, opt.get('unittest', False)) self.dict_agent = DictionaryAgent(opt)
def test_byte_level_bpe_tokenize(self): """ Tests a bytelevel bpe tokenizer inside ParlAI. """ parser = ParlaiParser() parser.set_params( dict_tokenizer='bytelevelbpe', bpe_vocab=DEFAULT_BYTELEVEL_BPE_VOCAB, bpe_merge=DEFAULT_BYTELEVEL_BPE_MERGE, bpe_add_prefix_space=False, ) opt = parser.parse_args([], print_args=False) agent = DictionaryAgent(opt) self.assertEqual( # grinning face emoji agent.bytelevelbpe_tokenize(u'Hello, ParlAI! \U0001f600'), BYTELEVEL_BPE_RESULT, ) self.assertEqual( agent.vec2txt([agent.tok2ind[w] for w in BYTELEVEL_BPE_RESULT]), # grinning face emoji u'Hello, ParlAI! \U0001f600', ) self.assertEqual( agent.txt2vec(u'Hello, ParlAI! \U0001f600'), [agent.tok2ind[w] for w in BYTELEVEL_BPE_RESULT], ) vocab_size = agent.byte_level_bpe.tokenizer.get_vocab_size() with testing_utils.tempdir() as tmpdir: path = os.path.join(tmpdir, 'dict-checkpoint') agent.save(filename=path) agent.load(filename=path) # Test loading / saving self.assertEqual(vocab_size, agent.byte_level_bpe.tokenizer.get_vocab_size()) self.assertEqual( # grinning face emoji agent.bytelevelbpe_tokenize(u'Hello, ParlAI! \U0001f600'), BYTELEVEL_BPE_RESULT, ) self.assertEqual( agent.vec2txt([agent.tok2ind[w] for w in BYTELEVEL_BPE_RESULT]), # grinning face emoji u'Hello, ParlAI! \U0001f600', ) self.assertEqual( agent.txt2vec(u'Hello, ParlAI! \U0001f600'), [agent.tok2ind[w] for w in BYTELEVEL_BPE_RESULT], ) # Test special token ids are mapped correctly: # 4 special tokens are added in ParlAI dict in the begining and at the # end for Hugging Face null token would be 0 in ParlAI dict and # original_vocab in Hugging Face assert agent.txt2vec("__null__") == [0] assert agent.txt2vec("__start__") == [1] assert agent.txt2vec("__end__") == [2] assert agent.txt2vec("__unk__") == [3]
def __init__(self, opt, shared=None): super().__init__(opt, shared) if opt['cuda']: print('[ Using CUDA ]') torch.cuda.set_device(opt['gpu']) if not shared: self.dict = DictionaryAgent(opt) self.id = 'ConvS2S' self.EOS = self.dict.end_token self.SOS = self.dict.start_token self.use_cuda = opt['cuda'] self.EOS_TENSOR = torch.LongTensor(self.dict.parse(self.EOS)) self.SOS_TENSOR = torch.LongTensor(self.dict.parse(self.SOS)) self.kernel_size = opt['kernel_size'] self.embedding_size = opt['embedding_size'] self.num_enc_layers = opt['num_encoder_layers'] self.num_dec_layers = opt['num_decoder_layers'] self.longest_label = 2 self.encoder_pad = (self.kernel_size - 1) // 2 self.decoder_pad = self.kernel_size - 1 self.criterion = nn.NLLLoss() self.embeder = layers.WordEmbeddingGenerator(self.dict.tok2ind, embedding_dim=self.embedding_size) self.encoder = layers.EncoderStack(self.embedding_size, 2*self.embedding_size, self.kernel_size, self.encoder_pad, self.num_enc_layers) self.decoder = layers.DecoderStack(self.embedding_size, 2 * self.embedding_size, self.kernel_size, self.decoder_pad, self.num_dec_layers) self.h2o = layers.HiddenToProb(self.embedding_size, len(self.dict)) lr = opt['learning_rate'] self.optims = { 'embeds': optim.Adam(self.embeder.parameters(), lr=lr), 'encoder': optim.Adam(self.encoder.parameters(), lr=lr), 'decoder': optim.Adam(self.decoder.parameters(), lr=lr), 'd2o': optim.Adam(self.h2o.parameters(), lr=lr), } if self.use_cuda: self.cuda() if 'model_file' in opt and os.path.isfile(opt['model_file']): print('Loading existing model parameters from ' + opt['model_file']) self.load(opt['model_file']) self.episode_done = True
def __init__(self, opt, shared=None): opt['cuda'] = not opt['no_cuda'] and torch.cuda.is_available() if opt['cuda']: print('[ Using CUDA ]') torch.cuda.device(opt['gpu']) if not shared: self.opt = opt self.id = 'MemNN' self.dict = DictionaryAgent(opt) self.answers = [None] * opt['batchsize'] self.model = MemNN(opt, len(self.dict)) self.mem_size = opt['mem_size'] self.loss_fn = CrossEntropyLoss() self.decoder = None self.longest_label = 1 self.END = self.dict.end_token self.END_TENSOR = torch.LongTensor(self.dict.parse(self.END)) self.START = self.dict.start_token self.START_TENSOR = torch.LongTensor(self.dict.parse(self.START)) if opt['output'] == 'generate' or opt['output'] == 'g': self.decoder = Decoder(opt['embedding_size'], opt['embedding_size'], opt['rnn_layers'], opt, self.dict) elif opt['output'] != 'rank' and opt['output'] != 'r': raise NotImplementedError('Output type not supported.') optim_params = [p for p in self.model.parameters() if p.requires_grad] lr = opt['learning_rate'] if opt['optimizer'] == 'sgd': self.optimizers = {'memnn': optim.SGD(optim_params, lr=lr)} if self.decoder is not None: self.optimizers['decoder'] = optim.SGD(self.decoder.parameters(), lr=lr) elif opt['optimizer'] == 'adam': self.optimizers = {'memnn': optim.Adam(optim_params, lr=lr)} if self.decoder is not None: self.optimizers['decoder'] = optim.Adam(self.decoder.parameters(), lr=lr) else: raise NotImplementedError('Optimizer not supported.') if opt['cuda']: self.model.share_memory() if self.decoder is not None: self.decoder.cuda() if opt.get('model_file') and os.path.isfile(opt['model_file']): print('Loading existing model parameters from ' + opt['model_file']) self.load(opt['model_file']) else: self.answers = shared['answers'] self.history = {} self.episode_done = True self.last_cands, self.last_cands_list = None, None super().__init__(opt, shared)
def __init__(self, opt, shared=None): # initialize defaults first super().__init__(opt, shared) self.id = 'LearningToRank' if shared: raise NotImplementedError else: # this is not a shared instance of this class, so do full # initialization. if shared is set, only set up shared members. self.sess = tf.Session() self.dict = DictionaryAgent(opt) self.EOS = self.dict.end_token self.observation = {'text': self.EOS, 'episode_done': True} self.learning_to_rank_config = { 'max_context_turns': 10, 'max_sequence_length': 60, 'embedding_size': 256, 'vocab_size': len(self.dict), 'rnn_cell': 'GRUCell', 'dropout_prob': 0.3, 'mlp_sizes': [16], 'l2_coef': 1e-5, 'lr': 0.0001, 'optimizer': 'AdamOptimizer', 'answer_candidates_number': 20 } self.X, self.pred, self.y = create_model_personachat( **(self.learning_to_rank_config)) self.batch_sample_weight = tf.placeholder(tf.float32, [None, 1], name='sample_weight') # Define loss and optimizer self.loss_op = get_loss_function( self.pred, self.y, self.batch_sample_weight, l2_coef=self.learning_to_rank_config['l2_coef']) self.global_step = tf.Variable(0, trainable=False) self.sess.run(tf.assign(self.global_step, 0)) self.learning_rate = tf.train.cosine_decay( self.learning_to_rank_config['lr'], self.global_step, 2000000, alpha=0.001) optimizer_class = getattr( tf.train, self.learning_to_rank_config['optimizer']) self.optimizer = optimizer_class(learning_rate=self.learning_rate) self.train_op = self.optimizer.minimize(self.loss_op, self.global_step) self.saver = tf.train.Saver(tf.global_variables()) self.sess.run(tf.global_variables_initializer()) self.episode_done = True
def get_dictionary(PATH: str) -> DictionaryAgent: """ 读取字典 :param PATH: 字典工具目录 :return 读取的字典 """ opt = Opt() dictionary = DictionaryAgent(opt=opt) dictionary.load(PATH) return dictionary
def __init__(self, **kwargs): opt = load_poly_encoder_opt() self.dict = DictionaryAgent(opt) super().__init__( unk_token=self.dict.unk_token, pad_token=self.dict.null_token, cls_token=self.dict.start_token, sep_token=self.dict.end_token, **kwargs, )
def __init__(self, opt, shared=None): super().__init__(opt) if shared: self.search_engine = shared['search_engine'] else: self.search_engine = OverlapSearchEngine( opt, DictionaryAgent(opt), shared=shared ) # type: ignore self.threshold = opt['f1_overlap_threshold'] self.dummy = torch.zeros(1, 1, dtype=torch.long)
def __init__(self, opt, shared=None): # initialize defaults first super().__init__(opt, shared) if not shared: # this is not a shared instance of this class, so do full # initialization. if shared is set, only set up shared members. saved_state = None if opt.get('model_file') and os.path.isfile(opt['model_file']): # load model parameters if available print('Loading existing model params from ' + opt['model_file']) new_opt, saved_state = self.load(opt['model_file']) # override options with stored ones opt = self._override_opt(new_opt) self.args = OptWrapper(opt) self.parlai_dict = DictionaryAgent(opt) self.fairseq_dict = _make_fairseq_dict(self.parlai_dict) self.id = 'Fairseq' self.truncate = opt['truncate'] if opt['truncate'] > 0 else None self.EOS = self.fairseq_dict[self.fairseq_dict.eos()] self.EOS_TENSOR = (torch.LongTensor(1, 1).fill_( self.fairseq_dict.eos())) self.NULL_IDX = self.fairseq_dict.pad() encoder = fconv.FConvEncoder(self.fairseq_dict, embed_dim=self.args.encoder_embed_dim, convolutions=eval( self.args.encoder_layers), dropout=self.args.dropout, max_positions=self.args.max_positions) decoder = fconv.FConvDecoder( self.fairseq_dict, embed_dim=self.args.decoder_embed_dim, convolutions=eval(self.args.decoder_layers), out_embed_dim=self.args.decoder_out_embed_dim, attention=eval(self.args.decoder_attention), dropout=self.args.dropout, max_positions=self.args.max_positions) self.model = fconv.FConvModel(encoder, decoder) # from fairseq's build_criterion() if self.args.label_smoothing > 0: self.criterion = criterions.LabelSmoothedCrossEntropyCriterion( self.args.label_smoothing, self.NULL_IDX) else: self.criterion = criterions.CrossEntropyCriterion( self.args, self.fairseq_dict) self.trainer = MultiprocessingTrainer(self.args, self.model, self.criterion) if saved_state is not None: self.set_states(saved_state) self.reset()
def __init__(self, opt, shared=None): """Initialize agent.""" super().__init__(opt) self.id = 'IRBaselineAgent' self.length_penalty = float(opt['length_penalty']) self.dictionary = DictionaryAgent(opt) self.opt = opt self.history = [] self.episodeDone = True if opt.get('label_candidates_file'): f = open(opt.get('label_candidates_file')) self.label_candidates = f.read().split('\n')
def __init__(self, opt, shared=None): super().__init__(opt, shared) opt['cuda'] = not opt['no_cuda'] and torch.cuda.is_available() if opt['cuda']: print('[Using CUDA]') torch.cuda.device(opt['gpu']) if not shared: self.opt = opt self.id = 'HCIAE' self.dict = DictionaryAgent(opt) self.answers = [None] * opt['batchsize'] self.END = self.dict.end_token self.END_TENSOR = torch.LongTensor(self.dict.parse(self.END)) self.START = self.dict.start_token self.START_TENSOR = torch.LongTensor(self.dict.parse(self.START)) self.mem_size = 10 self.longest_label = 1 self.writer = SummaryWriter() self.writer_idx = 0 lr = opt['learning_rate'] self.loss_fn = CrossEntropyLoss() self.model = HCIAE(opt, self.dict) self.decoder = Decoder(opt['hidden_size'], opt['hidden_size'], opt['rnn_layers'], opt, self.dict) optim_params = [p for p in self.model.parameters() if p.requires_grad] if opt['optimizer'] == 'sgd': self.optimizers = {'hciae': optim.SGD(optim_params, lr=lr)} if self.decoder is not None: self.optimizers['decoder'] = optim.SGD(self.decoder.parameters(), lr=lr) elif opt['optimizer'] == 'adam': self.optimizers = {'hciae': optim.Adam(optim_params, lr=lr)} if self.decoder is not None: self.optimizers['decoder'] = optim.Adam(self.decoder.parameters(), lr=lr) else: raise NotImplementedError('Optimizer not supported.') if opt['cuda']: self.decoder.cuda() if opt.get('model_file') and os.path.isfile(opt['model_file']): print('Loading existing model parameters from ' + opt['model_file']) else: self.answers = shared['answers'] self.episode_done = True self.img_feature = None self.last_cands, self.last_cands_list = None, None
def __init__(self, opt, shared=None): """ Set up model if shared params not set, otherwise no work to do. """ super().__init__(opt, shared) opt = self.opt self.reset_metrics() self.id = 'Starspace' self.NULL_IDX = 0 self.cands = torch.LongTensor(1, 1, 1) self.ys_cache = [] self.ys_cache_sz = opt['cache_size'] self.truncate = opt['truncate'] if opt['truncate'] > 0 else None self.history = {} self.debugMode = False if shared: torch.set_num_threads(1) # set up shared properties self.dict = shared['dict'] self.model = shared['model'] else: print("[ creating StarspaceAgent ]") # this is not a shared instance of this class, so do full init if opt.get('model_file') and ( os.path.isfile(opt.get('model_file') + '.dict') or (opt['dict_file'] is None) ): # set default dict-file if not set opt['dict_file'] = opt['model_file'] + '.dict' # load dictionary and basic tokens & vectors self.dict = DictionaryAgent(opt) self.model = Starspace(opt, len(self.dict), self.dict) if opt.get('model_file') and os.path.isfile(opt['model_file']): self.load(opt['model_file']) else: self._init_embeddings() self.model.share_memory() # set up modules self.criterion = torch.nn.CosineEmbeddingLoss( margin=opt['margin'], size_average=False ) self.reset() self.fixedCands = False self.fixedX = None if self.opt.get('fixed_candidates_file'): self.fixedCands_txt = load_cands(self.opt.get('fixed_candidates_file')) fcs = [] for c in self.fixedCands_txt: fcs.append(torch.LongTensor(self.parse(c)).unsqueeze(0)) self.fixedCands = fcs print("[loaded candidates]")
def test_gpt2_bpe_tokenize(self): opt = Opt({'dict_tokenizer': 'gpt2', 'datapath': './data'}) agent = DictionaryAgent(opt) self.assertEqual( # grinning face emoji agent.gpt2_tokenize(u'Hello, ParlAI! \U0001f600'), GPT2_BPE_RESULT, ) self.assertEqual( agent.vec2txt(agent.tok2ind[w] for w in GPT2_BPE_RESULT), # grinning face emoji u'Hello, ParlAI! \U0001f600', )
def test_gpt2_bpe_tokenize(self): datapath = ParlaiParser().parse_args([], print_args=False)['datapath'] opt = Opt({'dict_tokenizer': 'gpt2', 'datapath': datapath}) agent = DictionaryAgent(opt) self.assertEqual( # grinning face emoji agent.gpt2_tokenize(u'Hello, ParlAI! \U0001f600'), GPT2_BPE_RESULT, ) self.assertEqual( agent.vec2txt(agent.tok2ind[w] for w in GPT2_BPE_RESULT), # grinning face emoji u'Hello, ParlAI! \U0001f600', )
def __init__(self, opt, shared=None): super().__init__(opt, shared) if not shared: # don't enter this loop for shared instantiations opt['cuda'] = not opt['no_cuda'] and torch.cuda.is_available() if opt['cuda']: print('[ Using CUDA ]') torch.cuda.set_device(opt['gpu']) self.id = 'RNN' self.dict = DictionaryAgent(opt) self.observation = {} self.rnn_type = opt['rnntype'] self.hidden_size = opt['hiddensize'] self.num_layers = opt['numlayers'] self.learning_rate = opt['learningrate'] self.use_cuda = opt.get('cuda', False) self.path = opt.get('model_file', None) vs = len(self.dict) hs = self.hidden_size nl = self.num_layers dr = opt['dropout'] self.embedding = nn.Embedding(vs, hs, padding_idx=0, scale_grad_by_freq=True) if self.rnn_type == 'GRU': self.rnn = nn.GRU(hs, hs, nl, dropout=dr) elif self.rnn_type == 'LSTM': self.rnn = nn.LSTM(hs, hs, nl, dropout=dr) self.dropout = nn.Dropout(dr) self.projection = nn.Linear(hs, vs) self.softmax = nn.LogSoftmax() self.loss = nn.NLLLoss() lr = self.learning_rate self.optims = { 'embedding': optim.SGD(self.embedding.parameters(), lr=lr), 'rnn': optim.SGD(self.rnn.parameters(), lr=lr), 'projection': optim.SGD(self.projection.parameters(), lr=lr), } if self.use_cuda: self.cuda() if opt.get('model_file') and os.path.isfile(opt['model_file']): print('Loading existing model parameters from ' + opt['model_file']) self.load(opt['model_file']) self.episode_done = True