def add_args(parser): """Add model-specific arguments to the parser.""" LSTMModel.add_args(parser) parser.add_argument('--lang-embedding-size', type=int, default=32, help='language embedding dimension')
def test_assert_jit_vs_nonjit_(self): task, parser = get_dummy_task_and_parser() LSTMModel.add_args(parser) args = parser.parse_args([]) args.criterion = "" model = LSTMModel.build_model(args, task) model.eval() scripted_model = torch.jit.script(model) scripted_model.eval() idx = len(task.source_dictionary) iter = 100 # Inject random input and check output seq_len_tensor = torch.randint(1, 10, (iter, )) num_samples_tensor = torch.randint(1, 10, (iter, )) for i in range(iter): seq_len = seq_len_tensor[i] num_samples = num_samples_tensor[i] src_token = torch.randint(0, idx, (num_samples, seq_len)), src_lengths = torch.randint(1, seq_len + 1, (num_samples, )) src_lengths, _ = torch.sort(src_lengths, descending=True) # Force the first sample to have seq_len src_lengths[0] = seq_len prev_output_token = torch.randint(0, idx, (num_samples, 1)), result = model(src_token[0], src_lengths, prev_output_token[0], None) scripted_result = scripted_model(src_token[0], src_lengths, prev_output_token[0], None) self.assertTensorEqual(result[0], scripted_result[0]) self.assertTensorEqual(result[1], scripted_result[1])
def test_jit_and_export_lstm(self): task, parser = get_dummy_task_and_parser() LSTMModel.add_args(parser) args = parser.parse_args([]) args.criterion = "" model = LSTMModel.build_model(args, task) scripted_model = torch.jit.script(model) self._test_save_and_load(scripted_model)
def add_args(parser): """Add model-specific arguments to the parser.""" LSTMModel.add_args(parser) parser.add_argument('--lang-embedding-size', type=int, default=32, help='language embedding dimension') parser.add_argument('--encoder-model-path', type=str, default=None, help='path to pretrained model path') parser.add_argument('--fix-encoder', action='store_true')
def add_args(parser): """Add model-specific arguments to the parser.""" LSTMModel.add_args(parser) parser.add_argument('--share-dictionaries', action='store_true', help='share word dictionaries across languages') parser.add_argument('--share-encoder-embeddings', action='store_true', help='share encoder embeddings across languages') parser.add_argument('--share-decoder-embeddings', action='store_true', help='share decoder embeddings across languages') parser.add_argument('--share-encoders', action='store_true', help='share encoders across languages') parser.add_argument('--share-decoders', action='store_true', help='share decoders across languages') parser.add_argument('--lang-embedding-size', type=int, default=32, help='size of the language embedding')
from fairseq.models.lstm import LSTMModel import pickle import numpy as np import os os.environ["CUDA_VISIBLE_DEVICES"] = "1" print("loading seq2seq model...") checkpoint_path = "/local/ssd_1/stc/nlpcc_2017/" seq2seq = LSTMModel.from_pretrained(checkpoint_path, checkpoint_file='checkpoint_best.pt', data_name_or_path=checkpoint_path, beam=2) seq2seq.cuda() seq2seq.eval() criterion = seq2seq.task.build_criterion(seq2seq.args) criterion.ret_dist = True print("Done") def seq2seq_model(inputs, inputs_idx, sources, sequence_length, id2sen): sequence_length = sequence_length - 1 probs = [] output_batch = [] for i in range(len(inputs)): target_sentence = sources[i] output = seq2seq.get_clm(target_sentence, inputs[i], criterion) output_batch.append(output.cpu().data.numpy()) prob = 1 for j in range(sequence_length[i] - 1): prob *= output[j][inputs_idx[i][j + 1]]
# "europarl_fairseq_conv_es-en", "europarl_fairseq_es-en_large", # "europarl_fairseq_50k_transxs_es-en", ] summary = "" for fname in models: for bpe_size in [64, 32000]: path = f"/home/scarrion/datasets/scielo/constrained/datasets/bpe.{bpe_size}/{fname}/" if "lstm" in path: architecture = "LSTM" model = LSTMModel.from_pretrained(os.path.join(path, "checkpoints"), checkpoint_file='checkpoint_best.pt', data_name_or_path=os.path.join(path, "data-bin"), bpe='fastbpe', bpe_codes=os.path.join(path, f"tok/bpe.{bpe_size}/codes.en") ) elif "conv" in path: architecture = "CNN" model = FConvModel.from_pretrained(os.path.join(path, "checkpoints"), checkpoint_file='checkpoint_best.pt', data_name_or_path=os.path.join(path, "data-bin"), bpe='fastbpe', bpe_codes=os.path.join(path, f"tok/bpe.{bpe_size}/codes.en") ) else: architecture = "Transformer" model = TransformerModel.from_pretrained(os.path.join(path, "checkpoints"), checkpoint_file='checkpoint_best.pt', data_name_or_path=os.path.join(path, "data-bin"),
import os os.environ['CUDA_VISIBLE_DEVICES'] = '0' from fairseq.models.lstm import LSTMModel import jieba import pickle as pkl checkpoint_path = "/local/ssd_1/stc/stc_clm/" stc = LSTMModel.from_pretrained(checkpoint_path, checkpoint_file='checkpoint_best.pt', data_name_or_path=checkpoint_path + 'stc_ori', beam=5) stc.eval() f = open("/local/ssd_1/chengzhang/SA_dialog/dialogue/datas/stc_dict.pkl", 'wb') pkl.dump(stc.tgt_dict.indices, f) f.close() #print(type(stc.tgt_dict.indices), len(stc.tgt_dict.indices), stc.tgt_dict.indices) #input_sent = #input_sent = ' '.join(jieba.cut(''.join(input_sent.split()), cut_all=False)) #target_sent = #target_sent = ' '.join(jieba.cut(''.join(target_sent.split()), cut_all=False)) #criterion = stc.task.build_criterion(stc.args) #criterion.ret_dist = True #loss = stc.get_clm(input_sent, target_sent, criterion) #print(stc.translate(input_sent)) #print(loss) # target_sent_id = [trg_dict[x] if x in trg_dict.indices else trg_dict.unk_index for x in target_sent.split()]