Esempio n. 1
0
 def __init__(self, word_vec_dim, bidir=True, rnn_cell='LSTM'):
     super().__init__()
     self.trainable = True
     self.word_vec_dim = word_vec_dim
     self.hidden_state_size = word_vec_dim
     self.encoder = EncoderRNN(self.word_vec_dim,
                               self.word_vec_dim,
                               bidir=bidir,
                               rnn_cell=rnn_cell)
     self.encoder.apply(util.weight_init)
Esempio n. 2
0
def main():
    data_path = './data/chatbot.txt'
    voc, pairs = loadPrepareData(data_path)

    # 把含有低频词的句子扔掉
    MIN_COUNT = Config.MIN_COUNT
    pairs = trimRareWords(voc, pairs, MIN_COUNT)

    training_batches = [batch2TrainData(voc, [random.choice(pairs) for _ in range(Config.batch_size)])
                        for _ in range(Config.total_step)]

    # 词嵌入部分
    embedding = nn.Embedding(voc.num_words, Config.hidden_size)

    # 定义编码解码器
    encoder = EncoderRNN(Config.hidden_size, embedding, Config.encoder_n_layers, Config.dropout)
    decoder = LuongAttnDecoderRNN(Config.attn_model, embedding, Config.hidden_size, voc.num_words, Config.decoder_n_layers, Config.dropout)

    # 定义优化器
    encoder_optimizer = optim.Adam(encoder.parameters(), lr=Config.learning_rate)
    decoder_optimizer = optim.Adam(decoder.parameters(), lr=Config.learning_rate * Config.decoder_learning_ratio)

    start_iteration = 1
    save_every = 4000   # 多少步保存一次模型

    for iteration in range(start_iteration, Config.total_step + 1):
        training_batch = training_batches[iteration - 1]
        input_variable, lengths, target_variable, mask, max_target_len = training_batch

        start_time = time.time()
        # Run a training iteration with batch
        loss = train(input_variable, lengths, target_variable, mask, max_target_len, encoder,
                     decoder, embedding, encoder_optimizer, decoder_optimizer, Config.batch_size, Config.clip)

        time_str = datetime.datetime.now().isoformat()
        log_str = "time: {}, Iteration: {}; Percent complete: {:.1f}%; loss: {:.4f}, spend_time: {:6f}".format(time_str, iteration, iteration / Config.total_step * 100, loss, time.time() - start_time)
        rainbow(log_str)

        # Save checkpoint
        if iteration % save_every == 0:
            save_path = './save_model/'
            if not os.path.exists(save_path):
                os.makedirs(save_path)

            torch.save({
                'iteration': iteration,
                'encoder': encoder.state_dict(),
                'decoder': decoder.state_dict(),
                'en_opt': encoder_optimizer.state_dict(),
                'de_opt': decoder_optimizer.state_dict(),
                'loss': loss,
                'voc_dict': voc.__dict__,
                'embedding': embedding.state_dict()
            }, os.path.join(save_path, '{}_{}_model.tar'.format(iteration, 'checkpoint')))
Esempio n. 3
0
class RNNDotJudgeNet(nn.Module):
    def __init__(self, word_vec_dim, bidir=True, rnn_cell='LSTM'):
        super().__init__()
        self.trainable = True
        self.word_vec_dim = word_vec_dim
        self.hidden_state_size = word_vec_dim
        self.encoder = EncoderRNN(self.word_vec_dim,
                                  self.word_vec_dim,
                                  bidir=bidir,
                                  rnn_cell=rnn_cell)
        self.encoder.apply(util.weight_init)

    def forward(self, Ks: torch.Tensor, Cs: torch.Tensor, *args):
        """
        :param Ks, keywords used to expand: (batch_size, n_keys, word_vector_dim)
        :param Cs, candidates searched by Ks: (batch_size, n_candidates, word_vector_dim)
        :return: probs as good / bad candiates: (batch_size, n_candidates, 2)
        """
        batch_size = Ks.shape[0]
        n_candidates = Cs.shape[1]

        sep = torch.zeros(batch_size, 1, self.word_vec_dim)
        query_string = torch.cat(
            [Ks, sep, Cs],
            dim=1)  # (batch_size, n_keys + 1 + n_candidates, word_vector_dim)
        query_string_transposed = query_string.transpose(
            0, 1)  # (n_keys + 1 + n_candidates, batch_size, word_vector_dim)
        lengths = [query_string_transposed.shape[0]]

        encoder_outputs, encoder_states = self.encoder(
            query_string_transposed,
            torch.tensor(lengths).long().cpu())
        # (n_keys + 1 + n_candidates, batch_size, hidden_state_size)
        # (n_layers=1, batch_size, hidden_state_size)

        encoder_hidden = torch.sum(encoder_states[0],
                                   dim=0).view(batch_size,
                                               self.hidden_state_size, 1)
        products = torch.bmm(Cs,
                             encoder_hidden)  # (batch_size, n_candidates, 1)

        rest = -1 * products
        result = torch.cat([products, rest], dim=-1)

        return result
Esempio n. 4
0
def train(args):
    input_lang, output_lang, pairs = prepareData(args)
    print(random.choice(pairs))

    model = {}
    model['hidden_size'] = 1000
    model['dropout'] = 0.1
    model['input_lang'] = input_lang
    model['output_lang'] = output_lang
    model['max_length'] = max(input_lang.max_length,
                              output_lang.max_length) + 2
    print('Max length: {}'.format(model['max_length']))

    encoder1 = EncoderRNN(input_lang.n_words,
                          model['hidden_size']).to(getDevice())
    encoder1.train()
    attn_decoder1 = AttnDecoderRNN(model['hidden_size'],
                                   output_lang.n_words,
                                   dropout_p=model['dropout'],
                                   max_length=model['max_length']).to(
                                       getDevice())
    attn_decoder1.train()

    n_iters = 30000
    training_pairs = [
        tensorsFromPair(input_lang, output_lang, random.choice(pairs))
        for _ in range(n_iters)
    ]
    trainIters(training_pairs,
               encoder1,
               attn_decoder1,
               n_iters,
               print_every=1000,
               optim=args.optim,
               learning_rate=args.learning_rate,
               max_length=model['max_length'])

    print('saving models...')
    model['encoder_state'] = encoder1.state_dict()
    model['decoder_state'] = attn_decoder1.state_dict()
    torch.save(
        model,
        "data/{}_model_checkpoint.pth".format(args.phase.split('_')[-1]))
Esempio n. 5
0
def inference(args):
    model = {}
    model = torch.load("data/sc_question_model_checkpoint.pth")
    model['encoder'] = EncoderRNN(model['input_lang'].n_words,
                                  model['hidden_size']).to(getDevice())
    model['encoder'].load_state_dict(model['encoder_state'])
    model['encoder'].eval()
    model['decoder'] = AttnDecoderRNN(model['hidden_size'],
                                      model['output_lang'].n_words,
                                      dropout_p=model['dropout'],
                                      max_length=model['max_length']).to(
                                          getDevice())
    model['decoder'].load_state_dict(model['decoder_state'])
    model['decoder'].eval()

    with open('../executor/parse_results/sc_validation.json') as f:
        anns = json.load(f)

    out = {}
    for ann in tqdm(anns):
        v = {}
        v['scene_index'] = ann['scene_index']
        v['video_filename'] = ann['video_filename']
        v['questions'] = []

        for ann_q in ann['questions']:
            if ann_q['question_type'] == 'descriptive':
                continue

            q_program_pred, _ = evaluate(model['encoder'],
                                         model['decoder'],
                                         normalizeString(ann_q['question']),
                                         model['input_lang'],
                                         model['output_lang'],
                                         max_length=model['max_length'])
            if q_program_pred[-1] == '<EOS>':
                q_program_pred = q_program_pred[:-1]

            q = {}
            q['question_program'] = q_program_pred
            q['question'] = ann_q['question']
            q['question_type'] = '{}_single_choice'.format(
                ann_q['question_type'])
            q['question_subtype'] = ann_q['program'][-1]
            q['program_gt'] = ann_q['program']
            q['answer'] = ann_q['answer']

            v['questions'].append(q)

        out[v['scene_index']] = v

    out_path = '../executor/parse_results/sc_val_reproduced.json'
    print('Writing output to {}'.format(out_path))
    with open(out_path, 'w') as fout:
        json.dump(out, fout, indent=4)
Esempio n. 6
0
def evaluate(vocab: Vocabulary, corpus_filename: str, encoder: EncoderRNN,
             decoder: AttnDecoderRNN, max_src_length: int,
             max_tgt_length: int):

    device: torch.device = torch.device(
        "cuda:0" if torch.cuda.is_available() else "cpu")

    encoder.to(device)
    decoder.to(device)

    encoder.eval()
    decoder.eval()

    with torch.no_grad():

        corpus = Corpus(
            filename=corpus_filename,
            max_src_length=max_src_length,  # decoder.max_src_length,
            vocab=vocab,
            device=device)

        for batch in torch.utils.data.DataLoader(dataset=corpus, batch_size=1):

            input_tensor: torch.Tensor = batch["data"].permute(1, 0)

            encoder_outputs = encoder.encode_sequence(input_tensor)

            decoder_output = decoder.decode_sequence(
                encoder_outputs=encoder_outputs,
                start_symbol=corpus.characters.start_of_sequence.integer,
                max_length=max_tgt_length)
            _, top_i = decoder_output.topk(k=1)

            predictions = top_i.squeeze(dim=2).squeeze(dim=1).tolist()

            predicted_string = "".join(
                [corpus.characters[i].string for i in predictions])

            print(predicted_string)
Esempio n. 7
0
def run_training(*,
                 config: argparse.Namespace) -> None:

    import pickle

    vocab: Vocabulary = pickle.load(open(config.vocab, "rb"))

    device: torch.device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")

    training_corpus = Corpus(vocab=vocab,
                             filename=config.corpus,
                             max_src_length=config.max_src_length,
                             device=device)

    # for word in test_corpus.words:
    #    print(f"{''.join(word.characters)}\t{''.join(word.label)}")
    # sys.exit()

    if config.continue_training:
        encoder1 = torch.load(config.encoder, map_location=device)
        attn_decoder1 = torch.load(config.decoder, map_location=device)
    else:
        encoder1: EncoderRNN = EncoderRNN(input_size=len(training_corpus.characters),
                                          embedding_size=config.encoder_embedding_size,
                                          hidden_size=config.encoder_hidden_size,
                                          num_hidden_layers=config.encoder_hidden_layers).to(device=device)

        attn_decoder1 = AttnDecoderRNN(embedding_size=config.decoder_embedding_size,
                                       decoder_hidden_size=config.decoder_hidden_size,
                                       encoder_hidden_size=config.encoder_hidden_size,
                                       num_hidden_layers=config.decoder_hidden_layers,
                                       output_size=len(training_corpus.characters),
                                       dropout_p=config.decoder_dropout,
                                       max_src_length=training_corpus.word_tensor_length).to(device=device)

    train_iters(corpus=training_corpus,
                encoder=encoder1,
                decoder=attn_decoder1,
                device=device,
                n_iters=config.num_epochs,
                batch_size=config.batch_size,
                print_every=config.print_every,
                learning_rate=config.learning_rate,
                teacher_forcing_ratio=config.teacher_forcing_ratio)

    print(f"Saving encoder to {config.encoder}...")
    torch.save(encoder1.to(device=torch.device("cpu")), config.encoder)

    print(f"Saving decoder to {config.decoder}...")
    torch.save(attn_decoder1.to(device=torch.device("cpu")), config.decoder)
Esempio n. 8
0
def main():
    parser = ArgumentParser("Train Seq2Seq Attention Model")
    parser.add_argument("-f",
                        "--text_dir",
                        help="Path to all the full text documents",
                        required=True)
    parser.add_argument("-s",
                        "--summary_dir",
                        help="Path to all the summary documents",
                        required=False)
    parser.add_argument("-o",
                        "--output_dir",
                        help="Path to save the model",
                        required=True)
    parser.add_argument("--hidden_units",
                        help="Number of hidden units",
                        type=int,
                        default=256)
    parser.add_argument("--dropout",
                        help="Dropout value in Attention Decoder",
                        type=float,
                        default=0.1)
    parser.add_argument(
        "--trim_dataset",
        help="Trim the dataset to a small number for testing purposes",
        required=False,
        type=int)
    parser.add_argument("--debug",
                        help="Train the model in debug mode",
                        action="store_true",
                        required=False)
    parser.add_argument("--print_every",
                        help="Print every n iterations",
                        default=1000,
                        type=int,
                        required=False)
    parser.add_argument("--save_every",
                        help="Save model every n epochs",
                        default=5000,
                        required=False,
                        type=int)
    parser.add_argument("-lr",
                        "--learning_rate",
                        help="Learning rate",
                        default=0.001,
                        type=float)
    parser.add_argument("-n",
                        "--n_epochs",
                        help="Number of epochs to train for",
                        default=500000,
                        type=int)

    args = parser.parse_args()
    data = DataLoader(args.text_dir, args.summary_dir)
    full_text_lang, summary_text_lang, pairs = data.load(
        trim=args.trim_dataset)

    LOGGER.info('Creating models...')
    encoder = EncoderRNN(full_text_lang.n_words, args.hidden_units).to(device)
    attention_decoder = AttentionDecoderRNN(args.hidden_units,
                                            summary_text_lang.n_words,
                                            args.dropout).to(device)

    train(lang_1=full_text_lang,
          lang_2=summary_text_lang,
          pairs=pairs,
          encoder=encoder,
          decoder=attention_decoder,
          output_dir=args.output_dir,
          n_epochs=args.n_epochs,
          learning_rate=args.learning_rate,
          print_every=args.print_every,
          save_every=args.save_every,
          debug=args.debug)
Esempio n. 9
0
import torch.nn as nn
from torch import optim
import torch.nn.functional as F
from preprocess import get_dataset
from utils.transformer import *
import argparse
from seq2seq import EncoderRNN, DecoderRNN , Linear

device = 'cpu'
#***********************
#*******IMPORTANT*******
#***********************
#PLEASE NOTE THE VALUE  = 2 or 1 for input_size GIVEN BELOW IS JUST FOR TESTING PURPOSES. PLEASE DO NOT HARDCODE ANY VALUES IN YOUR seq2seq.py file

torch.manual_seed(0)
encoder  = EncoderRNN(hidden_size = 1, input_size = 1, batch_size = 1)
decoder  = DecoderRNN(hidden_size = 1, output_size = 2, batch_size = 1)
dense = Linear(bidirectional  = False, hidden_size_encoder = 1 , hidden_size_decoder = 1)
dense2 = Linear(bidirectional  = False, hidden_size_encoder = 2 , hidden_size_decoder = 1)

layers = [encoder, decoder , dense]

layers_individual = []


for layer in layers: 
	for name,module in layer.named_modules():
		if(name==''):
			continue
		else:
 			layers_individual.append([name,module])
Esempio n. 10
0
def train_iters(*,  # data: Data,
                corpus: Corpus,
                encoder: EncoderRNN,
                decoder: AttnDecoderRNN,
                device: torch.device,
                n_iters: int,
                batch_size: int,
                teacher_forcing_ratio: float,
                print_every: int = 1000,
                learning_rate: float = 0.01
                ) -> None:
    data = torch.utils.data.DataLoader(dataset=corpus, batch_size=batch_size)

    start: float = time.time()
    plot_losses: List[float] = []
    print_loss_total: float = 0  # Reset every print_every
    plot_loss_total: float = 0  # Reset every plot_every

    encoder_optimizer: Optimizer = SGD(encoder.parameters(), lr=learning_rate)
    decoder_optimizer: Optimizer = SGD(decoder.parameters(), lr=learning_rate)
    #
    # training_pairs: List[ParallelTensor] = [random.choice(data.pairs).tensors(source_vocab=data.source_vocab,
    #                                                                           target_vocab=data.target_vocab,
    #                                                                           device=device)
    #                                         for _ in range(n_iters)]

    criterion: nn.NLLLoss = nn.NLLLoss(reduction='mean')  # ignore_index=corpus.characters.pad_int)

    # for pair in parallel_data:
    #    print(f"src={len(pair['data'])}\ttgt={len(pair['labels'])}")

    for iteration in range(1, n_iters + 1):  # type: int

        # training_pair: ParallelTensor = training_pairs[iteration - 1]
        # input_tensor: torch.Tensor = training_pair.source   # shape: [seq_len, batch_size=1]
        # target_tensor: torch.Tensor = training_pair.target  # shape: [seq_len, batch_size=1]

        for batch in data:
            # print(f"batch['data'].shape={batch['data'].shape}\tbatch['labels'].shape{batch['labels'].shape}")
            # sys.exit()
            input_tensor: torch.Tensor = batch["data"].permute(1, 0)
            target_tensor: torch.Tensor = batch["labels"].permute(1, 0)

            actual_batch_size: int = min(batch_size, input_tensor.shape[1])

            verify_shape(tensor=input_tensor, expected=[corpus.word_tensor_length, actual_batch_size])
            verify_shape(tensor=target_tensor, expected=[corpus.label_tensor_length, actual_batch_size])

            # print(f"input_tensor.shape={input_tensor.shape}\t\ttarget_tensor.shape={target_tensor.shape}")
            # sys.exit()

            loss: float = train(input_tensor=input_tensor,
                                target_tensor=target_tensor,
                                encoder=encoder,
                                decoder=decoder,
                                encoder_optimizer=encoder_optimizer,
                                decoder_optimizer=decoder_optimizer,
                                criterion=criterion,
                                device=device,
                                max_src_length=corpus.word_tensor_length,
                                max_tgt_length=corpus.label_tensor_length,
                                batch_size=actual_batch_size,
                                start_of_sequence_symbol=corpus.characters.start_of_sequence.integer,
                                teacher_forcing_ratio=teacher_forcing_ratio)

            print_loss_total += loss
            plot_loss_total += loss

        if iteration % print_every == 0:
            print_loss_avg: float = print_loss_total / print_every
            print_loss_total = 0
            print('%s (%d %d%%) %.4f' % (time_since(since=start, percent=iteration / n_iters),
                                         iteration, iteration / n_iters * 100, print_loss_avg))
            sys.stdout.flush()
Esempio n. 11
0
def train(*,
          input_tensor: torch.Tensor,  # shape: [src_seq_len, batch_size]
          target_tensor: torch.Tensor,  # shape: [tgt_seq_len, batch_size]
          encoder: EncoderRNN,
          decoder: AttnDecoderRNN,
          encoder_optimizer: Optimizer,
          decoder_optimizer: Optimizer,
          criterion: nn.Module,
          device: torch.device,
          max_src_length: int,
          max_tgt_length: int,
          batch_size: int,
          start_of_sequence_symbol: int,
          teacher_forcing_ratio: float) -> float:
    encoder_optimizer.zero_grad()
    decoder_optimizer.zero_grad()

    loss: torch.Tensor = torch.tensor(0, dtype=torch.float, device=device)  # shape: [] meaning this is a scalar

    encoder_outputs = encoder.encode_sequence(input_tensor)

    decoder_input = target_tensor[0].unsqueeze(dim=0)
    decoder_hidden = decoder.init_hidden(batch_size=batch_size, device=device)

    verify_shape(tensor=decoder_input, expected=[1, batch_size])
    verify_shape(tensor=target_tensor, expected=[max_tgt_length, batch_size])
    verify_shape(tensor=decoder_hidden, expected=[decoder.gru.num_layers, batch_size, decoder.gru.hidden_size])

    use_teacher_forcing = True if random.random() < teacher_forcing_ratio else False
    # use_teacher_forcing = False

    decoder_output = decoder.decode_sequence(encoder_outputs=encoder_outputs,
                                             start_symbol=start_of_sequence_symbol,
                                             max_length=max_tgt_length,
                                             target_tensor=target_tensor if use_teacher_forcing else None)
    # print(f"input_tensor.shape={input_tensor.shape}\tdecoder_output.shape={decoder_output.shape}\ttarget_tensor.shape={target_tensor.shape}\tmax_tgt_length={max_tgt_length}")

    # Our loss function requires predictions to be of the shape NxC, where N is the number of predictions and C is the number of possible predicted categories
    predictions = decoder_output.reshape(-1,
                                         decoder.output_size)  # Reshaping from [seq_len, batch_size, decoder.output_size] to [seq_len*batch_size, decoder.output_size]
    labels = target_tensor.reshape(
        -1)  # Reshaping from [seq_len, batch_size]                      to [seq_len*batch_size]
    loss += criterion(predictions, labels)
    # print(f"\t{decoder_output.view(-1,decoder_output.shape[-1]).shape}")
    # print(target_tensor.reshape(-1))
    #    print(f"\t{target_tensor.view(-1)}")
    # sys.exit()
    # loss += criterion(decoder_output.view(1,1,-1), target_tensor.view(-1))
    # loss += criterion(decoder_output.squeeze(dim=1), target_tensor.squeeze(dim=1))
    # for index, decoder_output in enumerate(start=1,
    #                                        iterable=decoder.decode_sequence(encoder_outputs=encoder_outputs,
    #                                               start_of_sequence_symbol=start_of_sequence_symbol,
    #                                               max_length=max_tgt_length,
    #                                               target_tensor=target_tensor if use_teacher_forcing else None)):
    #
    #     loss += criterion(decoder_output, target_tensor[index])

    loss.backward()

    encoder_optimizer.step()
    decoder_optimizer.step()

    return loss.item()
Esempio n. 12
0
#checkpoint = torch.load(loadFilename, map_location=torch.device('cpu'))
encoder_sd = checkpoint['en']
decoder_sd = checkpoint['de']
encoder_optimizer_sd = checkpoint['en_opt']
decoder_optimizer_sd = checkpoint['de_opt']
embedding_sd = checkpoint['embedding']
voc.__dict__ = checkpoint['voc_dict']


print('Building encoder and decoder ...')
# 初始化word embedding
embedding = nn.Embedding(voc.num_words, hidden_size)
if model_checkpoint:
    embedding.load_state_dict(embedding_sd)
# 初始化encoder和decoder模型
encoder = EncoderRNN(hidden_size, embedding, encoder_n_layers, dropout)
decoder = LuongAttnDecoderRNN(attn_model, embedding, hidden_size, voc.num_words, decoder_n_layers, dropout)
if model_checkpoint:
    encoder.load_state_dict(encoder_sd)
    decoder.load_state_dict(decoder_sd)
# 使用合适的设备
encoder = encoder.to(device)
decoder = decoder.to(device)
print('Models built and ready to go!')


class GreedySearchDecoder(nn.Module):
    def __init__(self, encoder, decoder):
        super(GreedySearchDecoder, self).__init__()
        self.encoder = encoder
        self.decoder = decoder
Esempio n. 13
0
def second_aux(sent):
    seen_aux = 0
    
    for word in sent:
        if seen_aux:
            if word in ["do", "does", "don't", "doesn't"]:
                return word
        else:
            if word in ["do", "does", "don't", "doesn't"]:
                seen_aux = 1



# Where the actual running of the code happens
hidden_size = int(sys.argv[6]) # Default 128
encoder1 = EncoderRNN(input_lang.n_words, hidden_size, recurrent_unit)
decoder1 = DecoderRNN(hidden_size, output_lang.n_words, recurrent_unit, attn=attention, n_layers=1, dropout_p=0.1)

if use_cuda:
    encoder1 = encoder1.cuda()
    decoder1 = decoder1.cuda()


counter = 0
direcs_to_process = 1

lines = open(testFile, encoding='utf-8').read().strip().split('\n')
test_pairs = [[normalizeString(s) for s in l.split('\t')] for l in lines]

length_sorted_pairs_dict = {}
for i in range(30):
Esempio n. 14
0
    checkpoint = torch.load(loadFilename)
    # If loading a model trained on GPU to CPU
    # checkpoint = torch.load(loadFilename, map_location=torch.device('cpu'))

    encoder_sd = checkpoint['encoder']
    decoder_sd = checkpoint['decoder']
    encoder_optimizer_sd = checkpoint['en_opt']
    decoder_optimizer_sd = checkpoint['de_opt']
    embedding_sd = checkpoint['embedding']
    voc.__dict__ = checkpoint['voc_dict']

    embedding = nn.Embedding(voc.num_words, Config.hidden_size)
    embedding.load_state_dict(embedding_sd)

    encoder = EncoderRNN(Config.hidden_size, embedding,
                         Config.encoder_n_layers, Config.dropout)
    decoder = LuongAttnDecoderRNN(Config.attn_model, embedding,
                                  Config.hidden_size, voc.num_words,
                                  Config.decoder_n_layers, Config.dropout)
    encoder.load_state_dict(encoder_sd)
    decoder.load_state_dict(decoder_sd)

    encoder = encoder.to(Config.device)
    decoder = decoder.to(Config.device)

    # Set dropout layers to eval mode
    encoder.eval()
    decoder.eval()

    # Initialize search module
    searcher = GreedySearchDecoder(encoder, decoder)
Esempio n. 15
0
    # 否则比如checkpoint是在GPU上得到的,但是我们现在又用CPU来训练或者测试,那么注释掉下面的代码
    #checkpoint = torch.load(loadFilename, map_location=torch.device('cpu'))
    encoder_sd = checkpoint['en']
    decoder_sd = checkpoint['de']
    encoder_optimizer_sd = checkpoint['en_opt']
    decoder_optimizer_sd = checkpoint['de_opt']
    embedding_sd = checkpoint['embedding']
    voc.__dict__ = checkpoint['voc_dict']

print('Building encoder and decoder ...')
# 初始化word embedding
embedding = nn.Embedding(voc.num_words, hidden_size)
if loadFilename:
    embedding.load_state_dict(embedding_sd)
# 初始化encoder和decoder模型
encoder = EncoderRNN(hidden_size, embedding, encoder_n_layers, dropout)
decoder = LuongAttnDecoderRNN(attn_model, embedding, hidden_size,
                              voc.num_words, decoder_n_layers, dropout)
if loadFilename:
    encoder.load_state_dict(encoder_sd)
    decoder.load_state_dict(decoder_sd)
# 使用合适的设备
encoder = encoder.to(device)
decoder = decoder.to(device)
print('Models built and ready to go!')

######################################################################
# 设置进入训练模式,从而开启dropout
encoder.train()
decoder.train()
Esempio n. 16
0
###############################
# Creating the dataset object #
###############################
# Create training data object
bidirectional = config.getboolean("bidirectional")
trainset, source_vocab, target_vocab = get_dataset(
    types="train",
    batch_size=int(config["batch_size"]),
    shuffle=True,
    num_workers=int(config["num_workers"]),
    pin_memory=False,
    drop_last=True)
encoder1 = EncoderRNN(int(config["hidden_size_encoder"]),
                      len(source_vocab) + 2,
                      int(config["batch_size"]),
                      num_layers=int(config["num_layer_encoder"]),
                      bidirectional=bidirectional).to(device)
bridge = Linear(bidirectional, int(config["hidden_size_encoder"]),
                int(config["hidden_size_decoder"])).to(device)
decoder1 = DecoderRNN(int(config["hidden_size_decoder"]),
                      len(target_vocab) + 2,
                      int(config["batch_size"]),
                      num_layers=int(config["num_layer_decoder"])).to(device)
trainIters(trainset,
           encoder1,
           decoder1,
           bridge,
           num_epochs=int(config["num_epoch"]),
           batch_size=int(config["batch_size"]),
           print_every=10,
Esempio n. 17
0
            bleu_per_sentence[dutch] = [bleu, eng, output]
            for n in range(1, N + 1):
                total_clipped_counts[n] += ngrams_clipped_counts[n]
                total_counts[n] += ngrams_counts[n]
            bar.update(i)
    pp = pprint.PrettyPrinter(indent=4)
    pp.pprint(bleu_per_sentence)
    print("bleu on corpus:",
          computeBlue(total_clipped_counts, total_counts, bp, N))


if __name__ == "__main__":
    input_lang = Lang(nld_data)
    output_lang = Lang(eng_data)

    hidden_size = 256
    encoder1 = EncoderRNN(input_lang.n_words, hidden_size)
    encoder1.load_state_dict(
        torch.load('models_project6/encoder.pt',
                   map_location=lambda storage, loc: storage))
    attn_decoder1 = AttnDecoderRNN(hidden_size,
                                   output_lang.n_words,
                                   1,
                                   dropout_p=0.1)
    attn_decoder1.load_state_dict(
        torch.load('models_project6/decoder.pt',
                   map_location=lambda storage, loc: storage))

    readTrainData("data/dutch-sentences.txt")
    # evaluateAndShowAttention("zij vertrekken morgenochtend uit japan")
Esempio n. 18
0
        output_words = evaluate(encoder, decoder, pair[0])
        output_sentence = ' '.join(output_words)
        print('<', output_sentence)
        score, b2, b3, b4 = BLEU_score(pair[1], output_sentence[:-6])
        print(score)
        print(b2)
        print(b3)
        print(b4)
        total += score
        print('')
    print('Avg. score is:')
    print(total / 50)


hidden_size = 2046
encoder1 = EncoderRNN(2046, hidden_size)
attn_decoder1 = DecoderRNN(hidden_size, caption_list.n_words)

if use_cuda:
    encoder1 = encoder1.cuda()
    attn_decoder1 = attn_decoder1.cuda()

encoder1.load_state_dict(torch.load('encoder.pt'))
attn_decoder1.load_state_dict(torch.load('decoder.pt'))

######################################################################
#

evaluateRandomly(encoder1, attn_decoder1)

print('Done')
# 建立词典
vocab = build_vocab(cleaned_news, cleaned_summaries, min_freq=3)

# 生成 dataset 是DataTensor 格式
news_dataset = build_dataset(vocab, cleaned_news, config['max_len_news'], type='news')
summaries_dataset = build_dataset(vocab, cleaned_summaries, config['max_len_summaries'], type='summaries')
# 合并在一起
dataset = TensorDataset(news_dataset, summaries_dataset)

# 加载预训练的word2vec模型(使用搜狗新闻训练得到的word2vec),维度是300
pre_embeddings = get_pretrained_embedding(config['pretrained_vector_path'], vocab, vector_dim=300).to(device)

# 构建模型,选择隐状态和词向量维度相同,都是300
vocab_size = len(vocab)
# encoder 使用的是单层双向gru
encoder = EncoderRNN(vocab_size, 300, 300, n_layers=1, pre_embeddings=pre_embeddings)
# decoder 使用双层单项gru
decoder = DecoderRNN(vocab_size, 300, 300, n_layers=2, pre_embeddings=pre_embeddings)

# 迁移到cuda上,training 要用
encoder.to(device)
decoder.to(device)

# 训练模型
training(encoder, decoder, dataset, vocab, config['lr'], config['batch_size'], config['epochs'])





Esempio n. 20
0
    def build_model(self):
        class MLP(nn.Module):
            def __init__(self, dims):
                super(MLP, self).__init__()
                self.hidden = nn.ModuleList()
                for k in range(len(dims) - 1):
                    self.hidden.append(nn.Linear(dims[k], dims[k + 1]))

            def forward(self, x):
                for layer in self.hidden[:-1]:
                    x = F.relu(layer(x))
                output = self.hidden[-1](x.float())
                return output

        # A2V
        aud_input_MLP = MLP([self.feat_dim, self.hidden_dim, self.hidden_dim])
        phn_encoder = EncoderRNN(self.hidden_dim,
                                 self.seq_len,
                                 self.hidden_dim,
                                 input_dropout_p=self.dropout_rate,
                                 dropout_p=self.dropout_rate,
                                 n_layers=self.enc_num_layers,
                                 bidirectional=True,
                                 rnn_cell='gru',
                                 variable_lengths=True)
        spk_encoder = EncoderRNN(self.hidden_dim,
                                 self.seq_len,
                                 self.hidden_dim,
                                 input_dropout_p=self.dropout_rate,
                                 dropout_p=self.dropout_rate,
                                 n_layers=self.enc_num_layers,
                                 bidirectional=True,
                                 rnn_cell='gru',
                                 variable_lengths=True)
        aud_decoder = DecoderRNN(self.hidden_dim * 4,
                                 self.seq_len,
                                 self.hidden_dim * 4,
                                 n_layers=self.dec_num_layers,
                                 rnn_cell='gru',
                                 bidirectional=True,
                                 input_dropout_p=self.dropout_rate,
                                 dropout_p=self.dropout_rate)
        aud_output_MLP = MLP(
            [self.hidden_dim * 4, self.hidden_dim, self.feat_dim])

        # a2v = A2VwD(input_MLP, phn_encoder, spk_encoder, decoder, output_MLP, self.dec_num_layers)

        # T2V
        if self.unit_type == 'char':
            txt_feat_dim = 27
        else:
            txt_feat_dim = 60
        txt_input_MLP = MLP([txt_feat_dim, self.hidden_dim])
        txt_encoder = EncoderRNN(self.hidden_dim,
                                 self.seq_len,
                                 self.hidden_dim,
                                 n_layers=1,
                                 bidirectional=True,
                                 rnn_cell='gru',
                                 variable_lengths=True)
        txt_decoder = DecoderRNN(self.hidden_dim * 2,
                                 self.seq_len,
                                 self.hidden_dim * 2,
                                 n_layers=1,
                                 rnn_cell='gru',
                                 bidirectional=True)
        txt_output_MLP = MLP([self.hidden_dim * 2, txt_feat_dim])

        # t2v = A2V(txt_input_MLP, txt_encoder, txt_decoder, txt_output_MLP, 1)

        # size of discriminator input = num_directions * p_hidden_dim
        # discriminator = FCDiscriminator(2*self.hidden_dim, self.hidden_dim, self.D_num_layers)

        # the whole model
        if self.weight_x == 0.:
            if_x = False
        else:
            if_x = True
        self.model = Model(aud_input_MLP, phn_encoder, spk_encoder,
                           aud_decoder, aud_output_MLP, self.dec_num_layers,
                           txt_input_MLP, txt_encoder, txt_decoder,
                           txt_output_MLP, 1, if_x, self.neg_num)

        self.model.to(device)
Esempio n. 21
0
class RNNJudgeNet(nn.Module):
    """
    keys: (n_keys, word_vec_dim)
    candidates: (n_candidates, word_vec_dim)
    query = [keys; 0; candidates]: (n_keys + 1 + n_candidates, word_vec_dim),
    where 0 is used to separate keys and candidates
    result = GRU-Encoder-Decoder-with-Attention(query): (n_candidates, 2),
    which indicates the possibility of ith candidates to be good
    """
    def __init__(
        self,
        word_vec_dim,
        hidden_state_size,
        bidir=True,
        rnn_cell='LSTM',
    ):
        super().__init__()
        self.trainable = True
        self.word_vec_dim = word_vec_dim
        self.hidden_state_size = hidden_state_size
        self.encoder = EncoderRNN(self.word_vec_dim,
                                  self.hidden_state_size,
                                  bidir=bidir,
                                  rnn_cell=rnn_cell)
        self.decoder = AttnDecoderRNN(self.word_vec_dim,
                                      self.hidden_state_size,
                                      2,
                                      rnn_cell=rnn_cell)
        self.encoder.apply(util.weight_init)
        self.decoder.apply(util.weight_init)

    def forward(self, Ks: torch.Tensor, Cs: torch.Tensor, *args):
        """
        :param Ks, keywords used to expand: (batch_size, n_keys, word_vector_dim)
        :param Cs, candidates searched by Ks: (batch_size, n_candidates, word_vector_dim)
        :return: probs as good / bad candiates: (batch_size, n_candidates, 2)
        """
        batch_size = Ks.shape[0]
        n_candidates = Cs.shape[1]

        sep = torch.zeros(batch_size, 1, self.word_vec_dim)
        query_string = torch.cat(
            [Ks, sep, Cs],
            dim=1)  # (batch_size, n_keys + 1 + n_candidates, word_vector_dim)
        query_string_transposed = query_string.transpose(
            0, 1)  # (n_keys + 1 + n_candidates, batch_size, word_vector_dim)
        lengths = [query_string_transposed.shape[0]
                   ]  # (n_keys + 1 + n_candidates)

        encoder_outputs, encoder_hidden = self.encoder(
            query_string_transposed,
            torch.tensor(lengths).long().cpu())
        # (n_keys + 1 + n_candidates, batch_size, hidden_state_size)
        # (n_layers=1, batch_size, hidden_state_size)

        decoder_hidden = encoder_hidden

        answers = []
        for i in range(n_candidates):
            # logger.debug(f"decoder_hidden: {decoder_hidden[:, :, 0:10]}")
            decoder_input = Cs[:, i].unsqueeze(
                0)  # TODO (new dim=1,a candidate=1, word_vector_dim)
            # (1, batch_size, hidden_state_size) 此处batch指的不是前面的那个了
            output, decoder_hidden, _ = self.decoder(decoder_input,
                                                     decoder_hidden,
                                                     encoder_outputs)
            # (1, batch_size, 2)
            # (n_layers=1, batch_size, hidden_state_size)
            answers.append(output)

        probs = torch.cat(answers, dim=0)  # (n_candidates, batch_size, 2)
        probs = probs.transpose(0, 1)  # (batch_size, n_candidates, 2)
        # probs = torch.softmax(probs, dim=-1)

        return probs
Esempio n. 22
0
        print_loss_total += loss
        plot_loss_total += loss

        if iter % print_every == 0:
            print_loss_avg = print_loss_total / print_every
            print_loss_total = 0
            print('%s (Batch_no: %d %d%%) %.4f' % (timeSince(
                start, iter / 5), n_iters / 5, iter / 5 * 100, print_loss_avg))
            loss_all += print_loss_avg

            plot_loss_avg = plot_loss_total / plot_every
            plot_loss_total = 0


hidden_size = 2046
encoder1 = EncoderRNN(2046, hidden_size)
attn_decoder1 = DecoderRNN(hidden_size, caption_list.n_words)

if use_cuda:
    encoder1 = encoder1.cuda()
    attn_decoder1 = attn_decoder1.cuda()

# encoder1.load_state_dict(torch.load('encoder.pt'))
# attn_decoder1.load_state_dict(torch.load('decoder.pt'))


def evaluate(encoder, decoder, vid_ID, max_length=MAX_LENGTH):
    input_variable, empty = variableFromId(vid_ID)
    input_length = len(input_variable)
    encoder_hidden = encoder.initHidden()
Esempio n. 23
0
def trainIters(learning_rate=0.001):
    epochs = 1
    plot_train_losses = []
    plot_val_losses = []
    plot_loss_total = 0  # Reset every plot_every
    hidden_size = 256
    print('------- Hypers --------\n'
          '- epochs: %i\n'
          '- learning rate: %g\n'
          '- hidden size: %i\n'
          '----------------'
          '' % (epochs, learning_rate, hidden_size))

    # set model
    vocab_size_encoder = get_vocab_size(CodeEncoder())
    vocab_size_decoder = get_vocab_size(CommentEncoder())
    print(vocab_size_encoder)
    print(vocab_size_decoder)
    print('----------------')
    # COMMENT OUT WHEN FIRST TRAINING
    # encoder, decoder = load_model()
    encoder = EncoderRNN(vocab_size_encoder, hidden_size).to(device)
    decoder = AttnDecoderRNN(hidden_size, vocab_size_decoder,
                             dropout_p=0.1).to(device)

    # set training hypers
    criterion = nn.NLLLoss()
    encoder_optimizer = optim.Adam(encoder.parameters(), lr=learning_rate)
    decoder_optimizer = optim.Adam(decoder.parameters(), lr=learning_rate)

    # set data
    dataLoaders = createLoaders(extras=extras, debug=True)

    # used for initial input of decoder
    # with open('dicts/comment_dict.pkl', 'rb') as pfile:
    # 	SOS_token = pickle.load(pfile)['<SOS>']
    # since we already prepend <SOS> to the comment, don't think need this in decoder model anymore
    SOS_token = None

    # iteration
    counts = []
    best_val_loss = 100
    for eps in range(1, epochs + 1):
        print('Epoch Number', eps)
        for count, (inputs, targets) in enumerate(dataLoaders['train'], 0):
            inputs = torch.LongTensor(inputs[0])
            targets = torch.LongTensor(targets[0])
            inputs, targets = inputs.to(device), targets.to(device)

            loss = train(inputs,
                         targets,
                         encoder,
                         decoder,
                         encoder_optimizer,
                         decoder_optimizer,
                         criterion,
                         SOS_token=SOS_token)
            plot_loss_total += loss
            # if count != 0 and count % 10 == 0:
            print(count, loss)

        counts.append(eps)
        plot_loss_avg = plot_loss_total / len(dataLoaders['train'])
        plot_train_losses.append(plot_loss_avg)
        val_loss = validate_model(encoder,
                                  decoder,
                                  criterion,
                                  dataLoaders['valid'],
                                  SOS_token=SOS_token,
                                  device=device)
        if val_loss < best_val_loss:
            save_model(encoder, decoder)
            best_val_loss = val_loss
        plot_val_losses.append(val_loss)
        plot_loss_total = 0
        save_loss(plot_train_losses, plot_val_losses)
    showPlot(counts, plot_train_losses, plot_val_losses)