Esempio n. 1
0
def run_training(*,
                 config: argparse.Namespace) -> None:

    import pickle

    vocab: Vocabulary = pickle.load(open(config.vocab, "rb"))

    device: torch.device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")

    training_corpus = Corpus(vocab=vocab,
                             filename=config.corpus,
                             max_src_length=config.max_src_length,
                             device=device)

    # for word in test_corpus.words:
    #    print(f"{''.join(word.characters)}\t{''.join(word.label)}")
    # sys.exit()

    if config.continue_training:
        encoder1 = torch.load(config.encoder, map_location=device)
        attn_decoder1 = torch.load(config.decoder, map_location=device)
    else:
        encoder1: EncoderRNN = EncoderRNN(input_size=len(training_corpus.characters),
                                          embedding_size=config.encoder_embedding_size,
                                          hidden_size=config.encoder_hidden_size,
                                          num_hidden_layers=config.encoder_hidden_layers).to(device=device)

        attn_decoder1 = AttnDecoderRNN(embedding_size=config.decoder_embedding_size,
                                       decoder_hidden_size=config.decoder_hidden_size,
                                       encoder_hidden_size=config.encoder_hidden_size,
                                       num_hidden_layers=config.decoder_hidden_layers,
                                       output_size=len(training_corpus.characters),
                                       dropout_p=config.decoder_dropout,
                                       max_src_length=training_corpus.word_tensor_length).to(device=device)

    train_iters(corpus=training_corpus,
                encoder=encoder1,
                decoder=attn_decoder1,
                device=device,
                n_iters=config.num_epochs,
                batch_size=config.batch_size,
                print_every=config.print_every,
                learning_rate=config.learning_rate,
                teacher_forcing_ratio=config.teacher_forcing_ratio)

    print(f"Saving encoder to {config.encoder}...")
    torch.save(encoder1.to(device=torch.device("cpu")), config.encoder)

    print(f"Saving decoder to {config.decoder}...")
    torch.save(attn_decoder1.to(device=torch.device("cpu")), config.decoder)
Esempio n. 2
0
def train(args):
    input_lang, output_lang, pairs = prepareData(args)
    print(random.choice(pairs))

    model = {}
    model['hidden_size'] = 1000
    model['dropout'] = 0.1
    model['input_lang'] = input_lang
    model['output_lang'] = output_lang
    model['max_length'] = max(input_lang.max_length,
                              output_lang.max_length) + 2
    print('Max length: {}'.format(model['max_length']))

    encoder1 = EncoderRNN(input_lang.n_words,
                          model['hidden_size']).to(getDevice())
    encoder1.train()
    attn_decoder1 = AttnDecoderRNN(model['hidden_size'],
                                   output_lang.n_words,
                                   dropout_p=model['dropout'],
                                   max_length=model['max_length']).to(
                                       getDevice())
    attn_decoder1.train()

    n_iters = 30000
    training_pairs = [
        tensorsFromPair(input_lang, output_lang, random.choice(pairs))
        for _ in range(n_iters)
    ]
    trainIters(training_pairs,
               encoder1,
               attn_decoder1,
               n_iters,
               print_every=1000,
               optim=args.optim,
               learning_rate=args.learning_rate,
               max_length=model['max_length'])

    print('saving models...')
    model['encoder_state'] = encoder1.state_dict()
    model['decoder_state'] = attn_decoder1.state_dict()
    torch.save(
        model,
        "data/{}_model_checkpoint.pth".format(args.phase.split('_')[-1]))
Esempio n. 3
0
def inference(args):
    model = {}
    model = torch.load("data/sc_question_model_checkpoint.pth")
    model['encoder'] = EncoderRNN(model['input_lang'].n_words,
                                  model['hidden_size']).to(getDevice())
    model['encoder'].load_state_dict(model['encoder_state'])
    model['encoder'].eval()
    model['decoder'] = AttnDecoderRNN(model['hidden_size'],
                                      model['output_lang'].n_words,
                                      dropout_p=model['dropout'],
                                      max_length=model['max_length']).to(
                                          getDevice())
    model['decoder'].load_state_dict(model['decoder_state'])
    model['decoder'].eval()

    with open('../executor/parse_results/sc_validation.json') as f:
        anns = json.load(f)

    out = {}
    for ann in tqdm(anns):
        v = {}
        v['scene_index'] = ann['scene_index']
        v['video_filename'] = ann['video_filename']
        v['questions'] = []

        for ann_q in ann['questions']:
            if ann_q['question_type'] == 'descriptive':
                continue

            q_program_pred, _ = evaluate(model['encoder'],
                                         model['decoder'],
                                         normalizeString(ann_q['question']),
                                         model['input_lang'],
                                         model['output_lang'],
                                         max_length=model['max_length'])
            if q_program_pred[-1] == '<EOS>':
                q_program_pred = q_program_pred[:-1]

            q = {}
            q['question_program'] = q_program_pred
            q['question'] = ann_q['question']
            q['question_type'] = '{}_single_choice'.format(
                ann_q['question_type'])
            q['question_subtype'] = ann_q['program'][-1]
            q['program_gt'] = ann_q['program']
            q['answer'] = ann_q['answer']

            v['questions'].append(q)

        out[v['scene_index']] = v

    out_path = '../executor/parse_results/sc_val_reproduced.json'
    print('Writing output to {}'.format(out_path))
    with open(out_path, 'w') as fout:
        json.dump(out, fout, indent=4)
Esempio n. 4
0
 def __init__(
     self,
     word_vec_dim,
     hidden_state_size,
     bidir=True,
     rnn_cell='LSTM',
 ):
     super().__init__()
     self.trainable = True
     self.word_vec_dim = word_vec_dim
     self.hidden_state_size = hidden_state_size
     self.encoder = EncoderRNN(self.word_vec_dim,
                               self.hidden_state_size,
                               bidir=bidir,
                               rnn_cell=rnn_cell)
     self.decoder = AttnDecoderRNN(self.word_vec_dim,
                                   self.hidden_state_size,
                                   2,
                                   rnn_cell=rnn_cell)
     self.encoder.apply(util.weight_init)
     self.decoder.apply(util.weight_init)
Esempio n. 5
0
def evaluate(vocab: Vocabulary, corpus_filename: str, encoder: EncoderRNN,
             decoder: AttnDecoderRNN, max_src_length: int,
             max_tgt_length: int):

    device: torch.device = torch.device(
        "cuda:0" if torch.cuda.is_available() else "cpu")

    encoder.to(device)
    decoder.to(device)

    encoder.eval()
    decoder.eval()

    with torch.no_grad():

        corpus = Corpus(
            filename=corpus_filename,
            max_src_length=max_src_length,  # decoder.max_src_length,
            vocab=vocab,
            device=device)

        for batch in torch.utils.data.DataLoader(dataset=corpus, batch_size=1):

            input_tensor: torch.Tensor = batch["data"].permute(1, 0)

            encoder_outputs = encoder.encode_sequence(input_tensor)

            decoder_output = decoder.decode_sequence(
                encoder_outputs=encoder_outputs,
                start_symbol=corpus.characters.start_of_sequence.integer,
                max_length=max_tgt_length)
            _, top_i = decoder_output.topk(k=1)

            predictions = top_i.squeeze(dim=2).squeeze(dim=1).tolist()

            predicted_string = "".join(
                [corpus.characters[i].string for i in predictions])

            print(predicted_string)
Esempio n. 6
0
            bleu_per_sentence[dutch] = [bleu, eng, output]
            for n in range(1, N + 1):
                total_clipped_counts[n] += ngrams_clipped_counts[n]
                total_counts[n] += ngrams_counts[n]
            bar.update(i)
    pp = pprint.PrettyPrinter(indent=4)
    pp.pprint(bleu_per_sentence)
    print("bleu on corpus:",
          computeBlue(total_clipped_counts, total_counts, bp, N))


if __name__ == "__main__":
    input_lang = Lang(nld_data)
    output_lang = Lang(eng_data)

    hidden_size = 256
    encoder1 = EncoderRNN(input_lang.n_words, hidden_size)
    encoder1.load_state_dict(
        torch.load('models_project6/encoder.pt',
                   map_location=lambda storage, loc: storage))
    attn_decoder1 = AttnDecoderRNN(hidden_size,
                                   output_lang.n_words,
                                   1,
                                   dropout_p=0.1)
    attn_decoder1.load_state_dict(
        torch.load('models_project6/decoder.pt',
                   map_location=lambda storage, loc: storage))

    readTrainData("data/dutch-sentences.txt")
    # evaluateAndShowAttention("zij vertrekken morgenochtend uit japan")
Esempio n. 7
0
def train_iters(*,  # data: Data,
                corpus: Corpus,
                encoder: EncoderRNN,
                decoder: AttnDecoderRNN,
                device: torch.device,
                n_iters: int,
                batch_size: int,
                teacher_forcing_ratio: float,
                print_every: int = 1000,
                learning_rate: float = 0.01
                ) -> None:
    data = torch.utils.data.DataLoader(dataset=corpus, batch_size=batch_size)

    start: float = time.time()
    plot_losses: List[float] = []
    print_loss_total: float = 0  # Reset every print_every
    plot_loss_total: float = 0  # Reset every plot_every

    encoder_optimizer: Optimizer = SGD(encoder.parameters(), lr=learning_rate)
    decoder_optimizer: Optimizer = SGD(decoder.parameters(), lr=learning_rate)
    #
    # training_pairs: List[ParallelTensor] = [random.choice(data.pairs).tensors(source_vocab=data.source_vocab,
    #                                                                           target_vocab=data.target_vocab,
    #                                                                           device=device)
    #                                         for _ in range(n_iters)]

    criterion: nn.NLLLoss = nn.NLLLoss(reduction='mean')  # ignore_index=corpus.characters.pad_int)

    # for pair in parallel_data:
    #    print(f"src={len(pair['data'])}\ttgt={len(pair['labels'])}")

    for iteration in range(1, n_iters + 1):  # type: int

        # training_pair: ParallelTensor = training_pairs[iteration - 1]
        # input_tensor: torch.Tensor = training_pair.source   # shape: [seq_len, batch_size=1]
        # target_tensor: torch.Tensor = training_pair.target  # shape: [seq_len, batch_size=1]

        for batch in data:
            # print(f"batch['data'].shape={batch['data'].shape}\tbatch['labels'].shape{batch['labels'].shape}")
            # sys.exit()
            input_tensor: torch.Tensor = batch["data"].permute(1, 0)
            target_tensor: torch.Tensor = batch["labels"].permute(1, 0)

            actual_batch_size: int = min(batch_size, input_tensor.shape[1])

            verify_shape(tensor=input_tensor, expected=[corpus.word_tensor_length, actual_batch_size])
            verify_shape(tensor=target_tensor, expected=[corpus.label_tensor_length, actual_batch_size])

            # print(f"input_tensor.shape={input_tensor.shape}\t\ttarget_tensor.shape={target_tensor.shape}")
            # sys.exit()

            loss: float = train(input_tensor=input_tensor,
                                target_tensor=target_tensor,
                                encoder=encoder,
                                decoder=decoder,
                                encoder_optimizer=encoder_optimizer,
                                decoder_optimizer=decoder_optimizer,
                                criterion=criterion,
                                device=device,
                                max_src_length=corpus.word_tensor_length,
                                max_tgt_length=corpus.label_tensor_length,
                                batch_size=actual_batch_size,
                                start_of_sequence_symbol=corpus.characters.start_of_sequence.integer,
                                teacher_forcing_ratio=teacher_forcing_ratio)

            print_loss_total += loss
            plot_loss_total += loss

        if iteration % print_every == 0:
            print_loss_avg: float = print_loss_total / print_every
            print_loss_total = 0
            print('%s (%d %d%%) %.4f' % (time_since(since=start, percent=iteration / n_iters),
                                         iteration, iteration / n_iters * 100, print_loss_avg))
            sys.stdout.flush()
Esempio n. 8
0
def train(*,
          input_tensor: torch.Tensor,  # shape: [src_seq_len, batch_size]
          target_tensor: torch.Tensor,  # shape: [tgt_seq_len, batch_size]
          encoder: EncoderRNN,
          decoder: AttnDecoderRNN,
          encoder_optimizer: Optimizer,
          decoder_optimizer: Optimizer,
          criterion: nn.Module,
          device: torch.device,
          max_src_length: int,
          max_tgt_length: int,
          batch_size: int,
          start_of_sequence_symbol: int,
          teacher_forcing_ratio: float) -> float:
    encoder_optimizer.zero_grad()
    decoder_optimizer.zero_grad()

    loss: torch.Tensor = torch.tensor(0, dtype=torch.float, device=device)  # shape: [] meaning this is a scalar

    encoder_outputs = encoder.encode_sequence(input_tensor)

    decoder_input = target_tensor[0].unsqueeze(dim=0)
    decoder_hidden = decoder.init_hidden(batch_size=batch_size, device=device)

    verify_shape(tensor=decoder_input, expected=[1, batch_size])
    verify_shape(tensor=target_tensor, expected=[max_tgt_length, batch_size])
    verify_shape(tensor=decoder_hidden, expected=[decoder.gru.num_layers, batch_size, decoder.gru.hidden_size])

    use_teacher_forcing = True if random.random() < teacher_forcing_ratio else False
    # use_teacher_forcing = False

    decoder_output = decoder.decode_sequence(encoder_outputs=encoder_outputs,
                                             start_symbol=start_of_sequence_symbol,
                                             max_length=max_tgt_length,
                                             target_tensor=target_tensor if use_teacher_forcing else None)
    # print(f"input_tensor.shape={input_tensor.shape}\tdecoder_output.shape={decoder_output.shape}\ttarget_tensor.shape={target_tensor.shape}\tmax_tgt_length={max_tgt_length}")

    # Our loss function requires predictions to be of the shape NxC, where N is the number of predictions and C is the number of possible predicted categories
    predictions = decoder_output.reshape(-1,
                                         decoder.output_size)  # Reshaping from [seq_len, batch_size, decoder.output_size] to [seq_len*batch_size, decoder.output_size]
    labels = target_tensor.reshape(
        -1)  # Reshaping from [seq_len, batch_size]                      to [seq_len*batch_size]
    loss += criterion(predictions, labels)
    # print(f"\t{decoder_output.view(-1,decoder_output.shape[-1]).shape}")
    # print(target_tensor.reshape(-1))
    #    print(f"\t{target_tensor.view(-1)}")
    # sys.exit()
    # loss += criterion(decoder_output.view(1,1,-1), target_tensor.view(-1))
    # loss += criterion(decoder_output.squeeze(dim=1), target_tensor.squeeze(dim=1))
    # for index, decoder_output in enumerate(start=1,
    #                                        iterable=decoder.decode_sequence(encoder_outputs=encoder_outputs,
    #                                               start_of_sequence_symbol=start_of_sequence_symbol,
    #                                               max_length=max_tgt_length,
    #                                               target_tensor=target_tensor if use_teacher_forcing else None)):
    #
    #     loss += criterion(decoder_output, target_tensor[index])

    loss.backward()

    encoder_optimizer.step()
    decoder_optimizer.step()

    return loss.item()
Esempio n. 9
0
class RNNJudgeNet(nn.Module):
    """
    keys: (n_keys, word_vec_dim)
    candidates: (n_candidates, word_vec_dim)
    query = [keys; 0; candidates]: (n_keys + 1 + n_candidates, word_vec_dim),
    where 0 is used to separate keys and candidates
    result = GRU-Encoder-Decoder-with-Attention(query): (n_candidates, 2),
    which indicates the possibility of ith candidates to be good
    """
    def __init__(
        self,
        word_vec_dim,
        hidden_state_size,
        bidir=True,
        rnn_cell='LSTM',
    ):
        super().__init__()
        self.trainable = True
        self.word_vec_dim = word_vec_dim
        self.hidden_state_size = hidden_state_size
        self.encoder = EncoderRNN(self.word_vec_dim,
                                  self.hidden_state_size,
                                  bidir=bidir,
                                  rnn_cell=rnn_cell)
        self.decoder = AttnDecoderRNN(self.word_vec_dim,
                                      self.hidden_state_size,
                                      2,
                                      rnn_cell=rnn_cell)
        self.encoder.apply(util.weight_init)
        self.decoder.apply(util.weight_init)

    def forward(self, Ks: torch.Tensor, Cs: torch.Tensor, *args):
        """
        :param Ks, keywords used to expand: (batch_size, n_keys, word_vector_dim)
        :param Cs, candidates searched by Ks: (batch_size, n_candidates, word_vector_dim)
        :return: probs as good / bad candiates: (batch_size, n_candidates, 2)
        """
        batch_size = Ks.shape[0]
        n_candidates = Cs.shape[1]

        sep = torch.zeros(batch_size, 1, self.word_vec_dim)
        query_string = torch.cat(
            [Ks, sep, Cs],
            dim=1)  # (batch_size, n_keys + 1 + n_candidates, word_vector_dim)
        query_string_transposed = query_string.transpose(
            0, 1)  # (n_keys + 1 + n_candidates, batch_size, word_vector_dim)
        lengths = [query_string_transposed.shape[0]
                   ]  # (n_keys + 1 + n_candidates)

        encoder_outputs, encoder_hidden = self.encoder(
            query_string_transposed,
            torch.tensor(lengths).long().cpu())
        # (n_keys + 1 + n_candidates, batch_size, hidden_state_size)
        # (n_layers=1, batch_size, hidden_state_size)

        decoder_hidden = encoder_hidden

        answers = []
        for i in range(n_candidates):
            # logger.debug(f"decoder_hidden: {decoder_hidden[:, :, 0:10]}")
            decoder_input = Cs[:, i].unsqueeze(
                0)  # TODO (new dim=1,a candidate=1, word_vector_dim)
            # (1, batch_size, hidden_state_size) 此处batch指的不是前面的那个了
            output, decoder_hidden, _ = self.decoder(decoder_input,
                                                     decoder_hidden,
                                                     encoder_outputs)
            # (1, batch_size, 2)
            # (n_layers=1, batch_size, hidden_state_size)
            answers.append(output)

        probs = torch.cat(answers, dim=0)  # (n_candidates, batch_size, 2)
        probs = probs.transpose(0, 1)  # (batch_size, n_candidates, 2)
        # probs = torch.softmax(probs, dim=-1)

        return probs
Esempio n. 10
0
def trainIters(learning_rate=0.001):
    epochs = 1
    plot_train_losses = []
    plot_val_losses = []
    plot_loss_total = 0  # Reset every plot_every
    hidden_size = 256
    print('------- Hypers --------\n'
          '- epochs: %i\n'
          '- learning rate: %g\n'
          '- hidden size: %i\n'
          '----------------'
          '' % (epochs, learning_rate, hidden_size))

    # set model
    vocab_size_encoder = get_vocab_size(CodeEncoder())
    vocab_size_decoder = get_vocab_size(CommentEncoder())
    print(vocab_size_encoder)
    print(vocab_size_decoder)
    print('----------------')
    # COMMENT OUT WHEN FIRST TRAINING
    # encoder, decoder = load_model()
    encoder = EncoderRNN(vocab_size_encoder, hidden_size).to(device)
    decoder = AttnDecoderRNN(hidden_size, vocab_size_decoder,
                             dropout_p=0.1).to(device)

    # set training hypers
    criterion = nn.NLLLoss()
    encoder_optimizer = optim.Adam(encoder.parameters(), lr=learning_rate)
    decoder_optimizer = optim.Adam(decoder.parameters(), lr=learning_rate)

    # set data
    dataLoaders = createLoaders(extras=extras, debug=True)

    # used for initial input of decoder
    # with open('dicts/comment_dict.pkl', 'rb') as pfile:
    # 	SOS_token = pickle.load(pfile)['<SOS>']
    # since we already prepend <SOS> to the comment, don't think need this in decoder model anymore
    SOS_token = None

    # iteration
    counts = []
    best_val_loss = 100
    for eps in range(1, epochs + 1):
        print('Epoch Number', eps)
        for count, (inputs, targets) in enumerate(dataLoaders['train'], 0):
            inputs = torch.LongTensor(inputs[0])
            targets = torch.LongTensor(targets[0])
            inputs, targets = inputs.to(device), targets.to(device)

            loss = train(inputs,
                         targets,
                         encoder,
                         decoder,
                         encoder_optimizer,
                         decoder_optimizer,
                         criterion,
                         SOS_token=SOS_token)
            plot_loss_total += loss
            # if count != 0 and count % 10 == 0:
            print(count, loss)

        counts.append(eps)
        plot_loss_avg = plot_loss_total / len(dataLoaders['train'])
        plot_train_losses.append(plot_loss_avg)
        val_loss = validate_model(encoder,
                                  decoder,
                                  criterion,
                                  dataLoaders['valid'],
                                  SOS_token=SOS_token,
                                  device=device)
        if val_loss < best_val_loss:
            save_model(encoder, decoder)
            best_val_loss = val_loss
        plot_val_losses.append(val_loss)
        plot_loss_total = 0
        save_loss(plot_train_losses, plot_val_losses)
    showPlot(counts, plot_train_losses, plot_val_losses)