Exemple #1
0
def evaluate(input_sentences, output_sentences, input_vocab, output_vocab,
             input_reverse, output_reverse, hy, writer):
    dataset = NMTDataset(input_sentences, output_sentences, input_vocab,
                         output_vocab, input_reverse, output_reverse)
    loader = DataLoader(dataset,
                        batch_size=hy.batch_size,
                        shuffle=True,
                        drop_last=True)

    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

    input_vocab_size = len(input_vocab.keys())
    output_vocab_size = len(output_vocab.keys())

    encoder = EncoderRNN(input_vocab_size, hy.embedding_size, hy.hidden_size,
                         hy.rnn_layers, hy.bidirectional, device)
    decoder = DecoderRNN(output_vocab_size, hy.embedding_size, hy.hidden_size,
                         hy.rnn_layers, hy.bidirectional, device)

    accuracies = []

    for epoch in range(1, hy.num_epochs + 1):
        encoder.load_state_dict(
            torch.load("saved_runs/encoder_{}_weights.pt".format(epoch)))
        decoder.load_state_dict(
            torch.load("saved_runs/decoder_{}_weights.pt".format(epoch)))
        accuracy = compute_model_accuracy(encoder, decoder, loader, device,
                                          epoch, writer)
        accuracies.append(accuracy)

    print("=" * 80)
    print("Final Accuracy = {:.1f}".format(100. * np.max(accuracies)))
    print("=" * 80)

    return accuracies
Exemple #2
0
    def build_model(self):
        if self.use_embeddings:
            self.embedding = nn.Embedding.from_pretrained(self.embedding_wts)
        else:
            self.embedding = nn.Embedding(self.vocab.n_words,
                                          self.embedding_dim)
        self.encoders = []
        self.encoder_optimizers = []

        # Note: No embeddings used in the encoders
        for m in ['v', 's']:
            encoder = EncoderRNN(self.enc_input_dim[m], self.hidden_size,
                                 self.enc_n_layers, self.dropout, self.unit,
                                 m).to(self.device)
            encoder_optimizer = optim.Adam(encoder.parameters(), lr=self.lr)

            if self.modality == 'ss-vv':
                checkpoint = torch.load(self.pretrained_modality[m],
                                        map_location=self.device)
                encoder.load_state_dict(checkpoint['en'])
                encoder_optimizer.load_state_dict(checkpoint['en_op'])
            self.encoders.append(encoder)
            self.encoder_optimizers.append(encoder_optimizer)
        self.decoder = DecoderRNN(self.attn_model, self.embedding_dim,
                                  self.hidden_size, self.vocab.n_words,
                                  self.unit, self.dec_n_layers, self.dropout,
                                  self.embedding).to(self.device)
        text_checkpoint = torch.load(self.pretrained_modality['t'],
                                     map_location=self.device)
        self.decoder.load_state_dict(text_checkpoint['de'])
        self.project_factor = self.encoders[0].project_factor
        self.latent2hidden = nn.Linear(self.latent_dim, self.hidden_size *
                                       self.project_factor).to(self.device)
        self.epoch = 0
Exemple #3
0
    def build_model(self):
        # Note: no embedding used here
        self.encoder = EncoderRNN(self.enc_input_dim, self.hidden_size,
                                  self.enc_n_layers, self.dropout, self.unit,
                                  self.modality).to(self.device)

        self.encoder_optimizer = optim.Adam(self.encoder.parameters(),
                                            lr=self.lr)

        self.epoch = 0  # define here to add resume training feature
Exemple #4
0
def main():
    argparser = argparse.ArgumentParser()
    argparser.add_argument('--test_query_file', '-i', type=str, required=True)
    argparser.add_argument('--load_path', '-p', type=str, required=True)
    # TODO: load epoch -> load best model
    argparser.add_argument('--load_epoch', '-e', type=int, required=True)

    argparser.add_argument('--output_file', '-o', type=str)
    argparser.add_argument('--dec_algorithm',
                           '-algo',
                           type=str,
                           default='greedy')

    new_args = argparser.parse_args()

    arg_file = os.path.join(new_args.load_path, 'args.pkl')
    if not os.path.exists(arg_file):
        raise RuntimeError('No default arguments file to load')
    f = open(arg_file, 'rb')
    args = pickle.load(f)
    f.close()

    if args.use_cuda:
        USE_CUDA = True

    vocab, rev_vocab = load_vocab(args.vocab_file,
                                  max_vocab=args.max_vocab_size)
    vocab_size = len(vocab)

    word_embeddings = nn.Embedding(vocab_size,
                                   args.emb_dim,
                                   padding_idx=SYM_PAD)
    E = EncoderRNN(vocab_size,
                   args.emb_dim,
                   args.hidden_dim,
                   args.n_layers,
                   bidirectional=True,
                   variable_lengths=True)
    G = Generator(vocab_size, args.response_max_len, args.emb_dim,
                  2 * args.hidden_dim, args.n_layers)

    if USE_CUDA:
        word_embeddings.cuda()
        E.cuda()
        G.cuda()

    reload_model(new_args.load_path, new_args.load_epoch, word_embeddings, E,
                 G)

    predict(new_args.test_query_file, args.response_max_len, vocab, rev_vocab,
            word_embeddings, E, G, new_args.output_file)
Exemple #5
0
def parrot_initialization_rgc(dataset, emb_path, dc=None, encoder=None, dddqn=None):
  '''
  Trains the rgc to repeat the input
  '''
  # TODO save optimizer
  if dc is None:
    dc = DataContainer(dataset, emb_path)
    dc.prepare_data()
  x_batch, y_parrot_batch, sl_batch = u.to_batch(dc.x, dc.y_parrot_padded, dc.sl, batch_size=dc.batch_size)

  # initialize rnn cell of the encoder and the dddqn
  rep = input('Load RNN cell pretrained for the encoder & dddqn? (y or n): ')
  if encoder is None:
    encoder = EncoderRNN(num_units=256)
  if rep == 'y' or rep == '':
    encoder.load(name='EncoderRNN-0')
  else:
    choose_best_rnn_pretrained(encoder, encoder.encoder_cell, dc, search_size=1, multiprocessed=False)
  # we do not need to train the dddqn rnn layer since we already trained the encoder rnn layer
  # we just have to initialize the dddqn rnn layer weights with the ones from the encoder
  if dddqn is None:
    dddqn = DDDQN(dc.word2idx, dc.idx2word, dc.idx2emb)
  u.init_rnn_layer(dddqn.lstm)
  u.update_layer(dddqn.lstm, encoder.encoder_cell)

  # define the loss function used to pretrain the rgc
  def get_loss(encoder, dddqn, epoch, x, y, sl, sos, max_steps, verbose=True):
    preds, logits, _, _, _ = pu.full_encoder_dddqn_pass(x, sl, encoder, dddqn, sos, max_steps, training=True)
    logits = tf.nn.softmax(logits)  # normalize logits between 0 & 1 to allow training through cross-entropy
    sl = [end_idx + 1 for end_idx in sl]  # sl = [len(sequence)-1, ...] => +1 to get the len
    loss = u.cross_entropy_cost(logits, y, sequence_lengths=sl)
    if verbose:
      acc_words, acc_sentences = u.get_acc_word_seq(logits, y, sl)
      logging.info('Epoch {} -> loss = {} | acc_words = {} | acc_sentences = {}'.format(epoch, loss, acc_words, acc_sentences))
    return loss

  rep = input('Load pretrained RGC-ENCODER-DDDQN? (y or n): ')
  if rep == 'y' or rep == '':
    encoder.load('RGC/Encoder')
    dddqn.load('RGC/DDDQN')

  rep = input('Train RGC-ENCODER-DDDQN? (y or n): ')
  if rep == 'y' or rep == '':
    optimizer = tf.train.AdamOptimizer()
    # training loop over epoch and batchs
    for epoch in range(300):
      verbose = True
      for x, y, sl in zip(x_batch, y_parrot_batch, sl_batch):
        sos = dc.get_sos_batch_size(len(x))
        optimizer.minimize(lambda: get_loss(encoder, dddqn, epoch, x, y, sl, sos, dc.max_tokens, verbose=verbose))
        verbose = False
      encoder.save(name='RGC/Encoder')
      dddqn.save(name='RGC/DDDQN')
      acc = pu.get_acc_full_dataset(dc, encoder, dddqn)
      logging.info('Validation accuracy = {}'.format(acc))
      if acc > 0.95:
        logging.info('Stopping criteria on validation accuracy raised')
        break

  return encoder, dddqn, dc
    def __init__(self, input_size, output_size, hidden_size, learning_rate,
                 teacher_forcing_ratio, device):
        super(Seq2Seq, self).__init__()

        self.teacher_forcing_ratio = teacher_forcing_ratio
        self.device = device

        self.encoder = EncoderRNN(input_size, hidden_size)
        self.decoder = AttnDecoderRNN(hidden_size, output_size)

        self.encoder_optimizer = optim.SGD(self.encoder.parameters(),
                                           lr=learning_rate)
        self.decoder_optimizer = optim.SGD(self.decoder.parameters(),
                                           lr=learning_rate)

        self.criterion = nn.NLLLoss()
 def init_network(self):
     print("Initializing Network...")
     hidden_size = 256
     self.encoder = EncoderRNN(self.src.n_words, hidden_size)
     self.attn_decoder = AttnDecoderRNN(hidden_size,
                                        self.target.n_words,
                                        dropout_p=0.1)
     self.next(self.train)
Exemple #8
0
 def __init__(self,
              dataset,
              emb_path,
              name='RGC',
              dc=None,
              bbc=None,
              split_size=0.5):
     self.name = name
     self.dataset = dataset
     self.emb_path = emb_path
     self.get_dc(dc, split_size)
     self.encoder = EncoderRNN(num_units=256)
     self.dddqn = DDDQN(self.dc.word2idx,
                        self.dc.idx2word,
                        self.dc.idx2emb,
                        max_tokens=self.dc.max_tokens)
     self.bbc = BlackBoxClassifier(
         dc=self.dc, prepare_data=True) if bbc is None else bbc
Exemple #9
0
def trainDemo(lang, dataSet, nlVocab, codeVocab, train_variables):
    print("Training...")
    encoder1 = EncoderRNN(codeVocab.n_words, setting.HIDDDEN_SIAZE)
    attn_decoder1 = AttnDecoderRNN(setting.HIDDDEN_SIAZE,
                                   nlVocab.n_words,
                                   1,
                                   dropout_p=0.1)

    if setting.USE_CUDA:
        encoder1 = encoder1.cuda()
        attn_decoder1 = attn_decoder1.cuda()

    trainIters(lang,
               dataSet,
               train_variables,
               encoder1,
               attn_decoder1,
               2000000,
               print_every=5000)
Exemple #10
0
def initialize_models(voc):
    print('Building encoder and decoder ...')
    # initialize word embeddings
    embedding = nn.Embedding(voc.num_words, c.HIDDEN_SIZE)
    if c.LOAD_FILENAME:
        embedding.load_state_dict(embedding_sd)
    # initialize encoder & decoder models
    encoder = EncoderRNN(c.HIDDEN_SIZE, embedding, c.ENCODER_N_LAYERS,
                         c.DROPOUT)
    decoder = LuongAttnDecoderRNN(c.ATTN_MODEL, embedding, c.HIDDEN_SIZE,
                                  voc.num_words, c.DECODER_N_LAYERS, c.DROPOUT)
    if c.LOAD_FILENAME:
        encoder.load_state_dict(encoder_sd)
        decoder.load_state_dict(decoder_sd)
    # Use appropriate device
    encoder = encoder.to(c.DEVICE)
    decoder = decoder.to(c.DEVICE)
    print('Models built and ready to go!')
    return embedding, encoder, decoder
Exemple #11
0
    def build_model(self):
        if self.use_embeddings:
            self.embedding = nn.Embedding.from_pretrained(self.embedding_wts)
        else:
            self.embedding = nn.Embedding(self.vocab.n_words,
                                          self.embedding_dim)

        if self.modality == 't':  # Need embedding only for t2t mode
            self.encoder = EncoderRNN(self.embedding_dim,
                                      self.hidden_size,
                                      self.enc_n_layers,
                                      self.dropout,
                                      self.unit,
                                      self.modality,
                                      self.embedding,
                                      fusion_or_unimodal=True).to(self.device)
        else:
            # Note: no embedding used here
            self.encoder = EncoderRNN(self.enc_input_dim,
                                      self.hidden_size,
                                      self.enc_n_layers,
                                      self.dropout,
                                      self.unit,
                                      self.modality,
                                      fusion_or_unimodal=True).to(self.device)

        self.decoder = DecoderRNN(self.attn_model, self.embedding_dim,
                                  self.hidden_size, self.vocab.n_words,
                                  self.unit, self.dec_n_layers, self.dropout,
                                  self.embedding).to(self.device)
        self.encoder_optimizer = optim.Adam(self.encoder.parameters(),
                                            lr=self.lr)
        self.decoder_optimizer = optim.Adam(self.decoder.parameters(),
                                            lr=self.lr *
                                            self.dec_learning_ratio)

        self.epoch = 0  # define here to add resume training feature
        self.project_factor = self.encoder.project_factor
        self.latent2hidden = nn.Linear(self.latent_dim, self.hidden_size *
                                       self.project_factor).to(self.device)
def eval():

    # 或者在train.py中import进来
    input_lang = torch.load('model/input_lang')
    output_lang = torch.load('model/output_lang')
    eval_set = torch.load('model/test_set')

    encoder1 = EncoderRNN(input_lang.n_words, hidden_size).to(device)
    attn_decoder1 = AttnDecoderRNN(hidden_size, output_lang.n_words, MAX_LENGTH, dropout_p=0.1).to(device)

    encoder1.load_state_dict(torch.load('model/encoder'))
    attn_decoder1.load_state_dict(torch.load('model/decoder'))


    evaluateRandomly(encoder1, attn_decoder1, eval_set, input_lang, output_lang)
    evaluateAndShowAttention(encoder1, attn_decoder1, input_lang, output_lang, random.choice(eval_set)[0])

    try:
        while True:
            in_text = input("input " + input_lang.name + ":  press ctrl+c to exit\n")
            evaluateAndShowAttention(in_text, False)
    except KeyboardInterrupt:
        pass
Exemple #13
0
def choose_coders(dc, attention, search_size=8):
  '''
  Trains search_size coders and return the best one
  '''
  encoder = EncoderRNN()
  decoder = DecoderRNN(dc.word2idx, dc.idx2word, dc.idx2emb, max_tokens=dc.max_tokens, attention=attention)

  logging.info('Choosing coders...')
  logger = logging.getLogger()
  logger.disabled = True

  results_encoder = u.multiples_launch(pretrain_rnn_layer, [encoder, encoder.encoder_cell, dc], num_process=search_size)
  results_decoder = u.multiples_launch(pretrain_rnn_layer, [decoder, decoder.decoder_cell, dc], num_process=search_size)

  logger.disabled = False

  results_encoder.sort(key=lambda x: x[0], reverse=True)
  results_decoder.sort(key=lambda x: x[0], reverse=True)
  logging.info('Accuracy of the best encoder = {}'.format(results_encoder[0][0]))
  encoder.load(name='{}-{}'.format(encoder.name, results_encoder[0][1]))
  logging.info('Accuracy of the best decoder = {}'.format(results_decoder[0][0]))
  decoder.load(name='{}-{}'.format(decoder.name, results_decoder[0][1]), only_lstm=True)
  return encoder, decoder
Exemple #14
0
    def init_model(self,
                   wd,
                   hidden_size,
                   e_layers,
                   d_layers,
                   base_rnn,
                   pretrained_embeddings=None,
                   dropout_p=0.1):

        self.base_rnn = base_rnn
        self.wd = wd
        self.dropout_p = dropout_p
        if pretrained_embeddings is True:
            print("Loading GloVe Embeddings ...")
            pretrained_embeddings = load_glove_embeddings(
                wd.word2index, hidden_size)

        self.encoder = EncoderRNN(wd.n_words,
                                  hidden_size,
                                  n_layers=e_layers,
                                  base_rnn=base_rnn,
                                  pretrained_embeddings=pretrained_embeddings)

        self.mlp = torch.nn.Sequential(
            torch.nn.Linear(int(hidden_size * 8), int(hidden_size)),
            torch.nn.ReLU(), torch.nn.Dropout(dropout_p),
            torch.nn.Linear(int(hidden_size), 3), torch.nn.Softmax(dim=1))
        self.parameter_list = [
            self.encoder.parameters(),
            self.mlp.parameters()
        ]

        if USE_CUDA:
            self.encoder = self.encoder.cuda()
            self.mlp = self.mlp.cuda()

        return self
Exemple #15
0
    def __init__(self, **kwargs):
        dp = DataPreprocessor()
        file_name_formatted = dp.write_to_file()
        dc = DataCleaner(file_name_formatted)
        dc.clean_data_pipeline().trim_rare_words()
        self.data_loader = DataLoader(dc.vocabulary, dc.pairs)
        self.dp = dp
        self.dc = dc
        load_embedding = kwargs.get('pretrained_embedding', False)
        embedding_file = kwargs.get('pretrained_embedding_file', None)

        load_enc_dec = kwargs.get('pretrained_enc_dec', False)
        load_enc_file = kwargs.get('pretrained_enc_file', None)
        load_dec_file = kwargs.get('pretrained_dec_file', None)

        self.model_name = kwargs.get('model_name', 'cb_model')
        attn_model = kwargs.get('attention_type', 'dot')
        self.hidden_size = kwargs.get('hidden_size', 500)
        self.encoder_nr_layers = kwargs.get('enc_nr_layers', 2)
        self.decoder_nr_layers = kwargs.get('dec_nr_layers', 2)
        dropout = kwargs.get('dropout', 0.1)
        self.batch_size = kwargs.get('batch_size', 64)
        self.clip = kwargs.get('clip', 50.0)
        self.teacher_forcing_ratio = kwargs.get('teacher_forcing_ratio', 1.0)
        self.learning_rate = kwargs.get('lr', 0.0001)
        self.decoder_learning_ratio = kwargs.get('decoder_learning_ratio', 5.0)
        self.nr_iteration = kwargs.get('nr_iterations', 4000)
        self.print_every = kwargs.get('print_every', 1)
        self.save_every = 500
        self.embedding = nn.Embedding(self.dc.vocabulary.num_words, self.hidden_size)
        if load_embedding:
            self.embedding.load_state_dict(embedding_file)
        # Initialize encoder & decoder models
        encoder = EncoderRNN(self.hidden_size, self.embedding, self.encoder_nr_layers, dropout)
        decoder = DecoderRNN(
            attn_model,
            self.embedding,
            self.hidden_size,
            self.dc.vocabulary.num_words,
            self.decoder_nr_layers,
            dropout
        )

        if load_enc_dec:
            encoder.load_state_dict(load_enc_file)
            decoder.load_state_dict(load_dec_file)
        # Use appropriate device
        encoder = encoder.to(device)
        decoder = decoder.to(device)
        self.encoder = encoder
        self.decoder = decoder
        self.encoder_optimizer = optim.Adam(encoder.parameters(), lr=self.learning_rate)
        self.decoder_optimizer = optim.Adam(decoder.parameters(), lr=self.learning_rate * self.decoder_learning_ratio)
        return
Exemple #16
0
def main():
    """primary entry
    """
    voc, pairs = loadPreparedData()
    print('Building encoder and decoder ...')
    # Initialize word embeddings
    #embedding = nn.Embedding(voc.num_words, params.hidden_size)
    embedding = nn.Embedding(voc.num_words, params.embedding_size)
    # Initialize encoder & decoder models
    encoder = EncoderRNN(embedding, params.hidden_size,
                         params.encoder_n_layers, params.dropout)
    decoder = LuongAttnDecoderRNN(params.attn_model, embedding,
                                  params.hidden_size, voc.num_words,
                                  params.decoder_n_layers, params.dropout)
    # Use appropriate device
    encoder = encoder.to(params.device)
    decoder = decoder.to(params.device)
    print('Models built and ready to go!')

    # Ensure dropout layers are in train mode
    encoder.train()
    decoder.train()

    # Initialize optimizers
    print('Building optimizers ...')
    encoder_optimizer = optim.Adam(encoder.parameters(),
                                   lr=params.learning_rate)
    decoder_optimizer = optim.Adam(decoder.parameters(),
                                   lr=params.learning_rate *
                                   params.decoder_learning_ratio)

    # Run training iterations
    print("Starting Training!")
    trainIters(voc,
               pairs,
               encoder,
               decoder,
               encoder_optimizer,
               decoder_optimizer,
               embedding,
               params.encoder_n_layers,
               params.decoder_n_layers,
               params.save_dir,
               params.n_iteration,
               params.batch_size,
               params.print_every,
               params.save_every,
               params.clip,
               params.corpus_name,
               load_filename=None)
Exemple #17
0
def get_data():
    dc = DataContainer(os.environ['INPUT'], os.environ['EMB'])
    dc.prepare_data()
    x_a = [sample for batch in dc.x_train for sample in batch] + dc.x_te
    sl_a = [sample for batch in dc.sl_train for sample in batch] + dc.sl_te
    y_parrot_a = [
        sample for batch in dc.y_parrot_padded_batch for sample in batch
    ] + dc.y_p_p_te
    sos = dc.get_sos_batch_size(len(x_a))
    encoder = EncoderRNN()
    decoder = DecoderRNN(dc.word2idx,
                         dc.idx2word,
                         dc.idx2emb,
                         max_tokens=dc.max_tokens,
                         attention=False)
    optimizer = tf.train.AdamOptimizer()
    x_batch = u.create_batch(x_a, batch_size=dc.batch_size)
    y_parrot_batch = u.create_batch(y_parrot_a, batch_size=dc.batch_size)
    sl_batch = u.create_batch(sl_a, batch_size=dc.batch_size)
    return dc, x_a, sl_a, y_parrot_a, sos, encoder, decoder, optimizer, x_batch, y_parrot_batch, sl_batch
Exemple #18
0
def parrot_initialization_encoder_decoder(dataset, emb_path, attention):
  '''
  Trains the encoder-decoder to reproduce the input
  '''
  dc = DataContainer(dataset, emb_path)
  dc.prepare_data()

  x_batch, y_parrot_batch, sl_batch = u.to_batch(dc.x, dc.y_parrot_padded, dc.sl, batch_size=dc.batch_size)

  def get_loss(encoder, decoder, epoch, x, y, sl, sos):
    output, cell_state = encoder.forward(x, sl)
    loss = decoder.get_loss(epoch, sos, (cell_state, output), y, sl, x, encoder.outputs)
    return loss

  if os.path.isdir('models/Encoder-Decoder'):
    rep = input('Load previously trained Encoder-Decoder? (y or n): ')
    if rep == 'y' or rep == '':
      encoder = EncoderRNN()
      decoder = DecoderRNN(dc.word2idx, dc.idx2word, dc.idx2emb, max_tokens=dc.max_tokens, attention=attention)
      encoder.load(name='Encoder-Decoder/Encoder')
      decoder.load(name='Encoder-Decoder/Decoder')
      sos = dc.get_sos_batch_size(len(dc.x))
      see_parrot_results(encoder, decoder, 'final', dc.x, dc.y_parrot_padded, dc.sl, sos, greedy=True)
    else:
      encoder, decoder = choose_coders(dc, attention, search_size=5)
  else:
    encoder, decoder = choose_coders(dc, attention, search_size=5)

  optimizer = tf.train.AdamOptimizer()

  for epoch in range(300):
    for x, y, sl in zip(x_batch, y_parrot_batch, sl_batch):
      sos = dc.get_sos_batch_size(len(x))
      # grad_n_vars = optimizer.compute_gradients(lambda: get_loss(encoder, decoder, epoch, x, y, sl, sos))
      # optimizer.apply_gradients(grad_n_vars)
      optimizer.minimize(lambda: get_loss(encoder, decoder, epoch, x, y, sl, sos))
    if epoch % 30 == 0:
      # to reduce training time, compute global accuracy only every 30 epochs
      sos = dc.get_sos_batch_size(len(dc.x))
      see_parrot_results(encoder, decoder, epoch, dc.x, dc.y_parrot_padded, dc.sl, sos, greedy=True)
      # see_parrot_results(encoder, decoder, epoch, dc.x, dc.y_parrot_padded, dc.sl, sos)
    encoder.save(name='Encoder-Decoder/Encoder')
    decoder.save(name='Encoder-Decoder/Decoder')
    if decoder.parrot_stopping:
      break
    # x_batch, y_parrot_batch, sl_batch = u.shuffle_data(x_batch, y_parrot_batch, sl_batch)
    # strangely, shuffle data between epoch make the training realy noisy

  return encoder, decoder, dc
Exemple #19
0
    def __init__(self, hidden_size, cond_embed_size, output_size, target_path,
                 criterion, epoch, train_or_not, lr, input_embed_size,
                 teacher_forcing_ratio, ratio_kind):
        # initialize variable
        self.hidden_size = hidden_size
        self.cond_embed_size = cond_embed_size
        self.output_size = output_size
        self.target_path = target_path
        self.criterion = criterion
        self.train_or_not = train_or_not
        self.epoch = epoch
        self.learning_rate = lr
        self.teacher_forcing_ratio = teacher_forcing_ratio
        self.input_embed_size = input_embed_size
        self.ratio_kind = ratio_kind
        filename = self.get_bleuname()
        self.weight_name = 'CVAE_' + filename.replace('.csv', '') + '.pt'

        # initialize using class
        self.C2D = Char2Dict(cond_embed_size)
        self.DataLoader = Data(target_path)
        self.Encoder = EncoderRNN(input_embed_size, hidden_size,
                                  cond_embed_size).to(device)
        self.Decoder = DecoderRNN(input_embed_size, hidden_size,
                                  output_size).to(device)
        self.CVAE = CVAE(encoder=self.Encoder,
                         decoder=self.Decoder,
                         hidden_size=self.hidden_size,
                         cond_embed_size=self.cond_embed_size,
                         C2D=self.C2D,
                         Train=self.train_or_not,
                         output_size=self.output_size,
                         teacher_forcing_ratio=self.teacher_forcing_ratio,
                         input_embed_size=self.input_embed_size)
        self.CVAE_optimizer = optim.SGD(self.CVAE.parameters(),
                                        lr=self.learning_rate,
                                        momentum=0.9)
Exemple #20
0
    torch.nn.utils.clip_grad_norm(decoder.parameters(), clip)
    encoder_opt.step()
    decoder_opt.step()

    return loss.data[0].item() / target_length


input_lang, output_lang, pairs = etl.prepare_data(args.language)

attn_model = 'general'
hidden_size = 500
n_layers = 2
dropout_p = 0.05

# Initialize models
encoder = EncoderRNN(input_lang.n_words, hidden_size, n_layers)
decoder = AttentionDecoderRNN(attn_model,
                              hidden_size,
                              output_lang.n_words,
                              n_layers,
                              dropout_p=dropout_p)

# Move models to GPU
encoder.cuda()
decoder.cuda()

# Initialize optimizers and criterion
learning_rate = 0.0001
encoder_optimizer = optim.Adam(encoder.parameters(), lr=learning_rate)
decoder_optimizer = optim.Adam(decoder.parameters(), lr=learning_rate)
criterion = nn.NLLLoss()
Exemple #21
0
class FeatureAutoEncoderNetwork(Sequence2SequenceNetwork):
    # This autoencoder is to be used only for video and speech vectors
    # Use base Sequence2SequenceNetwork class for autoencoding text
    def build_model(self):
        # Note: no embedding used here
        self.encoder = EncoderRNN(self.enc_input_dim, self.hidden_size,
                                  self.enc_n_layers, self.dropout, self.unit,
                                  self.modality).to(self.device)

        self.encoder_optimizer = optim.Adam(self.encoder.parameters(),
                                            lr=self.lr)

        self.epoch = 0  # define here to add resume training feature

    def load_pretrained_model(self):
        if self.load_model_name:
            checkpoint = torch.load(self.load_model_name,
                                    map_location=self.device)
            print('Loaded {}'.format(self.load_model_name))
            self.epoch = checkpoint['epoch']
            self.encoder.load_state_dict(checkpoint['en'])
            self.encoder_optimizer.load_state_dict(checkpoint['en_op'])

    def train_model(self):
        best_score = 1e-200
        plot_losses = []
        print_loss_total = 0  # Reset every epoch

        start = time.time()
        saving_skipped = 0
        for epoch in range(self.epoch, self.n_epochs):
            random.shuffle(self.pairs)
            for iter in range(0, self.n_iters, self.batch_size):
                training_batch = batch2TrainData(
                    self.vocab, self.pairs[iter:iter + self.batch_size],
                    self.modality)

                if len(training_batch[1]) < self.batch_size:
                    print('skipped a batch..')
                    continue

                # Extract fields from batch
                input_variable, lengths, target_variable, \
                    tar_lengths = training_batch

                # Run a training iteration with the current batch
                loss = self.train(input_variable, lengths, target_variable,
                                  iter)
                self.writer.add_scalar('{}loss'.format(self.data_dir), loss,
                                       iter)

                print_loss_total += loss

            print_loss_avg = print_loss_total * self.batch_size / self.n_iters
            print_loss_total = 0
            print('Epoch: [{}/{}] Loss: {:.4f}'.format(epoch, self.n_epochs,
                                                       print_loss_avg))

            if self.modality == 'tt':
                # evaluate and save the model
                curr_score = self.evaluate_all()
            else:  # ss, vv
                curr_score = print_loss_avg

            if curr_score > best_score:
                saving_skipped = 0
                best_score = curr_score
                self.save_model(epoch)

            saving_skipped += 1

            if self.use_scheduler and saving_skipped > 3:
                saving_skipped = 0
                new_lr = self.lr * 0.5
                print('Entered the dungeon...')
                if new_lr > self.lr_lower_bound:  # lower bound on lr
                    self.lr = new_lr
                    print('lr decreased to => {}'.format(self.lr))

    def train(self, input_variable, lengths, target_variable, iter):
        input_variable = input_variable.to(self.device)
        lengths = lengths.to(self.device)
        target_variable = target_variable.to(self.device)

        # Initialize variables
        loss = 0
        print_losses = []
        n_totals = 0

        # Forward pass through encoder
        encoder_outputs, encoder_hidden = self.encoder(input_variable, lengths)
        if self.unit == 'gru':
            latent = encoder_hidden
        else:
            (latent, cell_state) = encoder_hidden
        # reconstruct input from latent vector
        seq_len = input_variable.shape[0]
        self.latent2output = nn.Linear(self.latent_dim, self.enc_input_dim *
                                       seq_len).to(self.device)
        output = self.latent2output(latent)
        output = output.view(seq_len, self.batch_size, self.enc_input_dim)
        reconstructed_input = output

        loss = self.mean_square_error(reconstructed_input, target_variable)
        loss.backward()
        # Clip gradients: gradients are modified in place
        torch.nn.utils.clip_grad_norm_(self.encoder.parameters(), self.clip)
        self.encoder_optimizer.step()
        return loss.item()

    def mean_square_error(self, inp, target):
        criterion = nn.MSELoss()
        inp = (inp.permute(1, 0, 2))
        target = (target.permute(1, 0, 2))
        return criterion(inp, target)

    def save_model(self, epoch):
        directory = self.save_dir
        if not os.path.exists(directory):
            os.makedirs(directory)
        torch.save(
            {
                'epoch': epoch,
                'en': self.encoder.state_dict(),
                'en_op': self.encoder_optimizer.state_dict()
            }, '{}{}-{}-{}-{}.pth'.format(directory, self.model_code,
                                          self.modality, self.langs, epoch))
Exemple #22
0
def adversarial():
    # user the root logger
    logger = logging.getLogger("lan2720")
    
    argparser = argparse.ArgumentParser(add_help=False)
    argparser.add_argument('--load_path', '-p', type=str, required=True)
    # TODO: load best
    argparser.add_argument('--load_epoch', '-e', type=int, required=True)
    
    argparser.add_argument('--filter_num', type=int, required=True)
    argparser.add_argument('--filter_sizes', type=str, required=True)

    argparser.add_argument('--training_ratio', type=int, default=2)
    argparser.add_argument('--g_learning_rate', '-glr', type=float, default=0.001)
    argparser.add_argument('--d_learning_rate', '-dlr', type=float, default=0.001)
    argparser.add_argument('--batch_size', '-b', type=int, default=168)
    
    # new arguments used in adversarial
    new_args = argparser.parse_args()
    
    # load default arguments
    default_arg_file = os.path.join(new_args.load_path, 'args.pkl')
    if not os.path.exists(default_arg_file):
        raise RuntimeError('No default argument file in %s' % new_args.load_path)
    else:
        with open(default_arg_file, 'rb') as f:
            args = pickle.load(f)
    
    args.mode = 'adversarial'
    #args.d_learning_rate  = 0.0001
    args.print_every = 1
    args.g_learning_rate = new_args.g_learning_rate
    args.d_learning_rate = new_args.d_learning_rate
    args.batch_size = new_args.batch_size

    # add new arguments
    args.load_path = new_args.load_path
    args.load_epoch = new_args.load_epoch
    args.filter_num = new_args.filter_num
    args.filter_sizes = new_args.filter_sizes
    args.training_ratio = new_args.training_ratio
    


    # set up the output directory
    exp_dirname = os.path.join(args.exp_dir, args.mode, time.strftime("%Y-%m-%d-%H-%M-%S"))
    os.makedirs(exp_dirname)

    # set up the logger
    tqdm_logging.config(logger, os.path.join(exp_dirname, 'adversarial.log'), 
                        mode='w', silent=False, debug=True)

    # load vocabulary
    vocab, rev_vocab = load_vocab(args.vocab_file, max_vocab=args.max_vocab_size)

    vocab_size = len(vocab)

    word_embeddings = nn.Embedding(vocab_size, args.emb_dim, padding_idx=SYM_PAD)
    E = EncoderRNN(vocab_size, args.emb_dim, args.hidden_dim, args.n_layers, args.dropout_rate, bidirectional=True, variable_lengths=True)
    G = Generator(vocab_size, args.response_max_len, args.emb_dim, 2*args.hidden_dim, args.n_layers, dropout_p=args.dropout_rate)
    D = Discriminator(args.emb_dim, args.filter_num, eval(args.filter_sizes))
    
    if args.use_cuda:
        word_embeddings.cuda()
        E.cuda()
        G.cuda()
        D.cuda()

    # define optimizer
    opt_G = torch.optim.Adam(G.rnn.parameters(), lr=args.g_learning_rate)
    opt_D = torch.optim.Adam(D.parameters(), lr=args.d_learning_rate)
    
    logger.info('----------------------------------')
    logger.info('Adversarial a neural conversation model')
    logger.info('----------------------------------')

    logger.info('Args:')
    logger.info(str(args))
    
    logger.info('Vocabulary from ' + args.vocab_file)
    logger.info('vocabulary size: %d' % vocab_size)
    logger.info('Loading text data from ' + args.train_query_file + ' and ' + args.train_response_file)
   
    
    reload_model(args.load_path, args.load_epoch, word_embeddings, E, G)
    #    start_epoch = args.resume_epoch + 1
    #else:
    #    start_epoch = 0

    # dump args
    with open(os.path.join(exp_dirname, 'args.pkl'), 'wb') as f:
        pickle.dump(args, f)


    # TODO: num_epoch is old one
    for e in range(args.num_epoch):
        train_data_generator = batcher(args.batch_size, args.train_query_file, args.train_response_file)
        logger.info("Epoch: %d/%d" % (e, args.num_epoch))
        step = 0
        cur_time = time.time() 
        while True:
            try:
                post_sentences, response_sentences = train_data_generator.next()
            except StopIteration:
                # save model
                save_model(exp_dirname, e, word_embeddings, E, G, D) 
                ## evaluation
                #eval(args.valid_query_file, args.valid_response_file, args.batch_size, 
                #        word_embeddings, E, G, loss_func, args.use_cuda, vocab, args.response_max_len)
                break
            
            # prepare data
            post_ids = [sentence2id(sent, vocab) for sent in post_sentences]
            response_ids = [sentence2id(sent, vocab) for sent in response_sentences]
            posts_var, posts_length = padding_inputs(post_ids, None)
            responses_var, responses_length = padding_inputs(response_ids, args.response_max_len)
            # sort by post length
            posts_length, perms_idx = posts_length.sort(0, descending=True)
            posts_var = posts_var[perms_idx]
            responses_var = responses_var[perms_idx]
            responses_length = responses_length[perms_idx]

            if args.use_cuda:
                posts_var = posts_var.cuda()
                responses_var = responses_var.cuda()

            embedded_post = word_embeddings(posts_var)
            real_responses = word_embeddings(responses_var)

            # forward
            _, dec_init_state = E(embedded_post, input_lengths=posts_length.numpy())
            fake_responses = G(dec_init_state, word_embeddings) # [B, T, emb_size]

            prob_real = D(embedded_post, real_responses)
            prob_fake = D(embedded_post, fake_responses)
        
            # loss
            D_loss = - torch.mean(torch.log(prob_real) + torch.log(1. - prob_fake)) 
            G_loss = torch.mean(torch.log(1. - prob_fake))
            
            if step % args.training_ratio == 0:
                opt_D.zero_grad()
                D_loss.backward(retain_graph=True)
                opt_D.step()
            
            opt_G.zero_grad()
            G_loss.backward()
            opt_G.step()
            
            if step % args.print_every == 0:
                logger.info('Step %5d: D accuracy=%.2f (0.5 for D to converge) D score=%.2f (-1.38 for G to converge) (%.1f iters/sec)' % (
                    step, 
                    prob_real.cpu().data.numpy().mean(), 
                    -D_loss.cpu().data.numpy()[0], 
                    args.print_every/(time.time()-cur_time)))
                cur_time = time.time()
            step = step + 1
Exemple #23
0
# args = parser.parse_args()
language = 'spa-eng'
helpers.validate_language_params(language)

input_lang, output_lang, pairs = etl.prepare_data(language)

attn_model = 'general'
hidden_size = 500
n_layers = 2
dropout_p = 0.05
teacher_forcing_ratio = .5
clip = 5.
criterion = nn.NLLLoss()

# Initialize models
encoder = EncoderRNN(input_lang.n_words, hidden_size, n_layers)
decoder = AttentionDecoderRNN(attn_model,
                              hidden_size,
                              output_lang.n_words,
                              n_layers,
                              dropout_p=dropout_p)

learning_rate = 1
encoder_optimizer = optim.Adam(encoder.parameters(), lr=learning_rate)
decoder_optimizer = optim.Adam(decoder.parameters(), lr=learning_rate)

# Load model parameters
encoder.load_state_dict(torch.load(
    './data/encoder_params_{}'.format(language)))
decoder.load_state_dict(torch.load(
    './data/decoder_params_{}'.format(language)))
Exemple #24
0
        pair = random.choice(pairs)
        print(pair)
        print('>', pair[2])
        print('=', pair[0])
        output_words, attentions = evaluate(encoder, decoder, pair[2])
        output_sentence = ' '.join(output_words)
        print('<', output_sentence)
        print('')


voc_path = abs_file_path + "/data/data_clean.txt"
voc = Voc("total")
voc.initVoc(voc_path)
pairs = prepareData(abs_file_path)
print(len(pairs))

hidden_size = 256
encoder1 = EncoderRNN(voc.num_words, hidden_size).to(device)
attn_decoder1 = AttnDecoderRNN(hidden_size, voc.num_words,
                               dropout=0.1).to(device)
trainIters(encoder1, attn_decoder1, 75000)

encoder_save_path = "encoder3.pth"
decoder_save_path = "decoder3.pth"
torch.save(encoder1, current_dir + '/' + encoder_save_path)
torch.save(attn_decoder1, current_dir + "/" + decoder_save_path)
model1 = torch.load(current_dir + "/" + encoder_save_path)
model2 = torch.load(current_dir + "/" + decoder_save_path)
evaluateRandomly(model1.to(torch.device("cpu")),
                 model2.to(torch.device("cpu")))
Exemple #25
0
voc, _ = loadPrepareData(corpus_name, datafile, MAX_LENGTH)

checkpoint = torch.load(ckpt)
encoder_sd = checkpoint['en']
decoder_sd = checkpoint['de']
encoder_optimizer_sd = checkpoint['en_opt']
decoder_optimizer_sd = checkpoint['de_opt']
embedding_sd = checkpoint['embedding']
voc.__dict__ = checkpoint['voc_dict']

# Initialize word embeddings
embedding = nn.Embedding(voc.num_words, hidden_size)
embedding.load_state_dict(embedding_sd)

# Initialize encoder & decoder models
encoder = EncoderRNN(hidden_size, embedding, encoder_n_layers, dropout)
decoder = LuongAttnDecoderRNN(attn_model, embedding, hidden_size,
                              voc.num_words, decoder_n_layers, dropout)
encoder.load_state_dict(encoder_sd)
decoder.load_state_dict(decoder_sd)

encoder = encoder.to(device)
decoder = decoder.to(device)

# Set dropout layers to eval mode
encoder.eval()
decoder.eval()

# Initialize search module
searcher = GreedySearchDecoder(encoder, decoder)
Exemple #26
0
if model == True:
    day = 19
    hour = "01"
    nowTime = '2018-12-'+str(day)+'-'+str(hour)
    encoder_save_path = "model/combineEncoder+" +nowTime+ "+.pth"
    decoder_save_path = "model/combineDecoder+" +nowTime+  "+.pth"
    combiner_save_path = "model/combineCombiner+" +nowTime+ "+.pth"
    gcn_save_path= "model/combinegcn+" +nowTime+ "+.pth"
    encoder1 = torch.load(current_dir + "/" + encoder_save_path)
    attn_decoder1 = torch.load(current_dir + "/" + decoder_save_path)
    CombineEncoder = torch.load(current_dir + "/" + combiner_save_path)
else:



    encoder1 = EncoderRNN(voc.num_words, hidden_size,embedding=embedding_layer, embedded=True).to(device)
    attn_decoder1 = AttnDecoderRNN(hidden_size, voc.num_words).to(device)
    CombineEncoder = doubleCombineEncoderRNN(hidden_size, hidden_size).to(device)
    GCN = GCN(voc.num_words, hidden_size, hidden=[12897], dropouts=[0.5]).to(device)
    GcnEncoder = EncoderRNN(hidden_size, hidden_size,embedding=embedding_layer, embedded=True).to(device)
trainIters(encoder1, attn_decoder1, CombineEncoder, GCN, GcnEncoder ,trainpairs,10)



encoder_save_path = "model/GCNcombineEncoder+" +nowTime+ "hidden"+str(hidden_size)+ "+.pth"
decoder_save_path = "model/GCNcombineDecoder+" +nowTime+ "hidden"+str(hidden_size)+ "+.pth"
combiner_save_path = "model/GCNcombineCombiner+" +nowTime+ "hidden"+str(hidden_size)+ "+.pth"
torch.save(encoder1, current_dir + '/' + encoder_save_path)
torch.save(attn_decoder1, current_dir + "/" + decoder_save_path)

torch.save(CombineEncoder, current_dir + "/" + combiner_save_path)
Exemple #27
0
    # checkpoint=torch.load(loadFilename,map_location=torch.device('cpu')
    encoder_sd=checkpoint['en']
    decoder_sd=checkpoint['de']
    encoder_optimizer_sd=checkpoint['en_opt']
    decoder_optimizer_sd=checkpoint['de_opt']
    embedding_sd=checkpoint['embedding']
    voc.__dict__=checkpoint['voc_dict']


print('Building encoder and decoder...')
# 初始化embedding
embedding=nn.Embedding(voc.num_words,hidden_size)
if loadFilename:
    embedding.load_state_dict(embedding_sd)
# 初始化encoder和decoder参数
encoder=EncoderRNN(hidden_size,embedding,encoder_n_layers,dropout)
decoder=LuongAttnDecoderRNN(attn_model,embedding,hidden_size,voc.num_words,decoder_n_layers,dropout)

if loadFilename:
    encoder.load_state_dict(encoder_sd)
    decoder.load_state_dict(decoder_sd)

# 选择device
encoder=encoder.to(device)
decoder=decoder.to(device)

print("Models built and ready to go !")


encoder.train()
decoder.train()
from evaluation import *
from lang import Lang

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
if torch.cuda.is_available():
    print("Using CUDA.")
else:
    print("CUDA not available. Switching to CPU.")

encoder_file_location = 'data/encoder.dictionary'
decoder_file_location = 'data/decoder.dictionary'

# Load model from files
encoder = loadObjectFromFile(encoder_file_location)
decoder = loadObjectFromFile(decoder_file_location)
if encoder and decoder:
    evaluateRandomly(encoder, decoder)
    print(evaluate(encoder, decoder, 'c est un jeune directeur plein')[0])

# Create wod embeddings
input_lang, output_lang, pairs = prepareData('eng', 'fra', True)
print(random.choice(pairs))
print(input_lang)

# Create and train new model
hidden_size = 256
encoder1 = EncoderRNN(input_lang.n_words, hidden_size).to(device)
attn_decoder1 = AttnDecoderRNN(hidden_size, output_lang.n_words,
                               dropout_p=0.1).to(device)
trainIters(encoder1, attn_decoder1, 75000, print_every=5000)
# it makes it easier to run multiple experiments) we can actually
# initialize a network and start training.
#
# Remember that the input sentences were heavily filtered. For this small
# dataset we can use relatively small networks of 256 hidden nodes and a
# single GRU layer. After about 40 minutes on a MacBook CPU we'll get some
# reasonable results.
#
# .. Note::
#    If you run this notebook you can train, interrupt the kernel,
#    evaluate, and continue training later. Comment out the lines where the
#    encoder and decoder are initialized and run ``trainIters`` again.
#

hidden_size = 256
encoder1 = EncoderRNN(input_lang.n_words, hidden_size)
attn_decoder1 = AttnDecoderRNN(hidden_size, output_lang.n_words, dropout_p=0.1)

TRAIN = False
if "-t" in sys.argv:
    TRAIN = True

TRAIN_ITER = 7500
if len(sys.argv) == 3:
    TRAIN_ITER = int(sys.argv[2])

if use_cuda:
    encoder1 = encoder1.cuda()
    attn_decoder1 = attn_decoder1.cuda()

if os.path.exists("encoder.pt") and os.path.exists("decoder.pt") and not TRAIN:
Exemple #30
0
train_iter = DataLoader(dataset=train_dataset,
                        batch_size=batch_size,
                        shuffle=True,
                        num_workers=4,
                        collate_fn=collate_fn)
valid_iter = DataLoader(dataset=valid_dataset,
                        batch_size=batch_size,
                        shuffle=False,
                        num_workers=4,
                        collate_fn=collate_fn)


### Initialize model
encoder = EncoderRNN(embedding=src_embedding,
                     rnn_type=opts.rnn_type,
                     hidden_size=opts.hidden_size,
                     num_layers=opts.num_layers,
                     dropout=opts.dropout,
                     bidirectional=opts.bidirectional)

decoder = LuongAttnDecoderRNN(encoder, embedding=tgt_embedding,
                              attention=opts.attention,
                              tie_embeddings=opts.tie_embeddings,
                              dropout=opts.dropout,
                              tie_ext_feature=opts.tie_ext_feature,
                              ext_rate_embedding=ext_rate_emebdding,
                              ext_appcate_embedding=ext_appcate_embedding, ext_seqlen_embedding=ext_seqlen_embedding, ext_senti_embedding=ext_senti_embedding
                              )

print("emb start")
if opts.pretrained_embeddings:
    if opts.use_keyword: