Ejemplo n.º 1
0
def load_model(saved_vae, stored_info, device, cache_path=str(Path('../tmp')), seed=None):
    stored_info = stored_info.split(os.sep)[-1]
    cache_file =  os.path.join(cache_path, stored_info)

    start_load = time.time()
    print(f"Fetching cached info at {cache_file}")
    with open(cache_file, "rb") as f:
        dataset, z_size, condition_size, condition_on, decoder_hidden_size, encoder_hidden_size, n_encoder_layers = pickle.load(f)
    end_load = time.time()
    print(f"Cache {cache_file} loaded (load time: {end_load - start_load:.2f}s)")

    if os.path.exists(saved_vae):
        print(f"Found saved model {saved_vae}")
        start_load_model = time.time()

        e = model.EncoderRNN(dataset.input_side.n_words, encoder_hidden_size, z_size, n_encoder_layers, bidirectional=True)
        d = model.DecoderRNN(z_size, dataset.trn_split.n_conditions, condition_size, decoder_hidden_size, dataset.input_side.n_words, 1, word_dropout=0)
        vae = model.VAE(e, d).to(device)
        vae.load_state_dict(torch.load(saved_vae, map_location=lambda storage, loc: storage))
        vae.eval()
        print(f"Trained for {vae.steps_seen} steps (load time: {time.time() - start_load_model:.2f}s)")

        print("Setting new random seed")
        if seed is None:
            # TODO: torch.manual_seed(1999) in model.py is affecting this
            new_seed = int(time.time())
            new_seed = abs(new_seed) % 4294967295 # must be between 0 and 4294967295
        else:
            new_seed = seed
        torch.manual_seed(new_seed)

        random_state = np.random.RandomState(new_seed)
        #random_state.shuffle(dataset.trn_pairs)

    return vae, dataset, z_size, random_state
Ejemplo n.º 2
0
 def __init__(self, input_size, embedding_size, hidden_size, vocab_size,
              num_layer):
     super(Model, self).__init__()
     self.encoder = model.EncoderCNN(input_size, embedding_size)
     self.decoder = model.DecoderRNN(embedding_size, hidden_size,
                                     vocab_size, num_layer)
     self.criterion = nn.CrossEntropyLoss()
Ejemplo n.º 3
0
def main(args):
  transform = transforms.Compose([
    transforms.Resize(256),
    transforms.RandomCrop(224),
    transforms.RandomHorizontalFlip(),
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485, 0.456, 0.406],
      std=[0.229, 0.224, 0.225])
  ])

  with open(args.vocab_path, "rb") as f1, \
    open(args.batched_file_path, "rb") as f2:
      vocab = pickle.load(f1)
      batched_val_set = pickle.load(f2)

  coco_caps = COCO(args.caption_path)
  batched_val_loader = get_loader(args.image_dir,
                                  args.caption_path,
                                  batched_val_set,
                                  vocab,
                                  transform,
                                  shuffle=True,
                                  num_workers=3)

  encoder = model.EncoderCNN()
  decoder = model.DecoderRNN(512, 196, 512, 512, len(vocab), 1)
  if torch.cuda.is_available():
    encoder = encoder.cuda()
    decoder = decoder.cuda()

  checkpoint = torch.load(args.load_checkpoint)
  decoder.load_state_dict(checkpoint["state_dict"])
  checkpoint = None
  torch.cuda.empty_cache()

  for i, (images, captions, lengths, ids) in enumerate(batched_val_loader):
    if i == args.num_runs:
      break
    print("\nactual captions for batch " + str(i) + " are: ")
    annIds = coco_caps.getAnnIds(imgIds=ids)
    anns = coco_caps.loadAnns(annIds)
    for ann in anns:
      print(ann["caption"])
    images = to_var(images, volatile=True)
    captions = to_var(captions, volatile=True)
    features = encoder(images)
    results = decoder.sample(features, args.beam_size)
    print("predicted captions are: ")
    for result in results:
      candidate = [vocab(i) for i in result[1][:-1]]
      references = [nltk.tokenize.word_tokenize(ann["caption"].lower()) for ann in anns]
      score = bleu_score.sentence_bleu(references, candidate)
      print("probability: %5.4f, BLEU score: %5.4f, caption: %s" %(result[0], score, caption_id_to_string(result[1], vocab)))
Ejemplo n.º 4
0
def define_simple_decoder(hidden_size, input_vocab_len, output_vocab_len,
                          max_length):
    """ Provides a simple decoder instance
        NOTE: Not all the function arguments are needed - you need to figure out which arguments to use

    :return: a simple decoder instance
    """

    # Write your implementation here
    decoder = model.DecoderRNN(hidden_size, output_vocab_len)
    # End of implementation

    return decoder
Ejemplo n.º 5
0
    def __init__(self, embed_size, hidden_size, vocab, dropout_rate=0.2):
        super(NMT, self).__init__()

        self.embed_size = embed_size
        self.hidden_size = hidden_size
        self.dropout_rate = dropout_rate
        self.vocab = vocab

        src_vocab_size = len(self.vocab.src.word2id)
        tgt_vocab_size = len(self.vocab.tgt.word2id)

        self.encoder = model.EncoderRNN(vocab_size=src_vocab_size,
                                        embed_size=self.embed_size,
                                        hidden_size=self.hidden_size)
        self.decoder = model.DecoderRNN(embed_size=self.embed_size,
                                        hidden_size=self.hidden_size,
                                        output_size=tgt_vocab_size)
        self.encoder = self.encoder.cuda()
        self.decoder = self.decoder.cuda()

        self.criterion = torch.nn.CrossEntropyLoss().cuda()
Ejemplo n.º 6
0
imgh = args.imh
imgw = args.imw

embed_dim = args.embed_size
hidden_dim = args.nhid
attention_dim =args.attention_dim

transform = transforms.Compose([transforms.Resize((imgh, imgw)), 
									transforms.ToTensor(),
									transforms.Normalize((0.5, 0.5, 0.5),
														 (0.5, 0.5, 0.5))
									])
fine_tune_encoder = False
encoder = model.EncoderCNN().to(device)
encoder.fine_tune(fine_tune_encoder)
decoder = model.DecoderRNN(ntokens, embed_dim, hidden_dim, idx2word, word2idx).to(device)

loss_fn = nn.CrossEntropyLoss().to(device)

decoder_optimizer = t.optim.Adam(params=filter(lambda p: p.requires_grad, decoder.parameters()),
                                             lr=decoder_lr)
encoder_optimizer = t.optim.Adam(params=filter(lambda p: p.requires_grad, encoder.parameters()),
                                             lr=encoder_lr) if fine_tune_encoder else None

# def prepare_sequence(seq, to_ix):
#     idxs = [to_ix[w] for w in seq]
#     return t.tensor(idxs, dtype=t.long, device = device)

def batchify(data, bs):
    shuffle(data)
Ejemplo n.º 7
0
                                           shuffle=True,
                                           collate_fn=dataload.collate_fn,
                                           **kwargs)
val_loader = torch.utils.data.DataLoader(val_dataset,
                                         batch_size=args.batch_size,
                                         shuffle=True,
                                         collate_fn=dataload.collate_fn,
                                         **kwargs)

## Load the proper neural network model.
if args.model == 'Pretrained':

    model.encoder = model.EncoderCNN(args.embed_dim)
    model.decoder = model.DecoderRNN(embed_size=args.embed_dim,
                                     hidden_size=args.hidden_dim,
                                     vocab_size=vocab_size,
                                     num_layers=1,
                                     max_seq_length=10)

else:
    raise Exception('Unknown model {}'.format(args.model))

## the loss function -cross-entropy.

criterion = functional.cross_entropy

## Activate CUDA if specified and available.
if args.cuda:
    model.encoder.cuda()
    model.decoder.cuda()
Ejemplo n.º 8
0
def main(args):
    transform = transforms.Compose([
        transforms.Resize(256),
        transforms.RandomCrop(224),
        transforms.RandomHorizontalFlip(),
        transforms.ToTensor(),
        transforms.Normalize(mean=[0.485, 0.456, 0.406],
                             std=[0.229, 0.224, 0.225])
    ])

    useCuda = not args.disable_cuda

    with open(args.vocab_path,
              'rb') as f1, open(args.batched_train_path,
                                'rb') as f2, open(args.batched_val_path,
                                                  'rb') as f3:
        vocab = pickle.load(f1)
        batched_train_set = pickle.load(f2)
        batched_val_set = pickle.load(f3)

    batched_train_loader = get_loader(args.train_image_dir,
                                      args.train_caption_path,
                                      batched_train_set,
                                      vocab,
                                      transform,
                                      shuffle=True,
                                      num_workers=3)
    batched_val_loader = get_loader(args.val_image_dir,
                                    args.val_caption_path,
                                    batched_val_set,
                                    vocab,
                                    transform,
                                    shuffle=True,
                                    num_workers=1)
    random_val_loader = get_loader(args.val_image_dir,
                                   args.val_caption_path,
                                   batched_val_set,
                                   vocab,
                                   transform,
                                   shuffle=True,
                                   num_workers=1)

    encoder_cnn = model.EncoderCNN(args.is_normalized, useCuda=useCuda)
    decoder_rnn = model.DecoderRNN(args.embedding_dim,
                                   args.hidden_size,
                                   len(vocab),
                                   args.batch_size,
                                   dropout=args.dropout,
                                   useCuda=useCuda)
    if torch.cuda.is_available() and useCuda:
        decoder_rnn.cuda()
    loss_function = nn.NLLLoss()
    #loss_function = nn.CrossEntropyLoss()
    params = list(decoder_rnn.parameters())
    optimizer = optim.Adam(params, lr=args.encoder_lr)
    #scheduler = optim.lr_scheduler.ReduceLROnPlateau(optimizer, mode='min', patience=1)

    output_train_file = open(args.output_train_name, 'w')
    output_val_file = open(args.output_val_name, 'w')
    start_epoch = 0

    save_name = file_namer.make_checkpoint_name(args.batch_size, args.min_occurrences, args.num_epochs, \
      args.dropout, args.decoder_lr, args.encoder_lr, args.embedding_dim, args.hidden_size, args.grad_clip, \
      args.is_normalized) if args.load_checkpoint == "" else args.load_checkpoint
    checkpoint_name = file_namer.get_checkpoint(save_name)
    if checkpoint_name is not None:
        print("loading from checkpoint " + checkpoint_name)
        checkpoint = torch.load(checkpoint_name) if useCuda else torch.load(
            checkpoint_name, map_location=lambda storage, loc: storage)
        start_epoch = checkpoint['epoch']
        decoder_rnn.load_state_dict(checkpoint['state_dict'])
        optimizer.load_state_dict(checkpoint['optimizer'])
        args.load_checkpoint = checkpoint_name
        checkpoint = None
        torch.cuda.empty_cache()
    else:
        print("No existing checkpoints, starting from scratch")
        args.load_checkpoint = "No checkpoint found"

    full_return_index = mp.Value('i', 0)
    full_return_value = mp.Value('d', 0.0)
    full_val_processes = None
    for epoch in range(start_epoch, args.num_epochs):
        val_processes = None
        return_index = mp.Value('i', 0)
        return_value = mp.Value('d', 0.0)
        train_progress_bar = tqdm(iterable=batched_train_loader,
                                  desc='Epoch [%i/%i] (Train)' %
                                  (epoch, args.num_epochs))
        train_sum_loss = 0
        for i, (images, captions, _) in enumerate(train_progress_bar):
            train_sum_loss += trainer.train(encoder_cnn, decoder_rnn,
                                            loss_function, optimizer, images,
                                            captions, args.grad_clip, useCuda)
            train_progress_bar.set_postfix(loss=train_sum_loss /
                                           ((i % 100) + 1))
            if i % 100 == 0:
                output_train_file.write(
                    "%d, %5.4f\n" %
                    (epoch * len(batched_train_loader) + i,
                     train_sum_loss / 100 if i > 0 else train_sum_loss))
                if i % 1000 == 0:
                    if val_processes is not None:
                        val_processes.join()
                        output_val_file.write(
                            "%d, %5.4f\n" %
                            (return_index.value, return_value.value))
                    val_processes = mp.Process(
                        target=validate,
                        args=(random_val_loader, encoder_cnn, decoder_rnn,
                              loss_function, useCuda,
                              epoch * len(batched_train_loader) + i,
                              return_index, return_value))
                    val_processes.start()
                train_sum_loss = 0

        if full_val_processes is not None:
            full_val_processes.join()
            #scheduler.step(full_return_value.value)
            output_val_file.write(
                "End of Epoch\n%d, %5.4f\n" %
                (full_return_index.value, full_return_value.value))
        full_val_processes = mp.Process(
            target=validate_full,
            args=(batched_val_loader, encoder_cnn, decoder_rnn, loss_function,
                  useCuda, epoch, args.num_epochs, len(batched_train_loader),
                  full_return_index, full_return_value))
        full_val_processes.start()
        torch.save({'epoch': epoch + 1,
                    'state_dict': decoder_rnn.state_dict(),
                    'optimizer': optimizer.state_dict()},
                    file_namer.make_checkpoint_name(args.batch_size, args.min_occurrences, epoch + 1, args.dropout, \
                    args.decoder_lr, args.encoder_lr, args.embedding_dim, args.hidden_size, args.grad_clip, args.is_normalized))
    if full_val_processes is not None:
        full_val_processes.join()
        output_val_file.write(
            "End of Epoch\n%d, %5.4f\n" %
            (full_return_index.value, full_return_value.value))
        full_val_processes = None

    output_train_file.close()
    output_val_file.close()

    if args.plot:
        args.train_files.append(args.output_train_name)
        args.val_files.append(args.output_val_name)
        plot(args)
        args.png_files = [args.plot_name]
    if args.send_email:
        args.txt_files = [args.output_train_name, args.output_val_name]
        f = open('arguments.txt', 'w')
        for arg in sorted(vars(args)):
            # arguments we don't want sent in the email
            ignore_args = [
                'user', 'password', 'to', 'plot_name', 'train_image_dir',
                'val_image_dir', 'send_email', 'plot', 'plot_name',
                'train_caption_path', 'val_caption_path', 'png_files',
                'txt_files', 'disable_cuda', 'body', 'output_train_name',
                'output_val_name', 'show', 'subject', 'max_batched_set_size'
            ]
            if not arg in ignore_args:
                f.write("%s: %s\n" % (arg, getattr(args, arg)))
        f.close()
        if not args.body:
            args.body = 'arguments.txt'
        else:
            args.txt_files.append('arguments.txt')
        send_email(args)
Ejemplo n.º 9
0
with open('vocabSet.pkl', 'rb') as f:
    vocabularySet = pickle.load(f)

print("Loaded Vocabulary Set")

with open('vocabSet2.pkl', 'rb') as f:
    vocabularySet2 = pickle.load(f)

print("Loaded Reverse Vocabulary Set")

modelsPath = "LSTM4Models/"
imagesPath = "../data/val2014/"
captionsPath = "../data/annotations/captions_val.json"

cnnEn = model.EncoderCNN(wordEmbeddings).eval()
lstmDe = model.DecoderRNN(wordEmbeddings, lstmHiddenStates, len(vocabularySet),
                          lstmLayers)
cnnEn = cnnEn.to(device)
lstmDe = lstmDe.to(device)

valData = COCO(captionsPath)

#Exploiting Pycocotools to get insights about data
print("Total Annotations: " + str(len(valData.anns.keys())))
print("Total Images: " + str(len(valData.imgs.keys())))

#Visualise
print(valData.imgToAnns[393212])

for (i, key) in enumerate(valData.imgToAnns.keys()):
    origCaptionSet = []
    for rec in valData.imgToAnns[key]:
Ejemplo n.º 10
0
    def __init__(self,
                 embed_size,
                 hidden_size,
                 vocab,
                 dropout_rate,
                 num_layers,
                 bidirectional,
                 attention_type,
                 self_attention,
                 tau,
                 gamma1,
                 gamma2,
                 cost_fcn,
                 uniform_init,
                 embedding_file=None):

        super(NMT, self).__init__()

        self.embed_size = embed_size
        self.hidden_size = hidden_size
        self.dropout_rate = dropout_rate
        self.vocab = vocab
        self.bidirectional = bidirectional
        self.tau = tau
        self.gamma1 = gamma1
        self.gamma2 = gamma2
        self.cost_fcn = cost_fcn
        src_vocab_size = len(self.vocab.src.word2id)
        tgt_vocab_size = len(self.vocab.tgt.word2id)

        if embedding_file is not None:

            Glove = {}
            f = open(embedding_file)
            print("Loading the vectors.")

            i = 0
            for line in f:
                if i != 0:
                    word, vec = line.split(' ', 1)
                    Glove[word] = np.fromstring(vec, sep=' ')
                i += 1
            f.close()

            print("Done.")
            X_train = np.zeros((len(self.vocab.src.id2word), self.embed_size))

            for i in range(len(self.vocab.src.id2word)):
                if self.vocab.src.id2word[i] in Glove:
                    X_train[i] = Glove[self.vocab.src.id2word[i]]

            embeddings = np.asarray(X_train)
        else:
            embeddings = None

        self.encoder = model.EncoderRNN(vocab_size=src_vocab_size,
                                        embed_size=self.embed_size,
                                        hidden_size=hidden_size,
                                        dropout_rate=dropout_rate,
                                        num_layers=num_layers,
                                        bidirectional=bidirectional,
                                        embeddings=embeddings)
        self.decoder = model.DecoderRNN(embed_size=self.embed_size,
                                        hidden_size=self.hidden_size,
                                        output_size=tgt_vocab_size,
                                        dropout_rate=dropout_rate,
                                        num_layers=num_layers,
                                        attention_type=attention_type,
                                        self_attention=self_attention,
                                        bidirectional=bidirectional)
        self.encoder = self.encoder.cuda()
        self.decoder = self.decoder.cuda()

        # Initialize all parameter weights uniformly
        for param in list(self.encoder.parameters()) + list(
                self.decoder.parameters()):
            torch.nn.init.uniform(param, a=-uniform_init, b=uniform_init)

        self.criterion = torch.nn.CrossEntropyLoss(reduce=0).cuda()
Ejemplo n.º 11
0
num_epochs=100
learning_rate=1e-4
log_interval=10 # The interval at which the model will be saved

root_dir='../Data/'
#---------------------------------------------------------------

# Dataset loader
train_loader=Dataset_CRNN(root_dir=root_dir,)

# Define the cnn model
cnnEnc=m.initialize_model(model_name,cnn_encoding_length,feature_extract,use_pretrained) # To use pretrained model
# cnnEnc=MyModel() # To use your own model

# Define RNN decoder
rnnDec=m.DecoderRNN(CNN_embed_dim=cnn_encoding_length,h_RNN_layers=3, h_RNN=256, h_FC_dim=128, drop_p=0.3, num_classes=num_classes)

# Params to update
crnn_params=list(cnnEnc.parameters()) + list(rnnDec.parameters())

# Specify the loss to use
loss_criterion=F.BCELoss()

# Define the optimizer
optimizer = torch.optim.Adam(crnn_params, lr=learning_rate)

# Specify the device
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")

# Load the models to device
cnnEnc=cnnEnc.to(device)
Ejemplo n.º 12
0
    print(relations)
    relation_count = len(
        relations)  # args.relation_tag_size  # data['relation_tag_size']
    noisy_count = args.noisy_tag_size  # ata['noisy_tag_size']
    learning_rate = args.lr  # data['lr']
    l2 = args.l2  # data['l2']
    print("relation count: ", relation_count)
    print("Reading vector file......")
    vec_model = KeyedVectors.load_word2vec_format(args.datapath +
                                                  'vector2.txt',
                                                  binary=False)
    # vec_model = KeyedVectors.load_word2vec_format('/home/xiaoya/data/GoogleNews-vectors-negative300.bin.gz', binary=True)

    # load models
    encoder = model.EncoderRNN(args, wv).to(device)
    decoder = model.DecoderRNN(args, wv).to(device)
    RE_model = model.RE_RNN(args, wv, relation_count).to(device)

    criterion = nn.NLLLoss()  # CrossEntropyLoss()
    # criterion_RE = nn.BCELoss()
    # attn_decoder1 = AttnDecoderRNN(hidden_size, output_lang.n_words, dropout_p=0.1).to(device)
    if torch.cuda.is_available():
        encoder = encoder.cuda()
        decoder = decoder.cuda()
        RE_model = RE_model.cuda()
        criterion = criterion.cuda()
        # criterion_RE = criterion_RE.cuda()

    encoder_optimizer = optim.Adam(encoder.parameters(),
                                   lr=learning_rate,
                                   weight_decay=l2)  # SGD
Ejemplo n.º 13
0
with open("../ImageCaptioner/data/vocab/vocab_occurrence_5.pkl", 'rb') as f1,\
    open("../ImageCaptioner/data/batched_data/val_batch_1.pkl", "rb") as f2:
    vocab = pickle.load(f1)
    batched_val_set = pickle.load(f2)
coco_caps = COCO("../ImageCaptioner/data/annotations/captions_val2014.json")
batched_val_loader = get_loader(
    "../ImageCaptioner/data/val2014",
    "../ImageCaptioner/data/annotations/captions_val2014.json",
    batched_val_set,
    vocab,
    transform,
    shuffle=True,
    num_workers=3)

encoder = model.EncoderCNN()
decoder = model.DecoderRNN(512, 196, 512, 512, len(vocab), 1)
if torch.cuda.is_available():
    encoder = encoder.cuda()
    decoder = decoder.cuda()

checkpoint = torch.load(
    "noNorm/model_batch_100_dims_512x512_lr_0.0001/checkpoint_25.pt")
decoder.load_state_dict(checkpoint['state_dict'])
checkpoint = None
torch.cuda.empty_cache()

for i, (images, captions, lengths, ids) in enumerate(batched_val_loader):
    if i == 1:
        break
    print("actual captions are: ")
    annIds = coco_caps.getAnnIds(imgIds=ids)
Ejemplo n.º 14
0
def main(args):
    transform = transforms.Compose([
        transforms.Resize(256),
        transforms.RandomCrop(224),
        transforms.RandomHorizontalFlip(),
        transforms.ToTensor(),
        transforms.Normalize(mean=[0.485, 0.456, 0.406],
                             std=[0.229, 0.224, 0.225])
    ])

    useCuda = not args.disable_cuda

    with open(args.vocab_path, 'rb') as vocab_path, \
        open(args.batched_train_path, 'rb') as batched_train_path, \
        open(args.batched_val_path, 'rb') as batched_val_path:
        vocab = pickle.load(vocab_path)
        batched_train_set = pickle.load(batched_train_path)
        batched_val_set = pickle.load(batched_val_path)

    batched_train_loader = get_loader(args.train_image_dir,
                                      args.train_caption_path,
                                      batched_train_set,
                                      vocab,
                                      transform,
                                      shuffle=True,
                                      num_workers=3)
    batched_val_loader = get_loader(args.val_image_dir,
                                    args.val_caption_path,
                                    batched_val_set,
                                    vocab,
                                    transform,
                                    shuffle=True,
                                    num_workers=1)
    batched_val_loader_full = get_loader(args.val_image_dir,
                                         args.val_caption_path,
                                         batched_val_set,
                                         vocab,
                                         transform,
                                         shuffle=True,
                                         num_workers=1)

    encoder_cnn = model.EncoderCNN()
    decoder_rnn = model.DecoderRNN(512,
                                   196,
                                   args.embedding_dim,
                                   args.hidden_dim,
                                   len(vocab),
                                   args.num_layers,
                                   args.dropout,
                                   useCuda=useCuda)
    if torch.cuda.is_available() and useCuda:
        encoder_cnn.cuda()
        decoder_rnn.cuda()
    loss_function = nn.NLLLoss()
    params = list(decoder_rnn.parameters())
    optimizer = optim.Adam(params, lr=args.lr)

    output_train_file = open(
        args.output_dir + "/train_" + str(args.num_epochs) + ".txt", 'w')
    output_val_file = open(
        args.output_dir + "/val_" + str(args.num_epochs) + ".txt", 'w')
    start_epoch = 0

    if args.load_checkpoint is not None:
        checkpoint = torch.load(
            args.load_checkpoint) if useCuda else torch.load(
                args.load_checkpoint,
                map_location=lambda storage, loc: storage)
        print("loading from checkpoint " + str(args.load_checkpoint))
        start_epoch = checkpoint['epoch']
        decoder_rnn.load_state_dict(checkpoint['state_dict'])
        optimizer.load_state_dict(checkpoint['optimizer'])
        checkpoint = None
        torch.cuda.empty_cache()

    for epoch in range(start_epoch, args.num_epochs):
        progress_bar = tqdm(iterable=batched_train_loader,
                            desc='Epoch [%i/%i] (Train)' %
                            (epoch, args.num_epochs))
        train_sum_loss = 0
        for i, (images, captions, lengths, ids) in enumerate(progress_bar, 1):
            loss = train(images, captions, encoder_cnn, decoder_rnn,
                         loss_function, optimizer, args.grad_clip, useCuda)
            train_sum_loss += loss.data.select(0, 0)
            progress_bar.set_postfix(loss=train_sum_loss / ((i % 100) + 1))
            if i % 100 == 0:
                output_train_file.write("%d, %5.4f\n" %
                                        (epoch * len(batched_train_loader) + i,
                                         train_sum_loss / 100))
                train_sum_loss = 0
                if i % 1000 == 0:
                    temp_loss = validate(batched_val_loader, encoder_cnn,
                                         decoder_rnn, loss_function, useCuda)
                    output_val_file.write(
                        "%d, %5.4f\n" %
                        (epoch * len(batched_train_loader) + i, temp_loss))
        # end of batch
        output_train_file.write(
            "%d, %5.4f\n" % ((epoch + 1) * len(batched_train_loader),
                             train_sum_loss / len(batched_train_loader) / 100))

        val_sum_loss = 0
        val_progress_bar = tqdm(iterable=batched_val_loader_full,
                                desc='Epoch [%i/%i] (Val)' %
                                (epoch, args.num_epochs))
        for i, (images, captions, lengths,
                ids) in enumerate(val_progress_bar, 1):
            loss = evaluate(images, captions, encoder_cnn, decoder_rnn,
                            loss_function, optimizer, useCuda)
            val_sum_loss += loss.data.select(0, 0)
            val_progress_bar.set_postfix(loss=val_sum_loss / i)
        output_val_file.write("%d, %5.4f\n" %
                              ((epoch + 1) * len(batched_train_loader),
                               val_sum_loss / len(batched_val_loader_full)))

        torch.save(
            {
                'epoch': epoch + 1,
                'state_dict': decoder_rnn.state_dict(),
                'optimizer': optimizer.state_dict()
            }, args.output_dir + "/checkpoint_" + str(epoch + 1) + ".pt")

    output_train_file.close()
    output_val_file.close()
        decoder = decoder.cuda()

    for dialog in validation_data:
        sample(my_lang, dialog, encoder, context, decoder)
        time.sleep(3)

    sys.exit(0)

learning_rate = args.lr
criterion = nn.NLLLoss()
if not args.restore:
    encoder = model.EncoderRNN(len(my_lang.word2index), args.encoder_hidden, \
            args.encoder_layer, args.dropout)
    context = model.ContextRNN(args.encoder_hidden * args.encoder_layer, args.context_hidden, \
            args.context_layer, args.dropout)
    decoder = model.DecoderRNN(args.context_hidden * args.context_layer, args.decoder_hidden, \
            len(my_lang.word2index), args.decoder_layer, args.dropout)
else:
    print("Load last model in %s" % (args.save))
    number = torch.load(os.path.join(args.save, 'checkpoint.pt'))
    encoder = torch.load(
        os.path.join(args.save, 'encoder' + str(number) + '.pt'))
    context = torch.load(
        os.path.join(args.save, 'context' + str(number) + '.pt'))
    decoder = torch.load(
        os.path.join(args.save, 'decoder' + str(number) + '.pt'))
    if torch.cuda.is_available():
        encoder = encoder.cuda()
        context = context.cuda()
        decoder = decoder.cuda()

if torch.cuda.is_available():
Ejemplo n.º 16
0
        decoder_input = Variable(torch.LongTensor([[ni]]))
        decoder_input = decoder_input.cuda() if use_cuda else decoder_input

    return decoded_words


def evaluateRandomly(encoder, decoder, n=10):
    for i in range(n):
        pair = random.choice(pairs)
        print('>', pair[0])
        print('=', pair[1])
        output_words = evaluate(encoder, decoder, pair[0])
        output_sentence = ' '.join(output_words)
        print('<', output_sentence)
        print('')


hidden_dim = 256
embedding_dim = 100

encoder_1 = model.EncoderRNN(input_lang.n_words, embedding_dim, hidden_dim)
decoder_1 = model.DecoderRNN(output_lang.n_words, embedding_dim, hidden_dim)
#attn_decoder1 = AttnDecoderRNN(hidden_size, output_lang.n_words, 1, dropout_p=0.1)

if use_cuda:
    encoder_1 = encoder_1.cuda()
    decoder_1 = decoder_1.cuda()
    #attn_decoder1 = attn_decoder1.cuda()

trainIters(encoder_1, decoder_1, 75000)
Ejemplo n.º 17
0
def main(config):

    if(config.dataset == 'real'):
        #initialize the dictionary
        lang_real = prepare.Lang_real('txt')
        lines = open('data/opensubtitles/vocab4000').read().strip().split('\n')
        for sen in lines:
            lang_real.addSentence(sen)
        lang_txt = lang_real

        train_data = prepare.get_dataset('data/opensubtitles/train.txt', batch_size = 16, lang_txt = lang_real, task = 'real')
        shuffle(train_data)
        dev_data = prepare.get_dataset('data/opensubtitles/dev.txt', batch_size = 16, lang_txt = lang_real, task = 'real')
        test_data = prepare.get_dataset('data/opensubtitles/test.txt', batch_size = 16, lang_txt = lang_real, task = 'real')
        
    elif(config.dataset == 'counting'):
        lang_counting = prepare.Lang_counting('txt')
        lang_txt = lang_counting

        train_data = prepare.get_dataset('data/counting/train_counting.txt', batch_size = 16, lang_txt = lang_counting, task = 'counting')
        shuffle(train_data)
        dev_data = prepare.get_dataset('data/counting/dev_counting.txt', batch_size = 16, lang_txt = lang_counting, task = 'counting')
        test_data = prepare.get_dataset_test_counting('data/counting/test_counting.txt', batch_size = 16)

    feature = config.feature
    encoder = model.EncoderRNN(feature, feature, lang_txt.n_words)                     
    decoder = model.DecoderRNN(feature, feature,  lang_txt.n_words)
    evaluater = model.EvaluateR(feature)
    decoder_prev =  model.DecoderRNN(feature, feature,  lang_txt.n_words)
    encoder_prev =  model.EncoderRNN(feature, feature,  lang_txt.n_words)
    dis_encoder = model.disEncoderRNN(feature, feature, lang_txt.n_words)
    dis_decoder = model.disDecoderRNN(feature, feature, lang_txt.n_words)
    eva_encoder = model.disEncoderRNN(feature, feature, lang_txt.n_words)
    eva_decoder = model.disDecoderRNN(feature, feature, lang_txt.n_words)
    if use_cuda:
        encoder = encoder.cuda()
        decoder = decoder.cuda()
        evaluater= evaluater.cuda()
        decoder_prev = decoder_prev.cuda()
        encoder_prev = encoder_prev.cuda()
        dis_encoder = dis_encoder.cuda(0)
        dis_decoder = dis_decoder.cuda(0)
        eva_encoder = eva_encoder.cuda(0)
        eva_decoder = eva_decoder.cuda(0)


    print_every = config.print_every
    dev_every = config.dev_every
    use_ppo = config.use_ppo
    ppo_a1 = config.ppo_a1
    ppo_a2 = config.ppo_a2
    ppo_b1 = config.ppo_b1
    ppo_b2 = config.ppo_b2

    if(config.type == 'reinforce'): 

        lr = config.lr
        test1 = train.seq2seq(lang_txt, dev_data,test_data,  encoder, decoder, evaluater, 
                   encoder_prev,decoder_prev,
                    task = config.dataset,
                   god_rs_dev = [],
                    god_loss_dev = [],
                    god_loss = [],
                    god_rs_test = [])

        losses, rewards = test1.trainIters(train_data,1,1,
                                           use_ppo = use_ppo,actor_fixed = False, 
                                           min_rein_step = 0, max_rein_step = 5,
                                          ppo_b1 = ppo_b1, 
                                          ppo_b2 = ppo_b2, 
                                          ppo_a1 = ppo_a1,
                                          ppo_a2 = ppo_a2,
                                           ppo_a3 = 1e10,
                                           rate = 1,
                                          lr = lr,
                                          dev_every = dev_every,
                                          print_every = print_every,
                                          plot_every = 5000000000,
                                          name = '_z',
                                          file_name = 'MIXER')
    elif(config.type == 'gan'):

        test_gan = train.ganSeq2seq(lang_txt, dev_data,test_data,
                encoder,decoder,
                dis_encoder,dis_decoder,
                eva_encoder, eva_decoder,
                encoder_prev,
                decoder_prev,
                god_rs_dev = [],
               god_loss_dev = [],
               god_loss = [],
               god_rs_test = [],
                task = config.dataset)

        loss_g, loss_d = test_gan.trainIters(train_data, 0,0,1,
                                  use_ppo= config.use_ppo,g_lr = config.g_lr, d_lr = config.d_lr,
                                  search_n = 1, width = 1,
                                 ppo_b1 = ppo_b1, 
                                  ppo_b2 = ppo_b2, 
                                  ppo_a1 = ppo_a1,
                                  ppo_a2 = ppo_a2, 
                                  ppo_a3 = 10000000000,
                                 print_every = print_every,
                                 plot_every = 50000000000,
                                 dev_every = dev_every)
Ejemplo n.º 18
0
        for i in range(n):
            output_words = evaluate(encoder1, encoder2, decoder, d['image'][i],
                                    d['post'][i], d['tags'][i])
            output_sentence = ' '.join(output_words)
            print('ground truth:', d['comment'][i])
            print('generated:', output_sentence)
        break


def evaluateScore(encoder, decoder, weights):
    val_loader = DataLoader(dataset=val_dataset, batch_size=1, shuffle=False)
    total_score = 0
    for d in val_loader:
        #for i in range(n):
        output_words = evaluate(encoder, decoder, d['image'][0], d['post'][0],
                                d['tags'][0])

        score = sentence_bleu([d['comment'][0].split(' ')],
                              output_words,
                              weights=weights)
        total_score += score
    return float(total_score) / val_data_size


encoder1 = model.EncoderRNN(300, post_hidden_size).to(device)
encoder2 = model.Encoder(input_size, final_hidden_size).to(device)
decoder = model.DecoderRNN(final_hidden_size, vocab.n_words).to(device)

trainIters(encoder1, encoder2, decoder, learning_rate=0.0001)
evaluateRandomly(encoder1, encoder2, decoder, 'val', 10)
Ejemplo n.º 19
0
            bleu_total += bleu
        print_loss_total /= test_len
        bleu_total /= test_len
        print(f'Test loss: {print_loss_total}, bleu: {bleu_total}')
        
        with open(f'{latent_hidden_size}/train_loss', 'a') as f:
            f.write(f'{str(train_loss_total/tot_cnt)}\n')
        with open(f'{latent_hidden_size}/train_KL_loss', 'a') as f:
            f.write(f'{str(train_KL_total/tot_cnt)}\n')
        with open(f'{latent_hidden_size}/test_bleu', 'a') as f:
            f.write(f'{str(bleu_total)}\n')

        test_bleu_list.append(bleu_total)
        train_loss_list.append(train_loss_total/tot_cnt)
        train_KL_list.append(train_KL_total/tot_cnt)
        train_loss_total = 0
        train_KL_total = 0
        tot_cnt = 0

        if bleu_total > highest_score:
            highest_score = bleu_total
            torch.save(encoder, f'/home/karljackab/DL/lab5/{latent_hidden_size}/encoder_{str(bleu_total)}.pkl')
            torch.save(decoder, f'/home/karljackab/DL/lab5/{latent_hidden_size}/decoder_{str(bleu_total)}.pkl')
            torch.save(enc_last, f'/home/karljackab/DL/lab5/{latent_hidden_size}/enc_last_{str(bleu_total)}.pkl')
            print('save model')

enc_last = model.EncodeLast(hidden_size+4, latent_hidden_size, device).to(device)
encoder = model.EncoderRNN(vocab_size, hidden_size+4, device).to(device)
decoder = model.DecoderRNN(hidden_size+4, vocab_size, device).to(device)

trainIters(encoder, decoder, enc_last, 300, print_every=2000)
Ejemplo n.º 20
0
            print_rec_total = print_rec_total / print_every
            print_kl_total = print_kl_total / print_every
            print('average kl =  %.4f' % print_kl_total)
            print('average reconstruction =  %.4f' % print_rec_total)

        if iter % plot_every == 0:
            plot_loss_avg = plot_loss_total / plot_every
            plot_losses.append(plot_loss_avg)
            plot_loss_total = 0
            print_kl_total = 0
            print_rec_total = 0


encoder = model.EncoderRNN(vocabulary.n_words, latent_space,
                           embeddings).to(device)
decoder = model.DecoderRNN(embedding_space, embeddings,
                           vocabulary.n_words).to(device)
linear = model.RGB_to_Hidden(latent_space, embedding_space).to(device)

trainIters(encoder,
           decoder,
           linear,
           epochs,
           plot_every=500,
           print_every=500,
           learning_rate=learning_r)

if SAVE:
    dirpath = os.getcwd()
    encoder_path = dirpath + '/enc'
    decoder_path = dirpath + '/dec'
    torch.save(encoder, encoder_path)
Ejemplo n.º 21
0
                                         shuffle=True,
                                         collate_fn=dataload.collate_fn,
                                         **kwargs)
test_loader = torch.utils.data.DataLoader(test_dataset,
                                          batch_size=args.batch_size,
                                          shuffle=True,
                                          collate_fn=dataload.val_collate,
                                          **kwargs)
## Load the proper neural network model.
if args.model == 'Pretrained':
    # Problem 2 (no hidden layer, input -> output)
    model.encoder = model.EncoderCNN(10)
    model.decoder = model.DecoderRNN(encoder_dim=2048,
                                     decoder_dim=512,
                                     attention_dim=512,
                                     embed_size=512,
                                     hidden_size=args.hidden_dim,
                                     vocab_size=vocab_size,
                                     num_layers=1,
                                     max_seq_length=15)
#elif args.model == 'resnet_common':
# Problem 5 (multiple hidden layers, input -> hidden layers -> output)
#   print("sruthi check 1")
#  model = models.resnetcommon.ResnetCommon(im_size, args.hidden_dim, args.kernel_size, n_classes)

else:
    raise Exception('Unknown model {}'.format(args.model))

## Deinfe the loss function as cross-entropy.
## This is the softmax loss function (i.e., multiclass classification).
criterion = functional.cross_entropy
def main(args):

    # random set
    manualSeed = random.randint(1, 100)
    # print("Random Seed: ", manualSeed)
    random.seed(manualSeed)
    torch.manual_seed(manualSeed)
    torch.cuda.manual_seed_all(manualSeed)

    # Create model directory
    if not os.path.exists(args.model_path):
        os.makedirs(args.model_path)

    # Load vocabulary wrapper
    with open(args.vocab_path, 'rb') as f:
        vocab = pickle.load(f)

    audio_len, comment_len, mfcc_dim = caculate_max_len(
        args.audio_dir, args.text_path, vocab)
    # mfcc_features = audio_preprocess(args.audio_dir, N, AUDIO_LEN, MFCC_DIM).astype(np.float32)

    # Build data loader
    data_loader = data_get(args.audio_dir, audio_len, args.text_path,
                           comment_len, vocab)

    # Build the models
    encoder = model.EncoderRNN(mfcc_dim, args.embed_size,
                               args.hidden_size).to(device)
    decoder = model.DecoderRNN(args.embed_size + Z_DIM, args.hidden_size,
                               len(vocab), args.num_layers).to(device)
    # decoder = DecoderRNN(args.embed_size, args.hidden_size, len(vocab), args.num_layers).to(device)

    # Loss and optimizer
    criterion_BCEWithLogitsLoss = nn.BCEWithLogitsLoss()
    criterion_CrossEntropyLoss = nn.CrossEntropyLoss()

    # Loss and optimizer
    # criterion = nn.CrossEntropyLoss()
    params = list(decoder.parameters()) + list(encoder.parameters())
    optimizer = torch.optim.Adam(params, lr=args.learning_rate)

    # GAN                               #296'''in_dim=len(vocab)'''
    netD = model.LSTMDiscriminator(in_dim=1, hidden_dim=256).to(device)
    # setup optimizer
    optimizerD = torch.optim.Adam(netD.parameters(), lr=args.learning_rate)

    # Train the models
    total_step = len(data_loader)
    for epoch in range(args.num_epochs):
        for i, ((audio, audio_len), (comment,
                                     comment_len)) in enumerate(data_loader):
            audio = audio.to(device)
            audio = audio.unsqueeze(0)
            comment = comment.to(device)
            comment = comment.unsqueeze(0)
            targets = pack_padded_sequence(comment, [comment_len],
                                           batch_first=True)[0]

            batch_size = comment.shape[0]
            seq_len = targets.shape[0]
            # discriminator:1 -- real comment
            label0 = torch.full((batch_size, seq_len, 1), 0, device=device)
            label1 = torch.full((batch_size, seq_len, 1), 1, device=device)
            # real sample
            logits_real = netD(comment, [comment_len])  # batch*seq
            errD_real = criterion_BCEWithLogitsLoss(logits_real, label1)

            # discriminator:2 -- real comment
            audio_features = encoder(audio, [audio_len])
            if (Z_DIM > 0):
                z = Variable(torch.randn(audio_features.shape[0],
                                         Z_DIM)).cuda()
                audio_features = torch.cat([z, audio_features], 1)
            outputs = decoder(audio_features, comment, [comment_len])
            # generate comment discrimination
            max_v, max_index = outputs.detach().max(1)
            logits_fake = netD(max_index.unsqueeze(0),
                               [comment_len])  # batch*seq*1
            errD_fake = criterion_BCEWithLogitsLoss(logits_fake, label0)
            errD = errD_fake + errD_real
            optimizerD.zero_grad()
            errD.backward()
            optimizerD.step()

            # 2.generator
            audio_features = encoder(audio, [audio_len])
            if (Z_DIM > 0):
                z = Variable(torch.randn(audio_features.shape[0],
                                         Z_DIM)).cuda()
                audio_features = torch.cat([z, audio_features], 1)
            outputs = decoder(audio_features, comment, [comment_len])
            max_v, max_index = outputs.max(1)
            logits_fake = netD(max_index.unsqueeze(0),
                               [comment_len])  # batch*seq*vobsize
            errG = criterion_BCEWithLogitsLoss(logits_fake, label1)
            loss = criterion_CrossEntropyLoss(outputs, targets) + errG
            optimizer.zero_grad()
            loss.backward()
            optimizer.step()

            # Print log info
            if i % args.log_step == 0:
                print(
                    'Epoch [{}/{}], Step [{}/{}],  Loss_D:  {:.4f}, Loss_G: {:.4f}, Perplexity: {:5.4f}'
                    .format(epoch, args.num_epochs, i, total_step, errD.item(),
                            loss.item(), np.exp(loss.item())))

            # Save the model checkpoints
        if (epoch + 1) % args.save_step == 0:
            torch.save(
                decoder.state_dict(),
                os.path.join(args.model_path,
                             'decoder-{}-{}.ckpt'.format(epoch + 1, i + 1)))
            torch.save(
                encoder.state_dict(),
                os.path.join(args.model_path,
                             'encoder-{}-{}.ckpt'.format(epoch + 1, i + 1)))