Esempio n. 1
0
def main(preprocessed_dir_path, trained_dir_path, prefix, embedding_dim,
         hidden_dim, batch_size):
    print(20 * "=", f"Preparing for test", 20 * "=")
    if prefix != "":
        test_file_name = f"{prefix}_test"
        vocab_file_name = f"{prefix}_vocab"
        revocab_file_name = f"{prefix}_revocab"
        weight_file_name = f"{prefix}_weight"
    else:
        test_file_name = "test"
        vocab_file_name = "vocab"
        revocab_file_name = "revocab"
        weight_file_name = "weight"

    with open(os.path.join(preprocessed_dir_path, vocab_file_name + ".pkl"),
              "rb") as fp:
        vocab = pickle.load(fp)

    with open(os.path.join(preprocessed_dir_path, revocab_file_name + ".pkl"),
              "rb") as fp:
        revocab = pickle.load(fp)

    vocab_size = len(vocab)
    padding_idx = vocab[" "]

    encoder = Encoder(vocab_size, embedding_dim, hidden_dim,
                      padding_idx).to(device)
    decoder = Decoder(vocab_size, embedding_dim, hidden_dim,
                      padding_idx).to(device)
    encoder.load_state_dict(
        torch.load(os.path.join(trained_dir_path,
                                f"{weight_file_name}.pth"))["encoder"])
    decoder.load_state_dict(
        torch.load(os.path.join(trained_dir_path,
                                f"{weight_file_name}.pth"))["decoder"])

    expression_ids, answer_ids = data_loader(
        os.path.join(preprocessed_dir_path, test_file_name + ".pkl"),
        padding_idx)

    print(20 * "=", f"Testing", 20 * "=")
    predicted_answers = predict(encoder, decoder, vocab, revocab,
                                expression_ids, batch_size, padding_idx)

    answers = []
    for ids in answer_ids.detach().numpy().tolist():
        answer = int("".join([revocab[str(id)] for id in ids]))
        answers.append(answer)

    print(20 * "=", f"Calculate Test Result", 20 * "=")
    score = 0
    missed = []
    for i, answer in enumerate(answers):
        if predicted_answers[i] == answer:
            score += 1
        else:
            missed.append((answer, predicted_answers[i]))

    print(f"Accuracy: {score/len(answers) * 100} ({score}/{len(answers)})")
Esempio n. 2
0
def main():
    vocab, train_iter, val_iter, test_iter = dataset_factory(
        'twitter-customer-support')

    epochs = 100
    embedding_size = 20
    hidden_size = 100
    vocab_size = len(vocab)
    padding_idx = vocab.stoi['<pad>']

    encoder = Encoder(vocab_size, embedding_size, hidden_size)
    decoder = Decoder(vocab_size, embedding_size, hidden_size)
    seq2seq = cuda(Seq2Seq(encoder, decoder, vocab_size))

    optimizer = optim.Adam(seq2seq.parameters())

    best_val_loss = None
    for epoch in range(epochs):
        # calculate train and val loss
        train_loss = train(seq2seq, optimizer, train_iter, vocab_size, 5,
                           padding_idx)
        val_loss = evaluate(seq2seq, val_iter, vocab_size, padding_idx)
        print("[Epoch=%d] train_loss %f - val_loss %f" %
              (epoch, train_loss, val_loss))

        # save model if model achieved best val loss
        if not best_val_loss or val_loss < best_val_loss:
            print('Saving model...')
            save_model(seq2seq, epoch, val_loss)
            best_val_loss = val_loss
Esempio n. 3
0
    def __init__(self, weights_file: str, device: str = "cpu") -> None:
        self.weights_file = weights_file
        self.device = device

        self.encoder = Encoder(
            MODEL_CONF["encoder"]["input_dim"],
            MODEL_CONF["encoder"]["hidden_dim"],
            MODEL_CONF["encoder"]["n_layer"],
            MODEL_CONF["encoder"]["dropout"],
        )

        self.decoder = Decoder(
            MODEL_CONF["decoder"]["input_dim"],
            MODEL_CONF["decoder"]["output_dim"],
            MODEL_CONF["decoder"]["hidden_dim"],
            MODEL_CONF["decoder"]["n_layer"],
            MODEL_CONF["decoder"]["dropout"],
        )

        self.net = Seq2Seq(self.encoder, self.decoder, self.device)

        # load weight
        self.net.load_state_dict(
            torch.load(self.weights_file, map_location=self.device))
        print("weights loaded.")
Esempio n. 4
0
max_len = 200
h = 8
Num = 6
max_token = 4096

#dataset
dataset = Basedataset(data_path, en_word2idx, de_word2idx, bpe)
dataloader = Batch_loader(dataset, device, max_len, max_token)

total = len(dataset)

#model

encoder = Encoder(en_vocab_size, emb_size, d_ff, dropout, max_len, h, Num,
                  device)
decoder = Decoder(de_vocab_size, emb_size, d_ff, dropout, max_len, h, Num,
                  device)
model = Transformer(encoder, decoder, PAD, device, max_len).to(device)
'''
model = Transformer_fr(en_vocab_size, de_vocab_size, PAD, max_len, emb_size, device).to(device)
'''

#model.load_state_dict(torch.load("./model.pt", map_location= device))
criterion = nn.CrossEntropyLoss()  #(ignore_index = 0)
#criterion = nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(model.parameters(),
                             lr=lr,
                             betas=(0.9, 0.98),
                             eps=1e-9)

test_dataset = Basedataset(test_path, en_word2idx, de_word2idx, bpe)
test_loader = Batch_loader(test_dataset, device, max_len, 1500)
Esempio n. 5
0
def main():
    # data
    data = Data(
        IMAGE_DIR, 
        PICKLES_DIR, 
        params['keep_words'], 
        params=params,
        img_embed=params["img_embed"])
    data_dict = data.dictionary
    # define placeholders
    capt_inputs = tf.placeholder(tf.int32, [None, None])
    capt_labels = tf.placeholder(tf.int32, [None, None])
    seq_length = tf.placeholder(tf.int32, [None])
    # forward pass is expensive, so can use this method to reduce computation
    if params["img_embed"]== "vgg":
        n_features = 4096
    elif params["img_embed"] == "resnet":
        n_features = 2048
    image_embs = tf.placeholder(tf.float32, [None, n_features])  # vgg16
    if params['num_captions'] > 1 and params['mode'] == 'training':
        features_tiled = tf.tile(tf.expand_dims(image_embs, 1),
                                 [1, params['num_captions'], 1])
        features_tiled = tf.reshape(features_tiled,
                                    [tf.shape(image_embs
                                              )[0] * params['num_captions'],
                                     n_features])  # [5 * b_s, 4096]
    else:
        features_tiled = image_embs
    model = Decoder(capt_inputs, params['lstm_hidden'],
                    params['embed_dim'], seq_length,
                    data_dict, params['lstm_hidden'], image_embs,
                    params=params, reuse_text_emb=True)
    with tf.variable_scope("rnn", reuse=tf.AUTO_REUSE):
        x_cmlogits, _ = model.forward(mode='train_capt',
                                      image_embs=features_tiled)
        x_lmrlogits, _ = model.forward(mode='train_lmr', lm_label='romantic')
        x_lmhlogits, _ = model.forward(mode='train_lmh', lm_label='humorous')

    # losses
    labels_flat = tf.reshape(capt_labels, [-1])
    cm_loss = masked_loss(labels_flat, x_cmlogits, mode='train_capt')
    lmh_loss = masked_loss(labels_flat, x_lmhlogits, mode='train_lmh')
    lmr_loss = masked_loss(labels_flat, x_lmrlogits, mode='train_lmr')
    # optimizers
    cm_opt = lstm_optimizer(cm_loss, params, params['learning_rate'],
                            mode='train_capt')
    lmh_opt = lstm_optimizer(lmh_loss, params, 0.0005,
                             mode='train_lmh')
    lmr_opt = lstm_optimizer(lmr_loss, params, 0.0005,
                             mode='train_lmr')
    # train
    saver = tf.train.Saver(tf.trainable_variables(),
                           max_to_keep=params['keep_cp'])
    gpu_options = tf.GPUOptions(
                    visible_device_list=params["gpu"], 
                    allow_growth=True)
    config = tf.ConfigProto(gpu_options=gpu_options)
    with tf.Session(config=config) as sess:
        sess.run(tf.global_variables_initializer())
        if params['write_summary']:
            summary_writer = tf.summary.FileWriter('./logs', sess.graph)
            summary_writer.add_graph(sess.graph)
        # print(tf.trainable_variables())
        # train 3 networks, save S_act, S_hum, S_rom
        if params['restore']:
            print("Restoring from checkpoint")
            saver.restore(sess,
                          "./checkpoints/{}.ckpt".format(params['checkpoint']))
        # choose labels for the training
        tr_labels = ['actual']
        tr_style = params['tr_style']
        if tr_style == 'both':
            tr_labels.extend(['humorous', 'romantic'])
        else:
            tr_labels.append(tr_style)
        if params['mode'] == 'training':
            for e in range(params['epochs']):
                for label in tr_labels:
                    if label == 'actual':  # folowing paper
                        batch_size = params['batch_size']
                    else:
                        batch_size = params['batch_size_lm']
                    for captions, lengths, image_f in data.get_batch(
                            batch_size, label=label, set='train'):
                        feed = {capt_inputs: captions[0],
                                capt_labels: captions[1],
                                image_embs: image_f,
                                seq_length: lengths}
                        if label == 'actual':
                            opt_loss, optim = cm_loss, cm_opt
                        elif label == 'humorous':
                            opt_loss, optim = lmh_loss, lmh_opt
                        elif label == 'romantic':
                            opt_loss, optim = lmr_loss, lmr_opt
                        loss_, _ = sess.run([opt_loss, optim], feed)
                    if e % 4 == 0:
                        losses = []
                        for captions, lengths, image_f in data.get_batch(
                            params['batch_size'], label=label, set='val'):
                            feed = {capt_inputs: captions[0],
                                    capt_labels: captions[1],
                                    image_embs: image_f,
                                    seq_length: lengths}
                            if label == 'actual':
                                opt_loss, optim = cm_loss, cm_opt
                            elif label == 'humorous':
                                opt_loss, optim = lmh_loss, lmh_opt
                            elif label == 'romantic':
                                opt_loss, optim = lmr_loss, lmr_opt
                            vloss_ = sess.run([opt_loss], feed)
                            losses.append(vloss_)
                        print("Validation Model: {} Epoch: {} Loss: {}".format(
                            label, e, np.mean(losses)))
                        # save model
                        if not os.path.exists("./checkpoints"):
                            os.makedirs("./checkpoints")
                    if e % 10 == 0 and e != 0:  # save every 10 epochs
                        save_path = saver.save(sess,
                                               "./checkpoints/{}.ckpt".format(
                                                   params['checkpoint']))
                        print("Model saved in file: %s" % save_path)
                    print("{} Model: Epoch: {} Loss: {}".format(label,
                                                                e, loss_))
            # save model
            if not os.path.exists("./checkpoints"):
                os.makedirs("./checkpoints")
            save_path = saver.save(sess, "./checkpoints/{}.ckpt".format(
                params['checkpoint']))
            print("Model saved in file: %s" % save_path)
        elif params['mode'] == 'inference':
            inference(params, model, data, saver, sess)
Esempio n. 6
0
    def build_model(self):
        hps = self.hps
        ns = self.hps.ns
        enc_mode = self.enc_mode
        seg_len = self.hps.seg_len
        enc_size = self.hps.enc_size
        emb_size = self.hps.emb_size
        betas = (0.5, 0.9)

        #---stage one---#
        self.Encoder = cc(
            Encoder(ns=ns,
                    dp=hps.enc_dp,
                    enc_size=enc_size,
                    seg_len=seg_len,
                    enc_mode=enc_mode))
        self.Decoder = cc(
            Decoder(ns=ns,
                    c_in=enc_size,
                    c_h=emb_size,
                    c_a=hps.n_speakers,
                    seg_len=seg_len))
        self.SpeakerClassifier = cc(SpeakerClassifier(ns=ns, c_in=enc_size * enc_size if enc_mode == 'binary' else \
                     (2*enc_size if enc_mode == 'multilabel_binary' else enc_size), \
                     c_h=emb_size, n_class=hps.n_speakers, dp=hps.dis_dp, seg_len=seg_len))

        #---stage one opts---#
        params = list(self.Encoder.parameters()) + list(
            self.Decoder.parameters())
        self.ae_opt = optim.Adam(params, lr=self.hps.lr, betas=betas)
        self.clf_opt = optim.Adam(self.SpeakerClassifier.parameters(),
                                  lr=self.hps.lr,
                                  betas=betas)

        #---stage two---#
        if self.g_mode == 'naive':
            self.Generator = cc(
                Decoder(ns=ns,
                        c_in=enc_size,
                        c_h=emb_size,
                        c_a=hps.n_speakers,
                        seg_len=seg_len))
        elif self.g_mode == 'targeted' or self.g_mode == 'targeted_residual':
            self.Generator = cc(Decoder(ns=ns, c_in=enc_size, c_h=emb_size, c_a=hps.n_target_speakers, seg_len=seg_len, \
                   output_mask=True if self.g_mode == 'targeted_residual' else False))
        elif self.g_mode == 'enhanced':
            self.Generator = cc(
                Enhanced_Generator(ns=ns,
                                   dp=hps.enc_dp,
                                   enc_size=1024,
                                   emb_size=1024,
                                   seg_len=seg_len,
                                   n_speakers=hps.n_speakers))
        elif self.g_mode == 'spectrogram':
            self.Generator = cc(
                Spectrogram_Patcher(ns=ns,
                                    c_in=513,
                                    c_h=emb_size,
                                    c_a=hps.n_target_speakers,
                                    seg_len=seg_len))
        elif self.g_mode == 'tacotron':
            self.Generator = cc(
                Tacotron(enc_size,
                         hps.n_target_speakers,
                         mel_dim=hp.n_mels,
                         linear_dim=int(hp.n_fft / 2) + 1))
            self.tacotron_input_lengths = torch.tensor(
                [self.hps.seg_len // 8 for _ in range(hps.batch_size)])
        else:
            raise NotImplementedError('Invalid Generator mode!')

        self.PatchDiscriminator = cc(nn.DataParallel(PatchDiscriminator(ns=ns, n_class=hps.n_speakers \
                        if self.g_mode == 'naive' else hps.n_target_speakers,
                        seg_len=seg_len)))

        #---stage two opts---#
        self.gen_opt = optim.Adam(self.Generator.parameters(),
                                  lr=self.hps.lr,
                                  betas=betas)
        self.patch_opt = optim.Adam(self.PatchDiscriminator.parameters(),
                                    lr=self.hps.lr,
                                    betas=betas)

        #---target classifier---#
        self.TargetClassifier = cc(
            nn.DataParallel(TargetClassifier(ns=ns, n_class=3,
                                             seg_len=seg_len)))

        #---target classifier opts---#
        self.tclf_opt = optim.Adam(self.TargetClassifier.parameters(),
                                   lr=self.hps.lr,
                                   betas=betas)
Esempio n. 7
0
def weights_init(m):
    classname = m.__class__.__name__
    if classname.find('Conv') != -1:
        nn.init.normal_(m.weight.data, 0.0, 0.02)
    elif classname.find('BatchNorm') != -1:
        nn.init.normal_(m.weight.data, 1.0, 0.02)
        nn.init.constant_(m.bias.data, 0)


netE = Encoder(ngpu, ndf, nc).cuda()
if (device.type == 'cuda') and (ngpu > 1):
    netE = nn.DataParallel(netE, list(range(ngpu)))
netE.apply(weights_init)

netG = Decoder(ngpu, num, ngf).cuda()
if (device.type == 'cuda') and (ngpu > 1):
    netG = nn.DataParallel(netG, list(range(ngpu)))
netG.apply(weights_init)

netD = Discriminator(ngpu, ndf, nc).cuda()
if (device.type == 'cuda') and (ngpu > 1):
    netD = nn.DataParallel(netD, list(range(ngpu)))
netD.apply(weights_init)

# 损失函数和优化器
criterionE = Encoder_loss
criterionG = Decoder_loss
criterionD = nn.BCELoss()

# 优化器
Esempio n. 8
0
def main(preprocessed_dir_path, dest_dir_path, prefix, embedding_dim,
         hidden_dim, epoch, learning_rate, batch_size):
    print(20 * "=", f"Preparing for train", 20 * "=")

    if prefix != "":
        train_file_name = f"{prefix}_train"
        vocab_file_name = f"{prefix}_vocab"
        weight_file_name = f"{prefix}_weight"
    else:
        train_file_name = "train"
        vocab_file_name = "vocab"
        weight_file_name = "weight"

    with open(os.path.join(preprocessed_dir_path, vocab_file_name + ".pkl"),
              "rb") as fp:
        vocab = pickle.load(fp)

    vocab_size = len(vocab)
    padding_idx = vocab[" "]

    encoder = Encoder(vocab_size, embedding_dim, hidden_dim,
                      padding_idx).to(device)
    decoder = Decoder(vocab_size, embedding_dim, hidden_dim,
                      padding_idx).to(device)

    criterion = nn.CrossEntropyLoss()

    encoder_optimizer = optim.Adam(encoder.parameters(), lr=learning_rate)
    decoder_optimizer = optim.Adam(decoder.parameters(), lr=learning_rate)

    expressions, answers = data_loader(
        os.path.join(preprocessed_dir_path, train_file_name + ".pkl"),
        padding_idx)

    expression_batches = batching(expressions, batch_size)
    answer_batches = batching(answers, batch_size)

    print(20 * "=", f"Training", 20 * "=")

    all_losses = []
    all_times = []

    for i in range(1, epoch + 1):
        print(f"Epoch {i}")
        epoch_loss = 0
        start_time = time.time()

        for j in range(len(expression_batches)):
            encoder_optimizer.zero_grad()
            decoder_optimizer.zero_grad()

            expression_batch = expression_batches[j]
            answer_batch = answer_batches[j]

            encoder_state = encoder(expression_batch)

            source = answer_batch[:, :-1]
            target = answer_batch[:, 1:]

            loss = 0

            decoder_output, _ = decoder(source, encoder_state)

            for k in range(decoder_output.size()[1]):
                loss += criterion(decoder_output[:, k, :], target[:, k])
            epoch_loss += loss.item()

            loss.backward()

            encoder_optimizer.step()
            decoder_optimizer.step()

        end_time = time.time()
        t = end_time - start_time
        print(
            f"  Loss:{epoch_loss} Time:{datetime.timedelta(seconds=int(t))}s")

        all_losses.append(epoch_loss)
        all_times.append(t)

        if min(all_losses) == epoch_loss:
            torch.save(
                {
                    "encoder": encoder.state_dict(),
                    "decoder": decoder.state_dict()
                }, os.path.join(dest_dir_path, f"{weight_file_name}.pth"))

        if epoch_loss < 1:
            print("  Ealry Stop")
            break

    print(20 * "=", f"Done training", 20 * "=")
    print(f"Minimum Loss: {min(all_losses)}")
    print(f"Training time: {datetime.timedelta(seconds=int(sum(all_times)))}s")