def test_sampling():

    # Create Theano variables
    sampling_input = theano.tensor.lmatrix("input")

    # Construct model
    encoder = BidirectionalEncoder(vocab_size=10, embedding_dim=5, state_dim=8)
    decoder = Decoder(vocab_size=12, embedding_dim=6, state_dim=8, representation_dim=16, theano_seed=1234)
    sampling_representation = encoder.apply(sampling_input, theano.tensor.ones(sampling_input.shape))
    generateds = decoder.generate(sampling_input, sampling_representation)
    model = Model(generateds[1])

    # Initialize model
    encoder.weights_init = decoder.weights_init = IsotropicGaussian(0.01)
    encoder.biases_init = decoder.biases_init = Constant(0)
    encoder.push_initialization_config()
    decoder.push_initialization_config()
    encoder.bidir.prototype.weights_init = Orthogonal()
    decoder.transition.weights_init = Orthogonal()
    encoder.initialize()
    decoder.initialize()

    # Compile a function for the generated
    sampling_fn = model.get_theano_function()

    # Create literal variables
    numpy.random.seed(1234)
    x = numpy.random.randint(0, 10, size=(1, 2))

    # Call function and check result
    generated_step = sampling_fn(x)
    assert len(generated_step[0].flatten()) == 4
def test_search_model():

    # Create Theano variables
    floatX = theano.config.floatX
    source_sentence = theano.tensor.lmatrix("source")
    source_sentence_mask = theano.tensor.matrix("source_mask", dtype=floatX)
    target_sentence = theano.tensor.lmatrix("target")
    target_sentence_mask = theano.tensor.matrix("target_mask", dtype=floatX)

    # Construct model
    encoder = BidirectionalEncoder(vocab_size=10, embedding_dim=5, state_dim=8)
    decoder = Decoder(vocab_size=12, embedding_dim=6, state_dim=8, representation_dim=16)
    cost = decoder.cost(
        encoder.apply(source_sentence, source_sentence_mask),
        source_sentence_mask,
        target_sentence,
        target_sentence_mask,
    )

    # Compile a function for the cost
    f_cost = theano.function(
        inputs=[source_sentence, source_sentence_mask, target_sentence, target_sentence_mask], outputs=cost
    )

    # Create literal variables
    numpy.random.seed(1234)
    x = numpy.random.randint(0, 10, size=(22, 4))
    y = numpy.random.randint(0, 12, size=(22, 6))
    x_mask = numpy.ones_like(x).astype(floatX)
    y_mask = numpy.ones_like(y).astype(floatX)

    # Initialize model
    encoder.weights_init = decoder.weights_init = IsotropicGaussian(0.01)
    encoder.biases_init = decoder.biases_init = Constant(0)
    encoder.push_initialization_config()
    decoder.push_initialization_config()
    encoder.bidir.prototype.weights_init = Orthogonal()
    decoder.transition.weights_init = Orthogonal()
    encoder.initialize()
    decoder.initialize()

    cost_ = f_cost(x, x_mask, y, y_mask)
    assert_allclose(cost_, 14.90944)
Esempio n. 3
0
def main(args):
    """
    Training and validation the model
    """

    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
    logger.debug("DEVICE: {}".format(device))

    # load vocabulary
    with open(args.vocab_path, "rb") as f:
        vocab = pickle.load(f)

    # encoder model setting
    encoder = EncoderResNet()
    encoder_optimizer = torch.optim.Adam(
        params=filter(lambda p: p.requires_grad, encoder.parameters()),
        lr=args.encoder_lr) if args.fine_tune_encoder else None

    # decoder model setting
    decoder = Decoder(vis_dim=args.vis_dim,
                      vis_num=args.vis_num,
                      embed_dim=args.embed_dim,
                      hidden_dim=args.hidden_dim,
                      vocab_size=args.vocab_size,
                      num_layers=args.num_layers,
                      dropout_ratio=args.dropout_ratio)
    decoder_optimizer = torch.optim.Adam(params=filter(
        lambda p: p.requires_grad, decoder.parameters()),
                                         lr=args.decoder_lr)

    # move to GPU
    encoder = nn.DataParallel(encoder).to(device)
    decoder = nn.DataParallel(decoder).to(device)

    # loss function
    criterion = nn.CrossEntropyLoss()

    # data loader
    transform = set_transform(args.resize,
                              args.crop_size,
                              horizontal_flip=True,
                              normalize=True)
    train_img_dirc = os.path.join(args.root_img_dirc, "train2014")
    train_loader = get_image_loader(train_img_dirc, args.train_data_path,
                                    vocab, transform, args.batch_size,
                                    args.shuffle, args.num_workers)
    val_img_dirc = os.path.join(args.root_img_dirc, "val2014")
    val_loader = get_image_loader(val_img_dirc, args.val_data_path, vocab,
                                  transform, 1, args.shuffle, args.num_workers)

    # initialization
    best_bleu_score = -100
    not_improved_cnt = 0

    for epoch in range(1, args.num_epochs):
        # training
        train(encoder, decoder, encoder_optimizer, decoder_optimizer,
              train_loader, criterion, epoch)

        # validation
        pred_df = validation(encoder, decoder, val_loader, criterion, epoch,
                             vocab, args.beam_size)

        # calculate BLEU-4 score
        pred_cap_lst = decode_caption(pred_df["pred"], vocab.idx2word)
        ans_cap_lst = decode_caption(pred_df["ans"], vocab.idx2word)
        assert len(pred_cap_lst) == len(ans_cap_lst)
        bleu_score_lst = []
        for i in range(len(pred_cap_lst)):
            bleu_score_lst.append(
                bleu(pred_cap_lst[i], ans_cap_lst[i], mode="4-gram"))
        bleu_score = np.mean(bleu_score_lst)

        # early stopping
        if bleu_score < best_bleu_score:
            not_improved_cnt += 1
        else:
            # learning is going well
            best_bleu_score = bleu_score
            not_improved_cnt = 0

            # save best params model
            torch.save(encoder.state_dict(), args.save_encoder_path)
            torch.save(decoder.state_dict(), args.save_decoder_path)

        # logging status
        logger.debug(
            "\n************************ VAL ************************\n"
            "EPOCH          : [{0}/{1}]\n"
            "BLEU-4         : {2}\n"
            "EARLY STOPPING : [{3}/{4}]\n"
            "*****************************************************\n".format(
                epoch, args.num_epochs, bleu_score, not_improved_cnt,
                args.stop_count))

        if not_improved_cnt == args.stop_count:
            logger.debug("Early Stopping")
            break

        # decay learning rate if there is no improvement for 10 consecutive epochs
        if not_improved_cnt % 10 == 0:
            if args.fine_tune_encoder:
                adjust_learning_rate(encoder_optimizer, 0.8)
            adjust_learning_rate(decoder_optimizer, 0.8)
Esempio n. 4
0
    train_iter, valid_iter, test_iter = BucketIterator.splits(
                                            (train_data, valid_data, test_data),
                                            batch_size=BATCH_SIZE,
                                            device=device)

    IN_DIM = len(SRC.vocab)
    OUT_DIM = len(TRG.vocab)
    ENC_EMB_DIM = 256
    DEC_EMB_DIM = 256
    HID_DIM = 512
    N_LAYER = 2
    ENC_DROPOUT = 0.5
    DEC_DROPOUT = 0.5

    enc = Encoder(IN_DIM, ENC_EMB_DIM, HID_DIM, N_LAYER, ENC_DROPOUT)
    dec = Decoder(OUT_DIM, DEC_EMB_DIM, HID_DIM, N_LAYER, DEC_DROPOUT)

    model = Seq2Seq(enc, dec, device).to(device)
    model.apply(init_weights)

    print(f"Model has {count_parameters(model):,} trainable parameters")

    # TRG.pad_token = <pad>
    # TRG_PAD_IDX = 1
    TRG_PAD_IDX = TRG.vocab.stoi[TRG.pad_token]
    criterion = nn.CrossEntropyLoss(ignore_index=TRG_PAD_IDX)

    mode = 'train'
    # mode = 'eval'

    model.load_state_dict(torch.load('tut1-model.pt'))
parser.add_argument('--lr', type=float, default=1e-4)
parser.add_argument('--lr_decay', type=float, default=5e-5)
parser.add_argument('--max_iter', type=int, default=600000)
parser.add_argument('--batch_size', type=int, default=5)
parser.add_argument('--style_weight', type=float, default=3.0)
parser.add_argument('--content_weight', type=float, default=1.0)
parser.add_argument('--temporal_weight', type=float, default=2.0)
parser.add_argument('--v_weight', type=float, default=20.0)
parser.add_argument('--n_threads', type=int, default=16)
parser.add_argument('--save_model_interval', type=int, default=10000)
parser.add_argument('--start_iter', type=float, default=500000)
args = parser.parse_args('')


device = torch.device('cuda')
decoder = Decoder('Decoder')
vgg = VGG('VGG19')

vgg.features.load_state_dict(torch.load(args.vgg))
vgg = nn.Sequential(*list(vgg.features.children())[:44])
network = Net(vgg, decoder, args.start_iter)
network.train()
network.to(device)


optimizer = torch.optim.Adam([
                              {'params': network.decoder.parameters()},
                              {'params': network.transform.parameters()}], lr=args.lr)


style_tf = train_transform()
    # convert labels to indices
    indexed_target_test = prepare_data.label_to_idx(target_test, char2idx)
    indexed_target_word_test = prepare_data.word_to_idx(
        target_test, embeddings)

    test_data = prepare_data.combine_data(features_test, indexed_target_test)

    # initialize the Encoder
    encoder = Encoder(features_test[0].size(1), encoder_hidden_size,
                      encoder_layers, len(char2idx_ctc), batch_size,
                      device).to(device)

    # initialize the Decoder
    decoder = Decoder(embedding_dim_chars, encoder_hidden_size,
                      attention_hidden_size, num_filters,
                      len(char2idx) + 1, decoder_layers, encoder_layers,
                      batch_size, attention_type, device).to(device)

    # load the model
    checkpoint = torch.load('weights/libri/state_dict_10.pt',
                            map_location=torch.device('cpu'))
    encoder.load_state_dict(checkpoint['encoder'])
    decoder.load_state_dict(checkpoint['decoder'])

    # evaluate
    batch_size = 1

    pairs_batch_train = DataLoader(dataset=test_data,
                                   batch_size=batch_size,
                                   shuffle=False,
                                   collate_fn=prepare_data.collate,
Esempio n. 7
0
batch_size = 16
enc_lr = 0.0001
dec_lr = 0.0005
emb_lr = 0.0001

# -----------------------------------

text_embedding = nn.Embedding(num_embeddings=len(_symbol_to_id),
                              embedding_dim=512).to(device)
pos_embedding = nn.Embedding.from_pretrained(positional_encoding(512, 512),
                                             freeze=True).to(device)
pos_embedding_ = nn.Embedding.from_pretrained(positional_encoding(256, 512),
                                              freeze=True).to(device)

encoder = Encoder(emb_channels=512).to(device)
decoder = Decoder(mel_channels=80, enc_channels=512,
                  emb_channels=512).to(device)

optimizer = torch.optim.Adam([{
    'params': text_embedding.parameters(),
    'lr': emb_lr
}, {
    'params': encoder.parameters(),
    'lr': enc_lr
}, {
    'params': decoder.parameters(),
    'lr': dec_lr
}],
                             lr=0.001)

# -----------------------------------
Esempio n. 8
0
if args.resume_snapshot:
    model = torch.load(args.resume_snapshot,
                       map_location=lambda storage, loc: storage)
    print('load model from %s' % args.resume_snapshot)
else:
    word_embedding = torch.FloatTensor(vocab.word_embedding)
    char_embedding = torch.FloatTensor(vocab.char_embedding)
    args.char_embed_size = word_embedding.size(1)
    args.word_embed_size = char_embedding.size(1)

    print("[word_vocab]:%d [char_vocab]:%d" % (args.word_size, args.char_size))

    print("[!] Instantiating models...")
    encoder = Encoder(args, word_embedding)
    decoder = Decoder(args, char_embedding)
    model = Seq2Seq(encoder, decoder)

# set model dir
model_folder, model_prefix = utils.get_folder_prefix(args, model)
log_file = model_prefix + '.log'

# setup logger
log = logging.getLogger(__name__)
log.setLevel(logging.DEBUG)
fh = logging.FileHandler(log_file)
fh.setLevel(logging.DEBUG)
ch = logging.StreamHandler(sys.stdout)
ch.setLevel(logging.INFO)
formatter = logging.Formatter(fmt='%(asctime)s %(message)s',
                              datefmt='%m/%d/%Y %I:%M:%S')
def train_dynamics(env, args, writer=None):
    """
    Trains the Dynamics module. Supervised.

    Arguments:
    env: the initialized environment (rllab/gym)
    args: input arguments
    writer: initialized summary writer for tensorboard
    """
    args.action_space = env.action_space

    # Initialize models
    enc = Encoder(env.observation_space.shape[0],
                  args.dim,
                  use_conv=args.use_conv)
    dec = Decoder(env.observation_space.shape[0],
                  args.dim,
                  use_conv=args.use_conv)
    d_module = D_Module(env.action_space.shape[0], args.dim, args.discrete)

    if args.from_checkpoint is not None:
        results_dict = torch.load(args.from_checkpoint)
        enc.load_state_dict(results_dict['enc'])
        dec.load_state_dict(results_dict['dec'])
        d_module.load_state_dict(results_dict['d_module'])

    all_params = chain(enc.parameters(), dec.parameters(),
                       d_module.parameters())

    if args.transfer:
        for p in enc.parameters():
            p.requires_grad = False

        for p in dec.parameters():
            p.requires_grad = False
        all_params = d_module.parameters()

    optimizer = torch.optim.Adam(all_params,
                                 lr=args.lr,
                                 weight_decay=args.weight_decay)

    if args.gpu:
        enc = enc.cuda()
        dec = dec.cuda()
        d_module = d_module.cuda()

    # Initialize datasets
    val_loader = None
    train_dataset = DynamicsDataset(args.train_set,
                                    args.train_size,
                                    batch=args.train_batch,
                                    rollout=args.rollout)
    val_dataset = DynamicsDataset(args.test_set,
                                  5000,
                                  batch=args.test_batch,
                                  rollout=args.rollout)
    val_loader = torch.utils.data.DataLoader(dataset=val_dataset,
                                             batch_size=args.batch_size,
                                             shuffle=False,
                                             num_workers=args.num_workers)

    train_loader = torch.utils.data.DataLoader(dataset=train_dataset,
                                               batch_size=args.batch_size,
                                               shuffle=True,
                                               num_workers=args.num_workers)

    results_dict = {
        'dec_losses': [],
        'forward_losses': [],
        'inverse_losses': [],
        'total_losses': [],
        'enc': None,
        'dec': None,
        'd_module': None,
        'd_init': None,
        'args': args
    }

    total_action_taken = 0
    correct_predicted_a_hat = 0

    # create the mask here for re-weighting
    dec_mask = None
    if args.dec_mask is not None:
        dec_mask = torch.ones(9)
        game_vocab = dict([
            (b, a)
            for a, b in enumerate(sorted(env.game.all_possible_features()))
        ])
        dec_mask[game_vocab['Agent']] = args.dec_mask
        dec_mask[game_vocab['Goal']] = args.dec_mask
        dec_mask = dec_mask.expand(args.batch_size, args.maze_length,
                                   args.maze_length, 9).contiguous().view(-1)
        dec_mask = Variable(dec_mask, requires_grad=False)
        if args.gpu:
            dec_mask = dec_mask.cuda()

    for epoch in range(1, args.num_epochs + 1):
        enc.train()
        dec.train()
        d_module.train()

        if args.framework == "mazebase":
            d_init.train()

        # for measuring the accuracy
        train_acc = 0
        current_epoch_actions = 0
        current_epoch_predicted_a_hat = 0

        start = time.time()
        for i, (states, target_actions) in enumerate(train_loader):

            optimizer.zero_grad()

            if args.framework != "mazebase":
                forward_loss, inv_loss, dec_loss, recon_loss, model_loss, _, _ = forward_planning(
                    i, states, target_actions, enc, dec, d_module, args)
            else:
                forward_loss, inv_loss, dec_loss, recon_loss, model_loss, current_epoch_predicted_a_hat, current_epoch_actions = multiple_forward(
                    i, states, target_actions, enc, dec, d_module, args,
                    d_init, dec_mask)

            loss = forward_loss + args.inv_loss_coef * inv_loss + \
                        args.dec_loss_coef * dec_loss

            if i % args.log_interval == 0:
                log(
                    'Epoch [{}/{}]\tIter [{}/{}]\t'.format(
                        epoch, args.num_epochs, i+1, len(
                        train_dataset)//args.batch_size) + \
                    'Time: {:.2f}\t'.format(time.time() - start) + \
                    'Decoder Loss: {:.2f}\t'.format(dec_loss.data[0]) + \
                    'Forward Loss: {:.2f}\t'.format(forward_loss.data[0] ) + \
                    'Inverse Loss: {:.2f}\t'.format(inv_loss.data[0]) + \
                    'Loss: {:.2f}\t'.format(loss.data[0]))

                results_dict['dec_losses'].append(dec_loss.data[0])
                results_dict['forward_losses'].append(forward_loss.data[0])
                results_dict['inverse_losses'].append(inv_loss.data[0])
                results_dict['total_losses'].append(loss.data[0])

                # write the summaries here
                if writer:
                    writer.add_scalar('dynamics/total_loss', loss.data[0],
                                      epoch)
                    writer.add_scalar('dynamics/decoder', dec_loss.data[0],
                                      epoch)
                    writer.add_scalar('dynamics/reconstruction_loss',
                                      recon_loss.data[0], epoch)
                    writer.add_scalar('dynamics/next_state_prediction_loss',
                                      model_loss.data[0], epoch)
                    writer.add_scalar('dynamics/inv_loss', inv_loss.data[0],
                                      epoch)
                    writer.add_scalar('dynamics/forward_loss',
                                      forward_loss.data[0], epoch)

                    writer.add_scalars(
                        'dynamics/all_losses', {
                            "total_loss": loss.data[0],
                            "reconstruction_loss": recon_loss.data[0],
                            "next_state_prediction_loss": model_loss.data[0],
                            "decoder_loss": dec_loss.data[0],
                            "inv_loss": inv_loss.data[0],
                            "forward_loss": forward_loss.data[0],
                        }, epoch)

            loss.backward()

            correct_predicted_a_hat += current_epoch_predicted_a_hat
            total_action_taken += current_epoch_actions

            # does it not work at all without grad clipping ?
            torch.nn.utils.clip_grad_norm(all_params, args.max_grad_norm)
            optimizer.step()

            # maybe add the generated image to add the logs
            # writer.add_image()

        # Run validation
        if val_loader is not None:
            enc.eval()
            dec.eval()
            d_module.eval()
            forward_loss, inv_loss, dec_loss = 0, 0, 0
            for i, (states, target_actions) in enumerate(val_loader):
                f_loss, i_loss, d_loss, _, _, _, _ = forward_planning(
                    i, states, target_actions, enc, dec, d_module, args)
                forward_loss += f_loss
                inv_loss += i_loss
                dec_loss += d_loss
            loss = forward_loss + args.inv_loss_coef * inv_loss + \
                    args.dec_loss_coef * dec_loss
            if writer:
                writer.add_scalar('val/forward_loss', forward_loss.data[0] / i,
                                  epoch)
                writer.add_scalar('val/inverse_loss', inv_loss.data[0] / i,
                                  epoch)
                writer.add_scalar('val/decoder_loss', dec_loss.data[0] / i,
                                  epoch)
            log(
                '[Validation]\t' + \
                'Decoder Loss: {:.2f}\t'.format(dec_loss.data[0] / i) + \
                'Forward Loss: {:.2f}\t'.format(forward_loss.data[0] / i) + \
                'Inverse Loss: {:.2f}\t'.format(inv_loss.data[0] / i) + \
                'Loss: {:.2f}\t'.format(loss.data[0] / i))
        if epoch % args.checkpoint == 0:
            results_dict['enc'] = enc.state_dict()
            results_dict['dec'] = dec.state_dict()
            results_dict['d_module'] = d_module.state_dict()
            if args.framework == "mazebase":
                results_dict['d_init'] = d_init.state_dict()
            torch.save(
                results_dict,
                os.path.join(args.out, 'dynamics_module_epoch%s.pt' % epoch))
            log('Saved model %s' % epoch)

    results_dict['enc'] = enc.state_dict()
    results_dict['dec'] = dec.state_dict()
    results_dict['d_module'] = d_module.state_dict()
    torch.save(results_dict,
               os.path.join(args.out, 'dynamics_module_epoch%s.pt' % epoch))
    print(os.path.join(args.out, 'dynamics_module_epoch%s.pt' % epoch))
Esempio n. 10
0
def main():
    make_deterministic()

    # region Prepare data
    with Timer('\nData preparation time: %s\n'):
        ru_lang = Language()
        en_lang = Language()

        yandex = Yandex(
            'datasets/yandex/corpus.en_ru.1m.ru',
            'datasets/yandex/corpus.en_ru.1m.en',
            ru_lang,
            en_lang,
            data_slice=H.dataset_slice,
        )

        paracrawl = ParaCrawl(
            'datasets/paracrawl/en-ru.txt',
            ru_lang,
            en_lang,
            data_slice=slice(0),
        )

        low = ru_lang.lower_than(H.ru_word_count_minimum)
        infrequent_words_n = max(
            ceil(ru_lang.words_n * H.infrequent_words_percent), len(low))
        if infrequent_words_n > 0:
            ru_lang.drop_words(ru_lang.lowk(infrequent_words_n))
            print(
                f'{infrequent_words_n:,} infrequent Russian words are dropped')

        low = en_lang.lower_than(H.en_word_count_minimum)
        if len(low) > 0:
            en_lang.drop_words(*low)
            print(f'{len(low):,} infrequent English words are dropped')

        print(
            f'Russian language: {ru_lang.words_n:,} words, {ru_lang.sentence_length:,} words in a sentence'
        )
        print(
            f'English language: {en_lang.words_n:,} words, {en_lang.sentence_length:,} words in a sentence'
        )

        batch = H.batch_size
        dataset = ConcatDataset((yandex, paracrawl))
        loader = DataLoader(dataset, batch, shuffle=True)
    # endregion

    # region Models and optimizers
    model = Seq2Seq(
        Encoder(ru_lang.words_n, H.encoder_embed_dim, H.encoder_hidden_dim,
                H.encoder_bi, H.decoder_hd),
        Attention(H.encoder_hd, H.decoder_hd),
        Decoder(en_lang.words_n, H.decoder_embed_dim, H.decoder_hidden_dim,
                H.encoder_hd),
    ).to(Device).train()

    optimizer = Adam(model.parameters(), lr=H.learning_rate)
    criterion = CrossEntropyLoss(ignore_index=Token_PAD, reduction='sum')
    # endregion

    # region Training
    teaching_percent = H.teaching_percent
    total = len(dataset)
    log_interval = max(5, round(total / batch / 1000))

    for epoch in range(1, H.epochs + 1):
        with Printer() as printer:
            printer.print(f'Train epoch {epoch}: starting...')
            for i, ((ru, ru_l), en_sos, en_eos) in enumerate(loader, 1):
                # Zero the parameter gradients
                optimizer.zero_grad()
                # Run data through model
                predictions = model(ru, ru_l, en_sos, teaching_percent)
                # Calculate loss
                loss = criterion(predictions, en_eos)
                # Back propagate and perform optimization
                loss.backward()
                clip_grad_norm_(model.parameters(), H.gradient_norm_clip)
                optimizer.step()

                # Print log
                if i % log_interval == 0:
                    printer.print(
                        f'Train epoch {epoch}: {i * batch / total:.1%} [{i * batch:,}/{total:,}]'
                    )

            printer.print(f'Train epoch {epoch}: completed')
    # endregion

    torch.save(
        (
            ru_lang.__getnewargs__(),
            en_lang.__getnewargs__(),
            model.cpu().eval().data,
        ),
        'data/data.pt',
    )

    evaluate(model.to(Device), ru_lang, en_lang,
             'datasets/yandex/corpus.en_ru.1m.ru',
             slice(H.dataset_slice.stop + 1, H.dataset_slice.stop + 1 + 100))
Esempio n. 11
0
    batched_data = []
    for i in random.choice(len(data), batch_size):
        batched_data.append(data[i])
    return batched_data


data = load_data(data_dir, "weibo_pair_train_Q_after")
vocab_list, word2index, index2word, embedding, question_words = build_vocab(data_dir, data)
keywords_list, keywords_index, PMI = load_PMI()
key_to_vocab = [0] * len(keywords_list)
for i in range(len(keywords_list)):
    if keywords_list[i] in word2index:
        key_to_vocab[i] = word2index[keywords_list[i]]

encoder = Encoder(len(word2index), embedding_dim, hidden_dim)
decoder = Decoder(len(word2index), embedding_dim, hidden_dim)
model = Seq2Seq(encoder, decoder, device).to(device)

optimizer = torch.optim.SGD(model.parameters())
criterion = nn.CrossEntropyLoss(ignore_index=0).to(device)

def train():
    model.train()
    for batch_id in range(0, 100):
        batched_data = random_batch(batch_size, data)
        posts_index, questions_index, keyword_tensor, word_type = batch_data(batched_data, question_words, keywords_index, key_to_vocab, word2index)
        optimizer.zero_grad()
        output = model(posts_index, questions_index)
        loss = criterion(output.view(-1, output.shape[2]), questions_index.view(-1))
        loss.backward()
        torch.nn.utils.clip_grad_norm_(model.parameters(), clip)
Esempio n. 12
0
def main():
    max_len = 50
    n_vocab = params.n_vocab
    n_layer = params.n_layer
    n_hidden = params.n_hidden
    n_embed = params.n_embed
    temperature = params.temperature
    assert torch.cuda.is_available()

    if os.path.exists("data/vocab.json"):
        vocab = Vocabulary()
        with open('data/vocab.json', 'r') as fp:
            vocab.stoi = json.load(fp)

        for key, value in vocab.stoi.items():
            vocab.itos.append(key)
    else:
        print("vocabulary doesn't exist!")
        return

    print("loading model...")
    encoder = Encoder(n_vocab, n_embed, n_hidden, n_layer).cuda()
    Kencoder = KnowledgeEncoder(n_vocab, n_embed, n_hidden, n_layer).cuda()
    manager = Manager(n_hidden, n_vocab, temperature).cuda()
    decoder = Decoder(n_vocab, n_embed, n_hidden, n_layer).cuda()

    encoder = init_model(encoder, restore=params.encoder_restore)
    Kencoder = init_model(Kencoder, restore=params.Kencoder_restore)
    manager = init_model(manager, restore=params.manager_restore)
    decoder = init_model(decoder, restore=params.decoder_restore)
    print("successfully loaded!\n")

    utterance = ""
    while True:
        if utterance == "exit":
            break
        k1 = input("Type first Knowledge: ").lower()
        while not k1:
            print("Please type first Knowledge.\n")
            k1 = input("Type first Knowledge: ").lower()

        k2 = input("Type second Knowledge: ").lower()
        while not k2:
            print("Please type second Knowledge.\n")
            k2 = input("Type second Knowledge: ").lower()

        k3 = input("Type third Knowledge: ").lower()
        while not k3:
            print("Please type third Knowledge.\n")
            k3 = input("Type third Knowledge: ").lower()

        K = [k1, k2, k3]
        K = knowledgeToIndex(K, vocab)
        K = Kencoder(K)
        print()

        while True:
            utterance = input("you: ").lower()
            while not utterance:
                print("Please type utterance.\n")
                utterance = input("you: ")
            if utterance == "change knowledge" or utterance == "exit":
                print()
                break

            X = []
            tokens = nltk.word_tokenize(utterance)
            for word in tokens:
                if word in vocab.stoi:
                    X.append(vocab.stoi[word])
                else:
                    X.append(vocab.stoi["<UNK>"])
            X = torch.LongTensor(X).unsqueeze(0).cuda()  # X: [1, x_seq_len]

            encoder_outputs, hidden, x = encoder(X)
            k_i = manager(x, None, K)
            outputs = torch.zeros(
                max_len, 1, n_vocab).cuda()  # outputs: [max_len, 1, n_vocab]
            hidden = hidden[decoder.n_layer:]
            output = torch.LongTensor([params.SOS]).cuda()

            for t in range(max_len):
                output, hidden, attn_weights = decoder(output, k_i, hidden,
                                                       encoder_outputs)
                outputs[t] = output
                output = output.data.max(1)[1]

            outputs = outputs.max(2)[1]

            answer = ""
            for idx in outputs:
                if idx == params.EOS:
                    break
                answer += vocab.itos[idx] + " "

            print("bot:", answer[:-1], "\n")
Esempio n. 13
0
from torch.utils.tensorboard import SummaryWriter
from tqdm import tqdm

from data import PersianLexicon, collate_fn
from model import Encoder, Decoder
from config import DataConfig, ModelConfig, TrainConfig

# data prep
ds = PersianLexicon(DataConfig.graphemes_path, DataConfig.phonemes_path,
                    DataConfig.lexicon_path)
dl = DataLoader(ds, collate_fn=collate_fn, batch_size=TrainConfig.batch_size)

# models
encoder_model = Encoder(ModelConfig.graphemes_size,
                        ModelConfig.hidden_size).to(TrainConfig.device)
decoder_model = Decoder(ModelConfig.phonemes_size,
                        ModelConfig.hidden_size).to(TrainConfig.device)

# log
log = SummaryWriter(TrainConfig.log_path)

# loss
criterion = nn.CrossEntropyLoss()

# optimizer
optimizer = torch.optim.Adam(list(encoder_model.parameters()) +
                             list(decoder_model.parameters()),
                             lr=TrainConfig.lr)

# training loop
counter = 0
for e in range(TrainConfig.epochs):
Esempio n. 14
0
File: DDF.py Progetto: EDG-Y/EDGY
def convert(cfg):
    dataset_path = Path(utils.to_absolute_path("datasets")) / cfg.dataset.path
    with open(dataset_path / "speakers.json") as file:
        speakers = sorted(json.load(file))

    synthesis_list_path = Path(utils.to_absolute_path(cfg.synthesis_list))
    with open(synthesis_list_path) as file:
        synthesis_list = json.load(file)

    in_dir = Path(utils.to_absolute_path(cfg.in_dir))
    out_dir = Path(utils.to_absolute_path(cfg.out_dir))
    out_dir.mkdir(exist_ok=True, parents=True)

    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

    encoder = Encoder(**cfg.model.encoder)
    decoder = Decoder(**cfg.model.decoder)
    encoder.to(device)
    decoder.to(device)

    print("Load checkpoint from: {}:".format(cfg.checkpoint))
    checkpoint_path = utils.to_absolute_path(cfg.checkpoint)
    checkpoint = torch.load(checkpoint_path,
                            map_location=lambda storage, loc: storage)
    encoder.load_state_dict(checkpoint["encoder"])
    decoder.load_state_dict(checkpoint["decoder"])

    encoder.eval()
    decoder.eval()

    meter = pyloudnorm.Meter(cfg.preprocessing.sr)

    for wav_path, speaker_id, out_filename in tqdm(synthesis_list):
        wav_path = in_dir / wav_path
        wav, _ = librosa.load(wav_path.with_suffix(".wav"),
                              sr=cfg.preprocessing.sr)
        ref_loudness = meter.integrated_loudness(wav)
        wav = wav / np.abs(wav).max() * 0.999

        mel = librosa.feature.melspectrogram(
            preemphasis(wav, cfg.preprocessing.preemph),
            sr=cfg.preprocessing.sr,
            n_fft=cfg.preprocessing.n_fft,
            n_mels=cfg.preprocessing.n_mels,
            hop_length=cfg.preprocessing.hop_length,
            win_length=cfg.preprocessing.win_length,
            fmin=cfg.preprocessing.fmin,
            power=1)
        logmel = librosa.amplitude_to_db(mel, top_db=cfg.preprocessing.top_db)
        logmel = logmel / cfg.preprocessing.top_db + 1

        mel = torch.FloatTensor(logmel).unsqueeze(0).to(device)
        speaker = torch.LongTensor([speakers.index(speaker_id)]).to(device)
        with torch.no_grad():
            z, _ = encoder.encode(mel)
            output = decoder.generate(z, speaker)

        output_loudness = meter.integrated_loudness(output)
        output = pyloudnorm.normalize.loudness(output, output_loudness,
                                               ref_loudness)
        path = out_dir / out_filename
        librosa.output.write_wav(path.with_suffix(".wav"),
                                 output.astype(np.float32),
                                 sr=cfg.preprocessing.sr)
Esempio n. 15
0
def main(mode, config, use_bokeh=False):

    # Construct model
    logger.info('Building RNN encoder-decoder')
    encoder = BidirectionalEncoder(
        config['src_vocab_size'], config['enc_embed'], config['enc_nhids'])
    decoder = Decoder(
        config['trg_vocab_size'], config['dec_embed'], config['dec_nhids'],
        config['enc_nhids'] * 2,config['topical_embedding_dim'])
    topical_transformer=topicalq_transformer(config['topical_vocab_size'],config['topical_embedding_dim'], config['enc_nhids'],config['topical_word_num'],config['batch_size']);

    if mode == "train":

        # Create Theano variables
        logger.info('Creating theano variables')
        source_sentence = tensor.lmatrix('source')
        source_sentence_mask = tensor.matrix('source_mask')
        target_sentence = tensor.lmatrix('target')
        target_sentence_mask = tensor.matrix('target_mask')
        sampling_input = tensor.lmatrix('input')
        source_topical_word=tensor.lmatrix('source_topical')
        source_topical_mask=tensor.matrix('source_topical_mask')

        # Get training and development set streams
        tr_stream = get_tr_stream_with_topicalq(**config)
        dev_stream = get_dev_stream_with_topicalq(**config)
        topic_embedding=topical_transformer.apply(source_topical_word);
        # Get cost of the model
        representation=encoder.apply(source_sentence, source_sentence_mask);
        tw_representation=topical_transformer.look_up.apply(source_topical_word.T);
        content_embedding=representation[0,:,(representation.shape[2]/2):];

        cost = decoder.cost(
            representation,source_sentence_mask,tw_representation,
            source_topical_mask, target_sentence, target_sentence_mask,topic_embedding,content_embedding);

        logger.info('Creating computational graph')
        cg = ComputationGraph(cost)

        # Initialize model
        logger.info('Initializing model')
        encoder.weights_init = decoder.weights_init = IsotropicGaussian(
            config['weight_scale'])
        encoder.biases_init = decoder.biases_init = Constant(0)
        encoder.push_initialization_config()
        decoder.push_initialization_config()
        encoder.bidir.prototype.weights_init = Orthogonal()
        decoder.transition.weights_init = Orthogonal()
        encoder.initialize()
        decoder.initialize()
        topical_transformer.weights_init=IsotropicGaussian(
            config['weight_scale']);
        topical_transformer.biases_init=Constant(0);
        topical_transformer.push_allocation_config();#don't know whether the initialize is for
        topical_transformer.look_up.weights_init=Orthogonal();
        topical_transformer.transformer.weights_init=Orthogonal();
        topical_transformer.initialize();
        word_topical_embedding=cPickle.load(open(config['topical_embeddings'], 'rb'));
        np_word_topical_embedding=numpy.array(word_topical_embedding,dtype='float32');
        topical_transformer.look_up.W.set_value(np_word_topical_embedding);
        topical_transformer.look_up.W.tag.role=[];


        # apply dropout for regularization
        if config['dropout'] < 1.0:
            # dropout is applied to the output of maxout in ghog
            logger.info('Applying dropout')
            dropout_inputs = [x for x in cg.intermediary_variables
                              if x.name == 'maxout_apply_output']
            cg = apply_dropout(cg, dropout_inputs, config['dropout'])

        # Apply weight noise for regularization
        if config['weight_noise_ff'] > 0.0:
            logger.info('Applying weight noise to ff layers')
            enc_params = Selector(encoder.lookup).get_params().values()
            enc_params += Selector(encoder.fwd_fork).get_params().values()
            enc_params += Selector(encoder.back_fork).get_params().values()
            dec_params = Selector(
                decoder.sequence_generator.readout).get_params().values()
            dec_params += Selector(
                decoder.sequence_generator.fork).get_params().values()
            dec_params += Selector(decoder.state_init).get_params().values()
            cg = apply_noise(
                cg, enc_params+dec_params, config['weight_noise_ff'])

        # Print shapes
        shapes = [param.get_value().shape for param in cg.parameters]
        logger.info("Parameter shapes: ")
        for shape, count in Counter(shapes).most_common():
            logger.info('    {:15}: {}'.format(shape, count))
        logger.info("Total number of parameters: {}".format(len(shapes)))

        # Print parameter names
        enc_dec_param_dict = merge(Selector(encoder).get_parameters(),
                                   Selector(decoder).get_parameters())
        logger.info("Parameter names: ")
        for name, value in enc_dec_param_dict.items():
            logger.info('    {:15}: {}'.format(value.get_value().shape, name))
        logger.info("Total number of parameters: {}"
                    .format(len(enc_dec_param_dict)))

        # Set up training model
        logger.info("Building model")
        training_model = Model(cost)

        # Set extensions
        logger.info("Initializing extensions")
        extensions = [
            FinishAfter(after_n_batches=config['finish_after']),
            TrainingDataMonitoring([cost], after_batch=True),
            Printing(after_batch=True),
            CheckpointNMT(config['saveto'],
                          every_n_batches=config['save_freq'])
        ]
        '''
        # Set up beam search and sampling computation graphs if necessary
        if config['hook_samples'] >= 1 or config['bleu_script'] is not None:
            logger.info("Building sampling model")
            sampling_representation = encoder.apply(
                sampling_input, tensor.ones(sampling_input.shape))
            generated = decoder.generate(
                sampling_input, sampling_representation)
            search_model = Model(generated)
            _, samples = VariableFilter(
                bricks=[decoder.sequence_generator], name="outputs")(
                    ComputationGraph(generated[1]))

        # Add sampling
        if config['hook_samples'] >= 1:
            logger.info("Building sampler")
            extensions.append(
                Sampler(model=search_model, data_stream=tr_stream,
                        hook_samples=config['hook_samples'],
                        every_n_batches=config['sampling_freq'],
                        src_vocab_size=config['src_vocab_size']))

        # Add early stopping based on bleu
        if config['bleu_script'] is not None:
            logger.info("Building bleu validator")
            extensions.append(
                BleuValidator(sampling_input, samples=samples, config=config,
                              model=search_model, data_stream=dev_stream,
                              normalize=config['normalized_bleu'],
                              every_n_batches=config['bleu_val_freq']))
        '''

        # Reload model if necessary
        if config['reload']:
            extensions.append(LoadNMT(config['saveto']))

        # Plot cost in bokeh if necessary
        if use_bokeh and BOKEH_AVAILABLE:
            extensions.append(
                Plot('Cs-En', channels=[['decoder_cost_cost']],
                     after_batch=True))

        # Set up training algorithm
        logger.info("Initializing training algorithm")
        algorithm = GradientDescent(
            cost=cost, parameters=cg.parameters,on_unused_sources='warn',
            step_rule=CompositeRule([StepClipping(config['step_clipping']),
                                     eval(config['step_rule'])()])
        )

        # Initialize main loop
        logger.info("Initializing main loop")
        main_loop = MainLoop(
            model=training_model,
            algorithm=algorithm,
            data_stream=tr_stream,
            extensions=extensions
        )

        # Train!
        main_loop.run()

    elif mode == 'translate':

        # Create Theano variables
        logger.info('Creating theano variables')
        source_sentence = tensor.lmatrix('source')
        source_topical_word=tensor.lmatrix('source_topical')

        # Get test set stream
        test_stream = get_dev_stream_with_topicalq(
            config['test_set'], config['src_vocab'],
            config['src_vocab_size'],config['topical_test_set'],config['topical_vocab'],config['topical_vocab_size'],config['unk_id'])
        ftrans = open(config['test_set'] + '.trans.out', 'w')

        # Helper utilities
        sutils = SamplingBase()
        unk_idx = config['unk_id']
        src_eos_idx = config['src_vocab_size'] - 1
        trg_eos_idx = config['trg_vocab_size'] - 1

        # Get beam search
        logger.info("Building sampling model")
        topic_embedding=topical_transformer.apply(source_topical_word);
        representation=encoder.apply(source_sentence, tensor.ones(source_sentence.shape));
        tw_representation=topical_transformer.look_up.apply(source_topical_word.T);
        content_embedding=representation[0,:,(representation.shape[2]/2):];
        generated = decoder.generate(source_sentence,representation, tw_representation,topical_embedding=topic_embedding,content_embedding=content_embedding);


        _, samples = VariableFilter(
            bricks=[decoder.sequence_generator], name="outputs")(
                ComputationGraph(generated[1]))  # generated[1] is next_outputs
        beam_search = BeamSearch(samples=samples)

        logger.info("Loading the model..")
        model = Model(generated)
        loader = LoadNMT(config['saveto'])
        loader.set_model_parameters(model, loader.load_parameters())

        # Get target vocabulary
        trg_vocab = _ensure_special_tokens(
            pickle.load(open(config['trg_vocab'], 'rb')), bos_idx=0,
            eos_idx=trg_eos_idx, unk_idx=unk_idx)
        trg_ivocab = {v: k for k, v in trg_vocab.items()}

        logger.info("Started translation: ")
        total_cost = 0.0

        for i, line in enumerate(test_stream.get_epoch_iterator()):

            seq = sutils._oov_to_unk(
                line[0], config['src_vocab_size'], unk_idx)
            seq2 = line[1];
            input_ = numpy.tile(seq, (config['beam_size'], 1))
            input_topical=numpy.tile(seq2,(config['beam_size'],1))


            # draw sample, checking to ensure we don't get an empty string back
            trans, costs = \
                beam_search.search(
                    input_values={source_sentence: input_,source_topical_word:input_topical},
                    max_length=10*len(seq), eol_symbol=src_eos_idx,
                    ignore_first_eol=True)
            '''
            # normalize costs according to the sequence lengths
            if config['normalized_bleu']:
                lengths = numpy.array([len(s) for s in trans])
                costs = costs / lengths
            '''
            #best = numpy.argsort(costs)[0]
            best=numpy.argsort(costs)[0:config['beam_size']];
            for b in best:
                try:
                    total_cost += costs[b]
                    trans_out = trans[b]

                    # convert idx to words
                    trans_out = sutils._idx_to_word(trans_out, trg_ivocab)

                except ValueError:
                    logger.info(
                        "Can NOT find a translation for line: {}".format(i+1))
                    trans_out = '<UNK>'

                print(trans_out, file=ftrans)

            if i != 0 and i % 100 == 0:
                logger.info(
                    "Translated {} lines of test set...".format(i))

        logger.info("Total cost of the test: {}".format(total_cost))
        ftrans.close()
    elif mode == 'rerank':
        # Create Theano variables
        ftrans = open(config['val_set'] + '.scores.out', 'w')
        logger.info('Creating theano variables')
        source_sentence = tensor.lmatrix('source')
        source_sentence_mask = tensor.matrix('source_mask')
        target_sentence = tensor.lmatrix('target')
        target_sentence_mask = tensor.matrix('target_mask')

        config['src_data']=config['val_set']
        config['trg_data']=config['val_set_grndtruth']
        config['batch_size']=1;
        config['sort_k_batches']=1;
        test_stream=get_tr_stream_unsorted(**config);
        logger.info("Building sampling model")
        representations= encoder.apply(
            source_sentence,  source_sentence_mask)
        costs = decoder.cost(representations, source_sentence_mask,
            target_sentence, target_sentence_mask)
        logger.info("Loading the model..")
        model = Model(costs)
        loader = LoadNMT(config['saveto'])
        loader.set_model_parameters(model, loader.load_parameters())

        costs_computer = function([source_sentence,source_sentence_mask,
                                  target_sentence,
                                  target_sentence_mask],costs)
        iterator = test_stream.get_epoch_iterator()

        scores = []
        for i, (src, src_mask, trg, trg_mask) in enumerate(iterator):
            costs = costs_computer(*[src, src_mask, trg, trg_mask])
            cost = costs.sum()
            print(i, cost)
            scores.append(cost)
            ftrans.write(str(cost)+"\n");
        ftrans.close();
Esempio n. 16
0
import mxnet as mx
import pickle
from model import Encoder, Decoder, beam_search_translate

with open(f"./data/in_vocab.pkl", "rb") as fp:
    in_vocab = pickle.load(fp)
with open(f"./data/out_vocab.pkl", "rb") as fp:
    out_vocab = pickle.load(fp)

embed_size, num_hiddens, num_layers, ctx = 200, 200, 3, mx.cpu()
attention_size, drop_prob = 20, 0.1

encoder = Encoder(len(in_vocab), embed_size, num_hiddens, num_layers,
                  drop_prob)
decoder = Decoder(len(out_vocab), embed_size, num_hiddens, num_layers,
                  attention_size, drop_prob)

encoder.load_parameters('./data/params_encoder_180')
decoder.load_parameters('./data/params_decoder_180')

# testing
"should return 我无法做到"
input_seq = "I can't do it ."
beam_search_translate(encoder, decoder, input_seq, 20, ctx, 3, in_vocab,
                      out_vocab)

"should return 他很穷"
input_seq = "He is poor ."
beam_search_translate(encoder, decoder, input_seq, 20, ctx, 3, in_vocab,
                      out_vocab)
Esempio n. 17
0
def train(resume=False):

    it = 0

    writer = SummaryWriter('../runs/' + hparams.exp_name)

    for k in hparams.__dict__.keys():
        writer.add_text(str(k), str(hparams.__dict__[k]))

    train_dataset = ChestData(
        data_csv=hparams.train_csv,
        data_dir=hparams.train_dir,
        transform=transforms.Compose([
            transforms.ToTensor(),
            # transforms.Normalize((0.485), (0.229))
        ]))

    validation_dataset = ChestData(
        data_csv=hparams.valid_csv,
        data_dir=hparams.valid_dir,
        transform=transforms.Compose([
            transforms.ToTensor(),
            # transforms.Normalize((0.485), (0.229))
        ]))

    train_loader = DataLoader(train_dataset,
                              batch_size=hparams.batch_size,
                              shuffle=True,
                              num_workers=0)

    validation_loader = DataLoader(validation_dataset,
                                   batch_size=hparams.batch_size,
                                   shuffle=True,
                                   num_workers=0)

    print('loaded train data of length : {}'.format(len(train_dataset)))

    Tensor = torch.cuda.FloatTensor if hparams.cuda else torch.FloatTensor

    def validation(encoder_, decoder_=None, send_stats=False, epoch=0):
        encoder_ = encoder_.eval()
        if decoder_:
            decoder_ = decoder_.eval()
        # print('Validating model on {0} examples. '.format(len(validation_loader)))
        with torch.no_grad():
            scores_list = []
            labels_list = []
            val_loss = 0
            for (img, labels, imgs_names) in validation_loader:
                img = Variable(img.float(), requires_grad=False)
                labels = Variable(labels.float(), requires_grad=False)
                scores = None
                if hparams.cuda:
                    img = img.cuda(hparams.gpu_device)
                    labels = labels.cuda(hparams.gpu_device)

                z = encoder_(img)

                if decoder_:
                    outputs = decoder_(z)
                    scores = torch.sum(
                        (outputs - img)**2, dim=tuple(range(
                            1, outputs.dim())))  # (outputs - img) ** 2
                    # rec_loss = rec_loss.view(outputs.shape[0], -1)
                    # rec_loss = torch.sum(torch.sum(rec_loss, dim=1))
                    val_loss += torch.sum(scores)
                    save_image(img,
                               'tmp/img_{}.png'.format(epoch),
                               normalize=True)
                    save_image(outputs,
                               'tmp/reconstructed_{}.png'.format(epoch),
                               normalize=True)

                else:
                    dist = torch.sum((z - encoder.center)**2, dim=1)
                    if hparams.objective == 'soft-boundary':
                        scores = dist - encoder.radius**2
                        val_loss += (1 / hparams.nu) * torch.sum(
                            torch.max(torch.zeros_like(scores), scores))
                    else:
                        scores = dist
                        val_loss += torch.sum(dist)

                scores_list.append(scores)
                labels_list.append(labels)

            scores = torch.cat(scores_list, dim=0)
            labels = torch.cat(labels_list, dim=0)

            val_loss /= len(validation_dataset)
            val_loss += encoder_.radius**2 if decoder_ and hparams.objective == 'soft-boundary' else 0

            if hparams.cuda:
                labels = labels.cpu()
                scores = scores.cpu()

            labels = labels.view(-1).numpy()
            scores = scores.view(-1).detach().numpy()

            auc = roc_auc_score(labels, scores)

        return auc, val_loss

    ### validation function ends.

    if hparams.cuda:
        encoder = Encoder().cuda(hparams.gpu_device)
        decoder = Decoder().cuda(hparams.gpu_device)
    else:
        encoder = Encoder()
        decoder = Decoder()

    params_count = 0
    for param in encoder.parameters():
        params_count += np.prod(param.size())
    for param in decoder.parameters():
        params_count += np.prod(param.size())
    print('Model has {0} trainable parameters'.format(params_count))

    if not hparams.load_model:
        encoder.apply(weights_init_normal)
        decoder.apply(weights_init_normal)

    optim_params = list(encoder.parameters())
    optimizer_train = optim.Adam(optim_params,
                                 lr=hparams.train_lr,
                                 weight_decay=hparams.weight_decay,
                                 amsgrad=hparams.optimizer == 'amsgrad')

    if hparams.pretrain:
        optim_params += list(decoder.parameters())
        optimizer_pre = optim.Adam(optim_params,
                                   lr=hparams.pretrain_lr,
                                   weight_decay=hparams.ae_weight_decay,
                                   amsgrad=hparams.optimizer == 'amsgrad')
        # scheduler_pre = ReduceLROnPlateau(optimizer_pre, mode='min', factor=0.5, patience=10, verbose=True, cooldown=20)
        scheduler_pre = MultiStepLR(optimizer_pre,
                                    milestones=hparams.lr_milestones,
                                    gamma=0.1)

    # scheduler_train = ReduceLROnPlateau(optimizer_train, mode='min', factor=0.5, patience=10, verbose=True, cooldown=20)
    scheduler_train = MultiStepLR(optimizer_train,
                                  milestones=hparams.lr_milestones,
                                  gamma=0.1)

    print('Starting training.. (log saved in:{})'.format(hparams.exp_name))
    start_time = time.time()

    mode = 'pretrain' if hparams.pretrain else 'train'
    best_valid_loss = 100000000000000000
    best_valid_auc = 0
    encoder = init_center(encoder, train_loader)

    # print(model)
    for epoch in range(hparams.num_epochs):
        if mode == 'pretrain' and epoch == hparams.pretrain_epoch:
            print('Pretraining done.')
            mode = 'train'
            best_valid_loss = 100000000000000000
            best_valid_auc = 0
            encoder = init_center(encoder, train_loader)
        for batch, (imgs, labels, _) in enumerate(train_loader):

            # imgs = Variable(imgs.float(), requires_grad=False)

            if hparams.cuda:
                imgs = imgs.cuda(hparams.gpu_device)

            if mode == 'pretrain':
                optimizer_pre.zero_grad()
                z = encoder(imgs)
                outputs = decoder(z)
                # print(torch.max(outputs), torch.mean(imgs), torch.min(outputs), torch.mean(imgs))
                scores = torch.sum((outputs - imgs)**2,
                                   dim=tuple(range(1, outputs.dim())))
                # print(scores)
                loss = torch.mean(scores)
                loss.backward()
                optimizer_pre.step()
                writer.add_scalar('pretrain_loss',
                                  loss.item(),
                                  global_step=batch +
                                  len(train_loader) * epoch)

            else:
                optimizer_train.zero_grad()

                z = encoder(imgs)
                dist = torch.sum((z - encoder.center)**2, dim=1)
                if hparams.objective == 'soft-boundary':
                    scores = dist - encoder.radius**2
                    loss = encoder.radius**2 + (1 / hparams.nu) * torch.mean(
                        torch.max(torch.zeros_like(scores), scores))
                else:
                    loss = torch.mean(dist)

                loss.backward()
                optimizer_train.step()

                if hparams.objective == 'soft-boundary' and epoch >= hparams.warmup_epochs:
                    R = np.quantile(np.sqrt(dist.clone().data.cpu().numpy()),
                                    1 - hparams.nu)
                    encoder.radius = torch.tensor(R)
                    if hparams.cuda:
                        encoder.radius = encoder.radius.cuda(
                            hparams.gpu_device)
                    writer.add_scalar('radius',
                                      encoder.radius.item(),
                                      global_step=batch +
                                      len(train_loader) * epoch)
                writer.add_scalar('train_loss',
                                  loss.item(),
                                  global_step=batch +
                                  len(train_loader) * epoch)

            # pred_labels = (scores >= hparams.thresh)

            # save_image(imgs, 'train_imgs.png')
            # save_image(noisy_imgs, 'train_noisy.png')
            # save_image(gen_imgs, 'train_z.png')

            if batch % hparams.print_interval == 0:
                print('[Epoch - {0:.1f}, batch - {1:.3f}, loss - {2:.6f}]'.\
                format(1.0*epoch, 100.0*batch/len(train_loader), loss.item()))

        if mode == 'pretrain':
            val_auc, rec_loss = validation(copy.deepcopy(encoder),
                                           copy.deepcopy(decoder),
                                           epoch=epoch)
        else:
            val_auc, val_loss = validation(copy.deepcopy(encoder), epoch=epoch)

        writer.add_scalar('val_auc', val_auc, global_step=epoch)

        if mode == 'pretrain':
            best_valid_auc = max(best_valid_auc, val_auc)
            scheduler_pre.step()
            writer.add_scalar('rec_loss', rec_loss, global_step=epoch)
            writer.add_scalar('pretrain_lr',
                              optimizer_pre.param_groups[0]['lr'],
                              global_step=epoch)
            torch.save(
                {
                    'epoch': epoch,
                    'encoder_state_dict': encoder.state_dict(),
                    'decoder_state_dict': decoder.state_dict(),
                    'optimizer_pre_state_dict': optimizer_pre.state_dict(),
                }, hparams.model + '.pre')
            if best_valid_loss >= rec_loss:
                best_valid_loss = rec_loss
                torch.save(
                    {
                        'epoch': epoch,
                        'encoder_state_dict': encoder.state_dict(),
                        'decoder_state_dict': decoder.state_dict(),
                        'optimizer_pre_state_dict': optimizer_pre.state_dict(),
                    }, hparams.model + '.pre.best')
                print('best model on validation set saved.')
            print('[Epoch - {0:.1f} ---> rec_loss - {1:.4f}, current_lr - {2:.6f}, val_auc - {3:.4f}, best_valid_auc - {4:.4f}] - time - {5:.1f}'\
                .format(1.0*epoch, rec_loss, optimizer_pre.param_groups[0]['lr'], val_auc, best_valid_auc, time.time()-start_time))

        else:
            scheduler_train.step()
            writer.add_scalar('val_loss', val_loss, global_step=epoch)
            writer.add_scalar('train_lr',
                              optimizer_train.param_groups[0]['lr'],
                              global_step=epoch)
            torch.save(
                {
                    'epoch': epoch,
                    'encoder_state_dict': encoder.state_dict(),
                    'center': encoder.center,
                    'radius': encoder.radius,
                    'optimizer_train_state_dict': optimizer_train.state_dict(),
                }, hparams.model + '.train')
            if best_valid_loss >= val_loss:
                best_valid_loss = val_loss
                torch.save(
                    {
                        'epoch': epoch,
                        'encoder_state_dict': encoder.state_dict(),
                        'center': encoder.center,
                        'radius': encoder.radius,
                        'optimizer_train_state_dict':
                        optimizer_train.state_dict(),
                    }, hparams.model + '.train.best')
                print('best model on validation set saved.')
            if best_valid_auc <= val_auc:
                best_valid_auc = val_auc
                torch.save(
                    {
                        'epoch': epoch,
                        'encoder_state_dict': encoder.state_dict(),
                        'center': encoder.center,
                        'radius': encoder.radius,
                        'optimizer_train_state_dict':
                        optimizer_train.state_dict(),
                    }, hparams.model + '.train.auc')
                print('best model on validation set saved.')
            print('[Epoch - {0:.1f} ---> val_loss - {1:.4f}, current_lr - {2:.6f}, val_auc - {3:.4f}, best_valid_auc - {4:.4f}] - time - {5:.1f}'\
                .format(1.0*epoch, val_loss, optimizer_train.param_groups[0]['lr'], val_auc, best_valid_auc, time.time()-start_time))

        start_time = time.time()
Esempio n. 18
0
opts_loss = {
    'title': 'sequence loss',
    'xlabel': 'every 200 batch',
    'ylabel': 'loss',
    'showlegend': 'true'
}
opts_acc = {
    'title': 'Accuracy',
    'xlabel': 'every 200 batch',
    'ylabel': 'accuracy',
    'showlegend': 'true'
}

# 创建编码器解码器
encoder = Encoder().to(DEVICE)
decoder = Decoder().to(DEVICE)
viz.text("Seq2seq model built", win='summary')


# 统计参数量
def count_parameters(model):
    return sum(p.numel() for p in model.parameters() if p.requires_grad)


print("encoder params %d" % count_parameters(encoder))
print("decoder params %d" % count_parameters(decoder))

# 加载预训练词嵌入
encoder.load_pretrained_vectors(EMBED_PATH_SRC)
decoder.load_pretrained_vectors(EMBED_PATH_TGT)
viz.text("Pretrained embeddings loaded", win='summary', append=True)
Esempio n. 19
0
    pickle
    print("Preprocessing complete!")

    return train_data, word2index, tag2index, intent2index





_,word2index,tag2index,intent2index = preprocessing('../dataset/corpus/atis-2.train.w-intent.iob',60)

index2tag = {v:k for k,v in tag2index.items()}
index2intent = {v:k for k,v in intent2index.items()}

encoder = Encoder(len(word2index),64,64)
decoder = Decoder(len(tag2index),len(intent2index),len(tag2index)//3,64*2)

encoder.load_state_dict(torch.load('models_006_60_64_64_1_16_16_0p01/jointnlu-encoder.pkl'))
decoder.load_state_dict(torch.load('models_006_60_64_64_1_16_16_0p01/jointnlu-decoder.pkl'))
if USE_CUDA:
    encoder = encoder.cuda()
    decoder = decoder.cuda()


test = open("./dataset/corpus/atis.test.w-intent.iob","r").readlines()
test = [t[:-1] for t in test]
test = [[t.split("\t")[0].split(" "),t.split("\t")[1].split(" ")[:-1],t.split("\t")[1].split(" ")[-1]] for t in test]
test = [[t[0][1:-1],t[1][1:],t[2]] for t in test]

#index = random.choice(range(len(test)))
error=0
Esempio n. 20
0
def main():
    options = parse_args()
    is_cuda = use_cuda and not options.no_cuda
    hardware = "cuda" if is_cuda else "cpu"
    device = torch.device(hardware)

    for dataset_name in options.dataset:
        results = {"best": {}, "mean": {}, "highest_prob": {}}
        for checkpoint_path in options.checkpoint:
            checkpoint_name, _ = os.path.splitext(
                os.path.basename(checkpoint_path))
            checkpoint = (load_checkpoint(checkpoint_path, cuda=is_cuda)
                          if checkpoint_path else default_checkpoint)
            encoder_checkpoint = checkpoint["model"].get("encoder")
            decoder_checkpoint = checkpoint["model"].get("decoder")

            test_set = test_sets[dataset_name]
            dataset = CrohmeDataset(
                test_set["groundtruth"],
                tokensfile,
                root=test_set["root"],
                transform=transformers,
            )
            data_loader = DataLoader(
                dataset,
                batch_size=options.batch_size,
                shuffle=False,
                num_workers=options.num_workers,
                collate_fn=collate_batch,
            )

            enc = Encoder(img_channels=3,
                          checkpoint=encoder_checkpoint).to(device)
            dec = Decoder(
                len(dataset.id_to_token),
                low_res_shape,
                high_res_shape,
                checkpoint=decoder_checkpoint,
                device=device,
            ).to(device)
            enc.eval()
            dec.eval()

            result = evaluate(
                enc,
                dec,
                data_loader=data_loader,
                device=device,
                checkpoint=checkpoint,
                beam_width=options.beam_width,
                prefix=options.prefix,
            )
            results["best"][checkpoint_name] = result["best"]
            results["mean"][checkpoint_name] = result["mean"]
            results["highest_prob"][checkpoint_name] = result["highest_prob"]

        highest_prob_err_table, highest_prob_correct_table = create_markdown_tables(
            results["highest_prob"])
        best_err_table, best_correct_table = create_markdown_tables(
            results["best"])
        mean_err_table, mean_correct_table = create_markdown_tables(
            results["mean"])
        print(("\n# Dataset {name}\n\n"
               "Beam width: {beam_width}\n\n"
               "## Highest Probability\n\n{highest_prob_err_table}\n\n"
               "{highest_prob_correct_table}\n\n"
               "## Best\n\n{best_err_table}\n\n{best_correct_table}\n\n"
               "## Mean\n\n{mean_err_table}\n\n{mean_correct_table}").format(
                   name=dataset_name,
                   beam_width=options.beam_width,
                   highest_prob_err_table=highest_prob_err_table,
                   highest_prob_correct_table=highest_prob_correct_table,
                   best_err_table=best_err_table,
                   best_correct_table=best_correct_table,
                   mean_err_table=mean_err_table,
                   mean_correct_table=mean_correct_table,
               ))
Esempio n. 21
0
def main():
    construct_vocab = False
    encode_images = False
    train = True

    # Read and Process Raw data
    data = CaptioningData()
    # Finding image files as data
    data.set_all_images(cfg.images_path)
    captions_dict = data.get_captions(cfg.token_file)
    caption_maxlen = data.get_caption_maxlen()

    # Construct vocabulary
    if construct_vocab:
        # get all caption to construct Vocab
        all_captions = data.get_all_captions()
        vocab = build_vocab(vocab_path=cfg.data_path,
                            vocab_name=cfg.vocab_name,
                            captions=all_captions,
                            threshold=2)
    else:
        vocab = load_vocab(vocab_path=cfg.data_path, vocab_name=cfg.vocab_name)
    # print(vocab.word2idx)
    inception_encoding = Encoder()

    # train data
    if train:
        train_images = data.get_train_images(cfg.train_image_files)
        train_pairs = [
            ImgCaptionPair(img_id, captions_dict[img_id])
            for img_id in train_images
        ]

        # Image Encoding

        if encode_images:
            train_img_encoding = inception_encoding.encode_images(
                file_path=cfg.images_path,
                image_list=train_images,
                encoding_file=cfg.train_img_encoding_file)
        else:
            train_img_encoding = inception_encoding.load_image_encoding(
                encoding_file=cfg.train_img_encoding_file)

        train_data_generator = data_generator(vocab,
                                              train_pairs,
                                              train_img_encoding,
                                              batch_size=1800,
                                              max_len=caption_maxlen)
        # next(g)

    # Decoder model
    decoder = Decoder(vocab_size=len(vocab),
                      embedding_size=300,
                      input_shape=2048,
                      caption_max_len=caption_maxlen)
    decoder_model = decoder.get_model()
    decoder_model.load_weights('best_weights.97-0.95.hdf5')

    if train:
        decoder_model.compile(loss='categorical_crossentropy',
                              optimizer=RMSprop(),
                              metrics=['accuracy'])
        ckpt = ModelCheckpoint('weights.{epoch:02d}-{loss:.2f}.hdf5',
                               monitor='loss',
                               verbose=0,
                               save_best_only=False,
                               save_weights_only=False,
                               mode='auto',
                               period=30)
        best_ckpt = ModelCheckpoint('best_weights.{epoch:02d}-{loss:.2f}.hdf5',
                                    monitor='loss',
                                    verbose=0,
                                    save_best_only=True,
                                    save_weights_only=False,
                                    mode='auto',
                                    period=1)
        decoder_model.fit_generator(train_data_generator,
                                    steps_per_epoch=30,
                                    epochs=100,
                                    callbacks=[ckpt, best_ckpt])

    decoder_model.save('decoder_model.h5')

    img_ids = data.get_val_images(cfg.val_image_files)
    img_name = img_ids[9]

    enc_img = inception_encoding.encode_single_img(file_path=cfg.images_path,
                                                   img_name=img_name)

    caption = ["<start>"]
    while True:
        par_caps = [vocab(i) for i in caption]
        par_caps = sequence.pad_sequences([par_caps],
                                          maxlen=40,
                                          padding='post')
        preds = decoder_model.predict(
            [np.array([enc_img]), np.array(par_caps)])
        word_pred = vocab.idx2word[np.argmax(preds[0])]
        caption.append(word_pred)

        if word_pred == "<end>" or len(caption) > 40:
            break

    full_img_path = os.path.join(cfg.images_path, img_name)
    print(captions_dict[img_name])
    print(full_img_path)
    print(' '.join(caption[1:-1]))
Esempio n. 22
0
def train():
    opt = parse_args()
    cuda = True if torch.cuda.is_available() else False

    input_shape = (opt.channels, opt.img_width, opt.img_height)
    FloatTensor = torch.cuda.FloatTensor if cuda else torch.FloatTensor

    transform = transforms.Compose([
        transforms.Resize(int(opt.img_height * 1.12), Image.BICUBIC),
        transforms.RandomCrop((opt.img_height, opt.img_width)),
        transforms.RandomHorizontalFlip(),
        transforms.ToTensor(),
        transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))
    ])

    # Get dataloader
    train_loader = coco_loader(opt, mode='train', transform=transform)
    test_loader = coco_loader(opt, mode='test', transform=transform)

    # Get vgg
    vgg = VGGNet()

    # Initialize two generators and the discriminator
    shared_E = Encoder(opt.channels, opt.dim, opt.n_downsample)
    shared_D = Decoder(3, 256, opt.n_upsample)

    G_A = GeneratorA(opt.n_residual, 256, shared_E, shared_D)
    G_B = GeneratorB(opt.n_residual, 256, shared_E, shared_D)

    D_B = Discriminator(input_shape)

    # Initialize weights
    G_A.apply(weights_init_normal)
    G_B.apply(weights_init_normal)
    D_B.apply(weights_init_normal)

    # Losses
    criterion_GAN = torch.nn.MSELoss()
    criterion_pixel = torch.nn.L1Loss()

    if cuda:
        vgg = vgg.cuda().eval()
        G_A = G_A.cuda()
        G_B = G_B.cuda()
        D_B = D_B.cuda()
        criterion_GAN.cuda()
        criterion_pixel.cuda()

    optimizer_G = torch.optim.Adam(itertools.chain(G_A.parameters(),
                                                   G_B.parameters()),
                                   lr=opt.lr,
                                   betas=(0.5, 0.999))
    optimizer_D = torch.optim.Adam(D_B.parameters(),
                                   lr=opt.lr,
                                   betas=(0.5, 0.999))

    lr_scheduler_G = torch.optim.lr_scheduler.LambdaLR(
        optimizer_G,
        lr_lambda=LambdaLR(opt.n_epochs, opt.epoch, opt.decay_epoch).step)
    lr_scheduler_D = torch.optim.lr_scheduler.LambdaLR(
        optimizer_D,
        lr_lambda=LambdaLR(opt.n_epochs, opt.epoch, opt.decay_epoch).step)

    # Compute the style features in advance
    style_img = Variable(load_img(opt.style_img, transform).type(FloatTensor))
    style_feature = vgg(style_img)

    prev_time = time.time()
    for epoch in range(opt.epoch, opt.n_epochs):
        for batch_i, content_img in enumerate(train_loader):
            content_img = Variable(content_img.type(FloatTensor))

            valid = Variable(FloatTensor(
                np.ones((content_img.size(0), *D_B.output_shape))),
                             requires_grad=False)
            fake = Variable(FloatTensor(
                np.zeros((content_img.size(0), *D_B.output_shape))),
                            requires_grad=False)

            # ---------------------
            #  Train Generators
            # ---------------------

            optimizer_G.zero_grad()

            # 生成的图像并没有做反正则化,得保证:内容,风格,生成图,图像预处理的一致性!
            stylized_img = G_A(content_img)

            target_feature = vgg(stylized_img)
            content_feature = vgg(content_img)
            loss_st = opt.lambda_st * vgg.compute_st_loss(
                target_feature, content_feature, style_feature,
                opt.lambda_style)

            reconstructed_img = G_B(stylized_img)
            loss_adv = opt.lambda_adv * criterion_GAN(D_B(reconstructed_img),
                                                      valid)

            loss_G = loss_st + loss_adv
            loss_G.backward(retain_graph=True)
            optimizer_G.step()

            # ----------------------
            #  Train Discriminator
            # ----------------------

            optimizer_D.zero_grad()

            loss_D = criterion_GAN(D_B(content_img), valid) + criterion_GAN(
                D_B(reconstructed_img.detach()), fake)
            loss_D.backward()
            optimizer_D.step()

            # ------------------
            # Log Information
            # ------------------

            batches_done = epoch * len(train_loader) + batch_i
            batches_left = opt.n_epochs * len(train_loader) - batches_done
            time_left = datetime.timedelta(seconds=batches_left *
                                           (time.time() - prev_time))
            prev_time = time.time()

            print(
                "[Epoch %d/%d] [Batch %d/%d] [D loss: %f] [G loss: %f] ETA: %s"
                % (epoch, opt.n_epochs, batch_i, len(train_loader),
                   loss_D.item(), loss_G.item(), time_left))

            if batches_done % opt.sample_interval == 0:
                save_sample(opt.style_name, test_loader, batches_done, G_A,
                            G_B, FloatTensor)

            if batches_done % opt.checkpoint_interval == 0:
                torch.save(
                    G_A.state_dict(),
                    "checkpoints/%s/G_A_%d.pth" % (opt.style_name, epoch))
                torch.save(
                    G_B.state_dict(),
                    "checkpoints/%s/G_B_%d.pth" % (opt.style_name, epoch))

        # Update learning rates
        lr_scheduler_G.step()
        lr_scheduler_D.step()

    torch.save(G_A.state_dict(),
               "checkpoints/%s/G_A_done.pth" % opt.style_name)
    torch.save(G_B.state_dict(),
               "checkpoints/%s/G_B_done.pth" % opt.style_name)
    print("Training Process has been Done!")
Esempio n. 23
0
def train(args, logger):
    task_time = time.strftime("%Y-%m-%d %H:%M", time.localtime())
    Path("./saved_models/").mkdir(parents=True, exist_ok=True)
    Path("./pretrained_models/").mkdir(parents=True, exist_ok=True)
    MODEL_SAVE_PATH = './saved_models/'
    Pretrained_MODEL_PATH = './pretrained_models/'
    get_model_name = lambda part: f'{part}-{args.data}-{args.tasks}-{args.prefix}.pth'
    get_pretrain_model_name = lambda part: f'{part}-{args.data}-LP-{args.prefix}.pth'
    device_string = 'cuda:{}'.format(
        args.gpu) if torch.cuda.is_available() and args.gpu >= 0 else 'cpu'
    print('Model trainging with ' + device_string)
    device = torch.device(device_string)

    g = load_graphs(f"./data/{args.data}.bin")[0][0]
    print(g)
    efeat_dim = g.edata['feat'].shape[1]
    nfeat_dim = efeat_dim

    train_loader, val_loader, test_loader, num_val_samples, num_test_samples = dataloader(
        args, g)

    encoder = Encoder(args,
                      nfeat_dim,
                      n_head=args.n_head,
                      dropout=args.dropout).to(device)
    decoder = Decoder(args, nfeat_dim).to(device)
    msg2mail = Msg2Mail(args, nfeat_dim)
    fraud_sampler = frauder_sampler(g)

    optimizer = torch.optim.Adam(list(encoder.parameters()) +
                                 list(decoder.parameters()),
                                 lr=args.lr,
                                 weight_decay=args.weight_decay)
    scheduler_lr = torch.optim.lr_scheduler.CosineAnnealingLR(optimizer,
                                                              T_max=40)
    if args.warmup:
        scheduler_warmup = GradualWarmupScheduler(optimizer,
                                                  multiplier=1,
                                                  total_epoch=3,
                                                  after_scheduler=scheduler_lr)
        optimizer.zero_grad()
        optimizer.step()
    loss_fcn = torch.nn.BCEWithLogitsLoss()

    loss_fcn = loss_fcn.to(device)

    early_stopper = EarlyStopMonitor(logger=logger,
                                     max_round=args.patience,
                                     higher_better=True)

    if args.pretrain:
        logger.info(
            f'Loading the linkpred pretrained attention based encoder model')
        encoder.load_state_dict(
            torch.load(Pretrained_MODEL_PATH +
                       get_pretrain_model_name('Encoder')))

    for epoch in range(args.n_epoch):
        # reset node state
        g.ndata['mail'] = torch.zeros(
            (g.num_nodes(), args.n_mail, nfeat_dim + 2), dtype=torch.float32)
        g.ndata['feat'] = torch.zeros(
            (g.num_nodes(), nfeat_dim), dtype=torch.float32
        )  # init as zero, people can init it using others.
        g.ndata['last_update'] = torch.zeros((g.num_nodes()),
                                             dtype=torch.float32)
        encoder.train()
        decoder.train()
        start_epoch = time.time()
        m_loss = []
        logger.info('start {} epoch, current optim lr is {}'.format(
            epoch, optimizer.param_groups[0]['lr']))
        for batch_idx, (input_nodes, pos_graph, neg_graph, blocks, frontier,
                        current_ts) in enumerate(train_loader):

            pos_graph = pos_graph.to(device)
            neg_graph = neg_graph.to(device) if neg_graph is not None else None

            if not args.no_time or not args.no_pos:
                current_ts, pos_ts, num_pos_nodes = get_current_ts(
                    args, pos_graph, neg_graph)
                pos_graph.ndata['ts'] = current_ts
            else:
                current_ts, pos_ts, num_pos_nodes = None, None, None

            _ = dgl.add_reverse_edges(
                neg_graph) if neg_graph is not None else None
            emb, _ = encoder(dgl.add_reverse_edges(pos_graph), _,
                             num_pos_nodes)
            if batch_idx != 0:
                if 'LP' not in args.tasks and args.balance:
                    neg_graph = fraud_sampler.sample_fraud_event(
                        g, args.bs // 5,
                        current_ts.max().cpu()).to(device)
                logits, labels = decoder(emb, pos_graph, neg_graph)

                loss = loss_fcn(logits, labels)

                optimizer.zero_grad()
                loss.backward()
                optimizer.step()
                m_loss.append(loss.item())

            # MSG Passing
            with torch.no_grad():
                mail = msg2mail.gen_mail(args, emb, input_nodes, pos_graph,
                                         frontier, 'train')

                if not args.no_time:
                    g.ndata['last_update'][pos_graph.ndata[dgl.NID]
                                           [:num_pos_nodes]] = pos_ts.to('cpu')
                g.ndata['feat'][pos_graph.ndata[dgl.NID]] = emb.to('cpu')
                g.ndata['mail'][input_nodes] = mail
            if batch_idx % 100 == 1:
                gpu_mem = torch.cuda.max_memory_allocated(
                ) / 1.074e9 if torch.cuda.is_available(
                ) and args.gpu >= 0 else 0
                torch.cuda.empty_cache()
                mem_perc = psutil.virtual_memory().percent
                cpu_perc = psutil.cpu_percent(interval=None)
                output_string = f'Epoch {epoch} | Step {batch_idx}/{len(train_loader)} | CPU {cpu_perc:.1f}% | Sys Mem {mem_perc:.1f}% | GPU Mem {gpu_mem:.4f}GB '

                output_string += f'| {args.tasks} Loss {np.mean(m_loss):.4f}'

                logger.info(output_string)

        total_epoch_time = time.time() - start_epoch
        logger.info(' training epoch: {} took {:.4f}s'.format(
            epoch, total_epoch_time))
        val_ap, val_auc, val_acc, val_loss = eval_epoch(
            args, logger, g, val_loader, encoder, decoder, msg2mail, loss_fcn,
            device, num_val_samples)
        logger.info(
            'Val {} Task | ap: {:.4f} | auc: {:.4f} | acc: {:.4f} | Loss: {:.4f}'
            .format(args.tasks, val_ap, val_auc, val_acc, val_loss))

        if args.warmup:
            scheduler_warmup.step(epoch)
        else:
            scheduler_lr.step()

        early_stopper_metric = val_ap if 'LP' in args.tasks else val_auc

        if early_stopper.early_stop_check(early_stopper_metric):
            logger.info('No improvement over {} epochs, stop training'.format(
                early_stopper.max_round))
            logger.info(
                f'Loading the best model at epoch {early_stopper.best_epoch}')
            encoder.load_state_dict(
                torch.load(MODEL_SAVE_PATH + get_model_name('Encoder')))
            decoder.load_state_dict(
                torch.load(MODEL_SAVE_PATH + get_model_name('Decoder')))

            test_result = [
                early_stopper.best_ap, early_stopper.best_auc,
                early_stopper.best_acc, early_stopper.best_loss
            ]
            break

        test_ap, test_auc, test_acc, test_loss = eval_epoch(
            args, logger, g, test_loader, encoder, decoder, msg2mail, loss_fcn,
            device, num_test_samples)
        logger.info(
            'Test {} Task | ap: {:.4f} | auc: {:.4f} | acc: {:.4f} | Loss: {:.4f}'
            .format(args.tasks, test_ap, test_auc, test_acc, test_loss))
        test_result = [test_ap, test_auc, test_acc, test_loss]

        if early_stopper.best_epoch == epoch:
            early_stopper.best_ap = test_ap
            early_stopper.best_auc = test_auc
            early_stopper.best_acc = test_acc
            early_stopper.best_loss = test_loss
            logger.info(
                f'Saving the best model at epoch {early_stopper.best_epoch}')
            torch.save(encoder.state_dict(),
                       MODEL_SAVE_PATH + get_model_name('Encoder'))
            torch.save(decoder.state_dict(),
                       MODEL_SAVE_PATH + get_model_name('Decoder'))
def main():
    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
    logging.info("Device type is '%s'" % (device))
    logging.info("Load %s" % (INDEX2WORD_PATH))
    with open(INDEX2WORD_PATH) as f:
        i2w = json.load(f)
    with open(WORD2INDEX_PATH) as f:
        w2i = json.load(f)
    logging.info("Load word embedding")
    word_embedding = np.load(WORD_EMBEDDING_DATA)
    logging.info("Read validation dataset")
    valSet = buildS2SDatasetForTest.build(VAL_DATASET_PATH)

    logging.info("Build data loader for validation data")
    validation_generator = data.DataLoader(valSet,
                                           batch_size=BATCH_SIZE,
                                           shuffle=False,
                                           collate_fn=valSet.collate_fn)
    logging.info("Data loader loading is complete")

    loss_function = nn.CrossEntropyLoss(ignore_index=0)
    logging.info("Build encoder with hidden dimension %s" % (HIDDEN_DIMENSION))
    encoder = Encoder(EMBEDDING_DIMENSION, HIDDEN_DIMENSION,
                      word_embedding.shape[0], word_embedding, RNN_LAYER,
                      DROPOUT, BIDIRECTION)
    logging.info("Build decoder with hidden dimension %s" % (HIDDEN_DIMENSION))
    decoder = Decoder(EMBEDDING_DIMENSION, HIDDEN_DIMENSION,
                      word_embedding.shape[0], word_embedding, RNN_LAYER,
                      DROPOUT)
    logging.info("Build seq2seq model")
    model = Seq2Seq(encoder, decoder, device)
    del word_embedding
    model = torch.load(MODEL)
    model.to(device)
    check_model_performance = -1
    #torch.set_printoptions(threshold=100000)
    model.eval()
    logging.info("Start validation")
    final = []
    box = []
    ID = []
    with torch.no_grad():
        for step, d in enumerate(validation_generator):
            if step % 50 == 0: logging.info("Valid step %s" % (step))
            text = d['text'].to(device, dtype=torch.long)
            length = d['len_text']
            mask = d['attention_mask'].to(device, dtype=torch.long)
            out, predict = model.predict(text, 1, 2, length, mask)
            box.append(predict)
            ID.append(d['id'])
            del text, mask, out, predict
    for predict, test_idx in zip(box, ID):
        pre = 3
        for idx, ii in enumerate(predict):
            ans = []
            for s, j in enumerate(ii):
                if j == pre or i2w[j] == "<unk>": continue
                if i2w[j] == "</s>" or s > 80: break
                ans.append(i2w[j])
                pre = j
            sent = " ".join(ans)
            s_ans = {"id": test_idx[idx], "predict": sent}
            final.append(s_ans)
    logging.info("end predict")
    f = open(OUTPUT_PATH, "w")
    f.write("\n".join([json.dumps(p) for p in final]) + "\n")
    val_dataset = CelebA(args.ann_file, args.image_dir, eval_index_list,
                         transform_val, transform_val, args.att_num)
    val_loader = DataLoader(val_dataset,
                            shuffle=True,
                            batch_size=args.batch_size,
                            num_workers=args.nthreads)

    print("| Data Loaded: # training data: %d, # val data: %d" %
          (len(train_loader) * args.batch_size,
           len(val_loader) * args.batch_size))

    ###############################################################################
    # Build the model
    ###############################################################################
    encoder = Encoder()
    decoder = Decoder(att_num=args.att_num)
    classifier = Classifier(args.att_num)

    optimizer = optim.Adam(
        [
            {
                'params': encoder.parameters()
            },
            {
                'params': decoder.parameters()
            },
            #                      {'params': classifier.parameters()}
        ],
        lr=args.lr,
        weight_decay=args.weight_decay)
Esempio n. 26
0
def main():
    options = parse_args()
    torch.manual_seed(options.seed)
    is_cuda = use_cuda and not options.no_cuda
    hardware = "cuda" if is_cuda else "cpu"
    device = torch.device(hardware)

    checkpoint = (load_checkpoint(options.checkpoint, cuda=is_cuda)
                  if options.checkpoint else default_checkpoint)
    print("Running {} epochs on {}".format(options.num_epochs, hardware))
    encoder_checkpoint = checkpoint["model"].get("encoder")
    decoder_checkpoint = checkpoint["model"].get("decoder")
    if encoder_checkpoint is not None:
        print(("Resuming from - Epoch {}: "
               "Train Accuracy = {train_accuracy:.5f}, "
               "Train Loss = {train_loss:.5f}, "
               "Validation Accuracy = {validation_accuracy:.5f}, "
               "Validation Loss = {validation_loss:.5f}, ").format(
                   checkpoint["epoch"],
                   train_accuracy=checkpoint["train_accuracy"][-1],
                   train_loss=checkpoint["train_losses"][-1],
                   validation_accuracy=checkpoint["validation_accuracy"][-1],
                   validation_loss=checkpoint["validation_losses"][-1],
               ))

    train_dataset = CrohmeDataset(gt_train,
                                  tokensfile,
                                  root=root,
                                  crop=options.crop,
                                  transform=transformers)
    train_data_loader = DataLoader(
        train_dataset,
        batch_size=options.batch_size,
        shuffle=True,
        num_workers=options.num_workers,
        collate_fn=collate_batch,
    )
    validation_dataset = CrohmeDataset(gt_validation,
                                       tokensfile,
                                       root=root,
                                       crop=options.crop,
                                       transform=transformers)
    validation_data_loader = DataLoader(
        validation_dataset,
        batch_size=options.batch_size,
        shuffle=True,
        num_workers=options.num_workers,
        collate_fn=collate_batch,
    )
    criterion = nn.CrossEntropyLoss().to(device)
    enc = Encoder(img_channels=3,
                  dropout_rate=options.dropout_rate,
                  checkpoint=encoder_checkpoint).to(device)
    dec = Decoder(
        len(train_dataset.id_to_token),
        low_res_shape,
        high_res_shape,
        checkpoint=decoder_checkpoint,
        device=device,
    ).to(device)
    enc.train()
    dec.train()

    enc_params_to_optimise = [
        param for param in enc.parameters() if param.requires_grad
    ]
    dec_params_to_optimise = [
        param for param in dec.parameters() if param.requires_grad
    ]
    params_to_optimise = [*enc_params_to_optimise, *dec_params_to_optimise]
    optimiser = optim.Adadelta(params_to_optimise,
                               lr=options.lr,
                               weight_decay=options.weight_decay)
    optimiser_state = checkpoint.get("optimiser")
    if optimiser_state:
        optimiser.load_state_dict(optimiser_state)
    # Set the learning rate instead of using the previous state.
    # The scheduler somehow overwrites the LR to the initial LR after loading,
    # which would always reset it to the first used learning rate instead of
    # the one from the previous checkpoint. So might as well set it manually.
    for param_group in optimiser.param_groups:
        param_group["initial_lr"] = options.lr
    # Decay learning rate by a factor of lr_factor (default: 0.1)
    # every lr_epochs (default: 3)
    lr_scheduler = optim.lr_scheduler.StepLR(optimiser,
                                             step_size=options.lr_epochs,
                                             gamma=options.lr_factor)

    train(
        enc,
        dec,
        optimiser,
        criterion,
        train_data_loader,
        validation_data_loader,
        teacher_forcing_ratio=options.teacher_forcing,
        lr_scheduler=lr_scheduler,
        print_epochs=options.print_epochs,
        device=device,
        num_epochs=options.num_epochs,
        checkpoint=checkpoint,
        prefix=options.prefix,
        max_grad_norm=options.max_grad_norm,
    )
Esempio n. 27
0
    def __init__(self):

        current_time = datetime.datetime.now().strftime("%Y%m%d-%H%M%S")
        train_log_dir = './logs/gradient_tape/' + current_time + '/train'
        test_log_dir = './logs/gradient_tape/' + current_time + '/test'
        self.train_summary_writer = tf.summary.create_file_writer(
            train_log_dir)
        self.test_summary_writer = tf.summary.create_file_writer(test_log_dir)

        self.m = tf.keras.metrics.SparseCategoricalAccuracy()
        # self.recall = tf.keras.metrics.Recall()
        self.recall = [0]
        # self.F1Score = 2*self.m.result()*self.recall.result()/(self.recall.result()+self.m.result())
        self.BATCH_SIZE = 128
        self.embedding_dim = 24
        self.units = 64
        # 尝试实验不同大小的数据集
        stop_word_dir = './stop_words.utf8'
        self.stop_words = self.get_stop_words(stop_word_dir) + ['']
        num_examples = 30000
        QA_dir = './QA_data.txt'
        # QA_dir = 'C:/Users/Administrator/raw_chat_corpus/qingyun-11w/qinyun-11w.csv'
        self.input_tensor, self.target_tensor, self.inp_tokenizer, self.targ_tokenizer = self.load_dataset(
            QA_dir, num_examples)
        self.num_classes = len(self.targ_tokenizer.index_word)  #目标词类别
        #初始化混淆矩阵(训练用和测试用):
        self.train_confusion_matrix = tfa.metrics.MultiLabelConfusionMatrix(
            num_classes=self.num_classes)
        self.test_confusion_matrix = tfa.metrics.MultiLabelConfusionMatrix(
            num_classes=self.num_classes)

        self.F1Score = tfa.metrics.F1Score(num_classes=len(
            self.targ_tokenizer.index_word),
                                           average="micro")
        # self.F1Score = tfa.metrics.F1Score(num_classes=self.max_length_targ, average="micro")
        # input_tensor_train, input_tensor_val, target_tensor_train, target_tensor_val = train_test_split(
        #     self.input_tensor,
        #     self.target_tensor,
        #     test_size=0.2)
        # self.load_split_dataset(input_tensor_train,target_tensor_train)
        self.vocab_inp_size = len(self.inp_tokenizer.word_index) + 1
        self.vocab_tar_size = len(self.targ_tokenizer.word_index) + 1

        # encoder初始化
        self.encoder = Encoder(self.vocab_inp_size, self.embedding_dim,
                               self.units, self.BATCH_SIZE)
        plot_model(self.encoder,
                   to_file='encoder.png',
                   show_shapes=True,
                   show_layer_names=True,
                   rankdir='TB',
                   dpi=900,
                   expand_nested=True)
        # 样本输入
        # sample_hidden = self.encoder.initialize_hidden_state()
        # sample_output, sample_hidden = self.encoder.call(self.example_input_batch, sample_hidden)
        # print('Encoder output shape: (batch size, sequence length, units) {}'.format(sample_output.shape))
        # print('Encoder Hidden state shape: (batch size, units) {}'.format(sample_hidden.shape))

        # attention初始化
        attention_layer = BahdanauAttention(10)
        # attention_result, attention_weights = attention_layer(sample_hidden, sample_output)
        plot_model(attention_layer,
                   to_file='attention_layer.png',
                   show_shapes=True,
                   show_layer_names=True,
                   rankdir='TB',
                   dpi=900,
                   expand_nested=True)

        # print("Attention result shape: (batch size, units) {}".format(attention_result.shape))
        # print("Attention weights shape: (batch_size, sequence_length, 1) {}".format(attention_weights.shape))

        # decoder初始化
        self.decoder = Decoder(self.vocab_tar_size, self.embedding_dim,
                               self.units, self.BATCH_SIZE)
        plot_model(self.decoder,
                   to_file='decoder.png',
                   show_shapes=True,
                   show_layer_names=True,
                   rankdir='TB',
                   dpi=900,
                   expand_nested=True)
        # sample_decoder_output, _, _ = self.decoder(tf.random.uniform((self.BATCH_SIZE, 1)),
        #                                       sample_hidden, sample_output)
        #
        # print('Decoder output shape: (batch_size, vocab size) {}'.format(sample_decoder_output.shape))

        # optimizer初始化
        self.optimizer = tf.keras.optimizers.Adam()
        self.loss_object = tf.keras.losses.SparseCategoricalCrossentropy(
            from_logits=True, reduction='none')

        # checkpoint & save model as object 初始化
        self.checkpoint_dir = './training_checkpoints'
        self.checkpoint_prefix = os.path.join(self.checkpoint_dir, "ckpt")
        self.checkpoint = tf.train.Checkpoint(optimizer=self.optimizer,
                                              encoder=self.encoder,
                                              decoder=self.decoder)
Esempio n. 28
0
def main(mode, config, use_bokeh=False):

    # Construct model
    logger.info('Building RNN encoder-decoder')
    encoder = BidirectionalEncoder(config['src_vocab_size'],
                                   config['enc_embed'], config['enc_nhids'])
    decoder = Decoder(config['trg_vocab_size'], config['dec_embed'],
                      config['dec_nhids'], config['enc_nhids'] * 2,
                      config['topical_embedding_dim'])
    topical_transformer = topicalq_transformer(config['topical_vocab_size'],
                                               config['topical_embedding_dim'],
                                               config['enc_nhids'],
                                               config['topical_word_num'],
                                               config['batch_size'])

    if mode == "train":

        # Create Theano variables
        logger.info('Creating theano variables')
        source_sentence = tensor.lmatrix('source')
        source_sentence_mask = tensor.matrix('source_mask')
        target_sentence = tensor.lmatrix('target')
        target_sentence_mask = tensor.matrix('target_mask')
        sampling_input = tensor.lmatrix('input')
        source_topical_word = tensor.lmatrix('source_topical')
        source_topical_mask = tensor.matrix('source_topical_mask')

        # Get training and development set streams
        tr_stream = get_tr_stream_with_topicalq(**config)
        dev_stream = get_dev_stream_with_topicalq(**config)
        topic_embedding = topical_transformer.apply(source_topical_word)
        # Get cost of the model
        representation = encoder.apply(source_sentence, source_sentence_mask)
        tw_representation = topical_transformer.look_up.apply(
            source_topical_word.T)
        content_embedding = representation[0, :,
                                           (representation.shape[2] / 2):]

        cost = decoder.cost(representation, source_sentence_mask,
                            tw_representation, source_topical_mask,
                            target_sentence, target_sentence_mask,
                            topic_embedding, content_embedding)

        logger.info('Creating computational graph')
        cg = ComputationGraph(cost)

        # Initialize model
        logger.info('Initializing model')
        encoder.weights_init = decoder.weights_init = IsotropicGaussian(
            config['weight_scale'])
        encoder.biases_init = decoder.biases_init = Constant(0)
        encoder.push_initialization_config()
        decoder.push_initialization_config()
        encoder.bidir.prototype.weights_init = Orthogonal()
        decoder.transition.weights_init = Orthogonal()
        encoder.initialize()
        decoder.initialize()
        topical_transformer.weights_init = IsotropicGaussian(
            config['weight_scale'])
        topical_transformer.biases_init = Constant(0)
        topical_transformer.push_allocation_config()
        #don't know whether the initialize is for
        topical_transformer.look_up.weights_init = Orthogonal()
        topical_transformer.transformer.weights_init = Orthogonal()
        topical_transformer.initialize()
        word_topical_embedding = cPickle.load(
            open(config['topical_embeddings'], 'rb'))
        np_word_topical_embedding = numpy.array(word_topical_embedding,
                                                dtype='float32')
        topical_transformer.look_up.W.set_value(np_word_topical_embedding)
        topical_transformer.look_up.W.tag.role = []

        # apply dropout for regularization
        if config['dropout'] < 1.0:
            # dropout is applied to the output of maxout in ghog
            logger.info('Applying dropout')
            dropout_inputs = [
                x for x in cg.intermediary_variables
                if x.name == 'maxout_apply_output'
            ]
            cg = apply_dropout(cg, dropout_inputs, config['dropout'])

        # Apply weight noise for regularization
        if config['weight_noise_ff'] > 0.0:
            logger.info('Applying weight noise to ff layers')
            enc_params = Selector(encoder.lookup).get_params().values()
            enc_params += Selector(encoder.fwd_fork).get_params().values()
            enc_params += Selector(encoder.back_fork).get_params().values()
            dec_params = Selector(
                decoder.sequence_generator.readout).get_params().values()
            dec_params += Selector(
                decoder.sequence_generator.fork).get_params().values()
            dec_params += Selector(decoder.state_init).get_params().values()
            cg = apply_noise(cg, enc_params + dec_params,
                             config['weight_noise_ff'])

        # Print shapes
        shapes = [param.get_value().shape for param in cg.parameters]
        logger.info("Parameter shapes: ")
        for shape, count in Counter(shapes).most_common():
            logger.info('    {:15}: {}'.format(shape, count))
        logger.info("Total number of parameters: {}".format(len(shapes)))

        # Print parameter names
        enc_dec_param_dict = merge(
            Selector(encoder).get_parameters(),
            Selector(decoder).get_parameters())
        logger.info("Parameter names: ")
        for name, value in enc_dec_param_dict.items():
            logger.info('    {:15}: {}'.format(value.get_value().shape, name))
        logger.info("Total number of parameters: {}".format(
            len(enc_dec_param_dict)))

        # Set up training model
        logger.info("Building model")
        training_model = Model(cost)

        # Set extensions
        logger.info("Initializing extensions")
        extensions = [
            FinishAfter(after_n_batches=config['finish_after']),
            TrainingDataMonitoring([cost], after_batch=True),
            Printing(after_batch=True),
            CheckpointNMT(config['saveto'],
                          every_n_batches=config['save_freq'])
        ]
        '''
        # Set up beam search and sampling computation graphs if necessary
        if config['hook_samples'] >= 1 or config['bleu_script'] is not None:
            logger.info("Building sampling model")
            sampling_representation = encoder.apply(
                sampling_input, tensor.ones(sampling_input.shape))
            generated = decoder.generate(
                sampling_input, sampling_representation)
            search_model = Model(generated)
            _, samples = VariableFilter(
                bricks=[decoder.sequence_generator], name="outputs")(
                    ComputationGraph(generated[1]))

        # Add sampling
        if config['hook_samples'] >= 1:
            logger.info("Building sampler")
            extensions.append(
                Sampler(model=search_model, data_stream=tr_stream,
                        hook_samples=config['hook_samples'],
                        every_n_batches=config['sampling_freq'],
                        src_vocab_size=config['src_vocab_size']))

        # Add early stopping based on bleu
        if config['bleu_script'] is not None:
            logger.info("Building bleu validator")
            extensions.append(
                BleuValidator(sampling_input, samples=samples, config=config,
                              model=search_model, data_stream=dev_stream,
                              normalize=config['normalized_bleu'],
                              every_n_batches=config['bleu_val_freq']))
        '''

        # Reload model if necessary
        if config['reload']:
            extensions.append(LoadNMT(config['saveto']))

        # Plot cost in bokeh if necessary
        if use_bokeh and BOKEH_AVAILABLE:
            extensions.append(
                Plot('Cs-En',
                     channels=[['decoder_cost_cost']],
                     after_batch=True))

        # Set up training algorithm
        logger.info("Initializing training algorithm")
        algorithm = GradientDescent(cost=cost,
                                    parameters=cg.parameters,
                                    on_unused_sources='warn',
                                    step_rule=CompositeRule([
                                        StepClipping(config['step_clipping']),
                                        eval(config['step_rule'])()
                                    ]))

        # Initialize main loop
        logger.info("Initializing main loop")
        main_loop = MainLoop(model=training_model,
                             algorithm=algorithm,
                             data_stream=tr_stream,
                             extensions=extensions)

        # Train!
        main_loop.run()

    elif mode == 'translate':

        # Create Theano variables
        logger.info('Creating theano variables')
        source_sentence = tensor.lmatrix('source')
        source_topical_word = tensor.lmatrix('source_topical')

        # Get test set stream
        test_stream = get_dev_stream_with_topicalq(
            config['test_set'], config['src_vocab'], config['src_vocab_size'],
            config['topical_test_set'], config['topical_vocab'],
            config['topical_vocab_size'], config['unk_id'])
        ftrans = open(config['test_set'] + '.trans.out', 'w')

        # Helper utilities
        sutils = SamplingBase()
        unk_idx = config['unk_id']
        src_eos_idx = config['src_vocab_size'] - 1
        trg_eos_idx = config['trg_vocab_size'] - 1

        # Get beam search
        logger.info("Building sampling model")
        topic_embedding = topical_transformer.apply(source_topical_word)
        representation = encoder.apply(source_sentence,
                                       tensor.ones(source_sentence.shape))
        tw_representation = topical_transformer.look_up.apply(
            source_topical_word.T)
        content_embedding = representation[0, :,
                                           (representation.shape[2] / 2):]
        generated = decoder.generate(source_sentence,
                                     representation,
                                     tw_representation,
                                     topical_embedding=topic_embedding,
                                     content_embedding=content_embedding)

        _, samples = VariableFilter(
            bricks=[decoder.sequence_generator], name="outputs")(
                ComputationGraph(generated[1]))  # generated[1] is next_outputs
        beam_search = BeamSearch(samples=samples)

        logger.info("Loading the model..")
        model = Model(generated)
        loader = LoadNMT(config['saveto'])
        loader.set_model_parameters(model, loader.load_parameters())

        # Get target vocabulary
        trg_vocab = _ensure_special_tokens(pickle.load(
            open(config['trg_vocab'], 'rb')),
                                           bos_idx=0,
                                           eos_idx=trg_eos_idx,
                                           unk_idx=unk_idx)
        trg_ivocab = {v: k for k, v in trg_vocab.items()}

        logger.info("Started translation: ")
        total_cost = 0.0

        for i, line in enumerate(test_stream.get_epoch_iterator()):

            seq = sutils._oov_to_unk(line[0], config['src_vocab_size'],
                                     unk_idx)
            seq2 = line[1]
            input_ = numpy.tile(seq, (config['beam_size'], 1))
            input_topical = numpy.tile(seq2, (config['beam_size'], 1))

            # draw sample, checking to ensure we don't get an empty string back
            trans, costs = \
                beam_search.search(
                    input_values={source_sentence: input_,source_topical_word:input_topical},
                    max_length=10*len(seq), eol_symbol=src_eos_idx,
                    ignore_first_eol=True)
            '''
            # normalize costs according to the sequence lengths
            if config['normalized_bleu']:
                lengths = numpy.array([len(s) for s in trans])
                costs = costs / lengths
            '''
            #best = numpy.argsort(costs)[0]
            best = numpy.argsort(costs)[0:config['beam_size']]
            for b in best:
                try:
                    total_cost += costs[b]
                    trans_out = trans[b]

                    # convert idx to words
                    trans_out = sutils._idx_to_word(trans_out, trg_ivocab)

                except ValueError:
                    logger.info(
                        "Can NOT find a translation for line: {}".format(i +
                                                                         1))
                    trans_out = '<UNK>'

                print(trans_out, file=ftrans)

            if i != 0 and i % 100 == 0:
                logger.info("Translated {} lines of test set...".format(i))

        logger.info("Total cost of the test: {}".format(total_cost))
        ftrans.close()
    elif mode == 'rerank':
        # Create Theano variables
        ftrans = open(config['val_set'] + '.scores.out', 'w')
        logger.info('Creating theano variables')
        source_sentence = tensor.lmatrix('source')
        source_sentence_mask = tensor.matrix('source_mask')
        target_sentence = tensor.lmatrix('target')
        target_sentence_mask = tensor.matrix('target_mask')

        config['src_data'] = config['val_set']
        config['trg_data'] = config['val_set_grndtruth']
        config['batch_size'] = 1
        config['sort_k_batches'] = 1
        test_stream = get_tr_stream_unsorted(**config)
        logger.info("Building sampling model")
        representations = encoder.apply(source_sentence, source_sentence_mask)
        costs = decoder.cost(representations, source_sentence_mask,
                             target_sentence, target_sentence_mask)
        logger.info("Loading the model..")
        model = Model(costs)
        loader = LoadNMT(config['saveto'])
        loader.set_model_parameters(model, loader.load_parameters())

        costs_computer = function([
            source_sentence, source_sentence_mask, target_sentence,
            target_sentence_mask
        ], costs)
        iterator = test_stream.get_epoch_iterator()

        scores = []
        for i, (src, src_mask, trg, trg_mask) in enumerate(iterator):
            costs = costs_computer(*[src, src_mask, trg, trg_mask])
            cost = costs.sum()
            print(i, cost)
            scores.append(cost)
            ftrans.write(str(cost) + "\n")
        ftrans.close()
Esempio n. 29
0
train_x = torch.from_numpy(train_x).to(device)
train_y = torch.from_numpy(train_y).to(device)

test_x = torch.from_numpy(test_x).to(device)
test_y = torch.from_numpy(test_y).to(device)

# モデルの準備
INPUT_DIM = 1
OUTPUT_DIM = 1
N_LAYERS = args.n_layers
HID_DIM = args.hidden_dim
DROPOUT_RATE = args.dropout_rate

enc = Encoder(INPUT_DIM, HID_DIM, N_LAYERS, DROPOUT_RATE)
dec = Decoder(OUTPUT_DIM, INPUT_DIM, HID_DIM, N_LAYERS, DROPOUT_RATE)
model = Seq2Seq(enc, dec, device).to(device)


# モデルのパラメータの初期化
def init_weights(m):
    for name, param in m.named_parameters():
        nn.init.uniform_(param.data, -0.08, 0.08)


print(model.apply(init_weights))

# optimizerの設定
if args.optimizer == "sgd":
    optimizer = optim.SGD(model.parameters(), lr=0.01)
else:
Esempio n. 30
0
def main():
    parser = argparse.ArgumentParser(description='Style Swap by Pytorch')
    parser.add_argument('--batch_size',
                        '-b',
                        type=int,
                        default=4,
                        help='Number of images in each mini-batch')
    parser.add_argument('--epoch',
                        '-e',
                        type=int,
                        default=3,
                        help='Number of sweeps over the dataset to train')
    parser.add_argument('--patch_size',
                        '-p',
                        type=int,
                        default=5,
                        help='Size of extracted patches from style features')
    parser.add_argument('--gpu',
                        '-g',
                        type=int,
                        default=0,
                        help='GPU ID(nagative value indicate CPU)')
    parser.add_argument('--learning_rate',
                        '-lr',
                        type=int,
                        default=1e-4,
                        help='learning rate for Adam')
    parser.add_argument('--tv_weight',
                        type=int,
                        default=1e-6,
                        help='weight for total variation loss')
    parser.add_argument('--snapshot_interval',
                        type=int,
                        default=500,
                        help='Interval of snapshot to generate image')
    parser.add_argument('--train_content_dir',
                        type=str,
                        default='/data/chen/content',
                        help='content images directory for train')
    parser.add_argument('--train_style_dir',
                        type=str,
                        default='/data/chen/style',
                        help='style images directory for train')
    parser.add_argument('--test_content_dir',
                        type=str,
                        default='/data/chen/content',
                        help='content images directory for test')
    parser.add_argument('--test_style_dir',
                        type=str,
                        default='/data/chen/style',
                        help='style images directory for test')
    parser.add_argument('--save_dir',
                        type=str,
                        default='result',
                        help='save directory for result and loss')

    args = parser.parse_args()

    # create directory to save
    if not os.path.exists(args.save_dir):
        os.mkdir(args.save_dir)

    loss_dir = f'{args.save_dir}/loss'
    model_state_dir = f'{args.save_dir}/model_state'
    image_dir = f'{args.save_dir}/image'

    if not os.path.exists(loss_dir):
        os.mkdir(loss_dir)
        os.mkdir(model_state_dir)
        os.mkdir(image_dir)

    # set device on GPU if available, else CPU
    if torch.cuda.is_available() and args.gpu >= 0:
        device = torch.device(f'cuda:{args.gpu}')
        print(f'# CUDA available: {torch.cuda.get_device_name(0)}')
    else:
        device = 'cpu'

    print(f'# Minibatch-size: {args.batch_size}')
    print(f'# epoch: {args.epoch}')
    print('')

    # prepare dataset and dataLoader
    train_dataset = PreprocessDataset(args.train_content_dir,
                                      args.train_style_dir)
    test_dataset = PreprocessDataset(args.test_content_dir,
                                     args.test_style_dir)
    iters = len(train_dataset)
    print(f'Length of train image pairs: {iters}')

    train_loader = DataLoader(train_dataset,
                              batch_size=args.batch_size,
                              shuffle=True)
    test_loader = DataLoader(test_dataset,
                             batch_size=args.batch_size,
                             shuffle=True)
    test_iter = iter(test_loader)

    # set model and optimizer
    encoder = VGGEncoder().to(device)
    decoder = Decoder().to(device)
    optimizer = Adam(decoder.parameters(), lr=args.learning_rate)

    # start training
    criterion = nn.MSELoss()
    loss_list = []

    for e in range(1, args.epoch + 1):
        print(f'Start {e} epoch')
        for i, (content, style) in tqdm(enumerate(train_loader, 1)):
            content = content.to(device)
            style = style.to(device)
            content_feature = encoder(content)
            style_feature = encoder(style)

            style_swap_res = []
            for b in range(content_feature.shape[0]):
                c = content_feature[b].unsqueeze(0)
                s = style_feature[b].unsqueeze(0)
                cs = style_swap(c, s, args.patch_size, 1)
                style_swap_res.append(cs)
            style_swap_res = torch.cat(style_swap_res, 0)

            out_style_swap = decoder(style_swap_res)
            out_content = decoder(content_feature)
            out_style = decoder(style_feature)

            out_style_swap_latent = encoder(out_style_swap)
            out_content_latent = encoder(out_content)
            out_style_latent = encoder(out_style)

            image_reconstruction_loss = criterion(
                content, out_content) + criterion(style, out_style)

            feature_reconstruction_loss = criterion(style_feature, out_style_latent) +\
                criterion(content_feature, out_content_latent) +\
                criterion(style_swap_res, out_style_swap_latent)

            tv_loss = TVloss(out_style_swap, args.tv_weight) + TVloss(out_content, args.tv_weight) \
                + TVloss(out_style, args.tv_weight)

            loss = image_reconstruction_loss + feature_reconstruction_loss + tv_loss

            loss_list.append(loss.item())

            optimizer.zero_grad()
            loss.backward()
            optimizer.step()
            print(
                f'[{e}/total {args.epoch} epoch],[{i} /'
                f'total {round(iters/args.batch_size)} iteration]: {loss.item()}'
            )

            if i % args.snapshot_interval == 0:
                content, style = next(test_iter)
                content = content.to(device)
                style = style.to(device)
                with torch.no_grad():
                    content_feature = encoder(content)
                    style_feature = encoder(style)
                    style_swap_res = []
                    for b in range(content_feature.shape[0]):
                        c = content_feature[b].unsqueeze(0)
                        s = style_feature[b].unsqueeze(0)
                        cs = style_swap(c, s, args.patch_size, 1)
                        style_swap_res.append(cs)
                    style_swap_res = torch.cat(style_swap_res, 0)
                    out_style_swap = decoder(style_swap_res)
                    out_content = decoder(content_feature)
                    out_style = decoder(style_feature)

                content = denorm(content, device)
                style = denorm(style, device)
                out_style_swap = denorm(out_style_swap, device)
                out_content = denorm(out_content, device)
                out_style = denorm(out_style, device)
                res = torch.cat(
                    [content, style, out_content, out_style, out_style_swap],
                    dim=0)
                res = res.to('cpu')
                save_image(res,
                           f'{image_dir}/{e}_epoch_{i}_iteration.png',
                           nrow=content_feature.shape[0])
        torch.save(decoder.state_dict(), f'{model_state_dir}/{e}_epoch.pth')
    plt.plot(range(len(loss_list)), loss_list)
    plt.xlabel('iteration')
    plt.ylabel('loss')
    plt.title('train loss')
    plt.savefig(f'{loss_dir}/train_loss.png')
    with open(f'{loss_dir}/loss_log.txt', 'w') as f:
        for l in loss_list:
            f.write(f'{l}\n')
    print(f'Loss saved in {loss_dir}')
        print "NOT FINETUNING"
    if test_model is True:
        assert args.snapshot is not None
    else:
        if args.sort_by_freq is False:
            assert args.order_free in ["pla", "mla"]
        else:
            if args.order_free:
                raise ValueError(
                    'Sort by freq and order_free are mutually exclusive.')
    resume = 0
    highest_f1 = 0
    epochs_without_imp = 0
    iterations = 0
    encoder = Encoder(encoder_weights=args.encoder_weights)
    decoder = Decoder(args.hidden_size, args.embed_size, args.attention_size,
                      args.dropout)
    encoder = encoder.to('cuda')
    decoder = decoder.to('cuda')

    snapshot = args.snapshot
    test_model = args.test_model
    train_from_scratch = args.train_from_scratch
    swa_params = eval(args.swa_params)
    finetune_encoder = args.finetune_encoder

    if not test_model:
        if finetune_encoder:
            encoder_optimizer = torch.optim.Adam(encoder.parameters(),
                                                 lr=args.encoder_lr)
        decoder_optimizer = torch.optim.Adam(decoder.parameters(),
                                             lr=args.decoder_lr)
parser.add_argument('--torch_pretrained', default='ckpt/panofull_lay_pretrained.t7',
                    help='path to load pretrained .t7 file')
parser.add_argument('--encoder', default='ckpt/pre_encoder.pth',
                    help='dump path. skip if not given')
parser.add_argument('--edg_decoder', default='ckpt/pre_edg_decoder.pth',
                    help='dump path. skip if not given')
parser.add_argument('--cor_decoder', default='ckpt/pre_cor_decoder.pth',
                    help='dump path. skip if not given')
args = parser.parse_args()


torch_pretrained = torchfile.load(args.torch_pretrained)
if args.encoder:
    encoder = Encoder()
if args.edg_decoder:
    edg_decoder = Decoder(skip_num=2, out_planes=3)
if args.cor_decoder:
    cor_decoder = Decoder(skip_num=3, out_planes=1)


# Check number of parameters
print('torch parameters num:', torch_pretrained.shape[0])
total_parameter = 0
if args.encoder:
    for p in encoder.parameters():
        total_parameter += np.prod(p.size())
if args.edg_decoder:
    for p in edg_decoder.parameters():
        total_parameter += np.prod(p.size())
if args.cor_decoder:
    for p in cor_decoder.parameters():
Esempio n. 33
0
def main(config, tr_stream, dev_stream):
    # Create Theano variables
    logger.info('Creating theano variables')
    source_char_seq = tensor.lmatrix('source_char_seq')
    source_sample_matrix = tensor.btensor3('source_sample_matrix')
    source_char_aux = tensor.bmatrix('source_char_aux')
    source_word_mask = tensor.bmatrix('source_word_mask')
    target_char_seq = tensor.lmatrix('target_char_seq')
    target_char_aux = tensor.bmatrix('target_char_aux')
    target_char_mask = tensor.bmatrix('target_char_mask')
    target_sample_matrix = tensor.btensor3('target_sample_matrix')
    target_word_mask = tensor.bmatrix('target_word_mask')
    target_resample_matrix = tensor.btensor3('target_resample_matrix')
    target_prev_char_seq = tensor.lmatrix('target_prev_char_seq')
    target_prev_char_aux = tensor.bmatrix('target_prev_char_aux')
    target_bos_idx = tr_stream.trg_bos
    target_space_idx = tr_stream.space_idx['target']

    # Construct model
    logger.info('Building RNN encoder-decoder')

    encoder = BidirectionalEncoder(config['src_vocab_size'],
                                   config['enc_embed'],
                                   config['char_enc_nhids'],
                                   config['enc_nhids'],
                                   config['encoder_layers'])

    decoder = Decoder(config['trg_vocab_size'], config['dec_embed'],
                      config['char_dec_nhids'], config['dec_nhids'],
                      config['enc_nhids'] * 2, config['transition_layers'],
                      target_space_idx, target_bos_idx)

    representation = encoder.apply(source_char_seq, source_sample_matrix,
                                   source_char_aux, source_word_mask)
    cost = decoder.cost(representation, source_word_mask, target_char_seq,
                        target_sample_matrix, target_resample_matrix,
                        target_char_aux, target_char_mask, target_word_mask,
                        target_prev_char_seq, target_prev_char_aux)

    logger.info('Creating computational graph')
    cg = ComputationGraph(cost)

    # Initialize model
    logger.info('Initializing model')
    encoder.weights_init = decoder.weights_init = IsotropicGaussian(
        config['weight_scale'])
    encoder.biases_init = decoder.biases_init = Constant(0)
    encoder.push_initialization_config()
    decoder.push_initialization_config()
    for layer_n in range(config['encoder_layers']):
        encoder.decimator.dgru.transitions[layer_n].weights_init = Orthogonal()
        encoder.children[
            1 + layer_n].prototype.recurrent.weights_init = Orthogonal()
    decoder.interpolator.igru.weights_init = Orthogonal()
    decoder.interpolator.feedback_brick.dgru.transitions[
        0].weights_init = Orthogonal()
    for layer_n in range(config['transition_layers']):
        decoder.transition.transitions[layer_n].weights_init = Orthogonal()
    encoder.initialize()
    decoder.initialize()

    # Apply weight noise for regularization
    if config['weight_noise_ff'] > 0.0:
        logger.info('Applying weight noise to ff layers')
        enc_params = Selector(encoder.lookup).get_params().values()
        enc_params += Selector(encoder.fwd_fork).get_params().values()
        enc_params += Selector(encoder.back_fork).get_params().values()
        dec_params = Selector(
            decoder.sequence_generator.readout).get_params().values()
        dec_params += Selector(
            decoder.sequence_generator.fork).get_params().values()
        dec_params += Selector(decoder.state_init).get_params().values()
        cg = apply_noise(cg, enc_params + dec_params,
                         config['weight_noise_ff'])

    # Print shapes
    shapes = [param.get_value().shape for param in cg.parameters]
    logger.info("Parameter shapes: ")
    for shape, count in Counter(shapes).most_common():
        logger.info('    {:15}: {}'.format(str(shape), count))
    logger.info("Total number of parameters: {}".format(len(shapes)))

    # Print parameter names
    enc_dec_param_dict = merge(
        Selector(encoder).get_parameters(),
        Selector(decoder).get_parameters())
    logger.info("Parameter names: ")
    for name, value in enc_dec_param_dict.items():
        logger.info('    {:15}: {}'.format(str(value.get_value().shape), name))
    logger.info("Total number of parameters: {}".format(
        len(enc_dec_param_dict)))

    # Set up training model
    logger.info("Building model")
    training_model = Model(cost)
    # Set up training algorithm
    logger.info("Initializing training algorithm")
    algorithm = GradientDescent(cost=cost,
                                parameters=cg.parameters,
                                step_rule=CompositeRule([
                                    StepClipping(config['step_clipping']),
                                    eval(config['step_rule'])()
                                ]))

    # Set extensions
    logger.info("Initializing extensions")
    # Extensions
    gradient_norm = aggregation.mean(algorithm.total_gradient_norm)
    step_norm = aggregation.mean(algorithm.total_step_norm)
    train_monitor = CostCurve([cost, gradient_norm, step_norm],
                              config=config,
                              after_batch=True,
                              before_first_epoch=True,
                              prefix='tra')
    extensions = [
        train_monitor,
        Timing(),
        Printing(after_batch=True),
        FinishAfter(after_n_batches=config['finish_after']),
        CheckpointNMT(config['saveto'], every_n_batches=config['save_freq'])
    ]

    # Set up beam search and sampling computation graphs if necessary
    if config['hook_samples'] >= 1 or config['bleu_script'] is not None:
        logger.info("Building sampling model")
        generated = decoder.generate(representation, source_word_mask)
        search_model = Model(generated)
        _, samples = VariableFilter(
            bricks=[decoder.sequence_generator], name="outputs")(
                ComputationGraph(generated[config['transition_layers']])
            )  # generated[transition_layers] is next_outputs

    # Add sampling
    if config['hook_samples'] >= 1:
        logger.info("Building sampler")
        extensions.append(
            Sampler(model=search_model,
                    data_stream=tr_stream,
                    hook_samples=config['hook_samples'],
                    transition_layers=config['transition_layers'],
                    every_n_batches=config['sampling_freq'],
                    src_vocab_size=config['src_vocab_size']))

    # Add early stopping based on bleu
    if config['bleu_script'] is not None:
        logger.info("Building bleu validator")
        extensions.append(
            BleuValidator(source_char_seq,
                          source_sample_matrix,
                          source_char_aux,
                          source_word_mask,
                          samples=samples,
                          config=config,
                          model=search_model,
                          data_stream=dev_stream,
                          normalize=config['normalized_bleu'],
                          every_n_batches=config['bleu_val_freq']))

    # Reload model if necessary
    if config['reload']:
        extensions.append(LoadNMT(config['saveto']))

    # Initialize main loop
    logger.info("Initializing main loop")
    main_loop = MainLoop(model=training_model,
                         algorithm=algorithm,
                         data_stream=tr_stream,
                         extensions=extensions)

    # Train!
    main_loop.run()
Esempio n. 34
0
for i in range(1):

    logger.info('Creating theano variables')
    print("create theano variables")
    source_sentence = tensor.lmatrix('source')
    source_sentence_mask = tensor.matrix('source_mask') # what is the source_mask
    target_sentence = tensor.lmatrix('target')
    target_sentence_mask = tensor.matrix('target_mask')
    #sampling_input = tensor.lmatrix('input')
# Construct model
    logger.info('Building RNN encoder-decoder')
    encoder = BidirectionalEncoder(
        config['src_vocab_size'], config['enc_embed'], config['enc_nhids'])
    decoder = Decoder(
        config['trg_vocab_size'], config['dec_embed'], config['dec_nhids'],
        config['enc_nhids'] * 2)
    #cost = decoder.cost(encoder.apply(source_sentence, source_sentence_mask), # here source_sentence_mask 是embeding向量矩阵? 属于自由向量?
    #        source_sentence_mask, target_sentence, target_sentence_mask)  # 定义cost 函数

    cost = decoder.cost(encoder.apply(source_sentence, tensor.ones(source_sentence.shape)),tensor.ones(source_sentence.shape), target_sentence, tensor.ones(target_sentence.shape))
    
    logger.info('Creating computational graph')
    cg = ComputationGraph(cost)  # construct the computational graph for gradient computing. it aims to optimize the model,cg包含有整个完整运算的各个权值
    # Initialize model
    logger.info('Initializing model')
    encoder.weights_init = decoder.weights_init = IsotropicGaussian(
    config['weight_scale'])
    encoder.biases_init = decoder.biases_init = Constant(0)
    encoder.push_initialization_config()  # push_initialization_config 已经被预先定义在Initializable里的方法
    decoder.push_initialization_config()