コード例 #1
0
ファイル: evaluate.py プロジェクト: cchengv5/t2vec
def evaluator(args):
    """
    do evaluation interactively
    """
    m0 = EncoderDecoder(args.vocab_size, args.embedding_size,
                        args.hidden_size, args.num_layers,
                        args.dropout, args.bidirectional)
    m1 = nn.Sequential(nn.Linear(args.hidden_size, args.vocab_size),
                       nn.LogSoftmax())
    if os.path.isfile(args.checkpoint):
        print("=> loading checkpoint '{}'".format(args.checkpoint))
        checkpoint = torch.load(args.checkpoint)
        m0.load_state_dict(checkpoint["m0"])
        m1.load_state_dict(checkpoint["m1"])
        while True:
            try:
                print("> ", end="")
                src = input()
                src = [int(x) for x in src.split()]
                trg = evaluate(src, (m0, m1), args.max_length)
                print(" ".join(map(str, trg)))
            except KeyboardInterrupt:
                break
    else:
        print("=> no checkpoint found at '{}'".format(args.checkpoint))
コード例 #2
0
def make_model(src_vocab,
               tgt_vocab,
               N=6,
               d_model=512,
               d_ff=2048,
               h=8,
               dropout=0.1):
    "Helper: Construct a model from hyperparameters."
    c = copy.deepcopy
    attn = MultiHeadAttention(h, d_model)
    ff = PositionWiseFeedForward(d_model, d_ff, dropout)
    position = PositionalEncoding(d_model, dropout)
    model = EncoderDecoder(
        Encoder(EncoderLayer(d_model, c(attn), c(ff), dropout), N),
        Decoder(DecoderLayer(d_model, c(attn), c(attn), c(ff), dropout), N),
        nn.Sequential(Embeddings(d_model, src_vocab), c(position)),
        nn.Sequential(Embeddings(d_model, tgt_vocab), c(position)),
        Generator(d_model, tgt_vocab))

    # This was important from their code.
    # Initialize parameters with Glorot / fan_avg.
    for p in model.parameters():
        if p.dim() > 1:
            nn.init.xavier_uniform_(p)
    return model
コード例 #3
0
def t2vec(args):
    "read source sequences from trj.t and write the tensor into file trj.h5"
    m0 = EncoderDecoder(args.vocab_size, args.embedding_size, args.hidden_size,
                        args.num_layers, args.dropout, args.bidirectional)
    if os.path.isfile(args.checkpoint):
        print("=> loading checkpoint '{}'".format(args.checkpoint))
        checkpoint = torch.load(args.checkpoint)
        m0.load_state_dict(checkpoint["m0"])
        if torch.cuda.is_available():
            m0.cuda()
        m0.eval()
        vecs = []
        scaner = DataOrderScaner(
            os.path.join(args.data, "{}-trj.t".format(args.prefix)),
            args.t2vec_batch)
        scaner.load()
        i = 0
        while True:
            if i % 100 == 0:
                print("{}: Encoding {} trjs...".format(i, args.t2vec_batch))
            i = i + 1
            # src 该组最大轨迹长度*num_seqs(该组轨迹个数)
            src, lengths, invp = scaner.getbatch()
            if src is None: break
            if torch.cuda.is_available():
                src, lengths, invp = src.cuda(), lengths.cuda(), invp.cuda()
            h, _ = m0.encoder(src, lengths)  # 【层数*双向2,该组轨迹个数,隐藏层数】【6,10,128】
            ## (num_layers, batch, hidden_size * num_directions) 【3,10,256】
            h = m0.encoder_hn2decoder_h0(h)
            ## (batch, num_layers, hidden_size * num_directions) 【10,3,256】
            h = h.transpose(0, 1).contiguous()
            ## (batch, *)
            #h = h.view(h.size(0), -1)
            vecs.append(h[invp].cpu().data)
        ## (num_seqs, num_layers, hidden_size * num_directions)

        vecs = torch.cat(vecs)  # [10,3,256]
        ## (num_layers, num_seqs, hidden_size * num_directions)
        vecs = vecs.transpose(0, 1).contiguous()  ## [3,10,256]
        path = os.path.join(args.data, "{}-trj.h5".format(args.prefix))
        print("=> saving vectors into {}".format(path))
        ## 存储三层 输出的隐藏层结构,每一层是 batch个256维的向量
        with h5py.File(path, "w") as f:
            for i in range(m0.num_layers):
                f["layer" + str(i + 1)] = vecs[i].squeeze(0).numpy()
        #torch.save(vecs.data, path)
        #return vecs.data
    else:
        print("=> no checkpoint found at '{}'".format(args.checkpoint))
    return vecs[m0.num_layers - 1]
コード例 #4
0
def experiments():
    use_cuda = True
    num_attr = 39
    to_swap = 'Blond_Hair'
    #to_swap = '5_o_Clock_Shadow'
    encoder_decoder_fpath = join('data', 'weights', 'adver.params')
    encoder_decoder = EncoderDecoder(num_attr)
    encoder_decoder.load_state_dict(torch.load(encoder_decoder_fpath))
    if use_cuda:
        encoder_decoder.cuda()

    _, _, test = split_train_val_test('data')
    test_iter = DataLoader(test, batch_size=32, shuffle=False)

    swap_idx, = np.where(test.attribute_names == to_swap)[0]

    encoder_decoder.eval()
    for iteration, (x, yb, yt) in enumerate(test_iter, start=1):
        yb[:, 2 * swap_idx] = 1 - yb[:, 2 * swap_idx]
        yb[:, 2 * swap_idx + 1] = 1 - yb[:, 2 * swap_idx + 1]
        if use_cuda:
            x, yb, yt = x.cuda(), yb.cuda(), yt.cuda()
        x, yb, yt = Variable(x), Variable(yb), Variable(yt)

        _, x_hat = encoder_decoder(x, yb)
        plot_samples(x, x_hat, prefix='test_%d' % (iteration))
コード例 #5
0
 def create_model(self) -> torch.nn.Module:
     return EncoderDecoder(len(self.train_set.in_vocabulary),
                           len(self.train_set.out_vocabulary),
                           self.helper.opt.state_size,
                           self.helper.opt.n_layers,
                           self.helper.opt.encoder_decoder.embedding_size,
                           self.helper.opt.dropout,
                           self.train_set.max_out_len)
コード例 #6
0
def t2vec(args):
    "read source sequences from trj.t and write the tensor into file trj.h5"
    m0 = EncoderDecoder(args.vocab_size, args.embedding_size, args.hidden_size,
                        args.num_layers, args.dropout, args.bidirectional)
    if os.path.isfile(args.checkpoint):
        print("=> loading checkpoint '{}'".format(args.checkpoint))
        checkpoint = torch.load(args.checkpoint)
        m0.load_state_dict(checkpoint["m0"])
        if torch.cuda.is_available():
            m0.cuda()
        m0.eval()
        vecs = []
        scaner = DataOrderScaner(os.path.join(args.data, "trj.t"),
                                 args.t2vec_batch)
        scaner.load()
        i = 0
        while True:
            if i % 10 == 0:
                print("{}: Encoding {} trjs...".format(i, args.t2vec_batch))
            i = i + 1
            src, lengths, invp = scaner.getbatch()
            if src is None: break
            src, lengths = Variable(src), Variable(lengths)
            if torch.cuda.is_available():
                src, lengths, invp = src.cuda(), lengths.cuda(), invp.cuda()
            h, _ = m0.encoder(src, lengths)
            ## (num_layers, batch, hidden_size * num_directions)
            h = m0.encoder_hn2decoder_h0(h)
            ## (batch, num_layers, hidden_size * num_directions)
            h = h.transpose(0, 1).contiguous()
            ## (batch, *)
            #h = h.view(h.size(0), -1)
            vecs.append(h[invp].cpu().data)
        ## (num_seqs, num_layers, hidden_size * num_directions)
        vecs = torch.cat(vecs)
        ## (num_layers, num_seqs, hidden_size * num_directions)
        vecs = vecs.transpose(0, 1).contiguous()
        path = os.path.join(args.data, "trj.h5")
        print("=> saving vectors into {}".format(path))
        with h5py.File(path, "w") as f:
            for i in range(m0.num_layers):
                f["layer" + str(i + 1)] = vecs[i].squeeze(0).numpy()
        #torch.save(vecs.data, path)
        #return vecs.data
    else:
        print("=> no checkpoint found at '{}'".format(args.checkpoint))
コード例 #7
0
def getPredict(src, args):
    m0 = EncoderDecoder(args.vocab_size, args.embedding_size, args.hidden_size,
                        args.num_layers, args.dropout, args.bidirectional)
    m1 = nn.Sequential(nn.Linear(args.hidden_size, args.vocab_size),
                       nn.LogSoftmax())
    trg = []
    if os.path.isfile(args.checkpoint):
        print("=> loading checkpoint '{}'".format(args.checkpoint))
        checkpoint = torch.load(args.checkpoint)
        m0.load_state_dict(checkpoint["m0"])
        m1.load_state_dict(checkpoint["m1"])
        print("> ", end="")
        trg = evaluate(src, (m0, m1), args.max_length)
        trg = [trg[ii].tolist() for ii in range(len(trg))]
        print(trg)
    else:
        print("=> no checkpoint found at '{}'".format(args.checkpoint))
    return trg
コード例 #8
0
def model_init(args):
    "read source sequences from trj.t and write the tensor into file trj.h5"
    m0 = EncoderDecoder(args.vocab_size, args.embedding_size,
                        args.hidden_size, args.num_layers,
                        args.dropout, args.bidirectional)
    if os.path.isfile(args.checkpoint):
        #print("=> loading checkpoint '{}'".format(args.checkpoint))
        checkpoint = torch.load(args.checkpoint, map_location='cpu')
        m0.load_state_dict(checkpoint["m0"])
        if torch.cuda.is_available():
            print('mo by cuda')
            m0.cuda()
        m0.eval()
    else:
        print("=> no checkpoint found at '{}'".format(args.checkpoint))
    return m0
コード例 #9
0
def test():
    print('Loading vocab and test dataset...')
    embed = {}
    with open(args.embed_path, 'r') as f:
        f.readline()
        for line in f.readlines():
            line = line.strip().split()
            vec = [float(_) for _ in line[1:]]
            embed[line[0]] = vec
    vocab = Vocab(args, embed)

    train_data, val_data, test_data = [], [], []
    fns = os.listdir(args.train_dir)
    fns.sort(key=lambda p: int(p.split('.')[0]))
    for fn in tqdm(fns):
        f = open(args.train_dir + fn, 'r')
        train_data.append(json.load(f))
        f.close()
        vocab.add_sentence(train_data[-1]['reviewText'].split())
        vocab.add_sentence(train_data[-1]['summary'].split())
    fns = os.listdir(args.valid_dir)
    fns.sort(key=lambda p: int(p.split('.')[0]))
    for fn in tqdm(fns):
        f = open(args.valid_dir + fn, 'r')
        val_data.append(json.load(f))
        f.close()
        vocab.add_sentence(val_data[-1]['reviewText'].split())
        vocab.add_sentence(val_data[-1]['summary'].split())
    fns = os.listdir(args.test_dir)
    fns.sort(key=lambda p: int(p.split('.')[0]))
    for fn in tqdm(fns):
        f = open(args.test_dir + fn, 'r')
        test_data.append(json.load(f))
        f.close()
        vocab.add_sentence(test_data[-1]['reviewText'].split())
        vocab.add_sentence(test_data[-1]['summary'].split())
    embed = vocab.trim()
    args.embed_num = len(embed)
    args.embed_dim = len(embed[0])
    test_dataset = Dataset(test_data)
    test_iter = DataLoader(dataset=test_dataset,
                           batch_size=args.batch_size,
                           shuffle=False)

    print('Loading model...')
    checkpoint = torch.load(args.save_path + args.load_model)
    net = EncoderDecoder(checkpoint['args'], embed)
    net.load_state_dict(checkpoint['model'])
    if args.use_cuda:
        net.cuda()
    criterion = nn.NLLLoss(ignore_index=vocab.PAD_IDX, reduction='sum')

    print('Begin testing...')
    loss, r1, r2, rl = evaluate(net, criterion, vocab, test_iter, False)
    print('Loss: %f Rouge-1: %f Rouge-2: %f Rouge-l: %f' % (loss, r1, r2, rl))
コード例 #10
0
ファイル: train.py プロジェクト: maaeedee/t2vec
def train(args):
    logging.basicConfig(filename="training.log", level=logging.INFO)

    trainsrc = os.path.join(args.data, "train.src")
    traintrg = os.path.join(args.data, "train.trg")
    trainData = DataLoader(trainsrc, traintrg, args.batch, args.bucketsize)
    print("Reading training data...")
    trainData.load(args.max_num_line)
    print("Allocation: {}".format(trainData.allocation))
    print("Percent: {}".format(trainData.p))

    valsrc = os.path.join(args.data, "val.src")
    valtrg = os.path.join(args.data, "val.trg")
    if os.path.isfile(valsrc) and os.path.isfile(valtrg):
        valData = DataLoader(valsrc, valtrg, args.batch, args.bucketsize, True)
        print("Reading validation data...")
        valData.load()
        assert valData.size > 0, "Validation data size must be greater than 0"
        print("Loaded validation data size {}".format(valData.size))
    else:
        print("No validation data found, training without validating...")

    ## create criterion, model, optimizer
    if args.criterion_name == "NLL":
        criterion = NLLcriterion(args.vocab_size)
        lossF = lambda o, t: criterion(o, t)
    else:
        assert os.path.isfile(args.knearestvocabs),\
            "{} does not exist".format(args.knearestvocabs)
        print("Loading vocab distance file {}...".format(args.knearestvocabs))
        with h5py.File(args.knearestvocabs) as f:
            V, D = f["V"][...], f["D"][...]
            V, D = torch.LongTensor(V), torch.FloatTensor(D)
        D = dist2weight(D, args.dist_decay_speed)
        if args.cuda and torch.cuda.is_available():
            V, D = V.cuda(), D.cuda()
        criterion = KLDIVcriterion(args.vocab_size)
        lossF = lambda o, t: KLDIVloss(o, t, criterion, V, D)

    m0 = EncoderDecoder(args.vocab_size,
                        args.embedding_size,
                        args.hidden_size,
                        args.num_layers,
                        args.dropout,
                        args.bidirectional)
    m1 = nn.Sequential(nn.Linear(args.hidden_size, args.vocab_size),
                       nn.LogSoftmax(dim=1))
    if args.cuda and torch.cuda.is_available():
        print("=> training with GPU")
        m0.cuda()
        m1.cuda()
        criterion.cuda()
        #m0 = nn.DataParallel(m0, dim=1)
    else:
        print("=> training with CPU")

    m0_optimizer = torch.optim.Adam(m0.parameters(), lr=args.learning_rate)
    m1_optimizer = torch.optim.Adam(m1.parameters(), lr=args.learning_rate)

    ## load model state and optmizer state
    if os.path.isfile(args.checkpoint):
        print("=> loading checkpoint '{}'".format(args.checkpoint))
        logging.info("Restore training @ {}".format(time.ctime()))
        checkpoint = torch.load(args.checkpoint)
        args.start_iteration = checkpoint["iteration"]
        best_prec_loss = checkpoint["best_prec_loss"]
        m0.load_state_dict(checkpoint["m0"])
        m1.load_state_dict(checkpoint["m1"])
        m0_optimizer.load_state_dict(checkpoint["m0_optimizer"])
        m1_optimizer.load_state_dict(checkpoint["m1_optimizer"])
    else:
        print("=> no checkpoint found at '{}'".format(args.checkpoint))
        logging.info("Start training @ {}".format(time.ctime()))
        best_prec_loss = float('inf')
        #print("=> initializing the parameters...")
        #init_parameters(m0)
        #init_parameters(m1)
        ## here: load pretrained wrod (cell) embedding

    num_iteration = args.epochs * sum(trainData.allocation) // args.batch
    print("Iteration starts at {} "
          "and will end at {}".format(args.start_iteration, num_iteration-1))
    ## training
    for iteration in range(args.start_iteration, num_iteration):
        try:
            input, lengths, target = trainData.getbatch()
            if args.cuda and torch.cuda.is_available():
                input, lengths, target = input.cuda(), lengths.cuda(), target.cuda()

            m0_optimizer.zero_grad()
            m1_optimizer.zero_grad()
            ## forward computation
            output = m0(input, lengths, target)
            loss = batchloss(output, target, m1, lossF, args.generator_batch)
            ## compute the gradients
            loss.backward()
            ## clip the gradients
            clip_grad_norm_(m0.parameters(), args.max_grad_norm)
            clip_grad_norm_(m1.parameters(), args.max_grad_norm)
            ## one step optimization
            m0_optimizer.step()
            m1_optimizer.step()
            ## average loss for one word
            avg_loss = loss.item() / target.size(0)
            if iteration % args.print_freq == 0:
                print("Iteration: {}\tLoss: {}".format(iteration, avg_loss))
            if iteration % args.save_freq == 0 and iteration > 0:
                prec_loss = validate(valData, (m0, m1), lossF, args)
                if prec_loss < best_prec_loss:
                    best_prec_loss = prec_loss
                    logging.info("Best model with loss {} at iteration {} @ {}"\
                                 .format(best_prec_loss, iteration, time.ctime()))
                    is_best = True
                else:
                    is_best = False
                print("Saving the model at iteration {} validation loss {}"\
                      .format(iteration, prec_loss))
                savecheckpoint({
                    "iteration": iteration,
                    "best_prec_loss": best_prec_loss,
                    "m0": m0.state_dict(),
                    "m1": m1.state_dict(),
                    "m0_optimizer": m0_optimizer.state_dict(),
                    "m1_optimizer": m1_optimizer.state_dict()
                }, is_best)
        except KeyboardInterrupt:
            break
コード例 #11
0
ファイル: m2p_runner.py プロジェクト: Ydkwim/Pre-training
    def set_model(self):
        print('[Runner] - Initializing Transformer model...')

        # text是speech2text, speech是text2speech.
        self.text_model = EncoderDecoder(
            encoder_config=self.speech_encoder_config,
            decoder_config=self.text_decoder_config,
            modality="text")
        self.text_model.to(self.device)
        self.text_model.train()

        self.speech_model = EncoderDecoder(
            encoder_config=self.text_encoder_config,
            decoder_config=self.speech_decoder_config,
            modality="speech")
        self.speech_model.to(self.device)
        self.speech_model.train()

        if self.args.multi_gpu:
            self.text_model = torch.nn.DataParallel(self.text_model)
            self.speech_model = torch.nn.DataParallel(self.speech_model)
            print('[Runner] - Multi-GPU training Enabled: ' +
                  str(torch.cuda.device_count()))

        print('[Runner] - Number of parameters: ' + str(sum(p.numel() for p in self.text_model.parameters() if p.requires_grad) + \
                                                        sum(p.numel() for p in self.speech_model.parameters() if p.requires_grad)))

        param_optimizer = list(self.text_model.named_parameters()) + list(
            self.speech_model.named_parameters())

        no_decay = ['bias', 'LayerNorm.bias', 'LayerNorm.weight']
        optimizer_grouped_parameters = [{
            'params': [
                p for n, p in param_optimizer
                if not any(nd in n for nd in no_decay)
            ],
            'weight_decay':
            0.01
        }, {
            'params':
            [p for n, p in param_optimizer if any(nd in n for nd in no_decay)],
            'weight_decay':
            0.0
        }]

        if 'type' not in self.config['optimizer']:
            self.config['optimizer']['type'] = 'adam'
        print('[Runner] - Optimizer: ' +
              ('apex Fused Adam' if self.
               apex else str(self.config['optimizer']['type'])))
        if self.apex:
            try:
                from apex.optimizers import FP16_Optimizer
                from apex.optimizers import FusedAdam
            except ImportError:
                raise ImportError(
                    "Please install apex from https://www.github.com/nvidia/apex to use distributed and fp16 training."
                )

            optimizer = FusedAdam(optimizer_grouped_parameters,
                                  lr=self.learning_rate,
                                  bias_correction=False,
                                  max_grad_norm=1.0)
            if self.config['optimizer']['loss_scale'] == 0:
                self.optimizer = FP16_Optimizer(optimizer,
                                                dynamic_loss_scale=True)
            else:
                self.optimizer = FP16_Optimizer(
                    optimizer,
                    static_loss_scale=self.config['optimizer']['loss_scale'])
            self.warmup_linear = WarmupLinearSchedule(
                warmup=self.warmup_proportion, t_total=self.total_steps)
        elif self.config['optimizer']['type'] == 'adam':
            self.optimizer = BertAdam(optimizer_grouped_parameters,
                                      lr=self.learning_rate,
                                      warmup=self.warmup_proportion,
                                      t_total=self.total_steps,
                                      schedule='warmup_linear')
        elif self.config['optimizer']['type'] == 'lamb' or self.config[
                'optimizer']['type'] == 'adamW':
            self.optimizer = Lamb(
                optimizer_grouped_parameters,
                lr=self.learning_rate,
                warmup=self.warmup_proportion,
                t_total=self.total_steps,
                schedule='warmup_linear',
                adam=True
                if self.config['optimizer']['type'] == 'adamW' else False,
                correct_bias=True
                if self.config['optimizer']['type'] == 'adamW' else False)
        else:
            raise NotImplementedError()

        if self.args.resume is not None:
            self.load_model(self.args.resume)
コード例 #12
0
def train_fader_network():
    gpu_id = 1
    use_cuda = True
    data_dir = 'data'
    sample_every = 10
    test_dir = join(data_dir, 'test-samples')
    encoder_decoder_fpath = join(data_dir, 'weights', 'adver.params')
    discriminator_fpath = join(data_dir, 'weights', 'discr.params')

    train, valid, test = split_train_val_test(data_dir)

    num_attr = train.attribute_names.shape[0]
    encoder_decoder = EncoderDecoder(num_attr, gpu_id=gpu_id)
    discriminator   = Discriminator(num_attr)
    if use_cuda:
        encoder_decoder.cuda(gpu_id)
        discriminator.cuda(gpu_id)

    train_iter = DataLoader(train, batch_size=32, shuffle=True, num_workers=8)
    valid_iter = DataLoader(valid, batch_size=32, shuffle=False, num_workers=8)
    test_iter  = DataLoader(test, batch_size=32, shuffle=False, num_workers=8)

    max_epochs = 1000
    lr, beta1 = 2e-3, 0.5
    adversarial_optimizer = optim.Adam(encoder_decoder.parameters(),
                                       lr=lr, betas=(beta1, 0.999))
    discriminator_optimizer = optim.Adam(discriminator.parameters(),
                                         lr=lr, betas=(beta1, 0.999))
    mse_loss = nn.MSELoss(size_average=True)
    bce_loss = nn.BCELoss(size_average=True)

    num_iters = 0
    lambda_e = np.linspace(0, 1e-4, 500000)

    try:
        for epoch in range(1, max_epochs):
            encoder_decoder.train()
            discriminator.train()
            for iteration, (x, yb, yt, _) in enumerate(train_iter, start=1):
                if use_cuda:
                    x = x.cuda(gpu_id)
                    yb, yt = yb.cuda(gpu_id), yt.cuda(gpu_id)
                x, yb, yt = Variable(x), Variable(yb), Variable(yt)
                #print yb.data.cpu().numpy().shape
                #print yt.data.cpu().numpy().shape
                adversarial_optimizer.zero_grad()
                z, x_hat = encoder_decoder(x, yb)

                #if (epoch == 1) or (epoch % sample_every == 0):
                #if (epoch % sample_every == 0):
                #    plot_samples(x, x_hat, prefix='train_%d_%d' % (
                #        epoch, iteration))

                # send the output of the encoder as a new Variable that is not
                # part of the backward pass
                # not sure if this is the correct way to do so
                # https://discuss.pytorch.org/t/how-to-copy-a-variable-in-a-network-graph/1603/9
                z_in = Variable(z.data, requires_grad=False)
                discriminator_optimizer.zero_grad()
                y_hat = discriminator(z_in)

                # adversarial loss
                y_in = Variable(y_hat.data, requires_grad=False)
                le_idx = min(500000 - 1, num_iters)
                le_val = Variable(
                    torch.FloatTensor([lambda_e[le_idx]]).float(),
                    requires_grad=False)
                if use_cuda:
                    le_val = le_val.cuda(gpu_id)
                advers_loss = mse_loss(x_hat, x) +\
                    le_val * bce_loss(y_in, 1 - yt)
                advers_loss.backward()
                adversarial_optimizer.step()

                # discriminative loss
                discrim_loss = bce_loss(y_hat, yt)
                discrim_loss.backward()
                discriminator_optimizer.step()

                print(' Train epoch %d, iter %d (lambda_e = %.2e)' % (
                    epoch, iteration, le_val.data[0]))
                print('  adv. loss = %.6f' % (advers_loss.data[0]))
                print('  dsc. loss = %.6f' % (discrim_loss.data[0]))

                num_iters += 1

            encoder_decoder.eval()
            discriminator.eval()
            for iteration, (x, yb, yt, _) in enumerate(valid_iter, start=1):
                if use_cuda:
                    x = x.cuda(gpu_id)
                    yb, yt = yb.cuda(gpu_id), yt.cuda(gpu_id)
                x, yb, yt = Variable(x), Variable(yb), Variable(yt)
                z, x_hat = encoder_decoder(x, yb)

                #plot_samples(x, x_hat, prefix='valid_%d_%d' % (
                #    epoch, iteration))

                z_in = Variable(z.data, requires_grad=False)
                y_hat = discriminator(z_in)

                y_in = Variable(y_hat.data, requires_grad=False)
                valid_advers_loss = mse_loss(x_hat, x) +\
                    le_val * bce_loss(y_in, 1 - yt)
                valid_discrim_loss = bce_loss(y_hat, yt)
                print(' Valid epoch %d, iter %d (lambda_e = %.2e)' % (
                    epoch, iteration, le_val.data[0]))
                print('  adv. loss = %.6f' % (valid_advers_loss.data[0]))
                print('  dsc. loss = %.6f' % (valid_discrim_loss.data[0]))

            if (epoch % sample_every == 0):
                encoder_decoder.eval()
                for iteration, (x, yb, ys, fp) in enumerate(test_iter, 1):
                    # randomly choose an attribute and swap the targets
                    to_swap = np.random.choice(test.attribute_names)
                    swap_idx, = np.where(test.attribute_names == to_swap)[0]
                    # map (0, 1) --> (1, 0), and (1, 0) --> (0, 1)
                    yb[:, 2 * swap_idx]     = 1 - yb[:, 2 * swap_idx]
                    yb[:, 2 * swap_idx + 1] = 1 - yb[:, 2 * swap_idx + 1]
                    if use_cuda:
                        x, yb = x.cuda(gpu_id), yb.cuda(gpu_id)
                    x, yb = Variable(x), Variable(yb)
                    _, x_hat = encoder_decoder(x, yb)
                    sample_dir = join(test_dir, '%s' % epoch, '%s' % to_swap)
                    if not exists(sample_dir):
                        makedirs(sample_dir)
                    fnames = ['%s.png' % splitext(basename(f))[0] for f in fp]
                    fpaths = [join(sample_dir, f) for f in fnames]
                    plot_samples(x, x_hat, fpaths)
    except KeyboardInterrupt:
        print('Caught Ctrl-C, interrupting training.')
    except RuntimeError:
        print('RuntimeError')
    print('Saving encoder/decoder parameters to %s' % (encoder_decoder_fpath))
    torch.save(encoder_decoder.state_dict(), encoder_decoder_fpath)
    print('Saving discriminator parameters to %s' % (discriminator_fpath))
    torch.save(discriminator.state_dict(), discriminator_fpath)
コード例 #13
0
ファイル: train.py プロジェクト: boyuanzheng010/ReviewSum
def train():
    embed = None
    if args.embed_path is not None and os.path.exists(args.embed_path):
        print('Loading pretrained word embedding...')
        embed = {}
        with open(args.embed_path, 'r') as f:
            f.readline()
            for line in f.readlines():
                line = line.strip().split()
                vec = [float(_) for _ in line[1:]]
                embed[line[0]] = vec
    vocab = Vocab(args, embed)
    print('Loading datasets...')
    train_data, val_data, test_data = [], [], []
    fns = os.listdir(args.train_dir)
    fns.sort(key=lambda p: int(p.split('.')[0]))
    for fn in tqdm(fns):
        f = open(args.train_dir + fn, 'r')
        train_data.append(json.load(f))
        f.close()
        vocab.add_sentence(train_data[-1]['reviewText'].split())
        vocab.add_sentence(train_data[-1]['summary'].split())
        vocab.add_user(train_data[-1]['userID'])
        vocab.add_product(train_data[-1]['productID'])
    fns = os.listdir(args.valid_dir)
    fns.sort(key=lambda p: int(p.split('.')[0]))
    for fn in tqdm(fns):
        f = open(args.valid_dir + fn, 'r')
        val_data.append(json.load(f))
        f.close()
        vocab.add_sentence(val_data[-1]['reviewText'].split())
        vocab.add_sentence(val_data[-1]['summary'].split())
        vocab.add_user(val_data[-1]['userID'])
        vocab.add_product(val_data[-1]['productID'])
    fns = os.listdir(args.test_dir)
    fns.sort(key=lambda p: int(p.split('.')[0]))
    for fn in tqdm(fns):
        f = open(args.test_dir + fn, 'r')
        test_data.append(json.load(f))
        f.close()
        vocab.add_sentence(test_data[-1]['reviewText'].split())
        vocab.add_sentence(test_data[-1]['summary'].split())
        vocab.add_user(test_data[-1]['userID'])
        vocab.add_product(test_data[-1]['productID'])

    print('Deleting rare words...')
    embed = vocab.trim()

    args.embed_num = len(embed)
    args.embed_dim = len(embed[0])
    args.user_num = vocab.user_num
    args.product_num = vocab.product_num

    train_dataset = Dataset(train_data)
    val_dataset = Dataset(val_data)
    train_iter = DataLoader(dataset=train_dataset,
                            batch_size=args.batch_size,
                            shuffle=True)
    val_iter = DataLoader(dataset=val_dataset,
                          batch_size=args.batch_size,
                          shuffle=False)

    net = EncoderDecoder(args, embed)
    if args.load_model is not None:
        print('Loading model...')
        checkpoint = torch.load(args.save_path + args.load_model)
        net = EncoderDecoder(checkpoint['args'], embed)
        net.load_state_dict(checkpoint['model'])
    if args.use_cuda:
        net.cuda()
    criterion = nn.NLLLoss(ignore_index=vocab.PAD_IDX, reduction='sum')
    optim = torch.optim.Adam(net.parameters(), lr=args.lr)

    print('Begin training...')
    for epoch in range(args.begin_epoch, args.epochs + 1):
        if epoch >= args.lr_decay_start:
            adjust_learning_rate(optim, epoch - args.lr_decay_start + 1)
        for i, batch in enumerate(train_iter):
            src, trg, src_embed, trg_embed, src_user, src_product, src_mask, src_lens, trg_lens, _1, _2 = vocab.read_batch(
                batch)
            pre_output = net(src, trg, src_embed, trg_embed, src_user,
                             src_product, vocab.word_num, src_mask, src_lens,
                             trg_lens)
            pre_output = torch.log(
                pre_output.view(-1, pre_output.size(-1)) + 1e-20)
            trg_output = trg.view(-1)
            loss = criterion(pre_output, trg_output) / len(src_lens)
            loss.backward()
            clip_grad_norm_(net.parameters(), args.max_norm)
            optim.step()
            optim.zero_grad()

            cnt = (epoch - 1) * len(train_iter) + i
            if cnt % args.print_every == 0:
                print('EPOCH [%d/%d]: BATCH_ID=[%d/%d] loss=%f' %
                      (epoch, args.epochs, i, len(train_iter), loss.data))

            if cnt % args.valid_every == 0:
                print('Begin valid... Epoch %d, Batch %d' % (epoch, i))
                cur_loss, r1, r2, rl = evaluate(net, criterion, vocab,
                                                val_iter, True)
                save_path = args.save_path + 'valid_%d_%.4f_%.4f_%.4f_%.4f' % (
                    cnt / args.valid_every, cur_loss, r1, r2, rl)
                net.save(save_path)
                print(
                    'Epoch: %2d Cur_Val_Loss: %f Rouge-1: %f Rouge-2: %f Rouge-l: %f'
                    % (epoch, cur_loss, r1, r2, rl))

    return
コード例 #14
0
def t2vec(args):
    "read source sequences from trj.t and write the tensor into file trj.h5"
    m0 = EncoderDecoder(args.vocab_size, args.embedding_size, args.hidden_size,
                        args.num_layers, args.dropout, args.bidirectional)
    if os.path.isfile(args.checkpoint):
        print("=> loading checkpoint '{}'".format(args.checkpoint))
        checkpoint = torch.load(args.checkpoint)
        m0.load_state_dict(checkpoint["m0"])
        if torch.cuda.is_available():
            m0.cuda()
        m0.eval()
        vecs = []
        scaner = DataOrderScaner(
            os.path.join(
                args.data,
                "trj-{}-{}{:.1f}.t".format(args.cityname, args.encode_data,
                                           args.grid_size)), args.t2vec_batch)
        scaner.load()
        i = 0
        encode_time = 0.0
        while True:
            if i % 10 == 0:
                print("{}: Encoding {} trjs...".format(i, args.t2vec_batch))
            i = i + 1
            src, lengths, invp = scaner.getbatch()
            if src is None: break
            if torch.cuda.is_available():
                src, lengths, invp = src.cuda(), lengths.cuda(), invp.cuda()
            start_time = time.time()
            h, _ = m0.encoder(src, lengths)
            ## (num_layers, batch, hidden_size * num_directions)
            h = m0.encoder_hn2decoder_h0(h)
            ## (batch, num_layers, hidden_size * num_directions)
            h = h.transpose(0, 1).contiguous()
            end_time = time.time()
            encode_time += end_time - start_time
            ## (batch, *)
            #h = h.view(h.size(0), -1)
            vecs.append(h[invp].cpu().data)
        print("Encode data time is : ", encode_time)
        ## (num_seqs, num_layers, hidden_size * num_directions)
        vecs = torch.cat(vecs)
        ## (num_layers, num_seqs, hidden_size * num_directions)
        vecs = vecs.transpose(0, 1).contiguous()
        if not os.path.exists(os.path.join(args.data, 'traj_emb')):
            os.mkdir(os.path.join(args.data, 'traj_emb'))
        path = os.path.join(
            args.data,
            "traj_emb/trj_{}_{}{:.0f}.h5".format(args.cityname,
                                                 args.encode_data,
                                                 args.grid_size))
        print("=> saving vectors into {}".format(path))
        with h5py.File(path, "w") as f:
            for i in range(m0.num_layers):
                f["layer" + str(i + 1)] = vecs[i].squeeze(0).numpy()
        #torch.save(vecs.data, path)
        #return vecs.data
    else:
        print("=> no checkpoint found at '{}'".format(args.checkpoint))


#args = FakeArgs()
#args.t2vec_batch = 128
#args.num_layers = 2
#args.hidden_size = 64
#vecs = t2vec(args)
#vecs
コード例 #15
0
    fake_label = tf.ones([1, 1, 30, 30], tf.int32)

    real_A, real_B = np.asarray(train[0],
                                dtype=np.float32) / 255.0, np.asarray(
                                    train[1], dtype=np.float32) / 255.0

    # real_data = tf.placeholder(tf.float32, shape=[1, 256, 256, 3], name='')
    # t = tf.placeholder(tf.float32, shape=[None, 1])
    real_A = real_A.transpose(2, 0, 1)
    real_B = real_B.transpose(2, 0, 1)
    real_A = real_A.reshape(1, 3, 256, 256)
    real_B = real_B.reshape(1, 3, 256, 256)

    real_A = tf.Variable(real_A)
    real_B = tf.Variable(real_B)
    output = EncoderDecoder(tf.concat((real_A, real_B), 1))
    print("output ", output)

    break

#     label = (real_label)
#     err_d_real = loss_dis(output, label)
#     err_d_real.backward()
#     fake_b = encoderdecoder_model(real_A)
#     output = discriminator_model(F.concat((real_A, fake_b), axis=1))
#     label = (fake_label)
#     err_d_fake = loss_dis(output, label)
#     err_d_fake.backward()
#     err_d = (err_d_real + err_d_fake) / 2.0
#     optimizer_discriminator.update()
コード例 #16
0
def train(args):
    logging.basicConfig(filename=os.path.join(args.data, "training.log"),
                        level=logging.INFO)
    trainData, valData = loadTrainDataAndValidateDate(args)
    # 创建损失函数,模型以及最优化训练
    lossF = setLossF(args)
    triplet_loss = nn.TripletMarginLoss(margin=1.0, p=2)

    # 输入到输出整个encoder-decoder的map
    m0 = EncoderDecoder(args.vocab_size, args.embedding_size, args.hidden_size,
                        args.num_layers, args.dropout, args.bidirectional)
    #  EncoderDecoder 的输出到词汇表向量的映射,并进行了log操作
    m1 = nn.Sequential(nn.Linear(args.hidden_size, args.vocab_size),
                       nn.LogSoftmax(dim=1))
    if args.cuda and torch.cuda.is_available():
        print("=> training with GPU")
        m0.cuda()
        m1.cuda()
        # criterion.cuda() 自己更改
        #m0 = nn.DataParallel(m0, dim=1)
    else:
        print("=> training with CPU")

    m0_optimizer = torch.optim.Adam(m0.parameters(), lr=args.learning_rate)
    m1_optimizer = torch.optim.Adam(m1.parameters(), lr=args.learning_rate)

    ## 加载模型状态和优化器状态
    ## 如果存在已经保存的训练状态,如果不存在则重新开始生成
    if os.path.isfile(args.checkpoint):
        print("=> loading checkpoint '{}'".format(args.checkpoint))
        logging.info("Restore training @ {}".format(time.ctime()))
        checkpoint = torch.load(args.checkpoint)
        args.start_iteration = checkpoint["iteration"]
        best_prec_loss = checkpoint["best_prec_loss"]
        m0.load_state_dict(checkpoint["m0"])
        m1.load_state_dict(checkpoint["m1"])
        m0_optimizer.load_state_dict(checkpoint["m0_optimizer"])
        m1_optimizer.load_state_dict(checkpoint["m1_optimizer"])
    else:
        print("=> no checkpoint found at '{}'".format(args.checkpoint))
        logging.info("Start training @ {}".format(time.ctime()))
        best_prec_loss = float('inf')
        #print("=> initializing the parameters...")
        #init_parameters(m0)
        #init_parameters(m1)
        ## here: load pretrained wrod (cell) embedding

    # num_iteration = 67000*128 // args.batch
    num_iteration = args.iter_num
    print("开始训练:" + str(time.ctime()))
    print("Iteration starts at {} and will end at {} \n".format(
        args.start_iteration, num_iteration - 1))
    ## training
    for iteration in range(args.start_iteration + 1, num_iteration):
        try:
            # 梯度初始化为0
            m0_optimizer.zero_grad()
            m1_optimizer.zero_grad()
            ## 前向传播求预测值并计算损失
            # 获取一批补位+转置后的数据对象 TF=['src', 'lengths', 'trg', 'invp']
            # src (seq_len1, batch), lengths (1, batch), trg (seq_len2, batch)
            gendata = trainData.getbatch_generative()
            # 计算损失
            genloss = genLoss(gendata, m0, m1, lossF, args)
            ## discriminative loss
            disloss_cross, disloss_inner = 0, 0
            # 每10次计算1次discriminative loss
            if args.use_discriminative and iteration % 10 == 0:
                # a和p的轨迹更接近 a.src.size = [max_length,128]
                a, p, n = trainData.getbatch_discriminative_cross()
                disloss_cross = disLoss(a, p, n, m0, triplet_loss, args)
                # a,p,n是由同一组128个轨迹采样得到的新的128个下采样轨迹集合
                a, p, n = trainData.getbatch_discriminative_inner()
                disloss_inner = disLoss(a, p, n, m0, triplet_loss, args)
                # print("计算三元损失:"+str(time.ctime()))
            # 损失按一定权重相加 genloss: 使损失尽可能小 discriminative——loss: 使序列尽可能相似
            loss = genloss + args.discriminative_w * (disloss_cross +
                                                      disloss_inner)
            ## 根据模型损失,计算梯度
            loss.backward()
            ## 剪辑梯度,限制梯度下降的阈值,防止梯度消失现象
            clip_grad_norm_(m0.parameters(), args.max_grad_norm)
            clip_grad_norm_(m1.parameters(), args.max_grad_norm)
            ## 更新全部参数一次
            m0_optimizer.step()
            m1_optimizer.step()
            ## 计算一个词的平均损失
            avg_genloss = genloss.item() / gendata.trg.size(0)
            ## 定期输出训练状态
            if iteration % args.print_freq == 0:
                print("\n当前时间:" + str(time.ctime()))
                print("Iteration: {0:}\nGenerative Loss: {1:.3f}"\
                      "\nDiscriminative Cross Loss: {2:.3f}\nDiscriminative Inner Loss: {3:.3f}"\
                      .format(iteration, avg_genloss, disloss_cross, disloss_inner))

            ## 定期存储训练状态,通过验证集前向计算当前模型损失,若能获得更小损失,则保存最新的模型参数

            if iteration % args.save_freq == 0 and iteration >= 1000:
                print("验证并存储训练状态:" + str(time.ctime()))
                prec_loss = validate(valData, (m0, m1), lossF, args)
                if prec_loss < best_prec_loss:
                    best_prec_loss = prec_loss
                    logging.info("Best model with loss {} at iteration {} @ {}"\
                                 .format(best_prec_loss, iteration, time.ctime()))
                    is_best = True
                else:
                    is_best = False
                print("Saving the model at iteration {} validation loss {}".
                      format(iteration, prec_loss) + str(time.ctime()))
                savecheckpoint(
                    {
                        "iteration": iteration,
                        "best_prec_loss": best_prec_loss,
                        "m0": m0.state_dict(),
                        "m1": m1.state_dict(),
                        "m0_optimizer": m0_optimizer.state_dict(),
                        "m1_optimizer": m1_optimizer.state_dict()
                    }, is_best, args)
        except KeyboardInterrupt:
            break
コード例 #17
0
from models import EncoderDecoder
from data import get_datasets, PadAndOneHot
from training import Trainer
from helper_functions import one_hot_to_string

# Generate datasets from text file
data_path = "data"
train_dataset, valid_dataset, test_dataset = get_datasets(data_path)
checkpoint_path = "."

# Initialize model
model = EncoderDecoder(
    num_encoder_layers=2,
    num_encoder_hidden=512,
    num_decoder_layers=2,
    num_decoder_hidden=512,
    Sx_size=len(train_dataset.Sx),  # input alphabet
    Sy_size=len(train_dataset.Sy),  # output alphabet
    y_eos=train_dataset.y_eos,  # index of end-of-sequence symbol for output
    dropout=0.1,
    use_attention=True)

# Train the model
num_epochs = 0
trainer = Trainer(model, lr=0.0001)
trainer.load_checkpoint(checkpoint_path)

for epoch in range(num_epochs):
    print("========= Epoch %d of %d =========" % (epoch + 1, num_epochs))
    train_acc, train_loss = trainer.train(train_dataset)
    valid_acc, valid_loss = trainer.test(valid_dataset)
    trainer.save_checkpoint(epoch, checkpoint_path)
コード例 #18
0
ファイル: m2p_runner.py プロジェクト: Ydkwim/Pre-training
class Runner():
    ''' Handler for complete pre-training progress of upstream models '''
    def __init__(self, args, config, dae_dataloader, tokenizer, ckpdir):

        self.device = torch.device('cuda') if (
            args.gpu and torch.cuda.is_available()) else torch.device('cpu')
        if torch.cuda.is_available(): print('[Runner] - CUDA is available!')
        self.model_kept = []
        self.global_step = 1
        self.log = SummaryWriter(ckpdir)

        self.args = args
        self.config = config
        self.dae_dataloader = dae_dataloader
        self.tokenizer = tokenizer

        self.ckpdir = ckpdir

        # optimizer
        self.learning_rate = float(config['optimizer']['learning_rate'])
        self.warmup_proportion = config['optimizer']['warmup_proportion']
        self.gradient_accumulation_steps = config['optimizer'][
            'gradient_accumulation_steps']
        self.gradient_clipping = config['optimizer']['gradient_clipping']

        # Training details
        self.apex = config['runner']['apex']
        self.total_steps = config['runner']['total_steps']
        self.warm_up_epochs = config['runner']['warm_up_epochs']
        self.log_step = config['runner']['log_step']
        self.save_step = config['runner']['save_step']
        self.duo_feature = config['runner']['duo_feature']
        self.max_keep = config['runner']['max_keep']

        # Model configs
        self.text_encoder_config = RobertaConfig(**config['semantic'])
        self.text_encoder_config.is_decoder = False
        self.text_encoder_config.add_cross_attention = False
        self.text_decoder_config = RobertaConfig(**config['semantic'])
        self.text_decoder_config.is_decoder = True
        self.text_decoder_config.add_cross_attention = True

        self.speech_encoder_config = RobertaConfig(**config['acoustic'])
        self.speech_encoder_config.is_decoder = False
        self.speech_encoder_config.add_cross_attention = False
        self.speech_decoder_config = RobertaConfig(**config['acoustic'])
        self.speech_decoder_config.is_decoder = True
        self.speech_decoder_config.add_cross_attention = True

    def set_model(self):
        print('[Runner] - Initializing Transformer model...')

        # text是speech2text, speech是text2speech.
        self.text_model = EncoderDecoder(
            encoder_config=self.speech_encoder_config,
            decoder_config=self.text_decoder_config,
            modality="text")
        self.text_model.to(self.device)
        self.text_model.train()

        self.speech_model = EncoderDecoder(
            encoder_config=self.text_encoder_config,
            decoder_config=self.speech_decoder_config,
            modality="speech")
        self.speech_model.to(self.device)
        self.speech_model.train()

        if self.args.multi_gpu:
            self.text_model = torch.nn.DataParallel(self.text_model)
            self.speech_model = torch.nn.DataParallel(self.speech_model)
            print('[Runner] - Multi-GPU training Enabled: ' +
                  str(torch.cuda.device_count()))

        print('[Runner] - Number of parameters: ' + str(sum(p.numel() for p in self.text_model.parameters() if p.requires_grad) + \
                                                        sum(p.numel() for p in self.speech_model.parameters() if p.requires_grad)))

        param_optimizer = list(self.text_model.named_parameters()) + list(
            self.speech_model.named_parameters())

        no_decay = ['bias', 'LayerNorm.bias', 'LayerNorm.weight']
        optimizer_grouped_parameters = [{
            'params': [
                p for n, p in param_optimizer
                if not any(nd in n for nd in no_decay)
            ],
            'weight_decay':
            0.01
        }, {
            'params':
            [p for n, p in param_optimizer if any(nd in n for nd in no_decay)],
            'weight_decay':
            0.0
        }]

        if 'type' not in self.config['optimizer']:
            self.config['optimizer']['type'] = 'adam'
        print('[Runner] - Optimizer: ' +
              ('apex Fused Adam' if self.
               apex else str(self.config['optimizer']['type'])))
        if self.apex:
            try:
                from apex.optimizers import FP16_Optimizer
                from apex.optimizers import FusedAdam
            except ImportError:
                raise ImportError(
                    "Please install apex from https://www.github.com/nvidia/apex to use distributed and fp16 training."
                )

            optimizer = FusedAdam(optimizer_grouped_parameters,
                                  lr=self.learning_rate,
                                  bias_correction=False,
                                  max_grad_norm=1.0)
            if self.config['optimizer']['loss_scale'] == 0:
                self.optimizer = FP16_Optimizer(optimizer,
                                                dynamic_loss_scale=True)
            else:
                self.optimizer = FP16_Optimizer(
                    optimizer,
                    static_loss_scale=self.config['optimizer']['loss_scale'])
            self.warmup_linear = WarmupLinearSchedule(
                warmup=self.warmup_proportion, t_total=self.total_steps)
        elif self.config['optimizer']['type'] == 'adam':
            self.optimizer = BertAdam(optimizer_grouped_parameters,
                                      lr=self.learning_rate,
                                      warmup=self.warmup_proportion,
                                      t_total=self.total_steps,
                                      schedule='warmup_linear')
        elif self.config['optimizer']['type'] == 'lamb' or self.config[
                'optimizer']['type'] == 'adamW':
            self.optimizer = Lamb(
                optimizer_grouped_parameters,
                lr=self.learning_rate,
                warmup=self.warmup_proportion,
                t_total=self.total_steps,
                schedule='warmup_linear',
                adam=True
                if self.config['optimizer']['type'] == 'adamW' else False,
                correct_bias=True
                if self.config['optimizer']['type'] == 'adamW' else False)
        else:
            raise NotImplementedError()

        if self.args.resume is not None:
            self.load_model(self.args.resume)

    def process_acoustic_data(self, acoustic_inputs):
        """Process training data for the masked acoustic model"""
        with torch.no_grad():

            assert (
                len(acoustic_inputs) == 4
            ), 'dataloader should return (a_inputs, a_mask_labels, a_attn_mask, a_labels)'
            # Unpack and Hack bucket: Bucketing should cause acoustic feature to have shape 1xBxTxD'
            a_inputs = acoustic_inputs[0].squeeze(0)
            a_mask_labels = acoustic_inputs[1].squeeze(0)
            a_attention_mask = acoustic_inputs[2].squeeze(0)
            a_labels = acoustic_inputs[3].squeeze(0)

            a_inputs = a_inputs.float().to(device=self.device)
            a_mask_labels = a_mask_labels.bool().to(device=self.device)
            a_attention_mask = a_attention_mask.float().to(device=self.device)
            a_labels = a_labels.float().to(device=self.device)

        return a_inputs, a_mask_labels, a_attention_mask, a_labels

    def process_semantic_data(self, semantic_inputs):
        with torch.no_grad():

            assert (
                len(semantic_inputs) == 4
            ), 'dataloader should return (s_inputs, s_attention_mask, s_labels, s_raw)'
            s_inputs = semantic_inputs[0].squeeze(0)
            s_attention_mask = semantic_inputs[1].squeeze(0)
            s_labels = semantic_inputs[2].squeeze(0)
            s_raw = semantic_inputs[3].squeeze(0)

            s_inputs = s_inputs.long().to(device=self.device)
            s_attention_mask = s_attention_mask.float().to(device=self.device)
            s_labels = s_labels.long().to(device=self.device)
            s_raw = s_raw.long().to(device=self.device)

        return s_inputs, s_attention_mask, s_labels, s_raw

    def load_model(self, ckptpth):
        ckpt = torch.load(ckptpth)
        self.text_model.load_state_dict(ckpt['semantic_model'])
        self.speech_model.load_state_dict(ckpt['acoustic_model'])
        self.optimizer.load_state_dict(ckpt['Optimizer'])
        self.global_step = ckpt['Global_step']

    def save_model(self, name='states', to_path=None):
        all_states = {
            'semantic_model':
            self.text_model.state_dict() if not self.args.multi_gpu else
            self.text_model.module.state_dict(),
            'acoustic_model':
            self.speech_model.state_dict() if not self.args.multi_gpu else
            self.speech_model.module.state_dict(),
        }
        all_states['Optimizer'] = self.optimizer.state_dict()
        all_states['Global_step'] = self.global_step
        all_states['Settings'] = {'Config': self.config, 'Paras': self.args}

        if to_path is None:
            new_model_path = '{}/{}-{}.ckpt'.format(self.ckpdir, name,
                                                    self.global_step)
        else:
            new_model_path = to_path

        torch.save(all_states, new_model_path)
        self.model_kept.append(new_model_path)

        if len(self.model_kept) >= self.max_keep:
            os.remove(self.model_kept[0])
            self.model_kept.pop(0)

    def train(self, ):

        print("Start warm up with parallel data.")

        warmup_dataset = SupervisedDataset(
            file_path=self.config['dataloader']['data_path'],
            sets=self.config['dataloader']['sup_train_set'],
            bucket_size=self.config['dataloader']['batch_size'],
            max_timestep=self.config['dataloader']['max_timestep'],
            drop=True,
            acoustic_config=self.config['acoustic'],
            semantic_config=self.config['semantic'],
            tokenizer=self.tokenizer,
            main_random_noise=False,
            mask_proportion=1.0)  #全部mask成[MASK]

        warmup_dataloader = DataLoader(
            dataset=warmup_dataset,
            batch_size=1,
            shuffle=True,
            drop_last=False,
            num_workers=self.config['dataloader']['n_jobs'],
            pin_memory=True)

        tk0 = tqdm(range(self.warm_up_epochs),
                   total=self.warm_up_epochs,
                   desc="warm up training with parallel data.")
        for _ in tk0:

            accum_step = 0
            accum_text_sup_loss = 0
            accum_speech_sup_loss = 0

            for warmup_batch in warmup_dataloader:
                warmup_batch_is_valid, warmup_speech_batch, warmup_text_batch = warmup_batch

                if not warmup_batch_is_valid:
                    continue

                speech_inputs, speech_mask_labels, speech_attention_mask, speech_labels = self.process_acoustic_data(
                    warmup_speech_batch)
                text_inputs, text_attention_mask, text_labels, text_raw = self.process_semantic_data(
                    warmup_text_batch)

                text_sup_loss = self.text_model(
                    encoder_inputs=speech_labels,
                    encoder_attention_mask=speech_attention_mask,
                    decoder_inputs=text_inputs,
                    decoder_attention_mask=text_attention_mask,
                    decoder_labels=text_labels)

                speech_sup_loss = self.speech_model(
                    encoder_inputs=text_raw,
                    encoder_attention_mask=text_attention_mask,
                    decoder_inputs=speech_inputs,
                    decoder_attention_mask=speech_attention_mask,
                    decoder_labels=(speech_labels, speech_mask_labels))

                loss = text_sup_loss + speech_sup_loss
                if self.args.multi_gpu:
                    loss = loss.mean()
                    text_sup_loss = text_sup_loss.mean()
                    speech_sup_loss = speech_sup_loss.mean()

                loss.backward()

                grad_norm = torch.nn.utils.clip_grad_norm_(
                    list(self.text_model.parameters()) +
                    list(self.speech_model.parameters()),
                    self.gradient_clipping)

                self.optimizer.step()
                self.optimizer.zero_grad()

                batch_size = text_inputs.size(0)
                accum_step += batch_size
                accum_text_sup_loss += text_sup_loss.item() * batch_size
                accum_speech_sup_loss += speech_sup_loss.item() * batch_size

            tk0.set_postfix(text_loss=accum_text_sup_loss / accum_step,
                            speech_loss=accum_speech_sup_loss / accum_step)

        del warmup_dataset, warmup_dataloader
        gc.collect()

        previous_speech2text_dataloader = None
        previous_text2speech_dataloader = None

        epoch = 0

        pbar = tqdm(total=self.total_steps)
        pbar.n = self.global_step - 1
        while self.global_step <= self.total_steps:

            print("\nStart Generation. Epoch: {}.\n".format(epoch))
            self.text_model.eval()
            self.speech_model.eval()

            if previous_speech2text_dataloader is None or previous_text2speech_dataloader is None:

                gen_dataset = DAEDataset(
                    file_path=self.config['dataloader']['data_path'],
                    sets=self.config['dataloader']['sup_train_set'] +
                    self.config['dataloader']['dt_train_set'],
                    bucket_size=self.config['dataloader']['batch_size'],
                    max_timestep=self.config['dataloader']['max_timestep'],
                    drop=True,
                    acoustic_config=self.config['acoustic'],
                    semantic_config=self.config['semantic'],
                    tokenizer=self.tokenizer,
                    main_random_noise=False)

                gen_dataloader = DataLoader(
                    dataset=gen_dataset,
                    batch_size=1,
                    shuffle=True,
                    drop_last=False,
                    num_workers=self.config['dataloader']['n_jobs'],
                    pin_memory=True)

            all_speech = []
            all_speech_mask = []
            all_gen_text = []

            all_text = []
            all_text_mask = []
            all_gen_speech = []

            with torch.no_grad():
                if previous_speech2text_dataloader is None or previous_text2speech_dataloader is None:
                    for gen_batch in tqdm(gen_dataloader,
                                          desc="Generating First Time."):
                        gen_batch_is_valid, gen_speech_batch, gen_text_batch = gen_batch
                        if not gen_batch_is_valid:
                            continue

                        # 这里的speech和text不配对。
                        speech_inputs, speech_mask_labels, speech_attention_mask, speech_labels = self.process_acoustic_data(
                            gen_speech_batch)
                        text_inputs, text_attention_mask, text_labels, text_raw = self.process_semantic_data(
                            gen_text_batch)

                        batch_size = speech_labels.size(0)
                        text_mask_inputs = torch.ones((batch_size, self.text_decoder_config.max_output_length), dtype=torch.long).to(self.device) * \
                            self.tokenizer.mask_token_id

                        output_text = self.text_model(
                            encoder_inputs=speech_labels,
                            encoder_attention_mask=speech_attention_mask,
                            decoder_inputs=text_mask_inputs,
                        )

                        all_speech.append(speech_labels.detach().cpu().numpy())
                        all_speech_mask.append(
                            speech_attention_mask.detach().cpu().numpy())
                        all_gen_text.append(output_text.detach().cpu().numpy())

                        batch_size = text_raw.size(0)
                        speech_mask_inputs = torch.zeros(
                            (batch_size,
                             self.speech_decoder_config.max_output_length,
                             self.speech_decoder_config.audio_size *
                             self.speech_decoder_config.downsample_rate),
                            dtype=torch.float).to(self.device)

                        output_speech = self.speech_model(
                            encoder_inputs=text_raw,
                            encoder_attention_mask=text_attention_mask,
                            decoder_inputs=speech_mask_inputs,
                        )

                        all_text.append(text_raw.detach().cpu().numpy())
                        all_text_mask.append(
                            text_attention_mask.detach().cpu().numpy())
                        all_gen_speech.append(
                            output_speech.detach().cpu().numpy())

                    del gen_dataset, gen_dataloader
                    gc.collect()

                else:
                    for gen_batch in tqdm(previous_speech2text_dataloader,
                                          desc="Generating Text."):
                        gen_batch_is_valid, gen_speech_batch, gen_text_batch = gen_batch
                        if not gen_batch_is_valid:
                            continue

                        # 这里的speech和text是配对的。
                        speech_inputs, speech_mask_labels, speech_attention_mask, speech_labels = self.process_acoustic_data(
                            gen_speech_batch)
                        text_inputs, text_attention_mask, text_labels, text_raw = self.process_semantic_data(
                            gen_text_batch)

                        output_text = self.text_model(
                            encoder_inputs=speech_labels,
                            encoder_attention_mask=speech_attention_mask,
                            decoder_inputs_embeds=
                            text_raw,  # 上一轮生成的结果,是token embeds形式。
                        )

                        all_speech.append(speech_labels.detach().cpu().numpy())
                        all_speech_mask.append(
                            speech_attention_mask.detach().cpu().numpy())
                        all_gen_text.append(output_text.detach().cpu().numpy())

                    del speech2text_dt_dataset, speech2text_dt_dataloader, previous_speech2text_dataloader
                    gc.collect()

                    for gen_batch in tqdm(previous_text2speech_dataloader,
                                          desc="Generating Speech."):
                        gen_batch_is_valid, gen_speech_batch, gen_text_batch = gen_batch
                        if not gen_batch_is_valid:
                            continue

                        # 这里的speech和text是配对的。
                        speech_inputs, speech_mask_labels, speech_attention_mask, speech_labels = self.process_acoustic_data(
                            gen_speech_batch)
                        text_inputs, text_attention_mask, text_labels, text_raw = self.process_semantic_data(
                            gen_text_batch)

                        output_speech = self.speech_model(
                            encoder_inputs=text_raw,
                            encoder_attention_mask=text_attention_mask,
                            decoder_inputs=
                            speech_labels,  # 上一轮生成的结果,是mel spec形式。
                        )

                        all_text.append(text_raw.detach().cpu().numpy())
                        all_text_mask.append(
                            text_attention_mask.detach().cpu().numpy())
                        all_gen_speech.append(
                            output_speech.detach().cpu().numpy())

                    del text2speech_dt_dataset, text2speech_dt_dataloader, previous_text2speech_dataloader
                    gc.collect()

            speech2text = ((all_speech, all_speech_mask), all_gen_text)
            text2speech = ((all_text, all_text_mask), all_gen_speech)

            current_epoch_dt_mask_prop = min(
                max(self.config['semantic']['dt_mask_proportion'],
                    self.config['acoustic']['dt_mask_proportion']),
                0.3 + 0.01 * epoch)

            speech2text_dt_dataset = Speech2TextDTDataset(
                speech2text,
                bucket_size=self.config['dataloader']['batch_size'],
                acoustic_config=self.config['acoustic'],
                semantic_config=self.config['semantic'],
                tokenizer=self.tokenizer,
                main_random_noise=False,
                mask_proportion=current_epoch_dt_mask_prop)

            speech2text_dt_dataloader = DataLoader(
                dataset=speech2text_dt_dataset,
                batch_size=1,
                shuffle=True,
                drop_last=False,
                num_workers=self.config['dataloader']['n_jobs'],
                pin_memory=True)

            text2speech_dt_dataset = Text2SpeechDTDataset(
                text2speech,
                bucket_size=self.config['dataloader']['batch_size'],
                acoustic_config=self.config['acoustic'],
                semantic_config=self.config['semantic'],
                tokenizer=self.tokenizer,
                main_random_noise=False,
                mask_proportion=current_epoch_dt_mask_prop)

            text2speech_dt_dataloader = DataLoader(
                dataset=text2speech_dt_dataset,
                batch_size=1,
                shuffle=True,
                drop_last=False,
                num_workers=self.config['dataloader']['n_jobs'],
                pin_memory=True)

            previous_speech2text_dataloader = speech2text_dt_dataloader
            previous_text2speech_dataloader = text2speech_dt_dataloader

            del speech2text, text2speech
            gc.collect()

            current_epoch_sup_mask_prop = min(
                max(self.config['semantic']['sup_mask_proportion'],
                    self.config['acoustic']['sup_mask_proportion']),
                0.3 + 0.01 * epoch)

            sup_dataset = SupervisedDataset(
                file_path=self.config['dataloader']['data_path'],
                sets=self.config['dataloader']['sup_train_set'],
                bucket_size=self.config['dataloader']['batch_size'],
                max_timestep=self.config['dataloader']['max_timestep'],
                drop=True,
                acoustic_config=self.config['acoustic'],
                semantic_config=self.config['semantic'],
                tokenizer=self.tokenizer,
                main_random_noise=False,
                mask_proportion=current_epoch_sup_mask_prop)

            sup_dataloader = DataLoader(
                dataset=sup_dataset,
                batch_size=1,
                shuffle=True,
                drop_last=False,
                num_workers=self.config['dataloader']['n_jobs'],
                pin_memory=True)

            ##################################################

            progress = tqdm(self.dae_dataloader,
                            desc="Main Training Iteration.")

            s2t_dt_iter = speech2text_dt_dataloader.__iter__()
            t2s_dt_iter = text2speech_dt_dataloader.__iter__()
            sup_iter = sup_dataloader.__iter__()

            loss_val = 0
            speech_dt_loss_val, text_dt_loss_val, speech_dt_stop_loss_val = 0, 0, 0
            speech_sup_loss_val, text_sup_loss_val, speech_sup_stop_loss_val = 0, 0, 0
            speech_dae_loss_val, text_dae_loss_val, speech_dae_stop_loss_val = 0, 0, 0

            self.text_model.train()
            self.speech_model.train()

            for dae_batch in progress:

                try:
                    s2t_dt_batch = next(s2t_dt_iter)
                except StopIteration:
                    del s2t_dt_iter
                    gc.collect()

                    s2t_dt_iter = speech2text_dt_dataloader.__iter__()
                    s2t_dt_batch = next(s2t_dt_iter)

                try:
                    t2s_dt_batch = next(t2s_dt_iter)
                except StopIteration:
                    del t2s_dt_iter
                    gc.collect()

                    t2s_dt_iter = text2speech_dt_dataloader.__iter__()
                    t2s_dt_batch = next(t2s_dt_iter)

                try:
                    sup_batch = next(sup_iter)
                except StopIteration:
                    del sup_iter
                    gc.collect()

                    sup_iter = sup_dataloader.__iter__()
                    sup_batch = next(sup_iter)

                dae_batch_is_valid, dae_speech_batch, dae_text_batch = dae_batch
                s2t_dt_batch_is_valid, s2t_dt_speech_batch, s2t_dt_text_batch = s2t_dt_batch
                t2s_dt_batch_is_valid, t2s_dt_speech_batch, t2s_dt_text_batch = t2s_dt_batch
                sup_batch_is_valid, sup_speech_batch, sup_text_batch = sup_batch

                try:
                    if self.global_step > self.total_steps: break
                    if not s2t_dt_batch_is_valid or not t2s_dt_batch_is_valid or not dae_batch_is_valid or not sup_batch_is_valid:
                        continue

                    ######## Dual Transformation ######
                    # 数据集不能混在一起。得分为两部分,生成的文本和原始的音频用来还原音频。vise versa.
                    # 生成的进encoder, 真实的进decoder.

                    speech_inputs, speech_mask_labels, speech_attention_mask, speech_labels = self.process_acoustic_data(
                        t2s_dt_speech_batch)
                    text_inputs, text_attention_mask, text_labels, text_raw = self.process_semantic_data(
                        t2s_dt_text_batch)

                    text_dt_loss = self.text_model(
                        encoder_inputs=speech_labels,  # 生成的。
                        encoder_attention_mask=speech_attention_mask,
                        decoder_inputs=text_inputs,
                        decoder_attention_mask=text_attention_mask,
                        decoder_labels=text_labels)

                    speech_inputs, speech_mask_labels, speech_attention_mask, speech_labels = self.process_acoustic_data(
                        s2t_dt_speech_batch)
                    text_inputs, text_attention_mask, text_labels, text_raw = self.process_semantic_data(
                        s2t_dt_text_batch)

                    speech_dt_loss = self.speech_model(
                        encoder_inputs_embeds=text_raw,  # 生成的。
                        encoder_attention_mask=text_attention_mask,
                        decoder_inputs=speech_inputs,
                        decoder_attention_mask=speech_attention_mask,
                        decoder_labels=(speech_labels, speech_mask_labels))

                    ######## Supervised #######

                    speech_inputs, speech_mask_labels, speech_attention_mask, speech_labels = self.process_acoustic_data(
                        sup_speech_batch)
                    text_inputs, text_attention_mask, text_labels, text_raw = self.process_semantic_data(
                        sup_text_batch)

                    text_sup_loss = self.text_model(
                        encoder_inputs=speech_inputs,
                        encoder_attention_mask=speech_attention_mask,
                        decoder_inputs=text_inputs,
                        decoder_attention_mask=text_attention_mask,
                        decoder_labels=text_labels)

                    speech_sup_loss = self.speech_model(
                        encoder_inputs=text_inputs,
                        encoder_attention_mask=text_attention_mask,
                        decoder_inputs=speech_inputs,
                        decoder_attention_mask=speech_attention_mask,
                        decoder_labels=(speech_labels, speech_mask_labels))

                    ######## Denoise AutoEncoding ########

                    speech_inputs, speech_mask_labels, speech_attention_mask, speech_labels = self.process_acoustic_data(
                        dae_speech_batch)
                    text_inputs, text_attention_mask, text_labels, text_raw = self.process_semantic_data(
                        dae_text_batch)

                    text_dae_loss = self.text_model(
                        encoder_inputs=speech_inputs,
                        encoder_attention_mask=speech_attention_mask,
                        encoder_labels=(speech_labels, speech_mask_labels))

                    speech_dae_loss = self.speech_model(
                        encoder_inputs=text_inputs,
                        encoder_attention_mask=text_attention_mask,
                        encoder_labels=text_labels)

                    #######################################

                    if self.args.multi_gpu:
                        text_dt_loss = text_dt_loss.mean()
                        speech_dt_loss = speech_dt_loss.mean()

                        text_sup_loss = text_sup_loss.mean()
                        speech_sup_loss = speech_sup_loss.mean()

                        text_dae_loss = text_dae_loss.mean()
                        speech_dae_loss = speech_dae_loss.mean()

                    loss = (text_dt_loss + speech_dt_loss) + \
                           0.1 * (text_sup_loss + speech_sup_loss) + \
                           (text_dae_loss + speech_dae_loss)

                    # Accumulate Loss
                    if self.gradient_accumulation_steps > 1:
                        loss = loss / self.gradient_accumulation_steps

                    if self.apex and self.args.multi_gpu:
                        raise NotImplementedError
                    elif self.apex:
                        self.optimizer.backward(loss)
                    else:
                        loss.backward()

                    loss_val += loss.item()

                    speech_dt_loss_val += speech_dt_loss.item()
                    text_dt_loss_val += text_dt_loss.item()

                    speech_sup_loss_val += speech_sup_loss.item()
                    text_sup_loss_val += text_sup_loss.item()

                    speech_dae_loss_val += speech_dae_loss.item()
                    text_dae_loss_val += text_dae_loss.item()

                    if (self.total_steps +
                            1) % self.gradient_accumulation_steps == 0:
                        if self.apex:
                            # modify learning rate with special warm up BERT uses
                            # if conifg.apex is False, BertAdam is used and handles this automatically
                            lr_this_step = self.learning_rate * self.warmup_linear.get_lr(
                                self.global_step, self.warmup_proportion)
                            for param_group in self.optimizer.param_groups:
                                param_group['lr'] = lr_this_step

                        # Step
                        grad_norm = torch.nn.utils.clip_grad_norm_(
                            list(self.text_model.parameters()) +
                            list(self.speech_model.parameters()),
                            self.gradient_clipping)
                        if math.isnan(grad_norm):
                            print(
                                '[Runner] - Error : grad norm is NaN @ step ' +
                                str(self.global_step))
                        else:
                            self.optimizer.step()
                        self.optimizer.zero_grad()

                        if self.global_step % self.log_step == 0:
                            # Log
                            self.log.add_scalar('lr',
                                                self.optimizer.get_lr()[0],
                                                self.global_step)
                            self.log.add_scalar('loss', (loss_val),
                                                self.global_step)
                            self.log.add_scalar('speech_dt_loss',
                                                (speech_dt_loss_val),
                                                self.global_step)
                            self.log.add_scalar('text_dt_loss',
                                                (text_dt_loss_val),
                                                self.global_step)
                            self.log.add_scalar('speech_dt_stop_loss',
                                                (speech_dt_stop_loss_val),
                                                self.global_step)
                            self.log.add_scalar('speech_sup_loss',
                                                (speech_sup_loss_val),
                                                self.global_step)
                            self.log.add_scalar('text_sup_loss',
                                                (text_sup_loss_val),
                                                self.global_step)
                            self.log.add_scalar('speech_sup_stop_loss',
                                                (speech_sup_stop_loss_val),
                                                self.global_step)
                            self.log.add_scalar('speech_dae_loss',
                                                (speech_dae_loss_val),
                                                self.global_step)
                            self.log.add_scalar('text_dae_loss',
                                                (text_dae_loss_val),
                                                self.global_step)
                            self.log.add_scalar('speech_dae_stop_loss',
                                                (speech_dae_stop_loss_val),
                                                self.global_step)
                            self.log.add_scalar('gradient norm', grad_norm,
                                                self.global_step)

                        progress.set_description(
                            "Loss {:.4f} - DT Loss {:.4f} - SUP Loss {:.4f} - DAE Loss {:.4f}"
                            .format(loss_val,
                                    (speech_dt_loss_val + text_dt_loss_val +
                                     speech_dt_stop_loss_val),
                                    (speech_sup_loss_val + text_sup_loss_val +
                                     speech_sup_stop_loss_val),
                                    (speech_dae_loss_val + text_dae_loss_val +
                                     speech_dae_stop_loss_val)))

                        if self.global_step % self.save_step == 0:
                            self.save_model('states')

                        loss_val = 0
                        speech_dt_loss_val, text_dt_loss_val, speech_dt_stop_loss_val = 0, 0, 0
                        speech_sup_loss_val, text_sup_loss_val, speech_sup_stop_loss_val = 0, 0, 0
                        speech_dae_loss_val, text_dae_loss_val, speech_dae_stop_loss_val = 0, 0, 0

                        pbar.update(1)
                        self.global_step += 1

                except RuntimeError as e:
                    if 'CUDA out of memory' in str(e):
                        print('CUDA out of memory at step: ', self.global_step)
                        torch.cuda.empty_cache()
                        self.optimizer.zero_grad()
                    else:
                        raise

            epoch += 1

            del sup_dataset, sup_dataloader, sup_iter, s2t_dt_iter, t2s_dt_iter
            gc.collect()

        pbar.close()
        self.log.close()
コード例 #19
0
print('# Minibatch-size: {}'.format(args.batchsize))
print('# epoch: {}'.format(args.epoch))

print('===> Loading datasets')
root_path = "dataset/"
train_set = get_training_set(root_path + args.dataset)
test_set = get_test_set(root_path + args.dataset)

# for iteration, batch in enumerate(train_set, 1):
#     print("iteration", iteration)
#     print(batch[0].shape)
#     print(batch[1].shape)
#     break

print('===> Building model')
encoderdecoder_model = EncoderDecoder(args.input_nc, args.output_nc, args.ngf)
discriminator_model = Discriminator(args.input_nc, args.output_nc, args.ngf)

if args.gpu >= 0:
    print("use gpu")
    chainer.cuda.get_device(args.gpu).use()  # Make a specified GPU current
    encoderdecoder_model.to_gpu()
    discriminator_model.to_gpu()

optimizer_encoderdecoder = chainer.optimizers.Adam(alpha=0.0002, beta1=0.5)
optimizer_encoderdecoder.setup(encoderdecoder_model)
serializers.save_npz("encoderdecoder_model_" + str(1), encoderdecoder_model)

if args.gpu >= 0:
    xp = cuda.cupy
    label = xp.random.randn(args.batchsize)