Ejemplo n.º 1
0
def train_ae(batch, total_loss_ae, start_time, i):
    autoencoder.train()
    autoencoder.zero_grad()

    source, target, lengths = batch
    source = to_gpu(args.cuda, Variable(source))
    target = to_gpu(args.cuda, Variable(target))

    # Create sentence length mask over padding
    mask = target.gt(0)
    masked_target = target.masked_select(mask)
    # examples x ntokens
    output_mask = mask.unsqueeze(1).expand(mask.size(0), ntokens)

    # output: batch x seq_len x ntokens
    output = autoencoder(source, lengths, noise=True)

    # output_size: batch_size, maxlen, self.ntokens
    flattened_output = output.view(-1, ntokens)

    masked_output = \
        flattened_output.masked_select(output_mask).view(-1, ntokens)
    loss = criterion_ce(masked_output/args.temp, masked_target)
    loss.backward()

    # `clip_grad_norm` to prevent exploding gradient in RNNs / LSTMs
    torch.nn.utils.clip_grad_norm(autoencoder.parameters(), args.clip)
    optimizer_ae.step()

    total_loss_ae += loss.data

    accuracy = None
    if i % args.log_interval == 0 and i > 0:
        # accuracy
        probs = F.softmax(masked_output)
        max_vals, max_indices = torch.max(probs, 1)
        accuracy = torch.mean(max_indices.eq(masked_target).float()).data[0]

        cur_loss = total_loss_ae[0] / args.log_interval
        elapsed = time.time() - start_time
        print('| epoch {:3d} | {:5d}/{:5d} batches | ms/batch {:5.2f} | '
              'loss {:5.2f} | ppl {:8.2f} | acc {:8.2f}'
              .format(epoch, i, len(train_data),
                      elapsed * 1000 / args.log_interval,
                      cur_loss, math.exp(cur_loss), accuracy))

        with open("./output/{}/logs.txt".format(args.outf), 'a') as f:
            f.write('| epoch {:3d} | {:5d}/{:5d} batches | ms/batch {:5.2f} | '
                    'loss {:5.2f} | ppl {:8.2f} | acc {:8.2f}\n'.
                    format(epoch, i, len(train_data),
                           elapsed * 1000 / args.log_interval,
                           cur_loss, math.exp(cur_loss), accuracy))

        total_loss_ae = 0
        start_time = time.time()

    return total_loss_ae, start_time
Ejemplo n.º 2
0
def evaluate_autoencoder(data_source, epoch):
    # Turn on evaluation mode which disables dropout.
    autoencoder.eval()
    total_loss = 0
    ntokens = len(corpus.dictionary.word2idx)
    all_accuracies = 0
    bcnt = 0
    for i, batch in enumerate(data_source):
        source, target, lengths = batch
        source = to_gpu(args.cuda, Variable(source, volatile=True))
        target = to_gpu(args.cuda, Variable(target, volatile=True))

        mask = target.gt(0)
        masked_target = target.masked_select(mask)
        # examples x ntokens
        output_mask = mask.unsqueeze(1).expand(mask.size(0), ntokens)

        # output: batch x seq_len x ntokens
        output = autoencoder(source, lengths, noise=True)
        flattened_output = output.view(-1, ntokens)

        masked_output = \
            flattened_output.masked_select(output_mask).view(-1, ntokens)
        total_loss += criterion_ce(masked_output/args.temp, masked_target).data

        # accuracy
        max_vals, max_indices = torch.max(masked_output, 1)
        all_accuracies += \
            torch.mean(max_indices.eq(masked_target).float()).data[0]
        bcnt += 1

        aeoutf = "./output/%s/%d_autoencoder.txt" % (args.outf, epoch)
        with open(aeoutf, "a") as f:
            max_values, max_indices = torch.max(output, 2)
            max_indices = \
                max_indices.view(output.size(0), -1).data.cpu().numpy()
            target = target.view(output.size(0), -1).data.cpu().numpy()
            for t, idx in zip(target, max_indices):
                # real sentence
                chars = " ".join([corpus.dictionary.idx2word[x] for x in t])
                f.write(chars)
                f.write("\n")
                # autoencoder output sentence
                chars = " ".join([corpus.dictionary.idx2word[x] for x in idx])
                f.write(chars)
                f.write("\n\n")

    return total_loss[0] / len(data_source), all_accuracies/bcnt
Ejemplo n.º 3
0
    def encode(self, indices, lengths, noise):
        embeddings = self.embedding(indices)
        packed_embeddings = pack_padded_sequence(input=embeddings,
                                                 lengths=lengths,
                                                 batch_first=True)

        # Encode
        packed_output, state = self.encoder(packed_embeddings)

        hidden, cell = state
        # batch_size x nhidden
        hidden = hidden[-1]  # get hidden state of last layer of encoder

        # normalize to unit ball (l2 norm of 1) - p=2, dim=1
        norms = torch.norm(hidden, 2, 1)
        
        # For older versions of PyTorch use:
        hidden = torch.div(hidden, norms.expand_as(hidden))
        # For newest version of PyTorch (as of 8/25) use this:
        # hidden = torch.div(hidden, norms.unsqueeze(1).expand_as(hidden))

        if noise and self.noise_radius > 0:
            gauss_noise = torch.normal(means=torch.zeros(hidden.size()),
                                       std=self.noise_radius)
            hidden = hidden + to_gpu(self.gpu, Variable(gauss_noise))

        return hidden
Ejemplo n.º 4
0
def train_gan_d(batch):
    # clamp parameters to a cube
    for p in gan_disc.parameters():
        p.data.clamp_(-args.gan_clamp, args.gan_clamp)

    autoencoder.train()
    autoencoder.zero_grad()
    gan_disc.train()
    gan_disc.zero_grad()

    # positive samples ----------------------------
    # generate real codes
    source, target, lengths = batch
    source = to_gpu(args.cuda, Variable(source))
    target = to_gpu(args.cuda, Variable(target))

    # batch_size x nhidden
    real_hidden = autoencoder(source, lengths, noise=False, encode_only=True)
    real_hidden.register_hook(grad_hook)

    # loss / backprop
    errD_real = gan_disc(real_hidden)
    errD_real.backward(one)

    # negative samples ----------------------------
    # generate fake codes
    noise = to_gpu(args.cuda,
                   Variable(torch.ones(args.batch_size, args.z_size)))
    noise.data.normal_(0, 1)

    # loss / backprop
    fake_hidden = gan_gen(noise)
    errD_fake = gan_disc(fake_hidden.detach())
    errD_fake.backward(mone)

    # `clip_grad_norm` to prvent exploding gradient problem in RNNs / LSTMs
    torch.nn.utils.clip_grad_norm(autoencoder.parameters(), args.clip)

    optimizer_gan_d.step()
    optimizer_ae.step()
    errD = -(errD_real - errD_fake)

    return errD, errD_real, errD_fake
Ejemplo n.º 5
0
def train_gan_g():
    gan_gen.train()
    gan_gen.zero_grad()

    noise = to_gpu(args.cuda,
                   Variable(torch.ones(args.batch_size, args.z_size)))
    noise.data.normal_(0, 1)

    fake_hidden = gan_gen(noise)
    errG = gan_disc(fake_hidden)

    # loss / backprop
    errG.backward(one)
    optimizer_gan_g.step()

    return errG
Ejemplo n.º 6
0
def train_lm(eval_path, save_path):
    # generate examples
    indices = []
    noise = to_gpu(args.cuda, Variable(torch.ones(100, args.z_size)))
    for i in range(1000):
        noise.data.normal_(0, 1)

        fake_hidden = gan_gen(noise)
        max_indices = autoencoder.generate(fake_hidden, args.maxlen)
        indices.append(max_indices.data.cpu().numpy())

    indices = np.concatenate(indices, axis=0)

    # write generated sentences to text file
    with open(save_path+".txt", "w") as f:
        # laplacian smoothing
        for word in corpus.dictionary.word2idx.keys():
            f.write(word+"\n")
        for idx in indices:
            # generated sentence
            words = [corpus.dictionary.idx2word[x] for x in idx]
            # truncate sentences to first occurrence of <eos>
            truncated_sent = []
            for w in words:
                if w != '<eos>':
                    truncated_sent.append(w)
                else:
                    break
            chars = " ".join(truncated_sent)
            f.write(chars+"\n")

    # train language model on generated examples
    lm = train_ngram_lm(kenlm_path=args.kenlm_path,
                        data_path=save_path+".txt",
                        output_path=save_path+".arpa",
                        N=args.N)

    # load sentences to evaluate on
    with open(eval_path, 'r') as f:
        lines = f.readlines()
    sentences = [l.replace('\n', '') for l in lines]
    ppl = get_ppl(lm, sentences)

    return ppl
Ejemplo n.º 7
0
    def __init__(self,
                 in_channels,
                 out_channels,
                 kernel_size=1,
                 stride=1,
                 padding=None,
                 dilation=1,
                 bias=True,
                 w_init_gain='linear'):
        super(ConvNorm, self).__init__()
        if padding is None:
            assert (kernel_size % 2 == 1)
            padding = int(dilation * (kernel_size - 1) / 2)

        self.conv = torch.nn.Conv1d(in_channels,
                                    out_channels,
                                    kernel_size=kernel_size,
                                    stride=stride,
                                    padding=padding,
                                    dilation=dilation,
                                    bias=bias)
        self.conv = to_gpu(self.conv)
        torch.nn.init.xavier_uniform_(
            self.conv.weight, gain=torch.nn.init.calculate_gain(w_init_gain))
Ejemplo n.º 8
0
    def encode(self, indices, lengths=None, noise=True):
        embeddings = self.embedding(indices)
        embeddings = embeddings.transpose(1, 2)
        c_pre_lin = self.encoder(embeddings)
        c_pre_lin = c_pre_lin.squeeze(2)
        hidden = self.linear(c_pre_lin.permute(0, 2, 1))
        # normalize to unit ball (l2 norm of 1) - p=2, dim=1
        norms = torch.norm(hidden, 2, 1)
        if norms.ndimension() == 1:
            norms = norms.unsqueeze(1)
        hidden = torch.div(hidden, norms.unsqueeze(1).expand_as(hidden))

        if noise and self.noise_radius > 0:
            normal = Normal(torch.zeros(hidden.size()),
                            self.noise_radius * torch.ones(hidden.size()))
            gauss_noise = normal.sample()
            # gauss_noise = torch.normal(means=torch.zeros(hidden.size()),
            # std=self.noise_radius*torch.ones(hidden.size()))
            if self.gpu:
                gauss_noise = gauss_noise.cuda()

            hidden = hidden + to_gpu(self.gpu, Variable(gauss_noise))

        return hidden
Ejemplo n.º 9
0
    def __init__(self, emsize, nhidden, ntokens, nlayers, noise_radius=0.2,
                 hidden_init=False, dropout=0, gpu=False):
        super(Seq2Seq, self).__init__()
        self.nhidden = nhidden
        self.emsize = emsize
        self.ntokens = ntokens
        self.nlayers = nlayers
        self.noise_radius = noise_radius
        self.hidden_init = hidden_init
        self.dropout = dropout
        self.gpu = gpu

        self.start_symbols = to_gpu(gpu, Variable(torch.ones(10, 1).long()))

        # Vocabulary embedding
        self.embedding = nn.Embedding(ntokens, emsize)
        self.embedding_decoder = nn.Embedding(ntokens, emsize)

        # RNN Encoder and Decoder
        self.encoder = nn.LSTM(input_size=emsize,
                               hidden_size=nhidden,
                               num_layers=nlayers,
                               dropout=dropout,
                               batch_first=True)

        decoder_input_size = emsize+nhidden
        self.decoder = nn.LSTM(input_size=decoder_input_size,
                               hidden_size=nhidden,
                               num_layers=1,
                               dropout=dropout,
                               batch_first=True)

        # Initialize Linear Transformation
        self.linear = nn.Linear(nhidden, ntokens)

        self.init_weights()
Ejemplo n.º 10
0
    def __init__(self, emsize, nhidden, ntokens, nlayers, noise_radius=0.2,
                 hidden_init=False, dropout=0, gpu=False):
        super(Seq2Seq, self).__init__()
        self.nhidden = nhidden
        self.emsize = emsize
        self.ntokens = ntokens
        self.nlayers = nlayers
        self.noise_radius = noise_radius
        self.hidden_init = hidden_init
        self.dropout = dropout
        self.gpu = gpu

        self.start_symbols = to_gpu(gpu, Variable(torch.ones(10, 1).long()))

        # Vocabulary embedding
        self.embedding = nn.Embedding(ntokens, emsize)
        self.embedding_decoder = nn.Embedding(ntokens, emsize)

        # RNN Encoder and Decoder
        self.encoder = nn.LSTM(input_size=emsize,
                               hidden_size=nhidden,
                               num_layers=nlayers,
                               dropout=dropout,
                               batch_first=True)

        decoder_input_size = emsize+nhidden
        self.decoder = nn.LSTM(input_size=decoder_input_size,
                               hidden_size=nhidden,
                               num_layers=1,
                               dropout=dropout,
                               batch_first=True)

        # Initialize Linear Transformation
        self.linear = nn.Linear(nhidden, ntokens)

        self.init_weights()
Ejemplo n.º 11
0
    def parse_batch(self, batch):
        # text_padded, input_lengths, mel_padded, gate_padded, \
        # output_lengths = batch
        input_lengths, mask_padded, words_sorted, \
               select_target_padded, mel_padded, gate_padded, output_lengths = batch

        input_lengths = to_gpu(input_lengths).long()
        mask_padded = to_gpu(mask_padded).float()
        # mask_padded = to_gpu(mask_padded).long()

        select_target_padded = to_gpu(select_target_padded).long()

        max_len = torch.max(input_lengths.data).item()
        mel_padded = to_gpu(mel_padded).float()
        gate_padded = to_gpu(gate_padded).float()
        output_lengths = to_gpu(output_lengths).long()

        return ((input_lengths, mask_padded, select_target_padded,
                 words_sorted, mel_padded, max_len, output_lengths),
                (mel_padded, gate_padded, select_target_padded))
Ejemplo n.º 12
0
def train(model_directory, epochs, learning_rate, epochs_per_checkpoint,
          batch_size, seed):
    torch.manual_seed(seed)
    torch.cuda.manual_seed(seed)

    criterion = CrossEntropyLoss()
    model = WaveNet(**wavenet_config).cuda()
    # model.upsample = torch.nn.Sequential() #replace the upsample step with no operation as we manually control samples
    # model.upsample.weight = None
    # model.upsample.bias = None

    optimizer = torch.optim.Adam(model.parameters(), lr=learning_rate)

    # Load checkpoint if one exists
    iteration = 0
    checkpoint_path = find_checkpoint(model_directory)
    if checkpoint_path is not None:
        model, optimizer, iteration = load_checkpoint(checkpoint_path, model,
                                                      optimizer)
        iteration += 1  # next iteration is iteration + 1

    trainset = SimpleWaveLoader()
    train_loader = DataLoader(trainset,
                              num_workers=1,
                              shuffle=False,
                              sampler=None,
                              batch_size=batch_size,
                              pin_memory=False,
                              drop_last=True)

    model.train()
    epoch_offset = max(0, int(iteration / len(train_loader)))
    # ================ MAIN TRAINNIG LOOP! ===================
    for epoch in range(epoch_offset, epochs):
        print("Epoch: {}".format(epoch))
        for i, batch in enumerate(train_loader):

            model.zero_grad()

            x, y = batch
            x = to_gpu(x).float()
            y = to_gpu(y)
            x = (x, y)  # auto-regressive takes outputs as inputs

            y_pred = model(x)
            loss = criterion(y_pred, y)
            reduced_loss = loss.data.item()
            loss.backward()
            optimizer.step()

            #print out the loss, and save to a file
            print("{}:\t{:.9f}".format(iteration, reduced_loss))
            with open(os.path.join(model_directory, 'loss_history.txt'),
                      'a') as f:
                f.write('%s\n' % str(reduced_loss))

            iteration += 1
            torch.cuda.empty_cache()

        if (epoch != 0 and epoch % epochs_per_checkpoint == 0):
            checkpoint_path = os.path.join(model_directory,
                                           'checkpoint_%d' % iteration)
            save_checkpoint(model, optimizer, learning_rate, iteration,
                            checkpoint_path)
Ejemplo n.º 13
0
    def alignment(self, sentences, poss, customs, lengths, bert_sent,
                  bert_sent_type, bert_sent_mask):

        batch_size = lengths.size(0)

        if self.config.use_bert:
            bert_output = self.bertmodel(input_ids=bert_sent,
                                         attention_mask=bert_sent_mask,
                                         token_type_ids=bert_sent_type)

            bert_output = bert_output[0]

            # masked mean
            masked_output = torch.mul(bert_sent_mask.unsqueeze(2), bert_output)
            mask_len = torch.sum(bert_sent_mask, dim=1, keepdim=True)
            bert_output = torch.sum(masked_output, dim=1,
                                    keepdim=False) / mask_len

            utterance_text = bert_output
        else:
            # extract features from text modality
            sentences = self.embed(sentences)
            final_h1t, final_h2t = self.extract_features(
                sentences, lengths, self.trnn1, self.trnn2, self.tlayer_norm)
            utterance_text = torch.cat(
                (final_h1t, final_h2t),
                dim=2).permute(1, 0, 2).contiguous().view(batch_size, -1)

        # extract features from pos modality 可以直接替换
        final_h1p, final_h2p = self.extract_features(poss, lengths, self.prnn1,
                                                     self.prnn2,
                                                     self.player_norm)
        utterance_pos = torch.cat(
            (final_h1p, final_h2p),
            dim=2).permute(1, 0, 2).contiguous().view(batch_size, -1)

        utterance_cust = customs

        # Shared-private encoders
        self.shared_private(utterance_text, utterance_pos, utterance_cust)

        # For reconstruction
        self.reconstruct()

        # 1-LAYER TRANSFORMER FUSION
        #shape= [9*96*32]
        h = torch.stack((
            self.utt_private_t,
            self.utt_private_p,
            self.utt_private_c,
            self.utt_shared_t,
            self.utt_shared_p,
            self.utt_shared_c,
        ),
                        dim=0)
        h = self.transformer_encoder(h)

        h = torch.cat((
            h[0],
            h[1],
            h[2],
            h[3],
            h[4],
            h[5],
        ), dim=1)

        features = to_gpu(torch.empty((0, 12 * self.config.hidden_size)))
        hx = self.hx
        for x in h:
            x = x.unsqueeze(0).unsqueeze(0)
            # if self.config.rnncell == "lstm":
            #     _, (hx, _) = self.conversation_rnn(input=x, hx=hx.detach())
            # else:
            #     _, hx = self.conversation_rnn(input=x, hx=hx.detach())
            _, hx = self.conversation_rnn(input=x, hx=hx.detach())
            features = torch.cat((features, hx.view(1, -1)), dim=0)
        self.hx = hx.detach()

        o = self.fusion(features)

        return o
Ejemplo n.º 14
0
def epoch_step(loader,
               desc,
               model,
               criterion,
               metrics,
               scaler,
               opt=None,
               batch_accum=1):
    is_train = opt is not None
    if is_train:
        model.train()
        criterion.train()
    else:
        model.eval()
        criterion.eval()

    pbar = tqdm.tqdm(total=len(loader), desc=desc, leave=False, mininterval=2)
    loc_loss = n = 0
    loc_accum = 1

    for x, y in loader:
        x = to_gpu(x, args.dist.gpu)
        y = to_gpu(y, args.dist.gpu)
        # x = x.to(memory_format=torch.channels_last)

        with torch.cuda.amp.autocast():
            logits = model(x)
            loss = criterion(logits, y) / batch_accum

        if is_train:
            scaler.scale(loss).backward()

            if loc_accum == batch_accum:
                scaler.step(opt)
                scaler.update()
                for p in model.parameters():
                    p.grad = None
                # opt.zero_grad()

                loc_accum = 1
            else:
                loc_accum += 1

            logits = logits.detach()

        bs = len(x)
        loc_loss += loss.item() * bs * batch_accum
        n += bs

        for metric in metrics.values():
            metric.update(logits, y)

        torch.cuda.synchronize()

        if args.dist.local_rank == 0:
            postfix = {"loss": f"{loc_loss / n:.3f}"}
            postfix.update({
                k: f"{metric.evaluate():.3f}"
                for k, metric in metrics.items()
            })
            if is_train:
                postfix.update(
                    {"lr": f'{next(iter(opt.param_groups))["lr"]:.3}'})
            pbar.set_postfix(**postfix)
            pbar.update()

    if is_train and loc_accum != batch_accum:
        scaler.step(opt)
        scaler.update()
        for p in model.parameters():
            p.grad = None
        # opt.zero_grad()

    pbar.close()

    return loc_loss / n
Ejemplo n.º 15
0
    def __init__(self,
                 emsize,
                 nhidden,
                 ntokens,
                 nlayers,
                 conv_windows="5-5-3",
                 conv_strides="2-2-2",
                 conv_layer="500-700-1000",
                 activation=nn.LeakyReLU(0.2, inplace=True),
                 noise_r=0.2,
                 share_decoder_emb=False,
                 hidden_init=False,
                 dropout=0,
                 gpu=False,
                 pooling_enc="avg"):
        super(Seq2Seq2CNNLSTMEncoderDecoder, self).__init__()
        self.nhidden = nhidden
        self.emsize = emsize
        self.ntokens = ntokens
        self.nlayers = nlayers
        self.noise_r = noise_r
        self.hidden_init = hidden_init
        self.dropout = dropout
        self.gpu = gpu
        # for CNN encoder
        self.arch_conv_filters = conv_layer
        self.arch_conv_strides = conv_strides
        self.arch_conv_windows = conv_windows

        self.start_symbols = to_gpu(gpu, Variable(torch.ones(10, 1).long()))

        # Vocabulary embedding
        self.embedding = nn.Embedding(ntokens, emsize)
        self.embedding_prem = nn.Embedding(ntokens, emsize)
        self.embedding_decoder1 = nn.Embedding(ntokens, emsize)
        self.embedding_decoder2 = nn.Embedding(ntokens, emsize)
        self.embedding_decoder3 = nn.Embedding(ntokens, emsize)

        # for CNN hypo encoder
        conv_layer_sizes = [emsize] + [int(x) for x in conv_layer.split('-')]
        conv_strides_sizes = [int(x) for x in conv_strides.split('-')]
        conv_windows_sizes = [int(x) for x in conv_windows.split('-')]
        self.encoder = nn.Sequential()

        for i in range(len(conv_layer_sizes) - 1):
            layer = nn.Conv1d(conv_layer_sizes[i], conv_layer_sizes[i + 1], \
                              conv_windows_sizes[i], stride=conv_strides_sizes[i])
            self.encoder.add_module("layer-" + str(i + 1), layer)

            bn = nn.BatchNorm1d(conv_layer_sizes[i + 1])
            self.encoder.add_module("bn-" + str(i + 1), bn)

            self.encoder.add_module("activation-" + str(i + 1), activation)

        if pooling_enc == "max":
            self.pooling_enc = nn.AdaptiveMaxPool1d(1)
        else:
            self.pooling_enc = nn.AdaptiveAvgPool1d(1)
        self.linear_enc = nn.Linear(1000, nhidden)

        # for CNN prem encoder
        self.encoder_prem = nn.LSTM(input_size=emsize,
                                    hidden_size=nhidden,
                                    num_layers=nlayers,
                                    dropout=dropout,
                                    batch_first=True)

        decoder_input_size = emsize + nhidden * 2
        self.decoder1 = nn.LSTM(input_size=decoder_input_size,
                                hidden_size=nhidden,
                                num_layers=1,
                                dropout=dropout,
                                batch_first=True)
        self.decoder2 = nn.LSTM(input_size=decoder_input_size,
                                hidden_size=nhidden,
                                num_layers=1,
                                dropout=dropout,
                                batch_first=True)
        self.decoder3 = nn.LSTM(input_size=decoder_input_size,
                                hidden_size=nhidden,
                                num_layers=1,
                                dropout=dropout,
                                batch_first=True)

        # Initialize Linear Transformation
        self.linear = nn.Linear(nhidden, ntokens)

        self.init_weights()

        if share_decoder_emb:
            self.embedding_decoder2.weight = self.embedding_decoder1.weight
            self.embedding_decoder3.weight = self.embedding_decoder1.weight
Ejemplo n.º 16
0
def evaluate_autoencoder(data_source, epoch):
    # Turn on evaluation mode which disables dropout.
    autoencoder.eval()
    enc_classifier.eval()
    total_loss = 0
    ntokens = args.ntokens
    nclasses = args.nclasses
    all_accuracies = 0
    all_class_accuracies = 0
    bcnt = 0
    for i, batch in enumerate(data_source):
        source, target, lengths, tags = batch
        source = to_gpu(args.cuda, Variable(source, volatile=True))
        target = to_gpu(args.cuda, Variable(target, volatile=True))

        mask = target.gt(0)
        masked_target = target.masked_select(mask)
        # examples x ntokens
        output_mask = mask.unsqueeze(1).expand(mask.size(0), ntokens)

        # output: batch x seq_len x ntokens
        output = autoencoder(source, lengths, noise=True)
        output_encode_only = autoencoder(source,
                                         lengths,
                                         noise=False,
                                         encode_only=True)
        output_classifier = enc_classifier(output_encode_only)
        _, output_classifier = torch.max(output_classifier, -1)

        flattened_output = output.view(-1, ntokens)

        masked_output = \
            flattened_output.masked_select(output_mask).view(-1, ntokens)
        total_loss += criterion_ce(masked_output / args.temp,
                                   masked_target).data

        # accuracy
        max_vals, max_indices = torch.max(masked_output, 1)
        all_accuracies += \
            torch.mean(max_indices.eq(masked_target).float()).item()
        bcnt += 1

        output_classifier = output_classifier.data.cpu().numpy()
        tags = tags.numpy()
        all_class_accuracies += \
            np.equal(output_classifier, tags).sum()

        aeoutf = "./output/%s/%d_autoencoder.txt" % (args.outf, epoch)
        with open(aeoutf, "a") as f:
            max_values, max_indices = torch.max(output, 2)
            max_indices = \
                max_indices.view(output.size(0), -1).data.cpu().numpy()
            target = target.view(output.size(0), -1).data.cpu().numpy()

            for t, idx, cls, cls_real in zip(target, max_indices,
                                             output_classifier, tags):
                # real sentence
                chars = " ".join([corpus.dictionary.idx2word[x] for x in t])
                f.write(str(cls_real))
                f.write("\t")
                f.write(chars)
                f.write("\n")
                # autoencoder output sentence
                chars = " ".join([corpus.dictionary.idx2word[x] for x in idx])
                f.write(str(cls))
                f.write("\t")
                f.write(chars)
                f.write("\n\n")

    return total_loss.item(
    ) / bcnt, all_accuracies / bcnt, all_class_accuracies / len(data_source)
Ejemplo n.º 17
0
    def inference(self, inputs, mask_padded):

        hidden_features, label_scores_charts, embedding_outputs = self.tree_encoder.parse_batch(
            inputs, return_label_scores_charts=True)
        batch_size_inner = hidden_features.size(0)
        sentence_max_length = hidden_features.size(1)
        structure_features = []
        for i, label_scores_chart in enumerate(label_scores_charts):
            sentence_length = label_scores_chart.size(0)
            label_scores_cnn_output = self.structure_cnn(label_scores_chart)
            label_scores_cnn_output = label_scores_cnn_output[1:-1, :]
            # label_scores_cnn_output = to_gpu(label_scores_cnn_output).float()
            label_scores_cnn_output = label_scores_cnn_output.float()
            label_scores_cnn_output_padder = torch.zeros(
                [sentence_max_length - sentence_length + 2, 300])
            label_scores_cnn_output_padder = to_gpu(
                label_scores_cnn_output_padder).float()
            # label_scores_cnn_output_padder = label_scores_cnn_output_padder.float()
            label_scores_cnn_output_padded = torch.cat(
                [label_scores_cnn_output, label_scores_cnn_output_padder], 0)
            structure_features.append(label_scores_cnn_output_padded)
        structure_features_reshape = torch.cat(structure_features, 0)
        structure_features = structure_features_reshape
        structure_features = torch.reshape(structure_features,
                                           [batch_size_inner, -1, 300])

        additional_select_features = torch.cat(
            [embedding_outputs, structure_features], 2)
        # select_pred = self.poly_phoneme_classifier.inference(additional_select_features)
        select_pred = self.poly_phoneme_classifier(additional_select_features,
                                                   mask_padded)

        yinsu_id_inputs = torch.matmul(select_pred, self.pinyin_to_yinsu_dict)
        yinsu_id_inputs = torch.reshape(yinsu_id_inputs,
                                        [batch_size_inner, -1]).long()
        yinsu_embedded_inputs = self.yinsu_embedding(yinsu_id_inputs)

        hidden_inputs = hidden_features.transpose(1, 2)
        structure_features = structure_features.transpose(1, 2)
        additional_features = torch.cat([hidden_inputs, structure_features], 1)
        additional_features = additional_features.permute(0, 2, 1)

        additional_features_repeat = torch.repeat_interleave(
            additional_features, repeats=4, dim=1)
        features_for_encoder = torch.cat(
            [additional_features_repeat, yinsu_embedded_inputs], 2)
        features_for_encoder = features_for_encoder.permute(0, 2, 1)
        encoder_outputs = self.encoder.inference(features_for_encoder)
        mel_outputs, gate_outputs, alignments = self.decoder.inference(
            encoder_outputs)

        # hidden_inputs = hidden_features.transpose(1, 2)
        # structure_features = structure_features.transpose(1, 2)
        # additional_features = torch.cat([hidden_inputs, structure_features], 1)
        # additional_features = additional_features.permute(0, 2, 1)
        # additional_features_repeat = torch.repeat_interleave(additional_features, repeats=4, dim=1)
        # features_for_decoder = torch.cat([additional_features_repeat, yinsu_embedded_inputs], 2)
        # mel_outputs, gate_outputs, alignments = self.decoder.inference(features_for_decoder)

        mel_outputs_postnet = self.postnet(mel_outputs)
        mel_outputs_postnet = mel_outputs + mel_outputs_postnet

        # embedded_inputs = self.embedding(inputs).transpose(1, 2)
        # encoder_outputs = self.encoder.inference(embedded_inputs)
        # mel_outputs, gate_outputs, alignments = self.decoder.inference(
        # encoder_outputs)

        # mel_outputs_postnet = self.postnet(mel_outputs)
        # mel_outputs_postnet = mel_outputs + mel_outputs_postnet

        outputs = self.parse_output(
            [mel_outputs, mel_outputs_postnet, gate_outputs, alignments],
            select_pred)

        return outputs
Ejemplo n.º 18
0
 def init_hidden(self, bsz):
     zeros1 = Variable(torch.zeros(self.nlayers, bsz, self.nhidden))
     zeros2 = Variable(torch.zeros(self.nlayers, bsz, self.nhidden))
     return (to_gpu(self.gpu, zeros1), to_gpu(self.gpu, zeros2))
Ejemplo n.º 19
0
def train(num_gpus, rank, group_name, output_directory, log_directory,
          checkpoint_path, hparams):
    torch.manual_seed(hparams.seed)
    torch.cuda.manual_seed(hparams.seed)

    #=====START: ADDED FOR DISTRIBUTED======
    if num_gpus > 1:
        init_distributed(rank, num_gpus, group_name, **dist_config)
    #=====END:   ADDED FOR DISTRIBUTED======

    criterion = WaveGlowLoss(hparams.sigma)
    model = WaveGlow(hparams).cuda()

    Taco2 = load_pretrained_taco('tacotron2.pt', hparams)

    #=====START: ADDED FOR DISTRIBUTED======
    if num_gpus > 1:
        model = apply_gradient_allreduce(model)
    #=====END:   ADDED FOR DISTRIBUTED======

    learning_rate = hparams.learning_rate
    optimizer = torch.optim.Adam(model.parameters(), lr=learning_rate)

    if hparams.fp16_run:
        from apex import amp
        model, optimizer = amp.initialize(model, optimizer, opt_level='O1')

    # Load checkpoint if one exists
    iteration = 0
    if checkpoint_path:
        model, optimizer, iteration = load_checkpoint(checkpoint_path, model,
                                                      optimizer)
        iteration += 1  # next iteration is iteration + 1

    trainset = TextMelLoader(hparams.training_files, hparams)
    collate_fn = TextMelCollate()
    # =====START: ADDED FOR DISTRIBUTED======
    train_sampler = DistributedSampler(trainset) if num_gpus > 1 else None
    # =====END:   ADDED FOR DISTRIBUTED======
    batch_size = hparams.batch_size
    train_loader = DataLoader(trainset,
                              num_workers=0,
                              shuffle=False,
                              sampler=train_sampler,
                              batch_size=batch_size,
                              pin_memory=False,
                              drop_last=True,
                              collate_fn=collate_fn)

    # Get shared output_directory readya

    if rank == 0:
        if not os.path.isdir(output_directory):
            os.makedirs(output_directory)
            os.chmod(output_directory, 0o775)
        print("output directory", output_directory)

    if hparams.with_tensorboard and rank == 0:
        logger = prepare_directories_and_logger(output_directory,
                                                log_directory)

    model.train()
    epoch_offset = max(0, int(iteration / len(train_loader)))
    print("Total Epochs: {}".format(hparams.epochs))
    print("Batch Size: {}".format(hparams.batch_size))
    print("learning rate: {}".format(hparams.learning_rate))
    # ================ MAIN TRAINNIG LOOP! ===================
    for epoch in range(epoch_offset, hparams.epochs):
        print("Epoch: {}".format(epoch))
        for i, batch in enumerate(train_loader):
            model.zero_grad()

            text_padded, input_lengths, mel_padded, max_len, output_lengths = parse_batch(
                batch)
            with torch.no_grad():
                enc_outputs, alignments = Taco2(
                    (text_padded, input_lengths, mel_padded, max_len,
                     output_lengths))

            # mel_padded = mel_padded.transpose(1, 2)
            # mel_padded = mel_padded / torch.abs(mel_padded).max().item()
            mel_pos = torch.arange(1000)
            mel_pos = to_gpu(mel_pos).long().unsqueeze(0)
            mel_pos = mel_pos.expand(hparams.batch_size, -1)
            src_pos = torch.arange(hparams.n_position)
            src_pos = to_gpu(src_pos).long().unsqueeze(0)
            src_pos = src_pos.expand(hparams.batch_size, -1)

            mel_padded = (mel_padded + 5) / 10

            z, log_s_list, log_det_w_list, dec_enc_attn = model(
                mel_padded, enc_outputs, mel_pos, src_pos, input_lengths)
            outputs = (z, log_s_list, log_det_w_list, dec_enc_attn)
            loss = criterion(outputs, alignments)
            if num_gpus > 1:
                reduced_loss = reduce_tensor(loss.data, num_gpus).item()
            else:
                reduced_loss = loss.item()

            if hparams.fp16_run:
                with amp.scale_loss(loss, optimizer) as scaled_loss:
                    scaled_loss.backward()
            else:
                loss.backward()

            grad_norm = torch.nn.utils.clip_grad_norm_(
                model.parameters(), hparams.grad_clip_thresh)
            optimizer.step()

            print("{}:\t{:.9f}".format(iteration, reduced_loss))
            if hparams.with_tensorboard and rank == 0:
                logger.log_training(reduced_loss, grad_norm, learning_rate,
                                    iteration)

            if (iteration % hparams.iters_per_checkpoint == 0):
                if rank == 0:
                    mel_predict, test_attn = model.test(
                        mel_padded, enc_outputs, mel_pos, src_pos,
                        input_lengths)
                    logger.log_alignment(model, dec_enc_attn, alignments,
                                         mel_padded, mel_predict, test_attn,
                                         iteration)
                    checkpoint_path = "{}/waveglow_{}".format(
                        output_directory, iteration)
                    save_checkpoint(model, optimizer, learning_rate, iteration,
                                    checkpoint_path)

            iteration += 1
Ejemplo n.º 20
0
def train(num_gpus, rank, group_name, output_directory, epochs, learning_rate,
          iters_per_checkpoint, iters_per_eval, batch_size, seed, checkpoint_path, log_dir, ema_decay=0.9999):
    torch.manual_seed(seed)
    torch.cuda.manual_seed(seed)
    #=====START: ADDED FOR DISTRIBUTED======
    if num_gpus > 1:
        init_distributed(rank, num_gpus, group_name, **dist_config)
    #=====END:   ADDED FOR DISTRIBUTED======

    if train_data_config["no_chunks"]:
        criterion = MaskedCrossEntropyLoss()
    else:
        criterion = CrossEntropyLoss()
    model = WaveNet(**wavenet_config).cuda()
    ema = ExponentialMovingAverage(ema_decay)
    for name, param in model.named_parameters():
        if param.requires_grad:
            ema.register(name, param.data)

    #=====START: ADDED FOR DISTRIBUTED======
    if num_gpus > 1:
        model = apply_gradient_allreduce(model)
    #=====END:   ADDED FOR DISTRIBUTED======

    optimizer = torch.optim.Adam(model.parameters(), lr=learning_rate)
    scheduler = StepLR(optimizer, step_size=200000, gamma=0.5)

    # Load checkpoint if one exists
    iteration = 0
    if checkpoint_path != "":
        model, optimizer, scheduler, iteration, ema = load_checkpoint(checkpoint_path, model,
                                                                      optimizer, scheduler, ema)
        iteration += 1  # next iteration is iteration + 1

    trainset = Mel2SampOnehot(audio_config=audio_config, verbose=True, **train_data_config)
    validset = Mel2SampOnehot(audio_config=audio_config, verbose=False, **valid_data_config)
    # =====START: ADDED FOR DISTRIBUTED======
    train_sampler = DistributedSampler(trainset) if num_gpus > 1 else None
    valid_sampler = DistributedSampler(validset) if num_gpus > 1 else None
    # =====END:   ADDED FOR DISTRIBUTED======
    print(train_data_config)
    if train_data_config["no_chunks"]:
        collate_fn = utils.collate_fn
    else:
        collate_fn = torch.utils.data.dataloader.default_collate
    train_loader = DataLoader(trainset, num_workers=1, shuffle=False,
                              collate_fn=collate_fn,
                              sampler=train_sampler,
                              batch_size=batch_size,
                              pin_memory=True,
                              drop_last=True)
    valid_loader = DataLoader(validset, num_workers=1, shuffle=False,
                              sampler=valid_sampler, batch_size=1, pin_memory=True)
    # Get shared output_directory ready
    if rank == 0:
        if not os.path.isdir(output_directory):
            os.makedirs(output_directory)
            os.chmod(output_directory, 0o775)
        print("output directory", output_directory)
    
    model.train()
    epoch_offset = max(0, int(iteration / len(train_loader)))
    writer = SummaryWriter(log_dir)
    print("Checkpoints writing to: {}".format(log_dir))
    # ================ MAIN TRAINNIG LOOP! ===================
    for epoch in range(epoch_offset, epochs):
        print("Epoch: {}".format(epoch))
        for i, batch in enumerate(train_loader):
            if low_memory:
                torch.cuda.empty_cache()
            scheduler.step()
            model.zero_grad()

            if train_data_config["no_chunks"]:
                x, y, seq_lens = batch
                seq_lens = to_gpu(seq_lens)
            else:
                x, y = batch
            x = to_gpu(x).float()
            y = to_gpu(y)
            x = (x, y)  # auto-regressive takes outputs as inputs
            y_pred = model(x)
            if train_data_config["no_chunks"]:
                loss = criterion(y_pred, y, seq_lens)
            else:
                loss = criterion(y_pred, y)
            if num_gpus > 1:
                reduced_loss = reduce_tensor(loss.data, num_gpus)[0]
            else:
                reduced_loss = loss.data[0]
            loss.backward()
            optimizer.step()

            for name, param in model.named_parameters():
                if name in ema.shadow:
                    ema.update(name, param.data)

            print("{}:\t{:.9f}".format(iteration, reduced_loss))
            if rank == 0:
                writer.add_scalar('loss', reduced_loss, iteration)
            if (iteration % iters_per_checkpoint == 0 and iteration):
                if rank == 0:
                    checkpoint_path = "{}/wavenet_{}".format(
                        output_directory, iteration)
                    save_checkpoint(model, optimizer, scheduler, learning_rate, iteration,
                                    checkpoint_path, ema, wavenet_config)
            if (iteration % iters_per_eval == 0 and iteration > 0 and not config["no_validation"]):
                if low_memory:
                    torch.cuda.empty_cache()
                if rank == 0:
                    model_eval = nv_wavenet.NVWaveNet(**(model.export_weights()))
                    for j, valid_batch in enumerate(valid_loader):
                        mel, audio = valid_batch
                        mel = to_gpu(mel).float()
                        cond_input = model.get_cond_input(mel)
                        predicted_audio = model_eval.infer(cond_input, nv_wavenet.Impl.AUTO)
                        predicted_audio = utils.mu_law_decode_numpy(predicted_audio[0, :].cpu().numpy(), 256)
                        writer.add_audio("valid/predicted_audio_{}".format(j),
                                         predicted_audio,
                                         iteration,
                                         22050)
                        audio = utils.mu_law_decode_numpy(audio[0, :].cpu().numpy(), 256)
                        writer.add_audio("valid_true/audio_{}".format(j),
                                         audio,
                                         iteration,
                                         22050)
                        if low_memory:
                            torch.cuda.empty_cache()
            iteration += 1
Ejemplo n.º 21
0
    def forward(self, inputs):
        text_lengths, mask_padded, select_target, words_sorted, mels, max_len, output_lengths = inputs
        text_lengths, output_lengths = text_lengths.data, output_lengths.data

        # batch_size_inner = select_target.size(0)
        # sentence_max_length = select_target.size(1)
        # print('CHECK words_sorted:', words_sorted)
        # predicted_trees, scores = self.tree_encoder.parse_batch(words_sorted)
        # print('CHECK scores:', scores)
        # tree_shows = [p.convert().linearize() for p in predicted_trees]
        # print('CHECK scores:', tree_shows)

        hidden_features, label_scores_charts, embedding_outputs = self.tree_encoder.parse_batch(
            words_sorted, return_label_scores_charts=True)
        # print('CHECK character_padded:', character_padded.shape)
        # print('CHECK hidden_features:', hidden_features.shape)

        batch_size_inner = hidden_features.size(0)
        sentence_max_length = hidden_features.size(1)

        structure_features = []
        for i, label_scores_chart in enumerate(label_scores_charts):
            sentence_length = label_scores_chart.size(0)
            label_scores_cnn_output = self.structure_cnn(label_scores_chart)
            label_scores_cnn_output = label_scores_cnn_output[1:-1, :]
            # label_scores_cnn_output = to_gpu(label_scores_cnn_output).float()
            label_scores_cnn_output = label_scores_cnn_output.float()
            label_scores_cnn_output_padder = torch.zeros(
                [sentence_max_length - sentence_length + 2, 300])
            label_scores_cnn_output_padder = to_gpu(
                label_scores_cnn_output_padder).float()
            # label_scores_cnn_output_padder = label_scores_cnn_output_padder.float()
            label_scores_cnn_output_padded = torch.cat(
                [label_scores_cnn_output, label_scores_cnn_output_padder], 0)
            structure_features.append(label_scores_cnn_output_padded)
        structure_features_reshape = torch.cat(structure_features, 0)
        structure_features = structure_features_reshape
        structure_features = torch.reshape(structure_features,
                                           [batch_size_inner, -1, 300])
        # print('CHECK structure_features:', structure_features.shape)

        # select_target_to_loss = torch.reshape(select_target, [-1, 6])
        # print('CHECK select_target:', select_target_to_loss)
        # select_target = select_target_to_loss.unsqueeze(-1)

        # character_embedded_inputs = self.character_embedding(character_padded)
        # poly_yinsu_embedded_inputs = self.yinsu_embedding(poly_yinsu_padded)

        # Pretain to have structure features with character_embedded_inputs [B, L, 512], actually 300
        # character_embedded_inputs = self.character_embedding(character_padded)

        additional_select_features = torch.cat(
            [embedding_outputs, structure_features], 2)
        # print('CHECK embedding_outputs:', embedding_outputs)
        # print('CHECK mask_padded:', mask_padded)
        # select_pred = self.poly_phoneme_classifier(embedding_outputs, mask_padded)
        select_pred = self.poly_phoneme_classifier(additional_select_features,
                                                   mask_padded)
        # print('CHECK select_pred:', select_pred)

        # select_pred_to_loss = torch.reshape(select_pred, [-1, 6])
        # print('CHECK select_pred:', select_pred_to_loss)
        # select_pred = select_pred_to_loss.unsqueeze(-1)

        select_accuracy = self.poly_phoneme_classifier.select_acc(
            select_target, select_pred, mask_padded)
        print('CHECK select_accuracy:', select_accuracy)

        # poly_yinsu_embedded_inputs = torch.reshape(poly_yinsu_embedded_inputs, [-1, 6, 512])
        # poly_yinsu_embedded_inputs = poly_yinsu_embedded_inputs.permute(0, 2, 1)
        # phoneme_selected_inputs = torch.bmm(poly_yinsu_embedded_inputs, select_pred)
        # phoneme_selected_inputs = phoneme_selected_inputs.squeeze(-1)
        # phoneme_selected_inputs = torch.reshape(phoneme_selected_inputs, [batch_size_inner, -1, 512])
        # phoneme_selected_inputs = phoneme_selected_inputs.permute(0, 2, 1)

        # print('CHECK pinyin_to_yinsu_dict:', self.pinyin_to_yinsu_dict)
        # print('CHECK pinyin_to_yinsu_dict:', self.pinyin_to_yinsu_dict.shape)
        # print('CHECK select_pred:', select_pred.shape)
        # yinsu_id_pred = torch.argmax(select_pred, 2)
        yinsu_id_inputs = torch.matmul(select_pred, self.pinyin_to_yinsu_dict)
        yinsu_id_inputs = torch.reshape(yinsu_id_inputs,
                                        [batch_size_inner, -1]).long()
        yinsu_embedded_inputs = self.yinsu_embedding(yinsu_id_inputs)
        # print('CHECK yinsu_embedded_inputs:', yinsu_embedded_inputs.shape)
        # print('CHECK yinsu_embedded_inputs:', yinsu_embedded_inputs)

        # Encoder Features Shape = [B, Features length, L]
        hidden_inputs = hidden_features.transpose(1, 2)
        structure_features = structure_features.transpose(1, 2)
        # additional_features = torch.cat([hidden_inputs, phoneme_selected_inputs, structure_features], 1)
        additional_features = torch.cat([hidden_inputs, structure_features], 1)
        # print('CHECK additional_features:', additional_features.shape)
        additional_features = additional_features.permute(0, 2, 1)

        additional_features_repeat = torch.repeat_interleave(
            additional_features, repeats=4, dim=1)
        # features_for_decoder = torch.cat([additional_features_repeat, yinsu_embedded_inputs], 2)
        features_for_encoder = torch.cat(
            [additional_features_repeat, yinsu_embedded_inputs], 2)
        features_for_encoder = features_for_encoder.permute(0, 2, 1)

        encoder_outputs = self.encoder(features_for_encoder, text_lengths * 4)
        # print('CHECK encoder_outputs:', encoder_outputs.shape)

        # mel_outputs, gate_outputs, alignments = self.decoder(features_for_decoder, mels, memory_lengths=text_lengths*4)
        mel_outputs, gate_outputs, alignments = self.decoder(
            encoder_outputs, mels, memory_lengths=text_lengths * 4)

        mel_outputs_postnet = self.postnet(mel_outputs)
        mel_outputs_postnet = mel_outputs + mel_outputs_postnet

        return self.parse_output(
            [mel_outputs, mel_outputs_postnet, gate_outputs, alignments],
            select_pred, output_lengths)
Ejemplo n.º 22
0
corpus = Corpus(datafiles,
                maxlen=args.maxlen,
                vocab_size=args.vocab_size,
                lowercase=args.lowercase,
                vocab=vocabdict)

# save arguments
ntokens = len(corpus.dictionary.word2idx)
print("Vocabulary Size: {}".format(ntokens))
args.ntokens = ntokens

eval_batch_size = 100
en_data = batchify(corpus.data[args.corpus_name],
                   eval_batch_size,
                   shuffle=False)
print(len(en_data))
print("Loaded data!")

model_args, idx2word, autoencoder, gan_gen, gan_disc = load_models(
    args.outf, args.epochs, twodecoders=True)

if args.cuda:
    autoencoder = autoencoder.cuda()
    gan_gen = gan_gen.cuda()
    gan_disc = gan_disc.cuda()

one = to_gpu(args.cuda, torch.FloatTensor([1]))
mone = one * -1

evaluate_generator(1, False)
Ejemplo n.º 23
0
Archivo: train.py Proyecto: manna/ARAE
def train_gan_d(ae_index, batch):
    autoencoder, optimizer_ae = autoencoders[ae_index], ae_optimizers[ae_index]
    gan_disc, optimizer_gan_d = gan_discs[ae_index], gan_d_optimizers[ae_index]

    # clamp parameters to a cube
    for p in gan_disc.parameters():
        p.data.clamp_(-args.gan_clamp, args.gan_clamp)

    autoencoder.train()
    autoencoder.zero_grad()
    gan_disc.train()
    gan_disc.zero_grad()

    # positive samples ----------------------------
    # generate real codes
    source, target, lengths = batch
    source = to_gpu(args.cuda, Variable(source))
    target = to_gpu(args.cuda, Variable(target))

    # batch_size x nhidden
    real_hidden = autoencoder(source, lengths, noise=False, encode_only=True)
    real_hidden.register_hook(make_grad_hook(autoencoder))

    # loss / backprop
    errD_real = gan_disc(real_hidden)
    errD_real.backward(one)

    # negative samples ----------------------------i
    # generate fake codes
    # noise = to_gpu(args.cuda, Variable(torch.ones(args.batch_size, args.z_size)))
    # noise.data.normal_(0, 1)

    fake_hiddens = []
    for other_index, other_autoencoder in enumerate(autoencoders):
        if other_index == ae_index: continue
        fake_hidden = other_autoencoder(source,
                                        lengths,
                                        noise=False,
                                        encode_only=True)  # TODO: noise=True
        fake_hidden.register_hook(make_grad_hook(
            other_autoencoder))  # maybe register hook? Not sure.
        fake_hiddens.append(fake_hidden)

    # loss / backprop
    # fake_hidden = gan_gen(noise)
    total_errD_fake = None
    errD_fakes = [gan_disc(fh.detach()) for fh in fake_hiddens]
    for errD_fake in errD_fakes:
        errD_fake.backward(mone)
        if total_errD_fake is None:
            total_errD_fake = errD_fake
        else:
            total_errD_fake += errD_fake
    # Alernatively, we might prefer: total_errD_fake.backward(mone)

    # `clip_grad_norm` to prvent exploding gradient problem in RNNs / LSTMs
    torch.nn.utils.clip_grad_norm(autoencoder.parameters(), args.clip)

    optimizer_gan_d.step()
    optimizer_ae.step()
    errD = -(errD_real - total_errD_fake)

    return errD, errD_real, total_errD_fake
Ejemplo n.º 24
0
Archivo: train.py Proyecto: manna/ARAE
def train_ae(ae_index, batch, total_loss_ae, start_time, i):
    autoencoder, ae_optimizer = autoencoders[ae_index], ae_optimizers[ae_index]
    ae_args = autoencoders_args[ae_index]

    autoencoder.train()
    autoencoder.zero_grad()

    source, target, lengths = batch
    source = to_gpu(args.cuda, Variable(source))
    target = to_gpu(args.cuda, Variable(target))

    # Create sentence length mask over padding
    mask = target.gt(0)
    masked_target = target.masked_select(mask)
    # examples x ntokens
    output_mask = mask.unsqueeze(1).expand(mask.size(0), ntokens)

    # output: batch x seq_len x ntokens
    output = autoencoder(source, lengths, noise=True)

    # output_size: batch_size, maxlen, self.ntokens
    flattened_output = output.view(-1, ntokens)

    masked_output = \
        flattened_output.masked_select(output_mask).view(-1, ntokens)
    loss = criterion_ce(masked_output / args.temp, masked_target)
    loss.backward()

    # `clip_grad_norm` to prevent exploding gradient in RNNs / LSTMs
    torch.nn.utils.clip_grad_norm(autoencoder.parameters(), args.clip)
    ae_optimizer.step()

    total_loss_ae += loss.data

    accuracy = None
    if i % args.log_interval == 0 and i > 0:
        # accuracy
        probs = F.softmax(masked_output)
        max_vals, max_indices = torch.max(probs, 1)
        accuracy = torch.mean(max_indices.eq(masked_target).float()).data[0]

        cur_loss = total_loss_ae[0] / args.log_interval
        elapsed = time.time() - start_time
        print(
            '| epoch {:3d} | {:5d}/{:5d} batches | ms/batch {:5.2f} | '
            'loss {:5.2f} | ppl {:8.2f} | acc {:8.2f}'.format(
                epoch, i, len(ae_args.train_data),
                elapsed * 1000 / args.log_interval, cur_loss,
                math.exp(cur_loss), accuracy))

        with open("./output/{}/logs.txt".format(ae_args.outf), 'a') as f:
            f.write('| epoch {:3d} | {:5d}/{:5d} batches | ms/batch {:5.2f} | '
                    'loss {:5.2f} | ppl {:8.2f} | acc {:8.2f}\n'.format(
                        epoch, i, len(ae_args.train_data),
                        elapsed * 1000 / args.log_interval, cur_loss,
                        math.exp(cur_loss), accuracy))

        total_loss_ae = 0
        start_time = time.time()

    return total_loss_ae, start_time
Ejemplo n.º 25
0
    def __init__(self, config):
        super(MISA, self).__init__()

        self.config = config
        self.text_size = config.embedding_size
        self.pos_size = config.pos_size
        self.cust_size = config.custom_size

        self.input_sizes = input_sizes = [
            self.text_size, self.pos_size, self.cust_size
        ]
        self.hidden_sizes = hidden_sizes = [
            int(self.text_size),
            int(self.pos_size),
            int(self.cust_size)
        ]
        self.output_size = output_size = config.num_classes
        self.dropout_rate = dropout_rate = config.dropout
        self.activation = self.config.activation()
        self.tanh = nn.Tanh()
        self.hx = to_gpu(torch.randn((2, 1, config.hidden_size * 6), ))

        rnn = nn.LSTM if self.config.rnncell == "lstm" else nn.GRU
        # defining modules - two layer bidirectional LSTM with layer norm in between

        if self.config.use_bert:

            # Initializing a BERT bert-base-uncased style configuration
            bertconfig = BertConfig.from_pretrained('bert-base-uncased',
                                                    output_hidden_states=True)
            self.bertmodel = BertModel.from_pretrained('bert-base-uncased',
                                                       config=bertconfig)
        else:
            # self.embed = nn.Embedding(len(config.word2id), input_sizes[0])
            self.embed = self.add_embeddings
            self.trnn1 = rnn(input_sizes[0],
                             hidden_sizes[0],
                             bidirectional=True)
            self.trnn2 = rnn(2 * hidden_sizes[0],
                             hidden_sizes[0],
                             bidirectional=True)

        self.prnn1 = rnn(input_sizes[1], hidden_sizes[1], bidirectional=True)
        self.prnn2 = rnn(2 * hidden_sizes[1],
                         hidden_sizes[1],
                         bidirectional=True)

        self.conversation_rnn = nn.GRU(input_size=config.hidden_size * 6,
                                       hidden_size=config.hidden_size * 6,
                                       num_layers=1,
                                       bidirectional=True,
                                       batch_first=True)

        ##########################################
        # mapping modalities to same sized space
        ##########################################
        if self.config.use_bert:
            self.project_t = nn.Sequential()
            self.project_t.add_module(
                'project_t',
                nn.Linear(in_features=768, out_features=config.hidden_size))
            self.project_t.add_module('project_t_activation', self.activation)
            self.project_t.add_module('project_t_layer_norm',
                                      nn.LayerNorm(config.hidden_size))
        else:
            self.project_t = nn.Sequential()
            self.project_t.add_module(
                'project_t',
                nn.Linear(in_features=hidden_sizes[0] * 4,
                          out_features=config.hidden_size))
            self.project_t.add_module('project_t_activation', self.activation)
            self.project_t.add_module('project_t_layer_norm',
                                      nn.LayerNorm(config.hidden_size))

        self.project_p = nn.Sequential()
        self.project_p.add_module(
            'project_p',
            nn.Linear(in_features=hidden_sizes[1] * 4,
                      out_features=config.hidden_size))
        self.project_p.add_module('project_p_activation', self.activation)
        self.project_p.add_module('project_p_layer_norm',
                                  nn.LayerNorm(config.hidden_size))

        self.project_c = nn.Sequential()
        self.project_c.add_module(
            'project_c',
            nn.Linear(in_features=self.cust_size,
                      out_features=config.hidden_size))
        self.project_c.add_module('project_c_activation', self.activation)
        self.project_c.add_module('project_c_layer_norm',
                                  nn.LayerNorm(config.hidden_size))

        ##########################################
        # private encoders
        ##########################################
        self.private_t = nn.Sequential()
        self.private_t.add_module(
            'private_t_1',
            nn.Linear(in_features=config.hidden_size,
                      out_features=config.hidden_size))
        self.private_t.add_module('private_t_activation_1', self.activation)

        self.private_p = nn.Sequential()
        self.private_p.add_module(
            'private_p_1',
            nn.Linear(in_features=config.hidden_size,
                      out_features=config.hidden_size))
        self.private_p.add_module('private_p_activation_1', self.activation)

        self.private_c = nn.Sequential()
        self.private_c.add_module(
            'private_c_1',
            nn.Linear(in_features=config.hidden_size,
                      out_features=config.hidden_size))
        self.private_c.add_module('private_c_activation_1', self.activation)

        ##########################################
        # shared encoder
        ##########################################
        self.shared = nn.Sequential()
        self.shared.add_module(
            'shared_1',
            nn.Linear(in_features=config.hidden_size,
                      out_features=config.hidden_size))
        self.shared.add_module('shared_activation_1', self.activation)

        ##########################################
        # reconstruct
        ##########################################
        self.recon_t = nn.Sequential()
        self.recon_t.add_module(
            'recon_t_1',
            nn.Linear(in_features=config.hidden_size,
                      out_features=config.hidden_size))
        self.recon_p = nn.Sequential()
        self.recon_p.add_module(
            'recon_p_1',
            nn.Linear(in_features=config.hidden_size,
                      out_features=config.hidden_size))
        self.recon_c = nn.Sequential()
        self.recon_c.add_module(
            'recon_c_1',
            nn.Linear(in_features=config.hidden_size,
                      out_features=config.hidden_size))

        self.fusion = nn.Sequential()
        self.fusion.add_module(
            'fusion_layer_1',
            nn.Linear(in_features=self.config.hidden_size * 12,
                      out_features=self.config.hidden_size))
        self.fusion.add_module('fusion_layer_1_dropout',
                               nn.Dropout(dropout_rate))
        self.fusion.add_module('fusion_layer_1_activation', self.activation)

        self.fusion.add_module(
            'fusion_layer_3',
            nn.Linear(in_features=self.config.hidden_size,
                      out_features=output_size))

        self.tlayer_norm = nn.LayerNorm((hidden_sizes[0] * 2, ))
        self.player_norm = nn.LayerNorm((hidden_sizes[1] * 2, ))

        encoder_layer = nn.TransformerEncoderLayer(
            d_model=self.config.hidden_size, nhead=2)
        self.transformer_encoder = nn.TransformerEncoder(encoder_layer,
                                                         num_layers=1)
Ejemplo n.º 26
0
# In[6]:


print("Training...")
with open("{}/log.txt".format(args.outf), 'a') as f:
    f.write('Training...\n')

# schedule of increasing GAN training loops
if args.niters_gan_schedule != "":
    gan_schedule = [int(x) for x in args.niters_gan_schedule.split("-")]
else:
    gan_schedule = []
niter_gan = 25

fixed_noise = to_gpu(args.cuda,Variable(torch.ones(args.batch_size, args.z_size)))
fixed_noise.data.normal_(0, 1)
one = to_gpu(args.cuda, torch.FloatTensor([1]))
mone = one * -1
#one = to_gpu(args.cuda, torch.FloatTensor([1]))
#mone = Variable(torch.tensor(-1.0).cuda())#one * -1
#one = Variable(one, requires_grad=True).cuda()#torch.tensor(1.0, dtype=torch.float64,device=torch.device('cuda:0'))
#mone = #Variable(mone, requires_grad=True).cuda()
#mone = torch.tensor(-1.0, dtype=torch.float64,device=torch.device('cuda:0'))



for epoch in range(1, args.epochs + 1):
    # update gan training schedule
    if epoch in gan_schedule:
        niter_gan += 1
Ejemplo n.º 27
0
def train(num_gpus, rank, group_name, output_directory, epochs, learning_rate,
          iters_per_checkpoint, batch_size, seed, checkpoint_path):
    torch.manual_seed(seed)
    torch.cuda.manual_seed(seed)
    #=====START: ADDED FOR DISTRIBUTED======
    if num_gpus > 1:
        init_distributed(rank, num_gpus, group_name, **dist_config)
    #=====END:   ADDED FOR DISTRIBUTED======

    criterion = CrossEntropyLoss()
    model = WaveNet(**wavenet_config).cuda()

    #=====START: ADDED FOR DISTRIBUTED======
    if num_gpus > 1:
        model = apply_gradient_allreduce(model)
    #=====END:   ADDED FOR DISTRIBUTED======

    optimizer = torch.optim.Adam(model.parameters(), lr=learning_rate)

    # Load checkpoint if one exists
    iteration = 0
    if checkpoint_path != "":
        model, optimizer, iteration = load_checkpoint(checkpoint_path, model,
                                                      optimizer)
        iteration += 1  # next iteration is iteration + 1

    #trainset = Mel2SampOnehot(**data_config)
    trainset = DeepMels(**data_config)
    # =====START: ADDED FOR DISTRIBUTED======
    train_sampler = DistributedSampler(trainset) if num_gpus > 1 else None
    # =====END:   ADDED FOR DISTRIBUTED======
    train_loader = DataLoader(trainset,
                              num_workers=1,
                              shuffle=False,
                              sampler=train_sampler,
                              batch_size=batch_size,
                              pin_memory=False,
                              drop_last=True)

    # Get shared output_directory ready
    if rank == 0:
        if not os.path.isdir(output_directory):
            os.makedirs(output_directory)
            os.chmod(output_directory, 0o775)
        print("output directory", output_directory)

    model.train()
    epoch_offset = max(0, int(iteration / len(train_loader)))
    # ================ MAIN TRAINNIG LOOP! ===================
    for epoch in range(epoch_offset, epochs):
        total_loss = 0
        print("Epoch: {}".format(epoch))
        for i, batch in enumerate(train_loader):
            model.zero_grad()

            x, y = batch
            x = to_gpu(x).float()
            y = to_gpu(y)
            x = (x, y)  # auto-regressive takes outputs as inputs
            y_pred = model(x)
            loss = criterion(y_pred, y)
            if num_gpus > 1:
                reduced_loss = reduce_tensor(loss.data, num_gpus)[0]
            else:
                reduced_loss = loss.data[0]
            loss.backward()
            optimizer.step()

            total_loss += reduced_loss

            if (iteration % iters_per_checkpoint == 0):
                if rank == 0:
                    checkpoint_path = "{}/wavenet_{}".format(
                        output_directory, iteration)
                    save_checkpoint(model, optimizer, learning_rate, iteration,
                                    checkpoint_path)

            iteration += 1
        print("epoch:{}, total epoch loss:{}".format(epoch, total_loss))
Ejemplo n.º 28
0
def main():
    state_dict = torch.load(args.ae_model)
    with open(args.ae_args) as f:
        ae_args = json.load(f)

    corpus = Corpus(args.data_file,
                    args.dict_file,
                    vocab_size=ae_args['vocab_size'])
    autoencoder = Seq2Seq(emsize=ae_args['emsize'],
                          nhidden=ae_args['nhidden'],
                          ntokens=ae_args['ntokens'],
                          nlayers=ae_args['nlayers'],
                          noise_radius=ae_args['noise_radius'],
                          hidden_init=ae_args['hidden_init'],
                          dropout=ae_args['dropout'],
                          gpu=args.cuda)
    autoencoder.load_state_dict(state_dict)
    for param in autoencoder.parameters():
        param.requires_grad = False
    # save arguments
    with open(os.path.join(out_dir, 'args.json'), 'w') as f:
        json.dump(vars(args), f)
    log.info('[Data and AE model loaded.]')

    gan_gen = MLP_G(ninput=args.nhidden,
                    noutput=args.nhidden,
                    layers=args.arch_g)
    gan_disc = MLP_D(ninput=2 * args.nhidden, noutput=1, layers=args.arch_d)
    optimizer_gan_g = optim.Adam(gan_gen.parameters(),
                                 lr=args.lr_gan_g,
                                 betas=(args.beta1, 0.999))
    optimizer_gan_d = optim.Adam(gan_disc.parameters(),
                                 lr=args.lr_gan_d,
                                 betas=(args.beta1, 0.999))
    criterion_ce = nn.CrossEntropyLoss()

    if args.cuda:
        autoencoder = autoencoder.cuda()
        gan_gen = gan_gen.cuda()
        gan_disc = gan_disc.cuda()
        criterion_ce = criterion_ce.cuda()

    one = to_gpu(args.cuda, torch.FloatTensor([1]))
    mone = one * -1
    train_pairs = BatchGen(corpus.get_chunks(size=2), args.batch_size)

    def train_gan_g(batch):
        gan_gen.train()
        gan_gen.zero_grad()

        source, _ = batch
        source = to_gpu(args.cuda, Variable(source))
        source_hidden = autoencoder(source, noise=False, encode_only=True)

        fake_hidden = gan_gen(source_hidden)
        errG = gan_disc(source_hidden, fake_hidden)

        # loss / backprop
        errG.backward(one)
        optimizer_gan_g.step()

        return errG

    def train_gan_d(batch):
        # clamp parameters to a cube
        for p in gan_disc.parameters():
            p.data.clamp_(-args.gan_clamp, args.gan_clamp)

        gan_disc.train()
        gan_disc.zero_grad()

        # positive samples ----------------------------
        # generate real codes
        source, target = batch
        source = to_gpu(args.cuda, Variable(source))
        target = to_gpu(args.cuda, Variable(target))

        # batch_size x nhidden
        source_hidden = autoencoder(source, noise=False, encode_only=True)
        target_hidden = autoencoder(target, noise=False, encode_only=True)

        # loss / backprop
        errD_real = gan_disc(source_hidden, target_hidden)
        errD_real.backward(one)

        # negative samples ----------------------------

        # loss / backprop
        fake_hidden = gan_gen(source_hidden)
        errD_fake = gan_disc(source_hidden.detach(), fake_hidden.detach())
        errD_fake.backward(mone)

        optimizer_gan_d.step()
        errD = -(errD_real - errD_fake)

        return errD, errD_real, errD_fake

    niter = 0
    start_time = datetime.now()

    for t in range(args.updates):
        niter += 1

        # train discriminator/critic
        for i in range(args.niters_gan_d):
            # feed a seen sample within this epoch; good for early training
            errD, errD_real, errD_fake = \
                train_gan_d(next(train_pairs))

        # train generator
        for i in range(args.niters_gan_g):
            errG = train_gan_g(next(train_pairs))

        if niter % args.log_interval == 0:
            eta = str((datetime.now() - start_time) / (t + 1) *
                      (args.updates - t - 1)).split('.')[0]
            log.info('[{}/{}] Loss_D: {:.6f} (real: {:.6f} '
                     'fake: {:.6f}) Loss_G: {:.6f} ETA: {}'.format(
                         niter, args.updates,
                         errD.data.cpu()[0],
                         errD_real.data.cpu()[0],
                         errD_fake.data.cpu()[0],
                         errG.data.cpu()[0], eta))
        if niter % args.save_interval == 0:
            save_model(gan_gen, out_dir, 'gan_gen_model_{}.pt'.format(t))
            save_model(gan_disc, out_dir, 'gan_disc_model_{}.pt'.format(t))
Ejemplo n.º 29
0
    return errD, errD_real, errD_fake


print("Training...")
#1204delete
#with open("./output/{}/logs.txt".format(outf), 'a') as f:
#    f.write('Training...\n')

# schedule of increasing GAN training loops
if niters_gan_schedule != "":
    gan_schedule = [int(x) for x in niters_gan_schedule.split("-")]
else:
    gan_schedule = []
niter_gan = 1

fixed_noise = to_gpu(cuda, Variable(torch.ones(batch_size, z_size)))
fixed_noise.data.normal_(0, 1)
one = to_gpu(cuda, torch.FloatTensor([1]))
mone = one * -1

best_ppl = None
impatience = 0
all_ppl = []
for epoch in range(1, epochs + 1):
    # update gan training schedule
    if epoch in gan_schedule:
        niter_gan += 1
        print("GAN training loop schedule increased to {}".format(niter_gan))
        #1204 delete
#with open("./output/{}/logs.txt".format(outf), 'a') as f:
#    f.write("GAN training loop schedule increased to {}\n".
Ejemplo n.º 30
0
def evaluate_autoencoder(whichdecoder, data_source, epoch, seper=""):
    # Turn on evaluation mode which disables dropout.
    autoencoder.eval()
    total_loss = 0
    ntokens = len(corpus.dictionary.word2idx)
    all_accuracies = 0
    bcnt = 0
    for i, batch in enumerate(data_source):
        source, target, lengths = batch
        source = to_gpu(args.cuda, Variable(source, volatile=True))
        target = to_gpu(args.cuda, Variable(target, volatile=True))

        mask = target.gt(0)
        masked_target = target.masked_select(mask)
        # examples x ntokens
        output_mask = mask.unsqueeze(1).expand(mask.size(0), ntokens)

        hidden = autoencoder(0, source, lengths, noise=False, encode_only=True)

        # output: batch x seq_len x ntokens
        if whichdecoder == 1:
            output = autoencoder(1, source, lengths, noise=False)
            flattened_output = output.view(-1, ntokens)
            masked_output = \
                flattened_output.masked_select(output_mask).view(-1, ntokens)
            # accuracy
            max_vals1, max_indices1 = torch.max(masked_output, 1)
            all_accuracies += \
                torch.mean(max_indices1.eq(masked_target).float()).data[0]
        
            max_values1, max_indices1 = torch.max(output, 2)
            max_indices2 = autoencoder.generate(2, hidden, maxlen=50)
        else:
            output = autoencoder(2, source, lengths, noise=False)
            flattened_output = output.view(-1, ntokens)
            masked_output = \
                flattened_output.masked_select(output_mask).view(-1, ntokens)
            # accuracy
            max_vals2, max_indices2 = torch.max(masked_output, 1)
            all_accuracies += \
                torch.mean(max_indices2.eq(masked_target).float()).data[0]

            max_values2, max_indices2 = torch.max(output, 2)
            max_indices1 = autoencoder.generate(1, hidden, maxlen=50)
        
        total_loss += criterion_ce(masked_output/args.temp, masked_target).data
        bcnt += 1

        aeoutf_targ = "%s/%d_output_decoder_%d_targ%s.txt"%(args.outf, epoch, whichdecoder, seper)
        aeoutf_one = "%s/%d_output_decoder_%d_one%s.txt"%(args.outf, epoch, whichdecoder, seper)
        aeoutf_two = "%s/%d_output_decoder_%d_two%s.txt"%(args.outf, epoch, whichdecoder, seper)
        with open(aeoutf_targ, 'w') as f_targ, open(aeoutf_one,'w') as f_one, open(aeoutf_two,'w') as f_two:
            max_indices1 = \
                max_indices1.view(output.size(0), -1).data.cpu().numpy()
            max_indices2 = \
                max_indices2.view(output.size(0), -1).data.cpu().numpy()
            target = target.view(output.size(0), -1).data.cpu().numpy()
            for t, idx1, idx2 in zip(target, max_indices1, max_indices2):
                # real sentence
                chars = " ".join([corpus.dictionary.idx2word[x] for x in t])
                f_targ.write(chars + "\n")
                # transfer sentence
                chars = " ".join([corpus.dictionary.idx2word[x] for x in idx1])
                f_one.write(chars + "\n")
                # transfer sentence
                chars = " ".join([corpus.dictionary.idx2word[x] for x in idx2])
                f_two.write(chars + "\n")

    return total_loss[0] / len(data_source), all_accuracies/bcnt
Ejemplo n.º 31
0
def train_ae_and_classifier(batch,
                            total_loss_ae,
                            start_time,
                            i,
                            perturb=None,
                            epsilon=0.0,
                            alpha=0.0,
                            pgd_iters=0):
    autoencoder.train()
    autoencoder.zero_grad()
    enc_classifier.train()
    enc_classifier.zero_grad()

    source, target, lengths, tags = batch
    source = to_gpu(args.cuda, Variable(source))
    target = to_gpu(args.cuda, Variable(target))
    tags = to_gpu(args.cuda, Variable(tags))

    # Create sentence length mask over padding
    mask = target.gt(0)
    masked_target = target.masked_select(mask)
    # examples x ntokens
    output_mask = mask.unsqueeze(1).expand(mask.size(0), ntokens)

    # output: batch x seq_len x ntokens
    output = autoencoder(source, lengths, noise=True)
    # output tags: batch_size x nclasses
    output_encode_only = autoencoder(source,
                                     lengths,
                                     noise=False,
                                     encode_only=True)
    output_classifier = enc_classifier(output_encode_only)

    perturbed_code = None
    if perturb == 'fgsm':
        output_encode_only.retain_grad()
        classifier_loss = criterion_ce(output_classifier, tags)
        enc_classifier.zero_grad()
        classifier_loss.backward(retain_graph=True)
        code_grad = output_encode_only.grad.data
        perturbed_code = fgsm_attack(output_encode_only, epsilon, code_grad)
    elif perturb == 'pgd':
        perturbed_code = output_encode_only.clone().detach()
        for step_idx in range(pgd_iters):
            perturbed_code.requires_grad = True
            adv_scores = enc_classifier(perturbed_code)
            tmp_loss = criterion_ce(adv_scores, tags)
            enc_classifier.zero_grad()
            tmp_loss.backward(retain_graph=True)

            # step in the direction of the gradient
            perturbed_code = perturbed_code + alpha * perturbed_code.grad.sign(
            )

            # Workaround as PyTorch doesn't have elementwise clip
            # from: https://gist.github.com/oscarknagg/45b187c236c6262b1c4bbe2d0920ded6#file-projected_gradient_descent-py
            perturbed_code = torch.max(
                torch.min(perturbed_code, output_encode_only + epsilon),
                output_encode_only - epsilon).detach()
            perturbed_code = torch.clamp(perturbed_code, -0.34, 0.32)
        print(i)
    print(i)
    # output_size: batch_size, maxlen, self.ntokens
    flattened_output = output.view(-1, ntokens)

    masked_output = \
        flattened_output.masked_select(output_mask).view(-1, ntokens)
    loss = criterion_ce(masked_output / args.temp, masked_target)
    classifier_loss = criterion_ce(output_classifier, tags)
    loss += classifier_loss

    if perturbed_code != None:
        output_classifier_adversarial = enc_classifier(perturbed_code)
        classifier_adversarial_loss = criterion_ce(
            output_classifier_adversarial, tags)
        loss += classifier_adversarial_loss

    loss.backward()

    # `clip_grad_norm` to prevent exploding gradient in RNNs / LSTMs
    torch.nn.utils.clip_grad_norm(autoencoder.parameters(), args.clip)
    torch.nn.utils.clip_grad_norm(enc_classifier.parameters(), args.clip)
    optimizer_ae.step()
    optimizer_enc_classifier.step()

    total_loss_ae += loss.data

    accuracy = None
    if i % args.log_interval == 0 and i > 0:
        # accuracy
        probs = F.softmax(masked_output, dim=-1)
        max_vals, max_indices = torch.max(probs, 1)
        _, predicted_tags = torch.max(output_classifier, 1)

        accuracy = torch.mean(max_indices.eq(masked_target).float()).item()
        accuracy_classifier = torch.mean(
            predicted_tags.eq(tags).float()).item()

        cur_loss = total_loss_ae.item() / args.log_interval
        cur_loss_classifier = classifier_loss.item()
        elapsed = time.time() - start_time
        print(
            '| epoch {:3d} | {:5d}/{:5d} batches | ms/batch {:5.2f} | '
            'loss {:5.2f} | ppl {:8.2f} | acc {:8.2f} | acc_cla {:8.2f} | loss_cla {:8.2f}'
            .format(epoch, i, len(train_data),
                    elapsed * 1000 / args.log_interval, cur_loss,
                    math.exp(cur_loss), accuracy, accuracy_classifier,
                    cur_loss_classifier))

        with open("./output/{}/logs.txt".format(args.outf), 'a') as f:
            f.write(
                '| epoch {:3d} | {:5d}/{:5d} batches | ms/batch {:5.2f} | '
                'loss {:5.2f} | ppl {:8.2f} | acc {:8.2f} | acc_cla {:8.2f} | loss_cla {:8.2f}\n'
                .format(epoch, i, len(train_data),
                        elapsed * 1000 / args.log_interval, cur_loss,
                        math.exp(cur_loss), accuracy, accuracy_classifier,
                        cur_loss_classifier))

        total_loss_ae = 0
        start_time = time.time()

    return total_loss_ae, start_time
 def parse_batch(self, batch):
     a, b = batch
     a = to_gpu(a).float()
     b = b.cuda()
     return (a, b)
Ejemplo n.º 33
0
        inverter = inverter.cpu()
        gan_gen = gan_gen.cpu()
        gan_disc = gan_disc.cpu()

    print("Training...")
    with open("./output/{}/logs.txt".format(args.outf), 'a') as f:
        f.write('Training...\n')

    # schedule of increasing GAN training loops
    if args.niters_gan_schedule != "":
        gan_schedule = [int(x) for x in args.niters_gan_schedule.split("-")]
    else:
        gan_schedule = []
    niter_gan = 1

    fixed_noise = to_gpu(args.cuda, Variable(torch.ones(args.batch_size, args.z_size)))
    fixed_noise.data.normal_(0, 1)
    one = to_gpu(args.cuda, torch.tensor(1, dtype=torch.float))
    mone = one * -1

    impatience = 0
    all_ppl = []
    best_ppl = None

    for epoch in range(start_epoch, args.epochs + 1):

        # update gan training schedule
        if epoch in gan_schedule:
            niter_gan += 1
            print("GAN training loop schedule increased to {}".format(niter_gan))
            with open("./output/{}/logs.txt".format(args.outf), 'a') as f:
Ejemplo n.º 34
0
    def train(self):

        curr_patience = patience = self.train_config.patience
        num_trials = 1

        self.criterion = criterion = nn.CrossEntropyLoss(reduction="mean")

        self.loss_diff = DiffLoss()
        self.loss_recon = MSE()
        self.loss_cmd = CMD()
        self.loss_sim = SimLoss()

        best_valid_loss = float('inf')

        # lr_scheduler
        # lr_scheduler = torch.optim.lr_scheduler.ReduceLROnPlateau(self.optimizer, factor=0.5, patience=2)
        # lr_scheduler = torch.optim.lr_scheduler.StepLR(self.optimizer, step_size=3, gamma=0.65)
        lr_scheduler = torch.optim.lr_scheduler.ExponentialLR(self.optimizer,
                                                              gamma=0.3)

        train_losses = []
        # self.eval(mode="test", to_print=True)
        for epoch in range(self.train_config.n_epoch):
            print("epoch: ", epoch)
            print(
                f"learning rate: {self.optimizer.state_dict()['param_groups'][0]['lr']}"
            )

            self.model.train()

            train_loss, train_loss_cls, train_loss_sim, train_loss_diff, train_loss_recon, train_floor = [], [], [], [], [], []

            for batch in self.train_data_loader:
                # print(f"learning rate batch: {self.optimizer.state_dict()['param_groups'][0]['lr']}")
                self.model.zero_grad()
                #t = text, p = pos, c = cust,y = label, l = length
                t, p, c, y, l, bert_sent, bert_sent_type, bert_sent_mask = batch

                t = to_gpu(t)
                p = to_gpu(p)
                c = to_gpu(c)
                y = to_gpu(y)
                l = to_gpu(l)

                bert_sent = to_gpu(bert_sent)
                bert_sent_type = to_gpu(bert_sent_type)
                bert_sent_mask = to_gpu(bert_sent_mask)

                y_tilde = self.model(t, p, c, l, bert_sent, bert_sent_type,
                                     bert_sent_mask)

                y = y.squeeze()
                cls_loss = criterion(y_tilde, y.long())

                diff_loss = self.get_diff_loss()

                recon_loss = self.get_recon_loss()

                similarity_loss = self.get_sim_loss()


                loss = cls_loss + \
                    self.train_config.diff_weight * diff_loss + \
                    self.train_config.sim_weight * similarity_loss + \
                    self.train_config.recon_weight * recon_loss
                # loss = cls_loss
                bottom = 0.6
                floor = (loss - bottom).abs() + bottom
                floor.backward()

                torch.nn.utils.clip_grad_value_([
                    param
                    for param in self.model.parameters() if param.requires_grad
                ], 1.5 * self.train_config.clip)
                self.optimizer.step()

                train_loss_cls.append(cls_loss.item())
                train_loss_diff.append(diff_loss.item())
                train_loss_recon.append(recon_loss.item())
                train_loss_sim.append(similarity_loss.item())
                train_loss.append(loss.item())
                train_floor.append(floor.item())

            train_losses.append(train_loss)
            print(f"Training loss: {round(np.mean(train_loss), 4)}")
            print(f"Training floor: {round(np.mean(train_floor), 4)}")
            print(f"cls loss: {round(np.mean(train_loss_cls), 4)}")
            print(f"diff loss: {round(np.mean(train_loss_diff), 4)}")
            print(f"sim loss: {round(np.mean(train_loss_sim), 4)}")
            print(f"recon loss: {round(np.mean(train_loss_recon), 4)}")

            valid_loss, valid_acc = self.eval(mode="dev")

            print(f"valid_loss : {round(valid_loss,4)}")
            print(f"Current patience: {curr_patience}.")
            if valid_loss <= best_valid_loss:
                best_valid_loss = round(valid_loss, 6)
                print("Found new best model on dev set!")
                if not os.path.exists('checkpoints'):
                    os.makedirs('checkpoints')

                torch.save(self.model.state_dict(), f'checkpoints/model.std')
                torch.save(self.optimizer.state_dict(),
                           f'checkpoints/optim.std')
                self.eval(mode="test", to_print=True)
                curr_patience = patience
            else:
                print(f"best_valid_loss : {round(best_valid_loss, 6)}")
                curr_patience -= 1
                if curr_patience <= -1:

                    print(
                        "Running out of patience, loading previous best model."
                    )
                    self.eval(mode="test", to_print=True)
                    num_trials -= 1
                    curr_patience = patience
                    self.model.load_state_dict(
                        torch.load(f'checkpoints/model.std'))
                    self.optimizer.load_state_dict(
                        torch.load(f'checkpoints/optim.std'))
                    lr_scheduler.step()
                    print(
                        f"Current learning rate: {self.optimizer.state_dict()['param_groups'][0]['lr']}"
                    )

            if num_trials <= 0:
                print("Running out of patience, early stopping.")
                break

        self.eval(mode="test", to_print=True)
Ejemplo n.º 35
0
def grad_hook(grad):
    global g_factor
    newgrad = grad * to_gpu(args.cuda, Variable(g_factor))
    return newgrad
Ejemplo n.º 36
0
 def init_hidden(self, bsz):
     zeros1 = Variable(torch.zeros(self.nlayers, bsz, self.nhidden))
     zeros2 = Variable(torch.zeros(self.nlayers, bsz, self.nhidden))
     return (to_gpu(self.gpu, zeros1), to_gpu(self.gpu, zeros2))
Ejemplo n.º 37
0
 def init_state(self, bsz):
     zeros = Variable(torch.zeros(self.nlayers, bsz, self.nhidden))
     return to_gpu(self.gpu, zeros)
Ejemplo n.º 38
0
 def init_state(self, bsz):
     zeros = Variable(torch.zeros(self.nlayers, bsz, self.nhidden))
     return to_gpu(self.gpu, zeros)
Ejemplo n.º 39
0
def evaluate_autoencoder(whichdecoder, data_source, references, epoch):
    # Turn on evaluation mode which disables dropout.
    autoencoder.eval()
    total_loss = 0
    ntokens = len(corpus.dictionary.word2idx)
    all_accuracies = 0
    bcnt = 0
    for i, batch in enumerate(data_source):
        source, target, lengths = batch
        source = to_gpu(args.cuda, Variable(source, volatile=True))
        target = to_gpu(args.cuda, Variable(target, volatile=True))

        mask = target.gt(0)
        masked_target = target.masked_select(mask)
        # examples x ntokens
        output_mask = mask.unsqueeze(1).expand(mask.size(0), ntokens)

        hidden = autoencoder(0, source, lengths, noise=False, encode_only=True)

        # output: batch x seq_len x ntokens
        if whichdecoder == 1:
            output = autoencoder(1, source, lengths, noise=False)
            flattened_output = output.view(-1, ntokens)
            masked_output =                 flattened_output.masked_select(output_mask).view(-1, ntokens)
            # accuracy
            max_vals1, max_indices1 = torch.max(masked_output, 1)
            all_accuracies +=                 torch.mean(max_indices1.eq(masked_target).float()).data[0]

            max_values1, max_indices1 = torch.max(output, 2)
            max_indices2 = autoencoder.generate(2, hidden, maxlen=50)
        else:
            output = autoencoder(2, source, lengths, noise=False)
            flattened_output = output.view(-1, ntokens)
            masked_output =                 flattened_output.masked_select(output_mask).view(-1, ntokens)
            # accuracy
            max_vals2, max_indices2 = torch.max(masked_output, 1)
            all_accuracies +=                 torch.mean(max_indices2.eq(masked_target).float()).data[0]

            max_values2, max_indices2 = torch.max(output, 2)
            max_indices1 = autoencoder.generate(1, hidden, maxlen=50)

        total_loss += criterion_ce(masked_output /
                                   args.temp, masked_target).data
        bcnt += 1

        aeoutf_from = "{}/{}_output_decoder_{}_from.txt".format(
            args.outf, epoch, whichdecoder)
        aeoutf_tran = "{}/{}_output_decoder_{}_tran.txt".format(
            args.outf, epoch, whichdecoder)
        aeoutf_bleu = "{}/{}_output_decoder_{}_bleu.txt".format(
            args.outf, epoch, whichdecoder)
        candidate = []
        counter = 0
        with open(aeoutf_from, 'w') as f_from, open(aeoutf_tran, 'w') as f_trans, open(aeoutf_bleu, 'w') as f_bleu:
            max_indices1 =                 max_indices1.view(output.size(0), -1).data.cpu().numpy()
            max_indices2 =                 max_indices2.view(output.size(0), -1).data.cpu().numpy()
            target = target.view(output.size(0), -1).data.cpu().numpy()
            tran_indices = max_indices2 if whichdecoder == 1 else max_indices1
            for t, tran_idx in zip(target, tran_indices):
                # real sentence
                chars = " ".join([corpus.dictionary.idx2word[x] for x in t])
                f_from.write(chars)
                f_from.write("\n")
                # transfer sentence
                chars = " ".join([corpus.dictionary.idx2word[x]
                                  for x in tran_idx])
                candidate = ", ".join([corpus.dictionary.idx2word[x]
                                  for x in tran_idx])
                f_trans.write(chars)
                f_trans.write("\n")
                if counter < len(references):
                    BLEU_score = sentence_bleu(references[counter], candidate)
                    f_bleu.write(BLEU_score)
                    f_bleu.write("\n")
                counter = counter + 1

    return total_loss[0] / len(data_source), all_accuracies / bcnt
Ejemplo n.º 40
0
    return errD, errD_real, errD_fake


print("Training...")
with open("./output/{}/logs.txt".format(args.outf), 'a') as f:
    f.write('Training...\n')

# schedule of increasing GAN training loops
if args.niters_gan_schedule != "":
    gan_schedule = [int(x) for x in args.niters_gan_schedule.split("-")]
else:
    gan_schedule = []
niter_gan = 1

fixed_noise = to_gpu(args.cuda,
                     Variable(torch.ones(args.batch_size, args.z_size)))
fixed_noise.data.normal_(0, 1)
one = to_gpu(args.cuda, torch.FloatTensor([1]))
mone = one * -1

best_ppl = None
impatience = 0
all_ppl = []
for epoch in range(1, args.epochs+1):
    # update gan training schedule
    if epoch in gan_schedule:
        niter_gan += 1
        print("GAN training loop schedule increased to {}".format(niter_gan))
        with open("./output/{}/logs.txt".format(args.outf), 'a') as f:
            f.write("GAN training loop schedule increased to {}\n".
                    format(niter_gan))