def batchify(data, bsz):
    if args.model == "FNN":
        # Implement sliding window to generate data in sizes of bsz
        data = [np.array(data[i:i+bsz]) for i in range(data.shape[0] - bsz + 1)]
        data = torch.Tensor(data).to(torch.int64)
        return data.to(device)
    else:
        # Work out how cleanly we can divide the dataset into bsz parts.
        nbatch = data.size(0) // bsz
        # Trim off any extra elements that wouldn't cleanly fit (remainders).
        data = data.narrow(0, 0, nbatch * bsz)
        # Evenly divide the data across the bsz batches.
        data = data.view(bsz, -1).t().contiguous()
        return data.to(device)     
Exemple #2
0
def train():
    model.train(True)
    total_loss = 0.0
    total_score = 0.0
    batch_num = 0
    end_flag = False
    data_loader.set_train()
    while not end_flag:
        data, target, end_flag = data_loader.get_batch()
        l, b = target.size(0), target.size(1)
        data = data.to(device)
        optimizer.zero_grad()
        output, _ = model(data)

        loss = criterion(output, target.to(device).view(-1))
        loss.backward()

        # gradient clipping
        # torch.nn.utils.clip_grad_norm_(model.parameters(), args.clip)

        optimizer.step()
        total_loss += loss.item()

        output = torch.argmax(output, 1).view(l, b).t().contiguous().cpu()
        target = target.t().contiguous()
        total_score += bleu_metric(output, target, 2)

        batch_num += 1

    loss = total_loss / batch_num
    score = total_score / batch_num
    perplexity = math.exp(loss)
    return loss, perplexity, score
Exemple #3
0
    def _train_epoch(self, epoch):
        noiseset = [35, 45, 55]
        for i, batch in enumerate(self.train_loader):
            # generate path
            data = batch
            data = data.to(self.device)

            noise = torch.zeros(data.size())
            stdN = np.random.choice(noiseset, size=noise.size()[0])
            for n in range(noise.size()[0]):
                sizeN = noise[0,:,:,:].size()
                noise[n,:,:,:] = torch.FloatTensor(sizeN).normal_(mean=0, std=stdN[n]/255.)
            noise = noise.cuda()
            imgn = data+noise
            model_loss = self._update_model(imgn, noise)
            net_loss = self._update_policy(imgn, noise)

            if i%self.args.iters_per_eval==0:
                print('Epoch: {}, Step: {}, Model loss: {}, Net loss: {}'.format(
                    epoch, i, model_loss, net_loss))

        # pdb.set_trace()
        log = {
            'epo': epoch,
        }

        return log
Exemple #4
0
def batchify(data, batch_size):
    """
    Divide the data into batch size

    From sequential data, batchify arranges the dataset into columns

    Example:
        a g m s
        b h n t
        c i o u
        d j p v
        e k q w
        f l r x

        Each of the column is treated independently. This means that
        the depednece of e. g. "g" on "f" cannot be learned, but allows
        for more efficient batch processing

    Args:
        data: List of Tensors, this  are ids obtained after tokenization
        batch_size: Int, size of the batch

    Return:
        batched ids, list of tensors
    """
    # Split the data into
    num_batch = data.size(0) // batch_size

    # Trim off excess elements that do not fit
    data = data.narrow(0, 0, num_batch * batch_size)

    # Evenly divide data across batches
    data = data.view(batch_size, -1).t().contiguous()

    return data.to(device)
Exemple #5
0
def evaluate(mode='valid'):
    # Turn on evaluation mode which disables dropout.
    model.eval()
    total_loss = 0.
    ntokens = len(corpus.dictionary)
    masksum = 0
    with torch.no_grad():
        for i in range(0, len(corpus.textind[mode]), args.batch_size):
            data, lengths = corpus.get_batch(args.batch_size, False, i, mode)
            data = data.to(device)
            lengths = lengths.to(device)
            hidden = model.init_hidden(data.shape[1])
            for seqind, j in enumerate(range(0, data.shape[0] - 1, args.bptt)):
                ei = min(j + args.bptt, data.shape[0] - 1)
                partoutput, hidden = model(data[j:ei], hidden)
                lossmat = criterion(partoutput.transpose(1, 2),
                                    data[j + 1:ei + 1])
                if (lengths >= ei).sum() == lengths.shape[0]:
                    total_loss += lossmat.sum()
                    masksum += lossmat.shape[0] * lossmat.shape[1]
                else:
                    mask = (torch.arange(ei - j).to(device).expand(
                        len(lengths), ei - j) <
                            (lengths - j).unsqueeze(1)).t().float()
                    total_loss += (lossmat * mask).sum()
                    masksum += mask.sum()
    return total_loss / masksum
Exemple #6
0
def evaluate():
    model.train(False)
    total_loss = 0.0
    total_score = 0.0
    total_score3 = 0.0
    total_score4 = 0.0
    batch_num = 0
    end_flag = False
    data_loader.set_valid()
    while not end_flag:
        data, target, end_flag = data_loader.get_batch()
        data = data.to(device)
        target = target
        l, b = target.size(0), target.size(1)
        output, _ = model(data)

        loss = criterion(output, target.to(device).view(-1))
        total_loss += loss.item()

        output = torch.argmax(output, 1).view(l, b).t().contiguous().cpu()
        target = target.t().contiguous()
        total_score += bleu_metric(output, target, 2)
        total_score3 += bleu_metric(output, target, 3)
        total_score4 += bleu_metric(output, target, 4)

        batch_num += 1

    loss = total_loss / batch_num
    score = total_score / batch_num
    # print("{:.4f}, {:.4f}\n".format(total_score3 / batch_num, total_score4 / batch_num))
    perplexity = math.exp(loss)
    return loss, perplexity, score
Exemple #7
0
def train():
    # Turn on training mode which enables dropout.
    model.train()
    random.shuffle(corpus.textind['train'])
    total_loss = 0.
    start_time = time.time()
    ntokens = len(corpus.dictionary)
    for batch, i in enumerate(
            range(0, len(corpus.textind['train']), args.batch_size)):
        data, lengths = corpus.get_batch(args.batch_size, False, i, 'train')
        data = data.to(device)
        lengths = lengths.to(device)
        hidden = model.init_hidden(data.shape[1])
        loss = 0
        masksum = 0
        model.zero_grad()
        for seqind, j in enumerate(range(0, data.shape[0] - 1, args.bptt)):
            # data.shape[0] - 1 to not let EOU pass as input
            # Starting each batch, we detach the hidden state from how it was previously produced.
            # If we didn't, the model would try backpropagating all the way to start of the dataset.
            ei = min(j + args.bptt, data.shape[0] - 1)
            hidden = repackage_hidden(hidden)
            partoutput, hidden = model(data[j:ei], hidden)
            lossmat = criterion(partoutput.transpose(1, 2), data[j + 1:ei + 1])
            if (lengths >= ei).sum() == lengths.shape[0]:
                temploss = lossmat.sum()
                tempmasksum = lossmat.shape[0] * lossmat.shape[1]
            else:
                mask = (torch.arange(ei - j).to(device).expand(
                    len(lengths), ei - j) <
                        (lengths - j).unsqueeze(1)).t().float()
                temploss = (lossmat * mask).sum()
                tempmasksum = mask.sum()
            loss += temploss.data
            masksum += tempmasksum.data
            (temploss / tempmasksum).backward()
        loss /= masksum
        # loss.backward()

        # `clip_grad_norm` helps prevent the exploding gradient problem in RNNs / LSTMs.
        torch.nn.utils.clip_grad_norm_(model.parameters(), args.clip)
        for p in model.parameters():
            p.data.add_(-lr, p.grad.data)

        total_loss += loss

        if batch % args.log_interval == 0 and batch != 0:
            cur_loss = total_loss / args.log_interval
            elapsed = time.time() - start_time
            print(
                '| epoch {:3d} | {:5d}/{:5d} batches | lr {:02.2f} | ms/batch {:5.2f} | '
                'loss {:5.3f} | ppl {:8.3f}'.format(
                    epoch, batch,
                    len(corpus.textind['train']) // args.batch_size, lr,
                    elapsed * 1000 / args.log_interval, cur_loss,
                    math.exp(cur_loss)))
            sys.stdout.flush()
            total_loss = 0
            start_time = time.time()
Exemple #8
0
def test(i, key, shape, rand=False, randFactor=256):
    global best_acc
    test_loss = 0
    correct = 0
    if (not rand) or (len(shape) != 4):
        model = nin.Net()
        pretrained_model = torch.load(args.pretrained)
        best_acc = pretrained_model['best_acc']
        model.load_state_dict(pretrained_model['state_dict'])
        model.to(device)
        bin_op = util.BinOp(model)
        model.eval()
        bin_op.binarization()
        state_dict = model.state_dict()

    if len(shape) == 4:
        size1 = shape[1]
        size2 = shape[2]
        size3 = shape[3]
        if rand:
            if (int(i / (size2 * size3)) % int(size1)) == torch.randint(
                    0, size1 - 1, [1]):
                model = nin.Net()
                pretrained_model = torch.load(args.pretrained)
                model.load_state_dict(pretrained_model['state_dict'])
                model.to(device)
                bin_op = util.BinOp(model)
                model.eval()
                bin_op.binarization()
                state_dict = model.state_dict()
                (state_dict[key][int(i / size1 / size2 / size3)][int(
                    i / size2 / size3 % size1)][int(i / size3 % size2)][int(
                        i % size3)]).mul_(-1)
            else:
                return 100
        else:
            (state_dict[key][int(i / size1 / size2 / size3)][int(
                i / size2 / size3 % size1)][int(i / size3 % size2)][int(
                    i % size3)]).mul_(-1)

    if len(shape) == 1:
        state_dict[key][i].mul_(-1)

    if len(shape) == 2:
        size = state_dict[key].shape[1]
        (state_dict[key][int(i / size)][i % size]).mul_(-1)

    with torch.no_grad():
        for data, target in testloader:
            data, target = Variable(data.to(device)), Variable(
                target.to(device))

            output = model(data)
            test_loss += criterion(output, target).data.item()
            pred = output.data.max(1, keepdim=True)[1]
            correct += pred.eq(target.data.view_as(pred)).cpu().sum()
    bin_op.restore()
    acc = 100. * float(correct) / len(testloader.dataset)
    return acc
Exemple #9
0
def batchify(data, bsz):
    # Work out how cleanly we can divide the dataset into bsz parts.
    nbatch = data.size(0) // bsz #GAM~ Size of the tensor 'data' // (floor division - retuns the integer of the quotient) diveded by 'bsz' (batch size)
    # Trim off any extra elements that wouldn't cleanly fit (remainders). ##GAM~ The ones that are excluded from the // (flor division)
    data = data.narrow(0, 0, nbatch * bsz)
    # Evenly divide the data across the bsz batches. #GAM~ Makes a matix size nbatch x bzs (batch size, default 20)
    data = data.view(bsz, -1).t().contiguous()
    return data.to(device)
def batchify(data, bsz):
    # Work out how cleanly we can divide the dataset into bsz parts.
    nbatch = data.size(0) // bsz
    # Trim off any extra elements that wouldn't cleanly fit (remainders).
    data = data.narrow(0, 0, nbatch * bsz)
    # Evenly divide the data across the bsz batches.
    data = data.view(bsz, -1).t().contiguous()
    return data.to(device)
Exemple #11
0
def batchify(data, bsz):
    # Work out how cleanly we can divide the dataset into bsz parts.
    nbatch = data.size(0) // bsz
    # Trim off any extra elements that wouldn't cleanly fit (remainders).
    data = data.narrow(0, 0, nbatch * bsz)
    # Evenly divide the data across the bsz batches.
    data = data.view(bsz, -1).t().contiguous()
    return data.to(device)
Exemple #12
0
def batchify(data, bsz):
    # Work out how cleanly we can divide the dataset into bsz parts.
    # Tensor.size(n), 获取第n维的大小,0代表第一个维度
    # 此处数据为一维Tensor

    # bsz表示batch的大小
    # nbatch表示batch的数量
    # 注意这里用了整除符号,可能会有多余的数据,这将在下面处理
    nbatch = data.size(0) // bsz
    # Tensor.narrow(dim, start, length)函数说明:
    # 对与2*3*4*5的矩阵而言, dim范围为[-4,3], 0表示取第一维也就是2这个数字对应的
    # 1表示取第二维也就是3这个数字对应的,3表示第三维也就是4这个数字对应的...
    # 在特定维度里取的范围为[start,start+length)
    # 例如:
    # x = torch.tensor([[1, 2, 3], [4, 5, 6], [7, 8, 9]])为2*3的矩阵
    # torch.narrow(x, 0, 0, 2)===>
    # tensor([[ 1,  2,  3],
    #         [ 4,  5,  6]])
    # torch.narrow(x, 1, 1, 2)===>
    # tensor([[ 2,  3],
    #         [ 5,  6],
    #         [ 8,  9]])

    # Trim off any extra elements that wouldn't cleanly fit (remainders).
    # 截取多余的数据,只留下 batch大小*batch数量 的数据
    # 从第一个维度截取顺序不会乱掉
    data = data.narrow(0, 0, nbatch * bsz)
    # Evenly divide the data across the bsz batches.
    # view这个函数,基本上可以进行Tensor的reshape操作, -1表示维度自动推断
    # t()这个函数实现二维矩阵的转置功能, 注意输入必须是二维矩阵,2*3转换为3*2

    # 需要注意的是reshape与转置有着明显的顺序不同, 比如下面的例子尺寸相同,但是顺序不同
    # x = torch.Tensor(2,3)
    # x.reshape(3,2) != x.t()

    # data.view(bsz, -1)这里batch取得是列而不是行
    # 之后调用.t()函数做了一下转置batch变为行

    # 所以问题来了,为什么不直接reshape为(nbatch, bsz)?
    # 好像是刻意打乱顺序
    # 1 2 3 4 5 6 7 8 9 10 11 12
    # == == == == == == == == == == == == == ==
    # call view()
    # bsz = 4
    # nbatch = 3
    # 1   2   3
    # 4   5   6
    # 7   8   9
    # 10  11  12
    # == == == == == == == == == == == == == ==
    # call t()
    # 1   4   7   10
    # 2   5   8   11
    # 3   6   9   12
    # == == == == == == == == == == == == == ==
    # contiguous()?
    data = data.view(bsz, -1).t().contiguous()
    return data.to(device)
Exemple #13
0
    def _valid_epoch(self):
        """
        validation after training an epoch
        :return:
        """
        noiseset = [35, 45, 55]
        self.score_net.eval()
        loss_all = 0
        stop_true = list()
        stop_pred = list()
        q_all = list()
        # check over all
        for i, batch in enumerate(self.val_loader):
            data= batch
            data = data.to(self.device)
            with torch.no_grad():
                noise = torch.zeros(data.size())
                stdN = np.random.choice(noiseset, size=noise.size()[0])
                for n in range(noise.size()[0]):
                    sizeN = noise[0,:,:,:].size()
                    noise[n,:,:,:] = torch.FloatTensor(sizeN).normal_(mean=0, std=stdN[n]/255.)
                noise = noise.cuda()
                xhs = self.model(data+noise)
                scores = self.score_net(data+noise, xhs)
    			
                stop_idx = self.q_posterior(self.args.policy_type, scores, 
                    stochastic=False, device=self.device)
                q = self.q_posterior(self.args.policy_type, scores, stochastic=True,
                    device=self.device)
                stop_pred.append(stop_idx)
                q_all.append(q)

                p_true, _ = self.true_posterior(self.args, xhs, noise)
                p = max_onehot(p_true, dim=-1, device=self.device)
                stop_true.append(p)
                # validation loss
                if self.args.kl_type == 'forward':
                    loss, _ = self.forward_kl_loss(noise, xhs, scores, p_det=True)
                else:
                    assert self.args.kl_type == 'backward'
                    loss, _ = self.backward_kl_loss(noise, xhs, scores)
                loss_all += loss
        

        # pdb.set_trace()

        if self.args.stochastic:
            log = {
                'val loss': loss_all/i,
                'sto q': torch.mean(torch.cat(q_all, dim=0), dim=0)
            }
        else:
            log = {
                'val loss': loss_all/i,
                'det q': torch.mean(torch.cat(stop_pred, dim=0), dim=0)
            }
        return log, log
Exemple #14
0
def validate_model(net, criterion, valid_loader):
    valid_loss = 0.0
    net.eval()
    for data, target in valid_loader:
        data, target = data.to(device), target.to(device)
        output = net(data)
        loss = criterion(output, target)
        valid_loss += loss.item() * data.size(0)
    return valid_loss
Exemple #15
0
def cnn_training_step(model, optimizer, data, labels, device='cpu'):
    b_x = data.to(device)   # batch x
    b_y = labels.to(device)   # batch y
    output = model(b_x)            # cnn final output
    criterion = af.get_loss_criterion()
    loss = criterion(output, b_y)   # cross entropy loss
    optimizer.zero_grad()           # clear gradients for this training step
    loss.backward()                 # backpropagation, compute gradients
    optimizer.step()                # apply gradients
def get_batch(source, source_batch, i):
    """Construct the input  and target data of the model, with batch. """
    data = torch.zeros(args.bsz, 1, args.bptt, args.ninp)
    target = torch.zeros(args.bsz, dtype=torch.long)
    batch_index = source_batch[i]
    for j in range(args.bsz):
        data[j, 0, :, :] = torch.from_numpy(source[0][batch_index[j] - args.bptt + 1: batch_index[j] + 1]).float()
        target[j] = int(source[1][batch_index[j]])
    return data.to(device), target.to(device)
Exemple #17
0
    def _train_epoch(self, epoch):
        # n outputs, n-1 nets
        self.score_net.train()
        noiseset = [35, 45, 55]

        total_loss = 0.0
        for i, batch in enumerate(self.train_loader):
            # generate path
            data = batch
            data = data.to(self.device)

            noise = torch.zeros(data.size())
            stdN = np.random.choice(noiseset, size=noise.size()[0])
            for n in range(noise.size()[0]):
                sizeN = noise[0,:,:,:].size()
                noise[n,:,:,:] = torch.FloatTensor(sizeN).normal_(mean=0, std=stdN[n]/255.)
            noise = noise.cuda()

            xhs = self.model(data+noise)
            scores = self.score_net(data+noise, xhs)

            # stop_idx = max_onehot(scores, dim=-1, device=self.device)
            # pred_idx = torch.argmax(stop_idx, dim=-1)
            # p_true, _ = self.true_posterior(self.args, xhs, noise)
            # p_true = torch.stack([p_true[:, t] for t in self.nz_post.values()], dim=1)
            # true_idx = max_onehot(p_true, dim=-1, device=self.device)
            # true_idx = torch.argmax(true_idx, dim=-1)
            # pdb.set_trace()

            self.optimizer.zero_grad()
            # loss
            if self.args.kl_type == 'forward':
                loss, _ = self.forward_kl_loss(noise, xhs, scores, p_det=True)
            else:
                assert self.args.kl_type == 'backward'
                loss, _ = self.backward_kl_loss(noise, xhs, scores)

            # backward
            loss.backward()
            self.optimizer.step()

            if i%self.args.iters_per_eval==0:
                q = self.q_posterior(self.args.policy_type, scores, stochastic=True,
                    device=self.device)
                print('Epoch: {}, Step: {}, Loss: {}'.format(epoch, i, loss))
                print(torch.mean(q, dim=0).detach().cpu().numpy())

            total_loss += loss.item()

        log = {
            'epo': epoch,
            'train loss': total_loss / i
        }

        return log
Exemple #18
0
def validate_model(net, criterion, valid_loader):
    valid_loss = 0.0
    net.eval()
    accs = []
    for data, target in valid_loader:
        data, target = data.to(device), target.to(device)
        output = net(data)
        loss = criterion(output, target)
        valid_loss += loss.item() * data.size(0)
        accs.append(metrics.acc(output.detach(), target))
    return valid_loss, np.mean(accs)
Exemple #19
0
def train_model(net, optimizer, criterion, train_loader):
    train_loss = 0.0
    net.train()
    for data, target in train_loader:
        data, target = data.to(device), target.to(device)
        optimizer.zero_grad()
        output = net(data)
        loss = criterion(output, target)
        loss.backward()
        optimizer.step()
        train_loss += loss.item() * data.size(0)
    return train_loss
Exemple #20
0
def batchify(data, bsz):
    # data : [len(train.txt),]
    # Work out how cleanly we can divide the dataset into bsz parts.
    # data.size(0) == len(tokes) + 1 ('<eos>')
    nbatch = data.size(0) // bsz  # 取商
    # Trim off any extra elements that wouldn't cleanly fit (remainders).
    # 暂时不懂,效果是把剩下的余数部分数据扔掉
    data = data.narrow(0, 0, nbatch * bsz)
    # Evenly divide the data across the bsz batches.
    # .t()取转置
    # .contiguous() 返回一个内存连续的有相同数据的tensor,如果原tensor内存连续则返回原tensor
    data = data.view(bsz, -1).t().contiguous()
    return data.to(device)
def batchify(data, bsz, random_start_idx=False):
    # Work out how cleanly we can divide the dataset into bsz parts.
    nbatch = data.size(0) // bsz
    # Shuffle data
    if random_start_idx:
        start_idx = random.randint(0, data.size(0) % bsz - 1)
    else:
        start_idx = 0
    # Trim off any extra elements that wouldn't cleanly fit (remainders).
    data = data.narrow(0, start_idx, nbatch * bsz)
    # Evenly divide the data across the bsz batches
    data = data.view(bsz, -1).t().contiguous()
    return data.to(device)
Exemple #22
0
def train_model(net, optimizer, criterion, train_loader):
    train_loss = 0.0
    net.train()
    accs = []
    for data, target in train_loader:
        data, target = data.to(device), target.to(device)
        optimizer.zero_grad()
        output = net(data)
        loss = criterion(output, target)
        loss.backward()
        optimizer.step()
        train_loss += loss.item() * data.size(0)
        accs.append(metrics.acc(output.detach(), target))
    return train_loss, np.mean(accs)
Exemple #23
0
def evaluate(data_source):
    # Turn on evaluation mode which disables dropout.
    model.eval()
    total_loss = 0.
    ntokens = len(corpus.dictionary)
    with torch.no_grad():
        total_steps = data_source.size(1) - args.n - 1
        for step_num in range(1, total_steps):
            data, targets = get_batch(test_data, step_num, args.n)
            data = data.to(device)
            targets = targets.to(device)
            output = model(data)
            total_loss += criterion(output, targets).item()
    return total_loss / total_steps
Exemple #24
0
def evaluate(eval_hidden, epoch):
    net.eval()
    data_loader.set_valid()
    data, labels, end = data_loader.get_batch()
    data = data.to(device)
    labels = labels.to(device)

    output, eval_hidden = net(data, eval_hidden)
    loss = criterion(output, labels)
    pp = torch.exp(loss)
    if args.save:
        writer.add_scalar('eval loss', loss, epoch)
        writer.add_scalar('eval pp', pp, epoch)

    return eval_hidden, loss, pp
Exemple #25
0
def train_epoch(args, model, device, train_loader, optimizer, epoch):
    model.train()
    for batch_idx, (data, target) in enumerate(train_loader):
        data, target = data.to(device), target.to(device)
        optimizer.zero_grad()
        output = model(data)
        loss = F.nll_loss(output, target)
        loss.backward()
        optimizer.step()
        if batch_idx % args.log_interval == 0:
            print('Train Epoch: {} [{}/{} ({:.0f}%)]\tLoss: {:.6f}'.format(
                epoch, batch_idx * len(data), len(train_loader.dataset),
                100. * batch_idx / len(train_loader), loss.item()))
            if args.dry_run:
                break
def batchify(data, bptt, bsz):
    #parisa's Modification
    lcm=int(bptt*bsz)
    print ('number of tokens in data tensor for each batch is {}'.format(lcm))
    #Parisa's Modification
    
    # Work out how cleanly we can divide the dataset into bsz parts.
    nbatch = data.size(0) // lcm
    # Trim off any extra elements that wouldn't cleanly fit (remainders).
    data = data.narrow(0, 0, nbatch * lcm)
    
    #Parisa's Modification
    # Evenly divide the data across the bsz batches.
    data = data.view(-1, bptt).contiguous()
    #Parisa's Modification
    
    return data.to(device)
Exemple #27
0
def train(optimizer):
    epoch_loss = 0.
    interval_loss = 0.
    start_time = time.time()
    ntokens = len(corpus.dictionary)

    # Turn on training mode which enables dropout.
    model.train()
    # Training for ngram case is lesser by :n(size of n-gram) - 1 for whole corpus because n-gram is only valid when predicting for (size-n)th word
    total_steps = train_data.size(1) - args.n - 1
    for step_num in range(1, total_steps):
        data, target = get_batch(train_data, step_num, args.n)
        data = data.to(device)
        target = target.to(device)

        predicted_logits = model(data)
        loss = criterion(predicted_logits, target)

        # reset optimizer state, start loss and move optimizer
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

        interval_loss += loss.item()
        if step_num % args.log_interval == 0 and step_num > 0:
            cur_loss = interval_loss / args.log_interval
            elapsed = time.time() - start_time
            ppl = 0
            try:
                ppl = math.exp(cur_loss)
            except OverflowError:
                print(
                    "Perplexity too big for log operation, using inf for ppl instead."
                )
                ppl = float('inf')
            print(
                '| epoch {:3d} | {:5d}/{:5d} batches | lr {:02.2f} | ms/batch {:5.2f} | '
                'loss {:5.2f} | ppl {:8.2f}'.format(
                    epoch, step_num, total_steps, args.lr,
                    elapsed * 1000 / args.log_interval, cur_loss, ppl))
            epoch_loss += interval_loss
            interval_loss = 0
            start_time = time.time()
        if args.dry_run:
            break
    return epoch_loss / total_steps
Exemple #28
0
    def train(audio_model, epoch, log_interval):
        audio_model.train()
        for batch_idx, (data, target) in enumerate(train_loader):

            data = data.to(device)
            target = target.to(device)

            # apply transform and model on whole batch directly on device
            data = transform(data)
            output = audio_model(data)

            # negative log-likelihood for a tensor of size (batch x 1 x n_output)
            loss = F.nll_loss(output.squeeze(), target)

            optimizer.zero_grad()
            loss.backward()
            optimizer.step()

            # print training stats
            if batch_idx % log_interval == 0:
                print(
                    f"Train Epoch: {epoch} [{batch_idx * len(data)}/{len(train_loader.dataset)} ({100. * batch_idx / len(train_loader):.0f}%)]\tLoss: {loss.item():.6f}"
                )

            # update progress bar
            pbar.update(pbar_update)
            # record loss
            losses.append(loss.item())

        tensorboard_writer.add_scalar("train loss", losses[-1], epoch)

        # save model
        checkpoints_path = os.path.join(save_path, "checkpoints")
        if not os.path.exists(checkpoints_path):
            os.makedirs(checkpoints_path)
        model_save_path = os.path.join(checkpoints_path,
                                       "model_{}.pt".format(epoch))
        print("saving to", model_save_path, "...")
        torch.save(
            {
                'epoch': epoch,
                'model_state_dict': audio_model.state_dict(),
                'optimizer_state_dict': optimizer.state_dict(),
                'loss': loss,
            }, model_save_path)
        print("...saved")
Exemple #29
0
    def save_imgs(args, score_net, model, data_loader, nz_post, device, folder,
            noiseset=[35, 45, 55, 65, 75], noiseL_B=[0,75]):
        if not os.path.exists(folder):
            os.makedirs(folder)
        model.eval()
        score_net.eval()
        np.random.seed(seed=args.seed)
        test_noiseL = np.random.choice(noiseset, size=len(data_loader.dataset))
        print('Average noise level: ', np.average(test_noiseL))
        predictions = list()
        stops = list()
        b_y = list()
        imgns = list()
        psnrs = list()
        img_pred = list()
        for i, batch in enumerate(data_loader):
            data = batch
            data = data.to(device)
            noise = torch.FloatTensor(data.size()).normal_(mean=0, 
                std=test_noiseL[i]/255., generator=torch.manual_seed(args.seed))
            noise = noise.cuda()
            with torch.no_grad():
                imgn = data+noise
                xhs = model(imgn)
            scores = score_net(imgn, xhs)
            stop_idx = PolicyKL.stop_idx(args.policy_type, scores, stochastic=False,
                device=device)
            q = PolicyKL.q_posterior(args.policy_type, scores, stochastic=True,
                device=device)

            index = torch.argmax(stop_idx, axis=-1)
            # pdb.set_trace()
            prediction = xhs[nz_post[index.cpu().numpy()[0]]]
            pred = torch.clamp(imgn-prediction, 0., 1.)
            psnr = batch_PSNR(pred, data, 1.)
            psnrs.append(psnr)
            # b_y.append(data)
            # imgns.append(imgn)
            # img_pred.append(pred)
            # pdb.set_trace()
            save_image(data[0], os.path.join(folder, '{}_raw.png'.format(i)))
            save_image(imgn[0], os.path.join(folder, '{}_imgn.png'.format(i)))
            save_image(pred[0], os.path.join(folder, '{}_pred.png'.format(i)))
        print('The test PSNR is ', np.average(psnrs))
        np.save(os.path.join(folder,'psnr.npy'), np.array(psnrs))
Exemple #30
0
def test(model, device, test_loader):
    model.eval()
    test_loss = 0
    correct = 0
    with torch.no_grad():
        for data, target in test_loader:
            data, target = data.to(device), target.to(device)
            output = model(data)
            # sum up batch loss
            test_loss += F.nll_loss(output, target, reduction='sum').item()
            # get the index of the max log-probability
            pred = output.argmax(dim=1, keepdim=True)
            correct += pred.eq(target.view_as(pred)).sum().item()

    test_loss /= len(test_loader.dataset)

    print('Test set: Average loss: {:.4f}, Accuracy: {}/{} ({:.0f}%)\n'.format(
        test_loss, correct, len(test_loader.dataset),
        100. * correct / len(test_loader.dataset)))
Exemple #31
0
def train(model, writer, train_loader, optimizer, criterion, epoch, task):
    model.train()
    for batch_idx, (data, target) in enumerate(train_loader):
        data, target = data.to(args.device), target.to(args.device)
        optimizer.zero_grad()
        output = model(data)
        loss = criterion(output, target)
        loss.backward()
        optimizer.step()
        if batch_idx % args.log_interval == 0:
            print("Train Epoch: {} [{}/{} ({:.0f}%)]\tLoss: {:.6f}".format(
                epoch,
                batch_idx * len(data),
                len(train_loader.dataset),
                100.0 * batch_idx / len(train_loader),
                loss.item(),
            ))

            t = (len(train_loader) * epoch + batch_idx) * args.batch_size
            writer.add_scalar("train_{}/loss".format(task), loss.item(), t)