Ejemplo n.º 1
0
def data_generator(dataset, batch_size, shuffle):
    dataloader = DataLoader(dataset, batch_size=batch_size, shuffle=shuffle, drop_last=True, num_workers=0)
    for _x, _y, _B in dataloader:
        x = _x[:, None].expand(_x.shape[0], 3, _x.shape[1], _x.shape[2]).float() / 255.
        y = _y.squeeze(1)
        b = _B.squeeze(1).float() / 200
        yield cuda(x), cuda(y), cuda(b)
Ejemplo n.º 2
0
def train(model, optimizer, train_iter, vocab_size, grad_clip, padding_idx):
    model.train(
    )  # put model in train mode (this is important because of dropout)

    optimizer.zero_grad()
    total_loss = 0
    for batch in train_iter:
        # calculate model predictions
        question, answer = cuda(batch.question), cuda(batch.answer)
        outputs = model(question, answer)

        # calculate loss and backpropagate errors
        loss = F.nll_loss(
            outputs.view(-1, vocab_size),
            answer[1:].view(-1),
            ignore_index=padding_idx)  # answer[1:] skip <sos> token
        loss.backward()

        total_loss += loss.data[0]

        # clip gradients to avoid exploding gradient
        clip_grad_norm(model.parameters(), grad_clip)

        # update parameters
        optimizer.step()

    return total_loss / len(train_iter)
Ejemplo n.º 3
0
def preprocess_mnistmulti(item):
    _x, _y, _B = item
    x = _x[:, None].expand(_x.shape[0], 3, _x.shape[1],
                           _x.shape[2]).float() / 255.
    y = _y.sort(dim=-1)[0]
    n_digits = y.shape[1]
    new_y = y[:, 0]
    for digit in range(1, n_digits):
        new_y = new_y * 10 + y[:, digit]
    y = new_y
    b = _B.squeeze(1).float()
    return cuda(x), cuda(y), cuda(b)
Ejemplo n.º 4
0
def main():
    vocab, train_iter, val_iter, test_iter = dataset_factory(
        'twitter-customer-support')

    epochs = 100
    embedding_size = 20
    hidden_size = 100
    vocab_size = len(vocab)
    padding_idx = vocab.stoi['<pad>']

    encoder = Encoder(vocab_size, embedding_size, hidden_size)
    decoder = Decoder(vocab_size, embedding_size, hidden_size)
    seq2seq = cuda(Seq2Seq(encoder, decoder, vocab_size))

    optimizer = optim.Adam(seq2seq.parameters())

    best_val_loss = None
    for epoch in range(epochs):
        # calculate train and val loss
        train_loss = train(seq2seq, optimizer, train_iter, vocab_size, 5,
                           padding_idx)
        val_loss = evaluate(seq2seq, val_iter, vocab_size, padding_idx)
        print("[Epoch=%d] train_loss %f - val_loss %f" %
              (epoch, train_loss, val_loss))

        # save model if model achieved best val loss
        if not best_val_loss or val_loss < best_val_loss:
            print('Saving model...')
            save_model(seq2seq, epoch, val_loss)
            best_val_loss = val_loss
Ejemplo n.º 5
0
def evaluate(model, val_iter, vocab_size, padding_idx):
    model.eval(
    )  # put model in eval mode (this is important because of dropout)

    total_loss = 0
    for batch in val_iter:
        # calculate model predictions
        question, answer = cuda(batch.question), cuda(batch.answer)
        outputs = model(question, answer)

        # calculate batch loss
        loss = F.nll_loss(
            outputs.view(-1, vocab_size),
            answer[1:].view(-1),
            ignore_index=padding_idx)  # answer[1:] skip <sos> token
        total_loss += loss.data[0]

    return total_loss / len(val_iter)
Ejemplo n.º 6
0
    def forward(self, src, trg):
        batch_size = src.size(1)
        trg_seq_len = trg.size(
            0
        ) - 1  # - 1 because first token in every sequence is <sos> TODO note this in docs (dimensions don't match because we subtracted 1 from seq_len)
        outputs = cuda(
            Variable(torch.zeros(trg_seq_len, batch_size, self.vocab_size)))

        encoder_outputs, h_n = self.encoder(src)

        hidden = h_n  # output of all encoder layers for t=seq_len
        input_word = cuda(
            Variable(trg.data[0], requires_grad=False)
        )  # sos for whole batch TODO check if we need to wrap tensor in new variable or just call trg[0] on existing variable, what's the difference?
        for t in range(trg_seq_len):
            output, hidden = self.decoder(input_word, hidden)
            outputs[t] = output
            max, argmax = output.data.max(dim=1)
            input_word = cuda(Variable(argmax))

        return outputs
Ejemplo n.º 7
0
    def __init__(self, embeddings, ignore_idx):
        super().__init__()
        voc_size = embeddings.shape[0]

        # Compute similarities
        print("Computing word similarities...")
        similarities = []
        for i in tqdm(range(voc_size)):
            similarities.append(
                F.cosine_similarity(embeddings[i].expand_as(embeddings),
                                    embeddings))
        similarities = cuda(torch.stack(similarities))

        # Ignore padding index penalties
        similarities[ignore_idx] = torch.zeros(voc_size)
        self.similarities = similarities
Ejemplo n.º 8
0
    def forward(self, t, row, col, lvl=None):
        """
        row and col is not required here. (unified interface with resolution regularizer)
        """
        if lvl is None:
            lvl = self.n_levels

        pc_par = []
        pc_chd = []

        for l in range(1, lvl + 1):
            current_level = noderange(self.n_branches, l)
            for k, i in enumerate(current_level):
                pc_par.append(t[(i - 1) // self.n_branches].b)
                pc_chd.append(t[i].b)

        loss_pc = F_reg_pc(T.stack(pc_par, 1), T.stack(pc_chd, 1)) if lvl >= 1 else cuda(T.zeros(1)) #x.new(1).zero_()
        return self.coef * loss_pc
Ejemplo n.º 9
0
def Train_dis_BCE(netD, netG, real_loader, epochs=1, out=None):
    best_loss = np.Inf
    for _ in range(epochs):
        for i, real in enumerate(real_loader):
            size = len(real)
            fake = netG.sample(size)
            data = util.Variable(T.cat([fake, util.cuda(real)], dim=0))
            label = util.Variable(T.cat([T.zeros(size, 1), T.ones(size, 1)]))
            netD.optim.zero_grad()
            loss = netD.BCELoss(data, label)
            loss.backward()
            netD.optim.step()
            if i % 10 == 0 and i != 0:
                for param_group in netD.optim.param_groups:
                    param_group['lr'] *= (1 - 0.03)
        if out and loss.data[0] < best_loss:
            T.save(netD.state_dict(), out + ".pkg")
            best_loss = loss.data[0]
    return loss.data[0]
Ejemplo n.º 10
0
    def __init__(self,
                 stop_idcs,
                 embeddings,
                 ignore_idx,
                 N=5,
                 normalization=True):
        super().__init__()
        voc_size = embeddings.shape[0]
        all_targets = []
        print("Computing word similarities...")
        for word_idx in tqdm(range(voc_size)):
            target = torch.zeros(voc_size)
            if word_idx != ignore_idx:
                if word_idx not in stop_idcs:
                    embedding = embeddings[word_idx]
                    # Compute similarities
                    similarities = F.cosine_similarity(
                        embedding.expand_as(embeddings), embeddings)

                    # Get top N word neighbors with their similarities
                    similarities, indices = torch.sort(similarities,
                                                       descending=True)
                    indices = indices[:N]
                    similarities = similarities[:N]

                    # Normalize computed similarities
                    if normalization:
                        normalization_factor = torch.sum(similarities)
                    else:
                        normalization_factor = 1
                    weights = similarities / normalization_factor
                    for i, idx in enumerate(indices):
                        target[idx] = weights[i]
                else:
                    # Ignore padding index penalties
                    target[word_idx] = 1
            all_targets.append(target)
        soft_targets = cuda(torch.stack(all_targets))
        self.soft_targets = soft_targets
Ejemplo n.º 11
0
    def __init__(self,
                 hidden_size,
                 voc_size,
                 padding_idx,
                 init_idx,
                 max_len,
                 embeddings=None,
                 embedding_dim=300):
        super().__init__()

        # Sizes
        if embeddings is not None:
            self.embedding_dim = embeddings.shape[1]
        else:
            self.embedding_dim = embedding_dim

        self.hidden_size = hidden_size
        self.voc_size = voc_size
        self.max_len = max_len

        # Indices
        self.init_idx = init_idx
        self.padding_idx = padding_idx

        # Layers
        if embeddings is not None:
            self.embeddings = cuda(embeddings)
            self.emb = nn.Embedding.from_pretrained(self.embeddings,
                                                    freeze=True)
        else:
            self.emb = nn.Embedding(self.voc_size, self.embedding_dim)
        self.enc = nn.LSTM(self.embedding_dim,
                           self.hidden_size,
                           batch_first=True)
        self.dec = nn.LSTMCell(self.embedding_dim, self.hidden_size)
        self.lin = nn.Linear(self.hidden_size, self.voc_size)
        self.dropout = nn.Dropout(p=0.5)
Ejemplo n.º 12
0
    hacks around the restriction from cleverhans that requires a 2D logits tensor
    '''
    def __init__(self, model):
        T.nn.Module.__init__(self)
        self.model = model

    def forward(self, x):
        y = self.model(x)
        if y.dim() == 3:
            return y.squeeze(1)
        else:
            return y


#model = cuda(DFSGlimpseSingleObjectClassifier())
model = cuda(tvmodels.ResNet(tvmodels.resnet.BasicBlock, [2, 2, 2, 2], 10))
model.load_state_dict(T.load('model.pt'))

s = tf.Session()
x_op = tf.placeholder(tf.float32, shape=(None, 3, 200, 200))

tf_model_fn = convert_pytorch_model_to_tf(cuda(TemporaryModule(model)))
cleverhans_model = CallableModelWrapper(tf_model_fn, output_layer='logits')

fgsm_op = FastGradientMethod(cleverhans_model, sess=s)
fgsm_params = {'eps': 0.01, 'clip_min': 0, 'clip_max': 1}
adv_x_op = fgsm_op.generate(x_op, **fgsm_params)
adv_preds_op = tf_model_fn(adv_x_op)
preds_op = tf_model_fn(x_op)

total = 0
Ejemplo n.º 13
0
def kl_temperature(y, lbl, temperature=0.01):
    batch_size = y.shape[0]
    n_classes = y.shape[1]
    y_logit = cuda(T.zeros(batch_size, n_classes))
    y_logit.scatter_(1, lbl.unsqueeze(-1), 1)
    return F.kl_div(F.log_softmax(y), F.softmax(y_logit / temperature), size_average=False) / batch_size
Ejemplo n.º 14
0
def preprocess_bird(item):
    _x, _y = item
    return cuda(_x), cuda(_y.squeeze(1)), None
Ejemplo n.º 15
0
def preprocess_imagenet(item):
    _x, _y, _ = item
    return cuda(_x), cuda(_y.squeeze(1)), None
Ejemplo n.º 16
0
def preprocess_cifar10(item):
    _x, _y = item
    return cuda(_x), cuda(_y), None
Ejemplo n.º 17
0
    def forward(self, t, row, col, lvl=None):
        """
        row and col is not required here. (unified interface with resolution regularizer)
        """
        if lvl is None:
            lvl = self.n_levels

        cc_chd_a = []
        cc_chd_b = []

        for l in range(1, lvl + 1):
            current_level = noderange(self.n_branches, l)
            for k, i in enumerate(current_level):
                if (k + 1) % self.n_branches == 0:
                    for i, j in itertools.combinations(range(k - self.n_branches + 1, k + 1), 2):
                        cc_chd_a.append(t[current_level[i]].b)
                        cc_chd_b.append(t[current_level[j]].b)

        loss_cc = F_reg_cc(T.stack(cc_chd_a, 1), T.stack(cc_chd_b, 1)) if lvl >= 1 and self.n_branches > 1 else cuda(T.zeros(1)) #x.new(1).zero_()
        return self.coef * loss_cc
Ejemplo n.º 18
0
        expr_setting, args.resume))
else:
    regularizer_classes = {
        PCRegularizer: args.pc_coef,
        CCRegularizer: args.cc_coef,
        ResRegularizer: args.res_coef
    }
    network_params = NETWORK_PARAMS[args.dataset]
    builder = cuda(
        nn.DataParallel(
            TreeBuilder(n_branches=n_branches,
                        n_levels=n_levels,
                        n_classes=network_params['n_classes'],
                        share=args.share,
                        regularizer_classes=regularizer_classes,
                        glimpse_type=args.glm_type,
                        glimpse_size=(args.glm_size, args.glm_size),
                        fm_target_size=network_params['fm_target_size'],
                        final_pool_size=network_params['final_pool_size'],
                        final_n_channels=network_params['final_n_channels'],
                        what__cnn=network_params['cnn'],
                        what__fix=args.fix,
                        what__in_dims=network_params['in_dims'])))
    readout = cuda(
        nn.DataParallel(
            create_readout(args.readout,
                           share=args.share,
                           final_n_channels=network_params['final_n_channels'],
                           n_branches=n_branches,
                           n_levels=n_levels,
                           n_classes=network_params['n_classes'])))
Ejemplo n.º 19
0
        with open(w2vec_loc, 'rb') as f:
            w2vec = pickle.load(f)
    else:
        w2vec = {}
        print("Loading word vectors...")
        with open(fasttext_loc) as f:
            f.__next__()
            for line in tqdm(f):
                items = line.strip().split(' ')
                token = items[0]
                vector = np.array(items[1:]).astype(float)
                w2vec[token] = vector
        with open(w2vec_loc, 'wb') as f:
            pickle.dump(w2vec, f)
    dim = len(random.choice(list(w2vec.values())))
    embeddings = cuda(
        torch.FloatTensor(match_embeddings(idx2w, w2vec, dim, bigram)))

    # DATASET #
    dataset_train = AutoencoderDataset(train_loc, voc, max_len, bigram=bigram)
    dataset_dev = AutoencoderDataset(dev_loc, voc, max_len, bigram=bigram)

    dataloader_train = DataLoader(dataset_train, batch_size, shuffle=True)
    dataloader_dev = DataLoader(dataset_dev, batch_size, shuffle=True)
    dataloaders = {'train': dataloader_train, 'dev': dataloader_dev}

    # MODEL #
    model = cuda(
        Autoencoder(hidden_size,
                    voc_size,
                    pad_idx,
                    init_idx,
Ejemplo n.º 20
0
                          glimpse_size=(15, 15),
                          n_glimpses=n_glimpses),
        cnn,
    )
    #module = cnn
else:
    #cnn = miniresnet20(num_classes=10)
    #cnn = getattr(pytorch_cifar.models, args.cnn)(1000)
    cnn = getattr(torchvision.models, args.cnn)(pretrained=True)
    cnn.fc = T.nn.Linear(512 * 4, 120)
    module = T.nn.DataParallel(
        T.nn.Sequential(
            #MultiscaleGlimpse(glimpse_type='gaussian', glimpse_size=(50, 50), n_glimpses=n_glimpses),
            cnn, ))

module = cuda(module)
#module.load_state_dict(T.load('cnn.pt'))
#module.load_state_dict(dfs.update_module.cnn.state_dict())
'''
net = skorch.NeuralNetClassifier(
        module=module,
        #module=CNN,
        #module__cnn='cnn',
        #module__input_size=(15, 15),
        #module__h_dims=128,
        #module__n_classes=10,
        #module__kernel_size=(3, 3),
        #module__final_pool_size=(2, 2),
        #module__filters=[16, 32, 64, 128, 256],
        criterion=T.nn.CrossEntropyLoss,
        max_epochs=50,
Ejemplo n.º 21
0
    y = np.maximum(np.minimum(val.numpy(), 50), -50)
    x = np.arange(len(y))
    ax[i, j].plot(x, y, color='b')
    ax[i, j].plot(x, [0] * len(y), color='g')
    ax[i, j].set_title(title)

def display_image(fig, ax, i, j, image, title):
    ax[i, j].imshow(image, cmap='gray')
    ax[i, j].set_title(title)

if os.path.exists('grad_cnn.pt'):
    cnn = T.load('grad_cnn.pt')
    net_h = T.load('grad_net_h.pt')
    #plt.subplots_adjust(wspace=0, hspace =100)
    valid_loader = data_generator(mnist_valid, 1, valid_shuffle)
    whole_glim = cuda(T.tensor([[0.5, 0.5, 1.0, 1.0, 0.5, 0.5]]))
    cnt = 0
    for x, y, b in valid_loader:
        glim = bbox_to_glimpse(b)
        grads = []
        losses = []
        for j in np.linspace(0, 1, 41):
            new_glim = glim * j + whole_glim * (1 - j)
            new_glim.requires_grad = True
            g = glimpse(x, new_glim.unsqueeze(1))[:, 0]
            if j == 0.:
                g_first = g[0][0].detach().cpu()
            if j == 1.:
                g_last = g[0][0].detach().cpu()
            out = net_h(cnn(g).view(1, -1))
            loss = F.cross_entropy(
Ejemplo n.º 22
0
                   n_digits=1,
                   backrand=0,
                   image_rows=size,
                   image_cols=size,
                   download=True)
n_glimpses = 3

glimpse = MultiscaleGlimpse(glimpse_type='gaussian',
                            glimpse_size=(15, 15),
                            n_glimpses=n_glimpses)
module = cuda(
    CNN(cnn='cnn',
        input_size=(15, 15),
        h_dims=128,
        n_classes=10,
        kernel_size=(3, 3),
        final_pool_size=(1, 1),
        filters=[16, 32, 64, 128, 256],
        pred=True,
        in_channels=3,
        n_patches=n_glimpses,
        coalesce_mode='sample'))
seq = T.nn.Sequential(glimpse, module)
seq.load_state_dict(T.load('cnntest.pt'))

rec = []

for i in range(100):
    x = cuda(mnist.train_data[i:i + 1, None].repeat(1, 3, 1, 1).float() / 255.)
    y = cuda(mnist.train_labels[i:i + 1, 0])
    b = cuda(T.zeros(1, 6))
    b.requires_grad = True
Ejemplo n.º 23
0
    if args.dataset == 'imagenet':
        n_classes = 1000
        cnn = 'resnet18'
    elif args.dataset == 'cifar10':
        n_classes = 10
        cnn = None
    elif args.dataset.startswith('mnist'):
        n_classes = 10**args.n_digits
        cnn = None

    builder = cuda(
        TreeBuilder(
            n_branches=n_branches,
            n_levels=n_levels,
            att_type=args.att_type,
            pc_coef=args.pc_coef,
            cc_coef=args.cc_coef,
            n_classes=n_classes,
            glimpse_type=args.glm_type,
            glimpse_size=GLIMPSE_SIZE,
            cnn=cnn,
        ))
    readout = cuda(
        ReadoutModule(n_branches=n_branches,
                      n_levels=n_levels,
                      n_classes=n_classes))
    batch_size = args.batch_size
    builder.load_state_dict(
        T.load('checkpoints/{}_builder_best.pt'.format(expr_setting)))
    readout.load_state_dict(
        T.load('checkpoints/{}_readout_best.pt'.format(expr_setting)))