Beispiel #1
0
    class simple_cnn_model(object):
        def __init__(self, epochs, batch_size, lr):
            self.epochs = epochs
            self.batch_size = batch_size
            self.lr = lr

        def load_data(self):
            # load data from cifar100 folder
            (x_train, y_train), (x_test, y_test) = cifar100(1211506319)
            return x_train, y_train, x_test, y_test

        def train_model(self, layers, loss_metrics, x_train, y_train):
            # build model
            self.model = Sequential(layers, loss_metrics)
            # train the model
            loss = self.model.fit(x_train,
                                  y_train,
                                  self.epochs,
                                  self.lr,
                                  self.batch_size,
                                  print_output=True)
            avg_loss = np.mean(np.reshape(loss, (self.epochs, -1)), axis=1)
            return avg_loss

        def test_model(self, x_test, y_test):
            # make a prediction
            pred_result = self.model.predict(x_test)
            accuracy = np.mean(pred_result == y_test)
            return accuracy
Beispiel #2
0
 def __init__(self, blocks=None, net_dim=None):
     super(SeqNet, self).__init__()
     self.is_double = False
     self.dims = []
     self.net_dim = net_dim
     self.transform = None if net_dim is None else Upsample(size=self.net_dim, mode="bilinear", consolidate_errors=True)
     self.blocks = [] if self.transform is None else [self.transform]
     if blocks is not None:
         self.blocks += [*blocks]
         self.blocks = Sequential(*self.blocks)
Beispiel #3
0
 def train_model(self, layers, loss_metrics, x_train, y_train):
     # build model
     self.model = Sequential(layers, loss_metrics)
     # train the model
     loss = self.model.fit(x_train,
                           y_train,
                           self.epochs,
                           self.lr,
                           self.batch_size,
                           print_output=True)
     avg_loss = np.mean(np.reshape(loss, (self.epochs, -1)), axis=1)
     return avg_loss
Beispiel #4
0
    def _init_layers(self):  # FIXME multi-layer lstm actually not work
        if self.n_layers == 1:
            self.lstm_encoder = LstmLayer(self.input_size, self.hidden_size)
            self.lstm_decoder = LstmLayer(self.hidden_size, self.input_size)
        else:
            lstm_encoders = OrderedDict()
            lstm_encoders['lstm_0'] = LstmLayer(self.input_size,
                                                self.hidden_size)
            for i in range(1, self.n_layers):
                lstm_encoders['lstm_' + str(i)] = LstmLayer(
                    self.hidden_size, self.hidden_size)
            self.lstm_encoder = Sequential(lstm_encoders)

            lstm_decoders = OrderedDict()
            lstm_decoders['lstm_0'] = LstmLayer(self.hidden_size,
                                                self.hidden_size)
            for i in range(1, self.n_layers):
                lstm_decoders['lstm_' + str(i)] = LstmLayer(
                    self.hidden_size, self.hidden_size)
            self.lstm_decoder = Sequential(lstm_decoders)
Beispiel #5
0
    def get_block(self, in_planes, out_planes, n_blocks, stride, dim):
        strides = [stride] + [1]*(n_blocks-1)
        layers = []

        if in_planes != out_planes:
            if self.res_block == FixupBasicBlock:
                downsample = Conv2d(in_planes, out_planes, kernel_size=1, stride=stride, bias=False)
                # downsample = AvgPool2d(1, stride=stride)
            elif self.res_block == WideBlock:
                downsample = Conv2d(in_planes, out_planes, kernel_size=1, stride=stride, bias=True)
            else:
                downsample = [Conv2d(in_planes, out_planes, kernel_size=1, stride=stride, bias=False)]
                # downsample = [AvgPool2d(1, stride=stride)]
                downsample += [BatchNorm2d(out_planes)]
                downsample = Sequential(*downsample)

        for stride in strides:
            layers += [self.res_block(dim, in_planes, out_planes, stride, downsample)]
            downsample = None
            in_planes = out_planes
            dim = dim // stride
        return dim, Sequential(*layers)
Beispiel #6
0
 def converter(self, net):
     if isinstance(net, nn.Sequential):
         seq_model = net
     else:
         seq_model = net.module
     for idx, l in enumerate(seq_model):
         if isinstance(l, nn.Linear):
             self.layers[idx + 1].linear.weight.data.copy_(l.weight.data)
             self.layers[idx + 1].linear.bias.data.copy_(l.bias.data)
         if isinstance(l, nn.Conv2d):
             self.layers[idx + 1].conv.weight.data.copy_(l.weight.data)
             self.layers[idx + 1].conv.bias.data.copy_(l.bias.data)
     self.blocks = Sequential(*self.layers)
Beispiel #7
0
    def __init__(self,
                 device,
                 dataset,
                 n_class=10,
                 input_size=32,
                 input_channel=3,
                 width1=1,
                 width2=1,
                 width3=1,
                 linear_size=100):
        super(ConvMedBig, self).__init__()

        mean, sigma = get_mean_sigma(device, dataset)
        self.normalizer = Normalization(mean, sigma)

        layers = [
            Normalization(mean, sigma),
            Conv2d(input_channel,
                   16 * width1,
                   3,
                   stride=1,
                   padding=1,
                   dim=input_size),
            ReLU((16 * width1, input_size, input_size)),
            Conv2d(16 * width1,
                   16 * width2,
                   4,
                   stride=2,
                   padding=1,
                   dim=input_size // 2),
            ReLU((16 * width2, input_size // 2, input_size // 2)),
            Conv2d(16 * width2,
                   32 * width3,
                   4,
                   stride=2,
                   padding=1,
                   dim=input_size // 2),
            ReLU((32 * width3, input_size // 4, input_size // 4)),
            Flatten(),
            Linear(32 * width3 * (input_size // 4) * (input_size // 4),
                   linear_size),
            ReLU(linear_size),
            Linear(linear_size, n_class),
        ]
        self.blocks = Sequential(*layers)
Beispiel #8
0
    def __init__(self, device, dataset, adv_pre, input_size, net, net_dim):
        super(UpscaleNet, self).__init__()
        self.net = net
        self.net_dim = net_dim
        self.blocks = []

        if input_size == net_dim:
            self.transform = None
        else:
            self.transform = Upsample(size=self.net_dim, mode="nearest", align_corners=False,consolidate_errors=False)
            self.blocks += [self.transform]

        if adv_pre:
            self.blocks += [Scale(2, fixed=True), Bias(-1, fixed=True)]
            self.normalization = Sequential(*self.blocks)
        else:
            mean, sigma = get_mean_sigma(device, dataset)
            self.normalization = Normalization(mean, sigma)
            self.blocks += [self.normalization]

        self.blocks += [self.net]
Beispiel #9
0
    def __init__(self,
                 device,
                 dataset,
                 sizes,
                 n_class=10,
                 input_size=32,
                 input_channel=3):
        super(FFNN, self).__init__()

        mean, sigma = get_mean_sigma(device, dataset)
        self.normalizer = Normalization(mean, sigma)

        layers = [
            Flatten(),
            Linear(input_size * input_size * input_channel, sizes[0]),
            ReLU(sizes[0])
        ]
        for i in range(1, len(sizes)):
            layers += [
                Linear(sizes[i - 1], sizes[i]),
                ReLU(sizes[i]),
            ]
        layers += [Linear(sizes[-1], n_class)]
        self.blocks = Sequential(*layers)
Beispiel #10
0
class NumberSortModule(Module):
    def __init__(self, input_size: int, seq_len: int, hidden_size: int,
                 n_layers: int) -> None:
        super().__init__()
        assert isinstance(input_size, int)
        assert isinstance(seq_len, int)
        assert isinstance(hidden_size, int)
        assert isinstance(n_layers, int)
        assert seq_len >= 1
        assert n_layers >= 1
        assert hidden_size >= 1

        self.input_size = input_size
        self.seq_len: int = seq_len
        self.hidden_size: int = hidden_size
        self.n_layers: int = n_layers

        self._init_layers()

        self.register_module_parameters('lstm_encoder', self.lstm_encoder)
        self.register_module_parameters('lstm_decoder', self.lstm_decoder)
        # self.register_module_parameters('decoder', self.decoder)

    def forward(self, x_input: np.ndarray) -> np.ndarray:
        """forward propagation
        
        Arguments:
            x_input {np.ndarray} -- [description]
        
        Returns:
            np.ndarray -- probabilities of permutation values
        """
        assert x_input.shape[-2] == self.seq_len
        assert x_input.shape[-1] == self.input_size

        # align shape to batch-like (batch_size, seq_len, input_size)
        x_input_aligned = x_input.reshape((-1, self.seq_len, self.input_size))
        batch_size, *_ = x_input_aligned.shape

        predictions = np.empty((batch_size, self.seq_len, self.input_size))

        for sample in range(batch_size):
            self._reload()
            lstm_encoder_out, _ = self.lstm_encoder.forward(
                x_input_aligned[sample])
            # print("LSTM output", lstm_encoder_out)
            lstm_encoder_out = np.tile(lstm_encoder_out, (self.seq_len, 1))
            _, lstm_decoder_out = self.lstm_decoder.forward(lstm_encoder_out)
            lstm_decoder_out = lstm_decoder_out.reshape(
                -1, self.seq_len, self.input_size)
            # decoder_out = self.decoder.forward(lstm_encoder_out)
            predictions[sample, :] = lstm_decoder_out

        return predictions.reshape(-1, self.seq_len, self.input_size)

    def backward(self, d_output: np.ndarray) -> np.ndarray:
        # as we use time distributed derivative for decoder
        d_dummy = np.zeros(d_output.shape[1:])[np.newaxis, ...]
        d_decoder_hidden, d_decoder_cell, d_decoder = self.lstm_decoder.backward(
            d_dummy, d_output)
        *_, d_encoder = self.lstm_encoder.backward(d_decoder[np.newaxis, ...])

        return d_encoder

    def predict(self, x_input: np.ndarray) -> np.ndarray:
        predictions_proba = self.forward(x_input).reshape(
            (-1, self.seq_len, self.input_size))
        predicted_permutation = np.argmax(predictions_proba, axis=-1)

        return predicted_permutation

    def _init_layers(self):  # FIXME multi-layer lstm actually not work
        if self.n_layers == 1:
            self.lstm_encoder = LstmLayer(self.input_size, self.hidden_size)
            self.lstm_decoder = LstmLayer(self.hidden_size, self.input_size)
        else:
            lstm_encoders = OrderedDict()
            lstm_encoders['lstm_0'] = LstmLayer(self.input_size,
                                                self.hidden_size)
            for i in range(1, self.n_layers):
                lstm_encoders['lstm_' + str(i)] = LstmLayer(
                    self.hidden_size, self.hidden_size)
            self.lstm_encoder = Sequential(lstm_encoders)

            lstm_decoders = OrderedDict()
            lstm_decoders['lstm_0'] = LstmLayer(self.hidden_size,
                                                self.hidden_size)
            for i in range(1, self.n_layers):
                lstm_decoders['lstm_' + str(i)] = LstmLayer(
                    self.hidden_size, self.hidden_size)
            self.lstm_decoder = Sequential(lstm_decoders)

        # self.decoder = FullyConnectedLayer(self.hidden_size, self.seq_len * self.seq_len)

    def _reload(self):
        if self.n_layers == 1:
            self.lstm_encoder.reload()
        else:
            for lstm in self.lstm_encoder.modules.values():
                lstm.reload()
"""
Created on Sun Mar 25 19:52:43 2018

@author: kaushik
"""
import time
import numpy as np
import matplotlib.pyplot as plt
from layers.dataset import cifar100
from layers import (ConvLayer, FullLayer, FlattenLayer, MaxPoolLayer,
                    ReluLayer, SoftMaxLayer, CrossEntropyLayer, Sequential)

(x_train, y_train), (x_test, y_test) = cifar100(1337)
model = Sequential(layers=(ConvLayer(3, 16, 3), ReluLayer(), MaxPoolLayer(),
                           ConvLayer(16, 32, 3), ReluLayer(), MaxPoolLayer(),
                           FlattenLayer(), FullLayer(8 * 8 * 32,
                                                     4), SoftMaxLayer()),
                   loss=CrossEntropyLayer())
start_time = time.clock()
lr_vals = [0.1]
losses_train = list()
losses_test = list()
test_acc = np.zeros(len(lr_vals))
for j in range(len(lr_vals)):
    train_loss, test_loss = model.fit(x_train,
                                      y_train,
                                      x_test,
                                      y_test,
                                      epochs=8,
                                      lr=lr_vals[j],
                                      batch_size=128)
Beispiel #12
0
import numpy as np
from layers.dataset import cifar100
import matplotlib.pyplot as plt

# Please make sure that cifar-100-python is present in the same folder as dataset.py

(x_train, y_train), (x_test, y_test) = cifar100(1212356299)

from layers import (FullLayer, ReluLayer, SoftMaxLayer, CrossEntropyLayer,
                    Sequential)

model = Sequential(layers=(FullLayer(3072,
                                     500), ReluLayer(), FullLayer(500, 4),
                           SoftMaxLayer()),
                   loss=CrossEntropyLayer())

lr_accuracies = np.zeros((3, ))

loss1 = model.fit(x_train, y_train, lr=0.01, epochs=15)
y_predict = model.predict(x_test)

count = 0
for i in range(np.size(y_test)):
    if y_predict[i] == y_test[i]:
        count += 1

lr_accuracies[0] = (100.0 * count) / np.shape(y_predict)[0]

loss2 = model.fit(x_train, y_train, lr=0.1, epochs=15)

y_predict = model.predict(x_test)
Beispiel #13
0
from cross_entropy import CrossEntropyLayer
from conv_new import ConvLayer
from maxpool_new import MaxPoolLayer
from flatten_new import FlattenLayer
from sequential import Sequential
from layers.dataset_new import cifar100
from dataset_new import onehot
(x_train, y_train), (x_test, y_test) = cifar100(1213268041)

test_accuracy = []
epochs = 15
finalloss = []
testloss = []
model = Sequential(layers=(ConvLayer(3, 16, 3), ReluLayer(), MaxPoolLayer(2),
                           ConvLayer(16, 32, 3), ReluLayer(), MaxPoolLayer(2),
                           FlattenLayer(), FullLayer(8 * 8 * 32,
                                                     4), SoftMaxLayer()),
                   loss=CrossEntropyLayer())

train_loss, valid_loss = model.fit(x_train,
                                   y_train,
                                   x_test,
                                   y_test,
                                   epochs=15,
                                   lr=0.1,
                                   batch_size=128)
y_pred = model.predict(x_test)
accuracy = (np.mean(y_test == onehot(y_pred)))
print('Accuracy: %.2f' % accuracy)
#np.append(test_accuracy, accuracy)
plt.plot(range(len(train_loss)), train_loss, label='Training loss')
Beispiel #14
0
    def __init__(self, device, dataset, n_class=10, input_size=32, input_channel=3, conv_widths=None,
                 kernel_sizes=None, linear_sizes=None, depth_conv=None, paddings=None, strides=None,
                 dilations=None, pool=False, net_dim=None, bn=False, max=False, scale_width=True):
        super(myNet, self).__init__(net_dim=None if net_dim == input_size else net_dim)
        if kernel_sizes is None:
            kernel_sizes = [3]
        if conv_widths is None:
            conv_widths = [2]
        if linear_sizes is None:
            linear_sizes = [200]
        if paddings is None:
            paddings = [1]
        if strides is None:
            strides = [2]
        if dilations is None:
            dilations = [1]
        if net_dim is None:
            net_dim = input_size

        if len(conv_widths) != len(kernel_sizes):
            kernel_sizes = len(conv_widths) * [kernel_sizes[0]]
        if len(conv_widths) != len(paddings):
            paddings = len(conv_widths) * [paddings[0]]
        if len(conv_widths) != len(strides):
            strides = len(conv_widths) * [strides[0]]
        if len(conv_widths) != len(dilations):
            dilations = len(conv_widths) * [dilations[0]]

        self.n_class=n_class
        self.input_size=input_size
        self.input_channel=input_channel
        self.conv_widths=conv_widths
        self.kernel_sizes=kernel_sizes
        self.paddings=paddings
        self.strides=strides
        self.dilations = dilations
        self.linear_sizes=linear_sizes
        self.depth_conv=depth_conv
        self.net_dim = net_dim
        self.bn=bn
        self.max=max

        mean, sigma = get_mean_sigma(device, dataset)
        layers = self.blocks
        layers += [Normalization(mean, sigma)]

        N = net_dim
        n_channels = input_channel
        self.dims += [(n_channels,N,N)]

        for width, kernel_size, padding, stride, dilation in zip(conv_widths, kernel_sizes, paddings, strides, dilations):
            if scale_width:
                width *= 16
            N = int(np.floor((N + 2 * padding - dilation * (kernel_size - 1) - 1) / stride + 1))
            layers += [Conv2d(n_channels, int(width), kernel_size, stride=stride, padding=padding, dilation=dilation)]
            if self.bn:
                layers += [BatchNorm2d(int(width))]
            if self.max:
                layers += [MaxPool2d(int(width))]
            layers += [ReLU((int(width), N, N))]
            n_channels = int(width)
            self.dims += 2*[(n_channels,N,N)]


        if depth_conv is not None:
            layers += [Conv2d(n_channels, depth_conv, 1, stride=1, padding=0),
                       ReLU((n_channels, N, N))]
            n_channels = depth_conv
            self.dims += 2*[(n_channels,N,N)]

        if pool:
            layers += [GlobalAvgPool2d()]
            self.dims += 2 * [(n_channels, 1, 1)]
            N=1

        layers += [Flatten()]
        N = n_channels * N ** 2
        self.dims += [(N,)]


        for width in linear_sizes:
            if width == 0:
                continue
            layers += [Linear(int(N), int(width)),
                       ReLU(width)]
            N = width
            self.dims+=2*[(N,)]

        layers += [Linear(N, n_class)]
        self.dims+=[(n_class,)]

        self.blocks = Sequential(*layers)
Beispiel #15
0
    def __init__(self, device, args, dataset, trunk_net, input_size, input_channel, n_class, n_branches, gate_type,
                 branch_net_names, gate_net_names, evalFn, lossFn):
        super(MyDeepTrunkNet, self).__init__()
        self.dataset = dataset
        self.input_size = input_size
        self.input_channel = input_channel
        self.n_class = n_class
        self.gate_type = gate_type
        self.n_branches = n_branches
        self.trunk_net = trunk_net
        self.evalFn = evalFn
        self.lossFn = lossFn

        assert gate_type in ["entropy", "net"], f"Unknown gate mode: {gate_type:s}"

        self.exit_ids = [-1] + list(range(n_branches))

        self.threshold = {exit_idx: args.gate_threshold for exit_idx in self.exit_ids[1:]}
        self.gate_nets = {}
        self.branch_nets = {}

        if len(branch_net_names) != n_branches:
            print("Number of branches does not match branch net names")
            branch_net_names = n_branches * branch_net_names[0:1]

        if gate_net_names is None:
            gate_net_names = branch_net_names
        elif len(gate_net_names) != n_branches:
            print("Number of branches does not match gate net names")
            gate_net_names = n_branches * gate_net_names[0:1]

        if args.load_branch_model is not None and len(args.load_branch_model) != n_branches:
            args.load_branch_model = n_branches * args.load_branch_model[0:1]
        if args.load_gate_model is not None and len(args.load_gate_model) != n_branches:
            args.load_gate_model = n_branches * args.load_gate_model[0:1]

        for i, branch_net_name in zip(range(n_branches), branch_net_names):
            exit_idx = self.exit_ids[i+1]
            self.branch_nets[exit_idx] = get_net(device, dataset, branch_net_name, input_size, input_channel, n_class,
                                                 load_model=None if args.load_branch_model is None else args.load_branch_model[i],
                                                  net_dim=args.cert_net_dim)

            if gate_type == "net":
                self.gate_nets[exit_idx] = get_net(device, dataset, gate_net_names[i], input_size, input_channel, 1,
                                                   load_model=None if args.load_gate_model is None else args.load_gate_model[i],
                                                   net_dim=args.cert_net_dim)
            else:
                self.gate_nets[exit_idx] = SeqNet(Sequential(*[*self.branch_nets[exit_idx].blocks, Entropy(n_class, low_mem=True, neg=True)]))
                self.gate_nets[exit_idx].determine_dims(torch.randn((2, input_channel, input_size, input_size), dtype=torch.float).to(device))
                init_slopes(self.gate_nets[exit_idx], device, trainable=False)

            self.add_module("gateNet_{}".format(exit_idx), self.gate_nets[exit_idx])
            self.add_module("branchNet_{}".format(exit_idx), self.branch_nets[exit_idx])

        if args.load_model is not None:
            old_state = self.state_dict()
            load_state = torch.load(args.load_model)
            if args.cert_net_dim is not None and not ("gateNet_0.blocks.layers.1.mean" in load_state.keys()): # Only change keys if loading from a non mixed resolution to mixed resolution
                new_dict = {}
                for k in load_state.keys():
                    if k.startswith("trunk"):
                        new_k = k
                    else:
                        k_match = re.match("(^.*\.layers\.)([0-9]+)(\..*$)", k)
                        new_k = "%s%d%s" % (k_match.group(1), int(k_match.group(2)) + 1, k_match.group(3))
                    new_dict[new_k] = load_state[k]
                load_state.update(new_dict)

            # LiRPA requires parameters to have zero batch dimension. This makes old models compatible
            for k, v in load_state.items():
                if k.endswith("mean") or k.endswith("sigma"):
                    if k in old_state:
                        load_state.update({k: v.reshape(old_state[k].shape)})

            old_state.update({k:v.view(old_state[k].shape) for k,v in load_state.items() if
                              k in old_state and (
                              (k.startswith("trunk") and args.load_trunk_model is None)
                              or (k.startswith("gate") and args.load_gate_model is None)
                              or (k.startswith("branch") and args.load_branch_model is None))})
            missing_keys, extra_keys = self.load_state_dict(old_state, strict=False)
            assert len([x for x in missing_keys if "gateNet" in x or "branchNet" in x]) == 0
            print("Whole model loaded from %s" % args.load_model)

            ## Trunk and branch nets have to be loaded after the whole model
            if args.load_trunk_model is not None:
                load_net_state(self.trunk_net, args.load_trunk_model)

        if (args.load_model is not None or args.load_gate_model is not None) and args.gate_feature_extraction is not None:
            for i, net in enumerate(self.gate_nets.values()):
                extraction_layer = [ii for ii in range(len(net.blocks)) if isinstance(net.blocks[ii],Linear)]
                extraction_layer = extraction_layer[-min(len(extraction_layer),args.gate_feature_extraction)]
                net.freeze(extraction_layer-1)

        self.trunk_cnet = None
        self.gate_cnets = {k: None for k in self.gate_nets.keys()}
        self.branch_cnets = {k: None for k in self.branch_nets.keys()}
Beispiel #16
0
    
    plt.plot(loss)
    plt.show()
    
    ret, _ = model.forward(x_test_1D)
    correct = 0
    for i in range(ret.shape[0]):
        if np.argmax(y_test[i]) == np.argmax(ret[i]):
            correct += 1
    print(correct/ret.shape[0])
'''

if __name__ == '__main__':
    # simple linear data: y = w1*x1 + w2*x2 + ... wi*xi + b
    in_features = 5
    num_samples = 1000
    X = np.random.randn(num_samples, in_features)
    W = np.random.randn(in_features, 1)
    B = np.random.randn(1)
    Y = X @ W + B + 0.01 * np.random.randn(num_samples, 1)

    m = Linear(in_features, 1)
    model = Sequential(m)
    loss = Learner(model, mse_loss, SGDOptimizer(lr=0.01)).fit(X,
                                                               Y,
                                                               epochs=100,
                                                               bs=100)

    plt.plot(loss)
    plt.show()
Beispiel #17
0
    def __init__(self, device, dataset, n_blocks, n_class=10, input_size=32, input_channel=3, block='basic',
                 in_planes=32, net_dim=None, widen_factor=1, pooling="global"):
        super(MyResnet, self).__init__(net_dim=None if net_dim == input_size else net_dim)
        if block == 'basic':
            self.res_block = BasicBlock
        elif block == 'preact':
            self.res_block = PreActBlock
        elif block == 'wide':
            self.res_block = WideBlock
        elif block == 'fixup':
            self.res_block = FixupBasicBlock
        else:
            assert False
        self.n_layers = sum(n_blocks)
        mean, sigma = get_mean_sigma(device, dataset)
        dim = input_size

        k = widen_factor

        layers = [Normalization(mean, sigma),
                  Conv2d(input_channel, in_planes, kernel_size=3, stride=1, padding=1, bias=(block == "wide"), dim=dim)]

        if not block == "wide":
            layers += [Bias() if block == 'fixup' else BatchNorm2d(in_planes),
                       ReLU((in_planes, input_size, input_size))]

        strides = [1, 2] + ([2] if len(n_blocks) > 2 else []) + [1] * max(0,(len(n_blocks)-3))

        n_filters = in_planes
        for n_block, n_stride in zip(n_blocks, strides):
            if n_stride > 1:
                n_filters *= 2
            dim, block_layers = self.get_block(in_planes, n_filters*k, n_block, n_stride, dim=dim)
            in_planes = n_filters*k
            layers += [block_layers]

        if block == 'fixup':
            layers += [Bias()]
        else:
            layers += [BatchNorm2d(n_filters*k)]

        if block == "wide":
            layers += [ReLU((n_filters*k, dim, dim))]

        if pooling == "global":
            layers += [GlobalAvgPool2d()]
            N = n_filters * k
        elif pooling == "None":      # old networks miss pooling layer and wont load
            N = n_filters * dim * dim * k
        elif isinstance(pooling, int):
            layers += [AvgPool2d(pooling)]
            dim = dim//pooling
            N = n_filters * dim * dim * k

        layers += [Flatten(), ReLU(N)]

        if block == 'fixup':
            layers += [Bias()]

        layers += [Linear(N, n_class)]

        self.blocks = Sequential(*layers)

        # Fixup initialization
        if block == 'fixup':
            for m in self.modules():
                if isinstance(m, FixupBasicBlock):
                    conv1, conv2 = m.residual[1].conv, m.residual[5].conv
                    nn.init.normal_(conv1.weight,
                                    mean=0,
                                    std=np.sqrt(2 / (conv1.weight.shape[0] * np.prod(conv1.weight.shape[2:]))) * self.n_layers ** (-0.5))
                    nn.init.constant_(conv2.weight, 0)
                elif isinstance(m, nn.Linear):
                    nn.init.constant_(m.weight, 0)
                    nn.init.constant_(m.bias, 0)
Beispiel #18
0
class SeqNet(nn.Module):

    def __init__(self, blocks=None, net_dim=None):
        super(SeqNet, self).__init__()
        self.is_double = False
        self.dims = []
        self.net_dim = net_dim
        self.transform = None if net_dim is None else Upsample(size=self.net_dim, mode="bilinear", consolidate_errors=True)
        self.blocks = [] if self.transform is None else [self.transform]
        if blocks is not None:
            self.blocks += [*blocks]
            self.blocks = Sequential(*self.blocks)

    def forward(self, x, residual=None, input_idx=-1):
        if isinstance(x, torch.Tensor) and self.is_double:
            x = x.to(dtype=torch.float64)
        x = self.forward_between(input_idx+1, None, x, residual)
        return x

    def verify(self, inputs, targets, eps, domain, threshold_min=0, input_min=0, input_max=1, return_abs=False):
        n_class = self.blocks[-1].out_features
        device = inputs.device

        if self.transform is not None and self.transform.consolidate_errors:
            abs_input = HybridZonotope.construct_from_noise(inputs, eps, "box",
                                                            data_range=(input_min, input_max))
            abs_input.domain = domain
        else:
            abs_input = HybridZonotope.construct_from_noise(inputs, eps, domain,
                                                            data_range=(input_min, input_max))

        if domain in ["box","hbox"] and n_class > 1:
            C = torch.stack([self.get_c_mat(n_class, x, device) for x in targets], dim=0)
            I = (~(targets.unsqueeze(1) == torch.arange(n_class, dtype=torch.float32, device=device).unsqueeze(0)))
            abs_outputs = self.forward_between(0, len(self.blocks) - 1, abs_input)
            abs_outputs = abs_outputs.linear(self.blocks[-1].linear.weight, self.blocks[-1].linear.bias, C)
            threshold_n = abs_outputs.concretize()[0][I].view(targets.size(0), n_class - 1).min(dim=1)[0]
            ver_corr = threshold_n > threshold_min
            ver = ver_corr
        else:
            abs_outputs = self(abs_input)
            ver, ver_corr, threshold_n = abs_outputs.verify(targets, threshold_min=threshold_min,
                                                            corr_only=abs_outputs.size(-1) > 10)

        if return_abs:
            return ver, ver_corr, threshold_n, abs_outputs
        else:
            return ver, ver_corr, threshold_n

    @staticmethod
    def get_c_mat(n_class, target, device):
        c = torch.eye(n_class, dtype=torch.float32, device=device)[target].unsqueeze(dim=0) \
            - torch.eye(n_class, dtype=torch.float32, device=device)
        return c

    def freeze(self, layer_idx):
        for i in range(layer_idx+1):
            self.blocks[i].requires_grad_(False)
            if isinstance(self.blocks[i],BatchNorm1d) or isinstance(self.blocks[i],BatchNorm2d):
                self.blocks[i].training = False

    def reset_bounds(self):
        for block in self.blocks:
            block.reset_bounds()

    def to_double(self):
        self.is_double = True
        for param_name, param_value in self.named_parameters():
            param_value.data = param_value.data.to(dtype=torch.float64)

    def forward_between(self, i_from, i_to, x, residual=None):
        """ Forward from (inclusive) to (exclusive)"""
        if i_to is None:
            i_to = len(self.blocks)
        if i_from is None:
            i_from = 0
        x = self.blocks.forward_between(i_from, i_to, x, residual=residual)
        return x

    def forward_until(self, i, x):
        """ Forward until layer i (inclusive) """
        x = self.forward_between(None, i+1, x, residual=None)
        return x

    def forward_from(self, i, x):
        """ Forward from layer i (exclusive) """
        x = self.forward_between(i+1, None, x, residual=None)
        return x

    def temp_freeze(self):
        param_state = {}
        for name, param in self.named_parameters():
            param_state[name] = param.requires_grad
            param.requires_grad = False
        return param_state

    def get_freeze_state(self):
        param_state = {}
        for name, param in self.named_parameters():
            param_state[name] = param.requires_grad
        return param_state

    def restore_freeze(self, param_state):
        for name, param in self.named_parameters():
            param.requires_grad = param_state[name]

    def determine_dims(self, x, force=False, blocks=None):
        if len(self.dims)>0 and not force:
            return
        if blocks is None:
            blocks = self.blocks
        for layer in blocks:
            if hasattr(layer, "layers"):
                for sub_layers in layer.layers:
                    sub_layers = sub_layers if not hasattr(sub_layers, "residual") else sub_layers.residual
                    x = self.determine_dims(x, force=True, blocks=sub_layers)
            else:
                x = layer(x)
            self.dims += [tuple(x.size()[1:])]
        return x

    def get_subNet_blocks(self,startBlock=0, endBlock=None):
        if endBlock is None:
            endBlock=len(self.blocks)
        assert endBlock<=len(self.blocks)
        return self.blocks[startBlock:endBlock]
Beispiel #19
0
from layers import Sequential, Dense
import numpy as np
from utils import load_ionosphere

(X_train, y_train), (X_test, y_test) = load_ionosphere(0.7,
                                                       normalize=True,
                                                       shuffled=True)

model = Sequential()
model.add(Dense(15, input_shape=(X_train.shape[1], )))
model.add(Dense(15))
model.add(Dense(y_train.shape[1]))

model.fit(X_train,
          y_train,
          reg_factor=0.0,
          epochs=1000,
          learning_rate=0.1,
          batch_size=32,
          validation_data=(X_test, y_test),
          verbose=True)

model.plot_error()

# model.plot_train_error()
Beispiel #20
0
import numpy as np
from tensor import Tensor
from layers import Sequential, Linear
from activations import Tanh, Sigmoid
from optimizers import SGD
from losses import MSELoss

np.random.seed(0)

data = Tensor(np.array([[0, 0], [0, 1], [1, 0], [1, 1]]), autograd=True)
target = Tensor(np.array([[0], [1], [0], [1]]), autograd=True)

model = Sequential([Linear(2, 3), Tanh(), Linear(3, 1), Sigmoid()])
criterion = MSELoss()

optim = SGD(parameters=model.get_parameters(), alpha=1)

for i in range(10):
    pred = model.forward(data)
    loss = criterion.forward(pred, target)

    loss.backward()
    optim.step()
    print(loss)