class simple_cnn_model(object): def __init__(self, epochs, batch_size, lr): self.epochs = epochs self.batch_size = batch_size self.lr = lr def load_data(self): # load data from cifar100 folder (x_train, y_train), (x_test, y_test) = cifar100(1211506319) return x_train, y_train, x_test, y_test def train_model(self, layers, loss_metrics, x_train, y_train): # build model self.model = Sequential(layers, loss_metrics) # train the model loss = self.model.fit(x_train, y_train, self.epochs, self.lr, self.batch_size, print_output=True) avg_loss = np.mean(np.reshape(loss, (self.epochs, -1)), axis=1) return avg_loss def test_model(self, x_test, y_test): # make a prediction pred_result = self.model.predict(x_test) accuracy = np.mean(pred_result == y_test) return accuracy
def __init__(self, blocks=None, net_dim=None): super(SeqNet, self).__init__() self.is_double = False self.dims = [] self.net_dim = net_dim self.transform = None if net_dim is None else Upsample(size=self.net_dim, mode="bilinear", consolidate_errors=True) self.blocks = [] if self.transform is None else [self.transform] if blocks is not None: self.blocks += [*blocks] self.blocks = Sequential(*self.blocks)
def train_model(self, layers, loss_metrics, x_train, y_train): # build model self.model = Sequential(layers, loss_metrics) # train the model loss = self.model.fit(x_train, y_train, self.epochs, self.lr, self.batch_size, print_output=True) avg_loss = np.mean(np.reshape(loss, (self.epochs, -1)), axis=1) return avg_loss
def _init_layers(self): # FIXME multi-layer lstm actually not work if self.n_layers == 1: self.lstm_encoder = LstmLayer(self.input_size, self.hidden_size) self.lstm_decoder = LstmLayer(self.hidden_size, self.input_size) else: lstm_encoders = OrderedDict() lstm_encoders['lstm_0'] = LstmLayer(self.input_size, self.hidden_size) for i in range(1, self.n_layers): lstm_encoders['lstm_' + str(i)] = LstmLayer( self.hidden_size, self.hidden_size) self.lstm_encoder = Sequential(lstm_encoders) lstm_decoders = OrderedDict() lstm_decoders['lstm_0'] = LstmLayer(self.hidden_size, self.hidden_size) for i in range(1, self.n_layers): lstm_decoders['lstm_' + str(i)] = LstmLayer( self.hidden_size, self.hidden_size) self.lstm_decoder = Sequential(lstm_decoders)
def get_block(self, in_planes, out_planes, n_blocks, stride, dim): strides = [stride] + [1]*(n_blocks-1) layers = [] if in_planes != out_planes: if self.res_block == FixupBasicBlock: downsample = Conv2d(in_planes, out_planes, kernel_size=1, stride=stride, bias=False) # downsample = AvgPool2d(1, stride=stride) elif self.res_block == WideBlock: downsample = Conv2d(in_planes, out_planes, kernel_size=1, stride=stride, bias=True) else: downsample = [Conv2d(in_planes, out_planes, kernel_size=1, stride=stride, bias=False)] # downsample = [AvgPool2d(1, stride=stride)] downsample += [BatchNorm2d(out_planes)] downsample = Sequential(*downsample) for stride in strides: layers += [self.res_block(dim, in_planes, out_planes, stride, downsample)] downsample = None in_planes = out_planes dim = dim // stride return dim, Sequential(*layers)
def converter(self, net): if isinstance(net, nn.Sequential): seq_model = net else: seq_model = net.module for idx, l in enumerate(seq_model): if isinstance(l, nn.Linear): self.layers[idx + 1].linear.weight.data.copy_(l.weight.data) self.layers[idx + 1].linear.bias.data.copy_(l.bias.data) if isinstance(l, nn.Conv2d): self.layers[idx + 1].conv.weight.data.copy_(l.weight.data) self.layers[idx + 1].conv.bias.data.copy_(l.bias.data) self.blocks = Sequential(*self.layers)
def __init__(self, device, dataset, n_class=10, input_size=32, input_channel=3, width1=1, width2=1, width3=1, linear_size=100): super(ConvMedBig, self).__init__() mean, sigma = get_mean_sigma(device, dataset) self.normalizer = Normalization(mean, sigma) layers = [ Normalization(mean, sigma), Conv2d(input_channel, 16 * width1, 3, stride=1, padding=1, dim=input_size), ReLU((16 * width1, input_size, input_size)), Conv2d(16 * width1, 16 * width2, 4, stride=2, padding=1, dim=input_size // 2), ReLU((16 * width2, input_size // 2, input_size // 2)), Conv2d(16 * width2, 32 * width3, 4, stride=2, padding=1, dim=input_size // 2), ReLU((32 * width3, input_size // 4, input_size // 4)), Flatten(), Linear(32 * width3 * (input_size // 4) * (input_size // 4), linear_size), ReLU(linear_size), Linear(linear_size, n_class), ] self.blocks = Sequential(*layers)
def __init__(self, device, dataset, adv_pre, input_size, net, net_dim): super(UpscaleNet, self).__init__() self.net = net self.net_dim = net_dim self.blocks = [] if input_size == net_dim: self.transform = None else: self.transform = Upsample(size=self.net_dim, mode="nearest", align_corners=False,consolidate_errors=False) self.blocks += [self.transform] if adv_pre: self.blocks += [Scale(2, fixed=True), Bias(-1, fixed=True)] self.normalization = Sequential(*self.blocks) else: mean, sigma = get_mean_sigma(device, dataset) self.normalization = Normalization(mean, sigma) self.blocks += [self.normalization] self.blocks += [self.net]
def __init__(self, device, dataset, sizes, n_class=10, input_size=32, input_channel=3): super(FFNN, self).__init__() mean, sigma = get_mean_sigma(device, dataset) self.normalizer = Normalization(mean, sigma) layers = [ Flatten(), Linear(input_size * input_size * input_channel, sizes[0]), ReLU(sizes[0]) ] for i in range(1, len(sizes)): layers += [ Linear(sizes[i - 1], sizes[i]), ReLU(sizes[i]), ] layers += [Linear(sizes[-1], n_class)] self.blocks = Sequential(*layers)
class NumberSortModule(Module): def __init__(self, input_size: int, seq_len: int, hidden_size: int, n_layers: int) -> None: super().__init__() assert isinstance(input_size, int) assert isinstance(seq_len, int) assert isinstance(hidden_size, int) assert isinstance(n_layers, int) assert seq_len >= 1 assert n_layers >= 1 assert hidden_size >= 1 self.input_size = input_size self.seq_len: int = seq_len self.hidden_size: int = hidden_size self.n_layers: int = n_layers self._init_layers() self.register_module_parameters('lstm_encoder', self.lstm_encoder) self.register_module_parameters('lstm_decoder', self.lstm_decoder) # self.register_module_parameters('decoder', self.decoder) def forward(self, x_input: np.ndarray) -> np.ndarray: """forward propagation Arguments: x_input {np.ndarray} -- [description] Returns: np.ndarray -- probabilities of permutation values """ assert x_input.shape[-2] == self.seq_len assert x_input.shape[-1] == self.input_size # align shape to batch-like (batch_size, seq_len, input_size) x_input_aligned = x_input.reshape((-1, self.seq_len, self.input_size)) batch_size, *_ = x_input_aligned.shape predictions = np.empty((batch_size, self.seq_len, self.input_size)) for sample in range(batch_size): self._reload() lstm_encoder_out, _ = self.lstm_encoder.forward( x_input_aligned[sample]) # print("LSTM output", lstm_encoder_out) lstm_encoder_out = np.tile(lstm_encoder_out, (self.seq_len, 1)) _, lstm_decoder_out = self.lstm_decoder.forward(lstm_encoder_out) lstm_decoder_out = lstm_decoder_out.reshape( -1, self.seq_len, self.input_size) # decoder_out = self.decoder.forward(lstm_encoder_out) predictions[sample, :] = lstm_decoder_out return predictions.reshape(-1, self.seq_len, self.input_size) def backward(self, d_output: np.ndarray) -> np.ndarray: # as we use time distributed derivative for decoder d_dummy = np.zeros(d_output.shape[1:])[np.newaxis, ...] d_decoder_hidden, d_decoder_cell, d_decoder = self.lstm_decoder.backward( d_dummy, d_output) *_, d_encoder = self.lstm_encoder.backward(d_decoder[np.newaxis, ...]) return d_encoder def predict(self, x_input: np.ndarray) -> np.ndarray: predictions_proba = self.forward(x_input).reshape( (-1, self.seq_len, self.input_size)) predicted_permutation = np.argmax(predictions_proba, axis=-1) return predicted_permutation def _init_layers(self): # FIXME multi-layer lstm actually not work if self.n_layers == 1: self.lstm_encoder = LstmLayer(self.input_size, self.hidden_size) self.lstm_decoder = LstmLayer(self.hidden_size, self.input_size) else: lstm_encoders = OrderedDict() lstm_encoders['lstm_0'] = LstmLayer(self.input_size, self.hidden_size) for i in range(1, self.n_layers): lstm_encoders['lstm_' + str(i)] = LstmLayer( self.hidden_size, self.hidden_size) self.lstm_encoder = Sequential(lstm_encoders) lstm_decoders = OrderedDict() lstm_decoders['lstm_0'] = LstmLayer(self.hidden_size, self.hidden_size) for i in range(1, self.n_layers): lstm_decoders['lstm_' + str(i)] = LstmLayer( self.hidden_size, self.hidden_size) self.lstm_decoder = Sequential(lstm_decoders) # self.decoder = FullyConnectedLayer(self.hidden_size, self.seq_len * self.seq_len) def _reload(self): if self.n_layers == 1: self.lstm_encoder.reload() else: for lstm in self.lstm_encoder.modules.values(): lstm.reload()
""" Created on Sun Mar 25 19:52:43 2018 @author: kaushik """ import time import numpy as np import matplotlib.pyplot as plt from layers.dataset import cifar100 from layers import (ConvLayer, FullLayer, FlattenLayer, MaxPoolLayer, ReluLayer, SoftMaxLayer, CrossEntropyLayer, Sequential) (x_train, y_train), (x_test, y_test) = cifar100(1337) model = Sequential(layers=(ConvLayer(3, 16, 3), ReluLayer(), MaxPoolLayer(), ConvLayer(16, 32, 3), ReluLayer(), MaxPoolLayer(), FlattenLayer(), FullLayer(8 * 8 * 32, 4), SoftMaxLayer()), loss=CrossEntropyLayer()) start_time = time.clock() lr_vals = [0.1] losses_train = list() losses_test = list() test_acc = np.zeros(len(lr_vals)) for j in range(len(lr_vals)): train_loss, test_loss = model.fit(x_train, y_train, x_test, y_test, epochs=8, lr=lr_vals[j], batch_size=128)
import numpy as np from layers.dataset import cifar100 import matplotlib.pyplot as plt # Please make sure that cifar-100-python is present in the same folder as dataset.py (x_train, y_train), (x_test, y_test) = cifar100(1212356299) from layers import (FullLayer, ReluLayer, SoftMaxLayer, CrossEntropyLayer, Sequential) model = Sequential(layers=(FullLayer(3072, 500), ReluLayer(), FullLayer(500, 4), SoftMaxLayer()), loss=CrossEntropyLayer()) lr_accuracies = np.zeros((3, )) loss1 = model.fit(x_train, y_train, lr=0.01, epochs=15) y_predict = model.predict(x_test) count = 0 for i in range(np.size(y_test)): if y_predict[i] == y_test[i]: count += 1 lr_accuracies[0] = (100.0 * count) / np.shape(y_predict)[0] loss2 = model.fit(x_train, y_train, lr=0.1, epochs=15) y_predict = model.predict(x_test)
from cross_entropy import CrossEntropyLayer from conv_new import ConvLayer from maxpool_new import MaxPoolLayer from flatten_new import FlattenLayer from sequential import Sequential from layers.dataset_new import cifar100 from dataset_new import onehot (x_train, y_train), (x_test, y_test) = cifar100(1213268041) test_accuracy = [] epochs = 15 finalloss = [] testloss = [] model = Sequential(layers=(ConvLayer(3, 16, 3), ReluLayer(), MaxPoolLayer(2), ConvLayer(16, 32, 3), ReluLayer(), MaxPoolLayer(2), FlattenLayer(), FullLayer(8 * 8 * 32, 4), SoftMaxLayer()), loss=CrossEntropyLayer()) train_loss, valid_loss = model.fit(x_train, y_train, x_test, y_test, epochs=15, lr=0.1, batch_size=128) y_pred = model.predict(x_test) accuracy = (np.mean(y_test == onehot(y_pred))) print('Accuracy: %.2f' % accuracy) #np.append(test_accuracy, accuracy) plt.plot(range(len(train_loss)), train_loss, label='Training loss')
def __init__(self, device, dataset, n_class=10, input_size=32, input_channel=3, conv_widths=None, kernel_sizes=None, linear_sizes=None, depth_conv=None, paddings=None, strides=None, dilations=None, pool=False, net_dim=None, bn=False, max=False, scale_width=True): super(myNet, self).__init__(net_dim=None if net_dim == input_size else net_dim) if kernel_sizes is None: kernel_sizes = [3] if conv_widths is None: conv_widths = [2] if linear_sizes is None: linear_sizes = [200] if paddings is None: paddings = [1] if strides is None: strides = [2] if dilations is None: dilations = [1] if net_dim is None: net_dim = input_size if len(conv_widths) != len(kernel_sizes): kernel_sizes = len(conv_widths) * [kernel_sizes[0]] if len(conv_widths) != len(paddings): paddings = len(conv_widths) * [paddings[0]] if len(conv_widths) != len(strides): strides = len(conv_widths) * [strides[0]] if len(conv_widths) != len(dilations): dilations = len(conv_widths) * [dilations[0]] self.n_class=n_class self.input_size=input_size self.input_channel=input_channel self.conv_widths=conv_widths self.kernel_sizes=kernel_sizes self.paddings=paddings self.strides=strides self.dilations = dilations self.linear_sizes=linear_sizes self.depth_conv=depth_conv self.net_dim = net_dim self.bn=bn self.max=max mean, sigma = get_mean_sigma(device, dataset) layers = self.blocks layers += [Normalization(mean, sigma)] N = net_dim n_channels = input_channel self.dims += [(n_channels,N,N)] for width, kernel_size, padding, stride, dilation in zip(conv_widths, kernel_sizes, paddings, strides, dilations): if scale_width: width *= 16 N = int(np.floor((N + 2 * padding - dilation * (kernel_size - 1) - 1) / stride + 1)) layers += [Conv2d(n_channels, int(width), kernel_size, stride=stride, padding=padding, dilation=dilation)] if self.bn: layers += [BatchNorm2d(int(width))] if self.max: layers += [MaxPool2d(int(width))] layers += [ReLU((int(width), N, N))] n_channels = int(width) self.dims += 2*[(n_channels,N,N)] if depth_conv is not None: layers += [Conv2d(n_channels, depth_conv, 1, stride=1, padding=0), ReLU((n_channels, N, N))] n_channels = depth_conv self.dims += 2*[(n_channels,N,N)] if pool: layers += [GlobalAvgPool2d()] self.dims += 2 * [(n_channels, 1, 1)] N=1 layers += [Flatten()] N = n_channels * N ** 2 self.dims += [(N,)] for width in linear_sizes: if width == 0: continue layers += [Linear(int(N), int(width)), ReLU(width)] N = width self.dims+=2*[(N,)] layers += [Linear(N, n_class)] self.dims+=[(n_class,)] self.blocks = Sequential(*layers)
def __init__(self, device, args, dataset, trunk_net, input_size, input_channel, n_class, n_branches, gate_type, branch_net_names, gate_net_names, evalFn, lossFn): super(MyDeepTrunkNet, self).__init__() self.dataset = dataset self.input_size = input_size self.input_channel = input_channel self.n_class = n_class self.gate_type = gate_type self.n_branches = n_branches self.trunk_net = trunk_net self.evalFn = evalFn self.lossFn = lossFn assert gate_type in ["entropy", "net"], f"Unknown gate mode: {gate_type:s}" self.exit_ids = [-1] + list(range(n_branches)) self.threshold = {exit_idx: args.gate_threshold for exit_idx in self.exit_ids[1:]} self.gate_nets = {} self.branch_nets = {} if len(branch_net_names) != n_branches: print("Number of branches does not match branch net names") branch_net_names = n_branches * branch_net_names[0:1] if gate_net_names is None: gate_net_names = branch_net_names elif len(gate_net_names) != n_branches: print("Number of branches does not match gate net names") gate_net_names = n_branches * gate_net_names[0:1] if args.load_branch_model is not None and len(args.load_branch_model) != n_branches: args.load_branch_model = n_branches * args.load_branch_model[0:1] if args.load_gate_model is not None and len(args.load_gate_model) != n_branches: args.load_gate_model = n_branches * args.load_gate_model[0:1] for i, branch_net_name in zip(range(n_branches), branch_net_names): exit_idx = self.exit_ids[i+1] self.branch_nets[exit_idx] = get_net(device, dataset, branch_net_name, input_size, input_channel, n_class, load_model=None if args.load_branch_model is None else args.load_branch_model[i], net_dim=args.cert_net_dim) if gate_type == "net": self.gate_nets[exit_idx] = get_net(device, dataset, gate_net_names[i], input_size, input_channel, 1, load_model=None if args.load_gate_model is None else args.load_gate_model[i], net_dim=args.cert_net_dim) else: self.gate_nets[exit_idx] = SeqNet(Sequential(*[*self.branch_nets[exit_idx].blocks, Entropy(n_class, low_mem=True, neg=True)])) self.gate_nets[exit_idx].determine_dims(torch.randn((2, input_channel, input_size, input_size), dtype=torch.float).to(device)) init_slopes(self.gate_nets[exit_idx], device, trainable=False) self.add_module("gateNet_{}".format(exit_idx), self.gate_nets[exit_idx]) self.add_module("branchNet_{}".format(exit_idx), self.branch_nets[exit_idx]) if args.load_model is not None: old_state = self.state_dict() load_state = torch.load(args.load_model) if args.cert_net_dim is not None and not ("gateNet_0.blocks.layers.1.mean" in load_state.keys()): # Only change keys if loading from a non mixed resolution to mixed resolution new_dict = {} for k in load_state.keys(): if k.startswith("trunk"): new_k = k else: k_match = re.match("(^.*\.layers\.)([0-9]+)(\..*$)", k) new_k = "%s%d%s" % (k_match.group(1), int(k_match.group(2)) + 1, k_match.group(3)) new_dict[new_k] = load_state[k] load_state.update(new_dict) # LiRPA requires parameters to have zero batch dimension. This makes old models compatible for k, v in load_state.items(): if k.endswith("mean") or k.endswith("sigma"): if k in old_state: load_state.update({k: v.reshape(old_state[k].shape)}) old_state.update({k:v.view(old_state[k].shape) for k,v in load_state.items() if k in old_state and ( (k.startswith("trunk") and args.load_trunk_model is None) or (k.startswith("gate") and args.load_gate_model is None) or (k.startswith("branch") and args.load_branch_model is None))}) missing_keys, extra_keys = self.load_state_dict(old_state, strict=False) assert len([x for x in missing_keys if "gateNet" in x or "branchNet" in x]) == 0 print("Whole model loaded from %s" % args.load_model) ## Trunk and branch nets have to be loaded after the whole model if args.load_trunk_model is not None: load_net_state(self.trunk_net, args.load_trunk_model) if (args.load_model is not None or args.load_gate_model is not None) and args.gate_feature_extraction is not None: for i, net in enumerate(self.gate_nets.values()): extraction_layer = [ii for ii in range(len(net.blocks)) if isinstance(net.blocks[ii],Linear)] extraction_layer = extraction_layer[-min(len(extraction_layer),args.gate_feature_extraction)] net.freeze(extraction_layer-1) self.trunk_cnet = None self.gate_cnets = {k: None for k in self.gate_nets.keys()} self.branch_cnets = {k: None for k in self.branch_nets.keys()}
plt.plot(loss) plt.show() ret, _ = model.forward(x_test_1D) correct = 0 for i in range(ret.shape[0]): if np.argmax(y_test[i]) == np.argmax(ret[i]): correct += 1 print(correct/ret.shape[0]) ''' if __name__ == '__main__': # simple linear data: y = w1*x1 + w2*x2 + ... wi*xi + b in_features = 5 num_samples = 1000 X = np.random.randn(num_samples, in_features) W = np.random.randn(in_features, 1) B = np.random.randn(1) Y = X @ W + B + 0.01 * np.random.randn(num_samples, 1) m = Linear(in_features, 1) model = Sequential(m) loss = Learner(model, mse_loss, SGDOptimizer(lr=0.01)).fit(X, Y, epochs=100, bs=100) plt.plot(loss) plt.show()
def __init__(self, device, dataset, n_blocks, n_class=10, input_size=32, input_channel=3, block='basic', in_planes=32, net_dim=None, widen_factor=1, pooling="global"): super(MyResnet, self).__init__(net_dim=None if net_dim == input_size else net_dim) if block == 'basic': self.res_block = BasicBlock elif block == 'preact': self.res_block = PreActBlock elif block == 'wide': self.res_block = WideBlock elif block == 'fixup': self.res_block = FixupBasicBlock else: assert False self.n_layers = sum(n_blocks) mean, sigma = get_mean_sigma(device, dataset) dim = input_size k = widen_factor layers = [Normalization(mean, sigma), Conv2d(input_channel, in_planes, kernel_size=3, stride=1, padding=1, bias=(block == "wide"), dim=dim)] if not block == "wide": layers += [Bias() if block == 'fixup' else BatchNorm2d(in_planes), ReLU((in_planes, input_size, input_size))] strides = [1, 2] + ([2] if len(n_blocks) > 2 else []) + [1] * max(0,(len(n_blocks)-3)) n_filters = in_planes for n_block, n_stride in zip(n_blocks, strides): if n_stride > 1: n_filters *= 2 dim, block_layers = self.get_block(in_planes, n_filters*k, n_block, n_stride, dim=dim) in_planes = n_filters*k layers += [block_layers] if block == 'fixup': layers += [Bias()] else: layers += [BatchNorm2d(n_filters*k)] if block == "wide": layers += [ReLU((n_filters*k, dim, dim))] if pooling == "global": layers += [GlobalAvgPool2d()] N = n_filters * k elif pooling == "None": # old networks miss pooling layer and wont load N = n_filters * dim * dim * k elif isinstance(pooling, int): layers += [AvgPool2d(pooling)] dim = dim//pooling N = n_filters * dim * dim * k layers += [Flatten(), ReLU(N)] if block == 'fixup': layers += [Bias()] layers += [Linear(N, n_class)] self.blocks = Sequential(*layers) # Fixup initialization if block == 'fixup': for m in self.modules(): if isinstance(m, FixupBasicBlock): conv1, conv2 = m.residual[1].conv, m.residual[5].conv nn.init.normal_(conv1.weight, mean=0, std=np.sqrt(2 / (conv1.weight.shape[0] * np.prod(conv1.weight.shape[2:]))) * self.n_layers ** (-0.5)) nn.init.constant_(conv2.weight, 0) elif isinstance(m, nn.Linear): nn.init.constant_(m.weight, 0) nn.init.constant_(m.bias, 0)
class SeqNet(nn.Module): def __init__(self, blocks=None, net_dim=None): super(SeqNet, self).__init__() self.is_double = False self.dims = [] self.net_dim = net_dim self.transform = None if net_dim is None else Upsample(size=self.net_dim, mode="bilinear", consolidate_errors=True) self.blocks = [] if self.transform is None else [self.transform] if blocks is not None: self.blocks += [*blocks] self.blocks = Sequential(*self.blocks) def forward(self, x, residual=None, input_idx=-1): if isinstance(x, torch.Tensor) and self.is_double: x = x.to(dtype=torch.float64) x = self.forward_between(input_idx+1, None, x, residual) return x def verify(self, inputs, targets, eps, domain, threshold_min=0, input_min=0, input_max=1, return_abs=False): n_class = self.blocks[-1].out_features device = inputs.device if self.transform is not None and self.transform.consolidate_errors: abs_input = HybridZonotope.construct_from_noise(inputs, eps, "box", data_range=(input_min, input_max)) abs_input.domain = domain else: abs_input = HybridZonotope.construct_from_noise(inputs, eps, domain, data_range=(input_min, input_max)) if domain in ["box","hbox"] and n_class > 1: C = torch.stack([self.get_c_mat(n_class, x, device) for x in targets], dim=0) I = (~(targets.unsqueeze(1) == torch.arange(n_class, dtype=torch.float32, device=device).unsqueeze(0))) abs_outputs = self.forward_between(0, len(self.blocks) - 1, abs_input) abs_outputs = abs_outputs.linear(self.blocks[-1].linear.weight, self.blocks[-1].linear.bias, C) threshold_n = abs_outputs.concretize()[0][I].view(targets.size(0), n_class - 1).min(dim=1)[0] ver_corr = threshold_n > threshold_min ver = ver_corr else: abs_outputs = self(abs_input) ver, ver_corr, threshold_n = abs_outputs.verify(targets, threshold_min=threshold_min, corr_only=abs_outputs.size(-1) > 10) if return_abs: return ver, ver_corr, threshold_n, abs_outputs else: return ver, ver_corr, threshold_n @staticmethod def get_c_mat(n_class, target, device): c = torch.eye(n_class, dtype=torch.float32, device=device)[target].unsqueeze(dim=0) \ - torch.eye(n_class, dtype=torch.float32, device=device) return c def freeze(self, layer_idx): for i in range(layer_idx+1): self.blocks[i].requires_grad_(False) if isinstance(self.blocks[i],BatchNorm1d) or isinstance(self.blocks[i],BatchNorm2d): self.blocks[i].training = False def reset_bounds(self): for block in self.blocks: block.reset_bounds() def to_double(self): self.is_double = True for param_name, param_value in self.named_parameters(): param_value.data = param_value.data.to(dtype=torch.float64) def forward_between(self, i_from, i_to, x, residual=None): """ Forward from (inclusive) to (exclusive)""" if i_to is None: i_to = len(self.blocks) if i_from is None: i_from = 0 x = self.blocks.forward_between(i_from, i_to, x, residual=residual) return x def forward_until(self, i, x): """ Forward until layer i (inclusive) """ x = self.forward_between(None, i+1, x, residual=None) return x def forward_from(self, i, x): """ Forward from layer i (exclusive) """ x = self.forward_between(i+1, None, x, residual=None) return x def temp_freeze(self): param_state = {} for name, param in self.named_parameters(): param_state[name] = param.requires_grad param.requires_grad = False return param_state def get_freeze_state(self): param_state = {} for name, param in self.named_parameters(): param_state[name] = param.requires_grad return param_state def restore_freeze(self, param_state): for name, param in self.named_parameters(): param.requires_grad = param_state[name] def determine_dims(self, x, force=False, blocks=None): if len(self.dims)>0 and not force: return if blocks is None: blocks = self.blocks for layer in blocks: if hasattr(layer, "layers"): for sub_layers in layer.layers: sub_layers = sub_layers if not hasattr(sub_layers, "residual") else sub_layers.residual x = self.determine_dims(x, force=True, blocks=sub_layers) else: x = layer(x) self.dims += [tuple(x.size()[1:])] return x def get_subNet_blocks(self,startBlock=0, endBlock=None): if endBlock is None: endBlock=len(self.blocks) assert endBlock<=len(self.blocks) return self.blocks[startBlock:endBlock]
from layers import Sequential, Dense import numpy as np from utils import load_ionosphere (X_train, y_train), (X_test, y_test) = load_ionosphere(0.7, normalize=True, shuffled=True) model = Sequential() model.add(Dense(15, input_shape=(X_train.shape[1], ))) model.add(Dense(15)) model.add(Dense(y_train.shape[1])) model.fit(X_train, y_train, reg_factor=0.0, epochs=1000, learning_rate=0.1, batch_size=32, validation_data=(X_test, y_test), verbose=True) model.plot_error() # model.plot_train_error()
import numpy as np from tensor import Tensor from layers import Sequential, Linear from activations import Tanh, Sigmoid from optimizers import SGD from losses import MSELoss np.random.seed(0) data = Tensor(np.array([[0, 0], [0, 1], [1, 0], [1, 1]]), autograd=True) target = Tensor(np.array([[0], [1], [0], [1]]), autograd=True) model = Sequential([Linear(2, 3), Tanh(), Linear(3, 1), Sigmoid()]) criterion = MSELoss() optim = SGD(parameters=model.get_parameters(), alpha=1) for i in range(10): pred = model.forward(data) loss = criterion.forward(pred, target) loss.backward() optim.step() print(loss)