def debug_unsafe_point(prop: AcasProp): fpath = prop.applicable_net_paths()[0] print('Picked nnet file:', fpath) net, bound_mins, bound_maxs = AcasNet.load_nnet(fpath, dom) lb, ub = prop.lbub() net = net.to(device) lb, ub = lb.to(device), ub.to(device) lb = net.normalize_inputs(lb, bound_mins, bound_maxs) ub = net.normalize_inputs(ub, bound_mins, bound_maxs) for i in range(20): # run 20 times sampled_pts = sample_points(lb, ub, sample_size) with torch.no_grad(): pt_outs = net(sampled_pts) pt_outs = dom.Ele.by_intvl(pt_outs, pt_outs) safe_dist = prop.safe_dist(pt_outs) safe_bits = safe_dist == 0. safe_ratio = len( safe_bits.nonzero(as_tuple=False)) / float(sample_size) print('Iter', i, ': Safe ratio for point outputs:', safe_ratio) if safe_ratio != 1.0: spurious_bits = ~safe_bits spurious_dist = safe_dist[spurious_bits] print( '\t', f'spurious dists: [ {spurious_dist.min()} ~ {spurious_dist.max()} ]' ) print('\t', 'spurious dists:', spurious_dist) pass return
def test_abstraction_soundness(): """ Validate that networks and inputs are abstracted correctly using implemented abstract domains. However, it turns out that the usage of MaxPool1d makes it rather easy to trigger unsound violations. see more details in tests/test_maxpool_soundness.py. """ all_fpaths = list(Path(c.COLLISION_DIR).glob('*.rlv')) dom = DeeppolyDom() net = c.CollisionMPNet.load(all_fpaths[0], dom, device) # all nets are the same, just use one unstable_cnts = 0 print('Evaluating abstraction correctness for the saved network:') for fpath in all_fpaths: prop = c.CollisionProp.load(fpath, dom) lb, ub = prop.lbub(device) pts = sample_points(lb, ub, 100) out_conc = net(pts) e = dom.Ele.by_intvl(lb, ub) out_lb, out_ub = net(e).gamma() threshold = 1e-5 # allow some numerical error diff_lb = out_lb - out_conc diff_ub = out_conc - out_ub # print(diff_lb.max()) # print(diff_ub.max()) if diff_lb.max() >= threshold or diff_ub.max() >= threshold: unstable_cnts += 1 print( f'Network {fpath.absolute().name} found unsound cases (due to numerical instability because of MaxPool?)' ) # print(diff_lb.max(), diff_ub.max()) print(f'-- Eventually, {unstable_cnts} networks seems to be unstable.') return
def sample_original_data(dom: AbsDom, trainsize: int = 10000, testsize: int = 5000, dir: str = ACAS_DIR): """ Sample the data from every trained network. Serve as training and test set. :param dom: the data preparation do not use abstraction domains, although the AcasNet constructor requires it. """ for nid in AcasNetID.all_ids(): fpath = nid.fpath() print('\rSampling for network', nid, 'picked nnet file:', fpath, end='') net, bound_mins, bound_maxs = AcasNet.load_nnet(fpath, dom) net = net.to(device) in_lbs = torch.tensor([bound_mins], device=device) in_ubs = torch.tensor([bound_maxs], device=device) in_lbs = net.normalize_inputs(in_lbs, bound_mins, bound_maxs) in_ubs = net.normalize_inputs(in_ubs, bound_mins, bound_maxs) inputs = sample_points(in_lbs, in_ubs, K=trainsize+testsize) with torch.no_grad(): outputs = net(inputs) labels = (outputs * -1).argmax(dim=-1) # because in ACAS Xu, minimum score is the prediction # # it seems the prediction scores from original ACAS Xu network is very close # softmax = torch.nn.Softmax(dim=1) # loss = torch.nn.CrossEntropyLoss() # print(loss(softmax(outputs * -1), labels)) train_inputs, test_inputs = inputs[:trainsize, ...], inputs[trainsize:, ...] train_labels, test_labels = labels[:trainsize, ...], labels[trainsize:, ...] torch.save((train_inputs, train_labels), Path(dir, f'{str(nid)}-orig-train.pt')) torch.save((test_inputs, test_labels), Path(dir, f'{str(nid)}-orig-test.pt')) print('\rSampled for network', nid, 'picked nnet file:', fpath) return
def _sample_check(self, lb: Tensor, ub: Tensor, extra: Optional[Tensor], forward_fn: nn.Module) -> Optional[Tensor]: """ Sample points from given input abstractions and check their safety. If found cex, return it/them. """ # just sample 1 point per abstraction for now, can have K>1 if extra is also duplicated pts = sample_points(lb, ub, K=1) outs = forward_fn(pts) viol_dist = self.prop.viol_dist_conc(outs, extra) viol_bits = viol_dist <= 0. if viol_bits.any(): cex = pts[viol_bits] return cex return None
def test_sample_points(nrow=10, ncol=10, K=1000): """ Validate that sampled points are within range. """ t1t2 = torch.stack((torch.randn(nrow, ncol), torch.randn(nrow, ncol)), dim=-1) lb, _ = torch.min(t1t2, dim=-1) ub, _ = torch.max(t1t2, dim=-1) outs = sample_points(lb, ub, K) assert len(outs) == nrow * K for i in range(nrow * K): row = i % nrow for j in range(ncol): assert lb[row][j] <= outs[i][j] assert outs[i][j] <= ub[row][j] return
def test_sample_violation(): """ It suffices to generate such violations by sampling. Having both MaxPool1d and FC2 is necessary to reproduce the bug. FC1 must have bias to easily reproduce the bug while FC2 may have no bias. Eps = 1e-4 is maximal magnitude to reproduce the bug because the weights and input bounds are initialized small. """ dom = DeeppolyDom() err_eps = 1e-4 in_neurons = 1 fc1_neurons = 3 kernel_size, stride = 2, 1 out_neurons = 1 lb = torch.tensor([[0.1]]) ub = torch.tensor([[0.12]]) # fixed fc2_neurons = (fc1_neurons - kernel_size) / stride + 1 assert int(fc2_neurons) == fc2_neurons fc2_neurons = int(fc2_neurons) # if using MaxPool1d # fc2_neurons = fc1_neurons # if not using MaxPool1d fc1 = dom.Linear(in_neurons, fc1_neurons, bias=True) relu = dom.ReLU() mp = dom.MaxPool1d(kernel_size=kernel_size, stride=stride) fc2 = dom.Linear(fc2_neurons, out_neurons, bias=False) def forward(x): x = fc1(x) x = relu(x) x = x.unsqueeze(dim=1) x = mp(x) x = x.squeeze(dim=1) x = fc2(x) return x def reset_params(): fc1.reset_parameters() fc2.reset_parameters() return k = 0 while True: k += 1 reset_params() pts = sample_points(lb, ub, 10000) e = dom.Ele.by_intvl(lb, ub) out_conc = forward(pts) out_lb, out_ub = forward(e).gamma() if (out_lb <= out_conc + err_eps).all(): continue print(f'After {k} resets') print('LB <= conc?', (out_lb <= out_conc + err_eps).all()) print('LB <= conc? detail', out_lb <= out_conc + err_eps) bits = out_conc + err_eps <= out_lb bits = bits.any(dim=1) # any dimension violation is sufficient idxs = bits.nonzero().squeeze(dim=1) idx = idxs[0] # just pick the 1st one to debug viol_in = pts[[idx]] viol_out = out_conc[[idx]] print('conc in:', viol_in.squeeze().item()) print('out lb:', out_lb.squeeze().item()) print('out ub:', out_ub.squeeze().item()) print('conc out:', viol_out.squeeze().item()) torch.save([fc1, fc2, viol_in], 'error_ctx.pt') break return
def sample_balanced_data(dom: AbsDom, trainsize: int = 10000, testsize: int = 5000, dir: str = ACAS_DIR): """ Sample the data from every trained network. Serve as training and test set. Note that the default original dataset is very imbalanced, we instead sample a balanced dataset where every category has exactly the same amount of data. Note that this applies to N_{1,1} ~ N_{1,6} only. Other networks all lack of data for certain categories. Some categories are having data < 0.1% of all sampled points. """ assert trainsize % len(AcasOut) == 0 and testsize % len(AcasOut) == 0 for nid in AcasNetID.balanced_ids(): fpath = nid.fpath() print('Sampling for network', nid, 'picked nnet file:', fpath) net, bound_mins, bound_maxs = AcasNet.load_nnet(fpath, dom) net = net.to(device) in_lbs = torch.tensor([bound_mins], device=device) in_ubs = torch.tensor([bound_maxs], device=device) in_lbs = net.normalize_inputs(in_lbs, bound_mins, bound_maxs) in_ubs = net.normalize_inputs(in_ubs, bound_mins, bound_maxs) res_inputs = [torch.tensor([]) for _ in range(len(AcasOut))] res_labels = [torch.tensor([]).long() for _ in range(len(AcasOut))] trainsize_cat = int(trainsize / len(AcasOut)) testsize_cat = int(testsize / len(AcasOut)) allsize_cat = trainsize_cat + testsize_cat while True: inputs = sample_points(in_lbs, in_ubs, K=trainsize+testsize) with torch.no_grad(): outputs = net(inputs) labels = (outputs * -1).argmax(dim=-1) # because in ACAS Xu, minimum score is the prediction all_filled = True for category in AcasOut: if len(res_inputs[category]) >= allsize_cat: continue all_filled = False idxs = labels == category cat_inputs, cat_labels = inputs[idxs], labels[idxs] res_inputs[category] = torch.cat((res_inputs[category], cat_inputs), dim=0) res_labels[category] = torch.cat((res_labels[category], cat_labels), dim=0) if all_filled: break pass empty = torch.tensor([]) train_inputs, train_labels = empty, empty.long() test_inputs, test_labels = empty, empty.long() for category in AcasOut: cat_inputs, cat_labels = res_inputs[category], res_labels[category] train_inputs = torch.cat((train_inputs, cat_inputs[:trainsize_cat, ...]), dim=0) train_labels = torch.cat((train_labels, cat_labels[:trainsize_cat, ...]), dim=0) test_inputs = torch.cat((test_inputs, cat_inputs[trainsize_cat:trainsize_cat+testsize_cat, ...]), dim=0) test_labels = torch.cat((test_labels, cat_labels[trainsize_cat:trainsize_cat+testsize_cat, ...]), dim=0) pass # # it seems the prediction scores from original ACAS Xu network is very close # softmax = torch.nn.Softmax(dim=1) # loss = torch.nn.CrossEntropyLoss() # print(loss(softmax(outputs * -1), labels)) with open(Path(dir, f'{str(nid)}-normed-train.pt'), 'wb') as f: torch.save((train_inputs, train_labels), f) with open(Path(dir, f'{str(nid)}-normed-test.pt'), 'wb') as f: torch.save((test_inputs, test_labels), f) return
def sample_balanced_data_for(dom: AbsDom, nid: AcasNetID, ignore_idxs: List[int], trainsize: int = 10000, testsize: int = 5000, dir: str = ACAS_DIR): """ Some networks' original data is soooooo imbalanced.. Some categories are ignored. """ assert len(ignore_idxs) != 0, 'Go to the other function.' assert all([0 <= i < len(AcasOut) for i in ignore_idxs]) print('Sampling for', nid, 'ignoring output category', ignore_idxs) ncats = len(AcasOut) - len(ignore_idxs) train_percat = int(trainsize / ncats) test_percat = int(testsize / ncats) def trainsize_of(i: AcasOut): return 0 if i in ignore_idxs else train_percat def testsize_of(i: AcasOut): return 0 if i in ignore_idxs else test_percat fpath = nid.fpath() print('Sampling for network', nid, 'picked nnet file:', fpath) net, bound_mins, bound_maxs = AcasNet.load_nnet(fpath, dom) net = net.to(device) in_lbs = torch.tensor([bound_mins], device=device) in_ubs = torch.tensor([bound_maxs], device=device) in_lbs = net.normalize_inputs(in_lbs, bound_mins, bound_maxs) in_ubs = net.normalize_inputs(in_ubs, bound_mins, bound_maxs) res_inputs = [torch.tensor([]) for _ in range(len(AcasOut))] res_labels = [torch.tensor([]).long() for _ in range(len(AcasOut))] while True: inputs = sample_points(in_lbs, in_ubs, K=trainsize + testsize) with torch.no_grad(): outputs = net(inputs) labels = (outputs * -1).argmax(dim=-1) # because in ACAS Xu, minimum score is the prediction filled_cnt = 0 for category in AcasOut: if len(res_inputs[category]) >= trainsize_of(category) + testsize_of(category): filled_cnt += 1 if category not in ignore_idxs and len(res_inputs[category]) >= trainsize_of(category) + testsize_of(category): continue idxs = labels == category cat_inputs, cat_labels = inputs[idxs], labels[idxs] res_inputs[category] = torch.cat((res_inputs[category], cat_inputs), dim=0) res_labels[category] = torch.cat((res_labels[category], cat_labels), dim=0) pass if filled_cnt == len(AcasOut): break pass empty = torch.tensor([]) train_inputs, train_labels = empty, empty.long() test_inputs, test_labels = empty, empty.long() for category in AcasOut: cat_inputs, cat_labels = res_inputs[category], res_labels[category] if category in ignore_idxs: amount = len(cat_inputs) pivot = int(amount * trainsize / (trainsize + testsize)) train_inputs = torch.cat((train_inputs, cat_inputs[:pivot, ...]), dim=0) train_labels = torch.cat((train_labels, cat_labels[:pivot, ...]), dim=0) test_inputs = torch.cat((test_inputs, cat_inputs[pivot:, ...]), dim=0) test_labels = torch.cat((test_labels, cat_labels[pivot:, ...]), dim=0) else: trainsize_cat = trainsize_of(category) testsize_cat = testsize_of(category) train_inputs = torch.cat((train_inputs, cat_inputs[:trainsize_cat, ...]), dim=0) train_labels = torch.cat((train_labels, cat_labels[:trainsize_cat, ...]), dim=0) test_inputs = torch.cat((test_inputs, cat_inputs[trainsize_cat:trainsize_cat + testsize_cat, ...]), dim=0) test_labels = torch.cat((test_labels, cat_labels[trainsize_cat:trainsize_cat + testsize_cat, ...]), dim=0) pass # # it seems the prediction scores from original ACAS Xu network is very close # softmax = torch.nn.Softmax(dim=1) # loss = torch.nn.CrossEntropyLoss() # print(loss(softmax(outputs * -1), labels)) with open(Path(dir, f'{str(nid)}-normed-train.pt'), 'wb') as f: torch.save((train_inputs, train_labels), f) with open(Path(dir, f'{str(nid)}-normed-test.pt'), 'wb') as f: torch.save((test_inputs, test_labels), f) return