def inspect_data_for(dom: AbsDom, nid: AcasNetID, dir: str = ACAS_DIR, normed: bool = True): """ Inspect the sampled data from every trained network. To serve as training and test set. """ fpath = nid.fpath() print('Loading sampled data for network', nid, 'picked nnet file:', fpath) props = AndProp(nid.applicable_props(dom)) print('Shall satisfy', props.name) net, bound_mins, bound_maxs = AcasNet.load_nnet(fpath, dom) net = net.to(device) mid = 'normed' if normed else 'orig' train_inputs, train_labels = torch.load(Path(dir, f'{str(nid)}-{mid}-train.pt'), device) test_inputs, test_labels = torch.load(Path(dir, f'{str(nid)}-{mid}-test.pt'), device) assert len(train_inputs) == len(train_labels) assert len(test_inputs) == len(test_labels) print(f'Loaded {len(train_inputs)} training samples, {len(test_inputs)} test samples.') for category in AcasOut: cnt = (train_labels == category).sum().item() + (test_labels == category).sum().item() print(f'Category {category} has {cnt} samples.') print() with torch.no_grad(): # because in ACAS Xu, minimum score is the prediction assert torch.equal(train_labels, (net(train_inputs) * -1).argmax(dim=-1)) assert torch.equal(test_labels, (net(test_inputs) * -1).argmax(dim=-1)) return
def test_vnn_results(): """ Validate the correctness of loaded results by checking some samples. """ info = VNN20Info() def _check(prop: int, net_id: AcasNetID, for_all: bool, label: str, times: List): _label, _times, _ = info.query(prop, net_id, for_all) assert _label == label assert len(_times) == len(times) for lhs, rhs in zip(_times, times): if isinstance(rhs, float): assert float(lhs) == rhs elif isinstance(rhs, int): assert int(lhs) == rhs else: assert lhs == rhs return _check(1, AcasNetID(1, 1), True, 'UNSAT', [0.51, '-', 18.58, '-', 1.32, 0.62]) _check(2, AcasNetID(3, 2), True, 'SAT', [0.23, '-', 18.5, '-', 0.82, 8.02]) _check(4, AcasNetID(1, 7), True, 'SAT', [0.16, 2.03, 164.72, 0.07, 0.2982, 1.44]) _check(4, AcasNetID(3, 2), True, 'UNSAT', [0.24, 25.68, 179.99, 0.24, 65.56, 0.96]) _check(10, AcasNetID(4, 5), True, 'UNSAT', [0.7, '-', '-', '-', 130.63, 2.07]) _check(1, AcasNetID(4, 6), False, 'UNSAT', [5.3, '-', 3191.34, '-', 179.98, 5.38]) _check(2, AcasNetID(3, 3), False, 'UNSAT', [7.46, '-', '-', '-', 294.53, 167]) _check(9, AcasNetID(3, 3), False, 'UNSAT', [2.52, 13326.19, 1121.38, '-', 1795.17, 9.21]) return
def sample_original_data(dom: AbsDom, trainsize: int = 10000, testsize: int = 5000, dir: str = ACAS_DIR): """ Sample the data from every trained network. Serve as training and test set. :param dom: the data preparation do not use abstraction domains, although the AcasNet constructor requires it. """ for nid in AcasNetID.all_ids(): fpath = nid.fpath() print('\rSampling for network', nid, 'picked nnet file:', fpath, end='') net, bound_mins, bound_maxs = AcasNet.load_nnet(fpath, dom) net = net.to(device) in_lbs = torch.tensor([bound_mins], device=device) in_ubs = torch.tensor([bound_maxs], device=device) in_lbs = net.normalize_inputs(in_lbs, bound_mins, bound_maxs) in_ubs = net.normalize_inputs(in_ubs, bound_mins, bound_maxs) inputs = sample_points(in_lbs, in_ubs, K=trainsize+testsize) with torch.no_grad(): outputs = net(inputs) labels = (outputs * -1).argmax(dim=-1) # because in ACAS Xu, minimum score is the prediction # # it seems the prediction scores from original ACAS Xu network is very close # softmax = torch.nn.Softmax(dim=1) # loss = torch.nn.CrossEntropyLoss() # print(loss(softmax(outputs * -1), labels)) train_inputs, test_inputs = inputs[:trainsize, ...], inputs[trainsize:, ...] train_labels, test_labels = labels[:trainsize, ...], labels[trainsize:, ...] torch.save((train_inputs, train_labels), Path(dir, f'{str(nid)}-orig-train.pt')) torch.save((test_inputs, test_labels), Path(dir, f'{str(nid)}-orig-test.pt')) print('\rSampled for network', nid, 'picked nnet file:', fpath) return
def _verify(nid: acas.AcasNetID, all_props: AndProp, args: Namespace): fpath = nid.fpath() net, bound_mins, bound_maxs = acas.AcasNet.load_nnet(fpath, dom, device) # logging.info(net) # no need to print acas network here, all the same v = Cluster(dom, all_props) if args.use_new else Bisecter(dom, all_props) in_lb, in_ub = all_props.lbub(device) in_bitmap = all_props.bitmap(device) in_lb = net.normalize_inputs(in_lb, bound_mins, bound_maxs) in_ub = net.normalize_inputs(in_ub, bound_mins, bound_maxs) res = v.verify(in_lb, in_ub, in_bitmap, net, batch_size=args.batch_size) return res
def test_andprop_conjoin(): """ Validate (manually..) that the AndProp is correct. """ dom = DeeppolyDom() def _go(id): props = id.applicable_props(dom) ap = AndProp(props) print('-- For network', id) for p in props: print('-- Has', p.name) lb, ub = p.lbub() print(' LB:', lb) print(' UB:', ub) lb, ub = ap.lbub() print('-- All conjoined,', ap.name) print(' LB:', lb) print(' UB:', ub) print(' Labels:', ap.labels) print('Cnt:', len(lb)) for i in range(len(lb)): print(' ', i, 'th piece, width:', ub[i] - lb[i], f'area: {total_area(lb[[i]], ub[[i]]) :E}') print() return ''' <1, 1> is tricky, as it has many props; <1, 9> is special, as it is different from many others; Many others have prop1, prop2, prop3, prop4 would generate 3 pieces, in which prop1 and prop2 merged. ''' # _go(AcasNetID(1, 1)) # _go(AcasNetID(1, 9)) # exit(0) for id in AcasNetID.all_ids(): _go(id) print('XL: Go manually check the outputs..') return
def train_acas(nid: acas.AcasNetID, args: Namespace) -> Tuple[int, float, bool, float]: """ The almost completed skeleton of training ACAS networks using ART. :return: trained_epochs, train_time, certified, final accuracies """ fpath = nid.fpath() net, bound_mins, bound_maxs = acas.AcasNet.load_nnet(fpath, args.dom, device) if args.reset_params: net.reset_parameters() logging.info(net) all_props = AndProp(nid.applicable_props(args.dom)) v = Bisecter(args.dom, all_props) def run_abs(batch_abs_lb: Tensor, batch_abs_ub: Tensor, batch_abs_bitmap: Tensor) -> Tensor: """ Return the safety distances over abstract domain. """ batch_abs_ins = args.dom.Ele.by_intvl(batch_abs_lb, batch_abs_ub) batch_abs_outs = net(batch_abs_ins) return all_props.safe_dist(batch_abs_outs, batch_abs_bitmap) in_lb, in_ub = all_props.lbub(device) in_bitmap = all_props.bitmap(device) in_lb = net.normalize_inputs(in_lb, bound_mins, bound_maxs) in_ub = net.normalize_inputs(in_ub, bound_mins, bound_maxs) # already moved to GPU if necessary trainset = AcasPoints.load(nid, train=True, device=device) testset = AcasPoints.load(nid, train=False, device=device) start = timer() if args.no_abs or args.no_refine: curr_abs_lb, curr_abs_ub, curr_abs_bitmap = in_lb, in_ub, in_bitmap else: # refine it at the very beginning to save some steps in later epochs curr_abs_lb, curr_abs_ub, curr_abs_bitmap = v.split(in_lb, in_ub, in_bitmap, net, args.refine_top_k, # tiny_width=args.tiny_width, stop_on_k_all=args.start_abs_cnt) opti = Adam(net.parameters(), lr=args.lr) scheduler = args.scheduler_fn(opti) # could be None accuracies = [] # epoch 0: ratio certified = False epoch = 0 while True: # first, evaluate current model logging.info(f'[{utils.time_since(start)}] After epoch {epoch}:') if not args.no_pts: logging.info(f'Loaded {trainset.real_len()} points for training.') if not args.no_abs: logging.info(f'Loaded {len(curr_abs_lb)} abstractions for training.') with torch.no_grad(): full_dists = run_abs(curr_abs_lb, curr_abs_ub, curr_abs_bitmap) logging.info(f'min loss {full_dists.min()}, max loss {full_dists.max()}.') if full_dists.max() <= 0.: certified = True logging.info(f'All {len(curr_abs_lb)} abstractions certified.') else: _, worst_idx = full_dists.max(dim=0) logging.debug(f'Max loss at LB: {curr_abs_lb[worst_idx]}, UB: {curr_abs_ub[worst_idx]}, rule: {curr_abs_bitmap[worst_idx]}.') accuracies.append(eval_test(net, testset)) logging.info(f'Test set accuracy {accuracies[-1]}.') # check termination if certified and epoch >= args.min_epochs: # all safe and sufficiently trained break if epoch >= args.max_epochs: break epoch += 1 certified = False logging.info(f'\n[{utils.time_since(start)}] Starting epoch {epoch}:') absset = exp.AbsIns(curr_abs_lb, curr_abs_ub, curr_abs_bitmap) # dataset may have expanded, need to update claimed length to date if not args.no_pts: trainset.reset_claimed_len() if not args.no_abs: absset.reset_claimed_len() if (not args.no_pts) and (not args.no_abs): ''' Might simplify this to just using the amount of abstractions, is it unnecessarily complicated? ''' # need to enumerate both max_claimed_len = max(trainset.claimed_len, absset.claimed_len) trainset.claimed_len = max_claimed_len absset.claimed_len = max_claimed_len if not args.no_pts: conc_loader = data.DataLoader(trainset, batch_size=args.batch_size, shuffle=True) nbatches = len(conc_loader) conc_loader = iter(conc_loader) if not args.no_abs: abs_loader = data.DataLoader(absset, batch_size=args.batch_size, shuffle=True) nbatches = len(abs_loader) # doesn't matter rewriting len(conc_loader), they are the same abs_loader = iter(abs_loader) total_loss = 0. for i in range(nbatches): opti.zero_grad() batch_loss = 0. if not args.no_pts: batch_inputs, batch_labels = next(conc_loader) batch_outputs = net(batch_inputs) batch_loss += args.accuracy_loss(batch_outputs, batch_labels) if not args.no_abs: batch_abs_lb, batch_abs_ub, batch_abs_bitmap = next(abs_loader) batch_dists = run_abs(batch_abs_lb, batch_abs_ub, batch_abs_bitmap) safe_loss = batch_dists.mean() # L1, need to upgrade to batch_worsts to unlock loss other than L1 total_loss += safe_loss.item() batch_loss += safe_loss logging.debug(f'Epoch {epoch}: {i / nbatches * 100 :.2f}%. Batch loss {batch_loss.item()}') batch_loss.backward() opti.step() # inspect the trained weights after another epoch # meta.inspect_params(net.state_dict()) total_loss /= nbatches if scheduler is not None: scheduler.step(total_loss) logging.info(f'[{utils.time_since(start)}] At epoch {epoch}: avg accuracy training loss {total_loss}.') # Refine abstractions, note that restart from scratch may output much fewer abstractions thus imprecise. if (not args.no_refine) and len(curr_abs_lb) < args.max_abs_cnt: curr_abs_lb, curr_abs_ub, curr_abs_bitmap = v.split(curr_abs_lb, curr_abs_ub, curr_abs_bitmap, net, args.refine_top_k, # tiny_width=args.tiny_width, stop_on_k_new=args.refine_top_k) pass # summarize train_time = timer() - start logging.info(f'Accuracy at every epoch: {accuracies}') logging.info(f'After {epoch} epochs / {utils.pp_time(train_time)}, ' + f'eventually the trained network got certified? {certified}, ' + f'with {accuracies[-1]:.4f} accuracy on test set.') return epoch, train_time, certified, accuracies[-1]
def sample_balanced_data(dom: AbsDom, trainsize: int = 10000, testsize: int = 5000, dir: str = ACAS_DIR): """ Sample the data from every trained network. Serve as training and test set. Note that the default original dataset is very imbalanced, we instead sample a balanced dataset where every category has exactly the same amount of data. Note that this applies to N_{1,1} ~ N_{1,6} only. Other networks all lack of data for certain categories. Some categories are having data < 0.1% of all sampled points. """ assert trainsize % len(AcasOut) == 0 and testsize % len(AcasOut) == 0 for nid in AcasNetID.balanced_ids(): fpath = nid.fpath() print('Sampling for network', nid, 'picked nnet file:', fpath) net, bound_mins, bound_maxs = AcasNet.load_nnet(fpath, dom) net = net.to(device) in_lbs = torch.tensor([bound_mins], device=device) in_ubs = torch.tensor([bound_maxs], device=device) in_lbs = net.normalize_inputs(in_lbs, bound_mins, bound_maxs) in_ubs = net.normalize_inputs(in_ubs, bound_mins, bound_maxs) res_inputs = [torch.tensor([]) for _ in range(len(AcasOut))] res_labels = [torch.tensor([]).long() for _ in range(len(AcasOut))] trainsize_cat = int(trainsize / len(AcasOut)) testsize_cat = int(testsize / len(AcasOut)) allsize_cat = trainsize_cat + testsize_cat while True: inputs = sample_points(in_lbs, in_ubs, K=trainsize+testsize) with torch.no_grad(): outputs = net(inputs) labels = (outputs * -1).argmax(dim=-1) # because in ACAS Xu, minimum score is the prediction all_filled = True for category in AcasOut: if len(res_inputs[category]) >= allsize_cat: continue all_filled = False idxs = labels == category cat_inputs, cat_labels = inputs[idxs], labels[idxs] res_inputs[category] = torch.cat((res_inputs[category], cat_inputs), dim=0) res_labels[category] = torch.cat((res_labels[category], cat_labels), dim=0) if all_filled: break pass empty = torch.tensor([]) train_inputs, train_labels = empty, empty.long() test_inputs, test_labels = empty, empty.long() for category in AcasOut: cat_inputs, cat_labels = res_inputs[category], res_labels[category] train_inputs = torch.cat((train_inputs, cat_inputs[:trainsize_cat, ...]), dim=0) train_labels = torch.cat((train_labels, cat_labels[:trainsize_cat, ...]), dim=0) test_inputs = torch.cat((test_inputs, cat_inputs[trainsize_cat:trainsize_cat+testsize_cat, ...]), dim=0) test_labels = torch.cat((test_labels, cat_labels[trainsize_cat:trainsize_cat+testsize_cat, ...]), dim=0) pass # # it seems the prediction scores from original ACAS Xu network is very close # softmax = torch.nn.Softmax(dim=1) # loss = torch.nn.CrossEntropyLoss() # print(loss(softmax(outputs * -1), labels)) with open(Path(dir, f'{str(nid)}-normed-train.pt'), 'wb') as f: torch.save((train_inputs, train_labels), f) with open(Path(dir, f'{str(nid)}-normed-test.pt'), 'wb') as f: torch.save((test_inputs, test_labels), f) return
def sample_balanced_data_for(dom: AbsDom, nid: AcasNetID, ignore_idxs: List[int], trainsize: int = 10000, testsize: int = 5000, dir: str = ACAS_DIR): """ Some networks' original data is soooooo imbalanced.. Some categories are ignored. """ assert len(ignore_idxs) != 0, 'Go to the other function.' assert all([0 <= i < len(AcasOut) for i in ignore_idxs]) print('Sampling for', nid, 'ignoring output category', ignore_idxs) ncats = len(AcasOut) - len(ignore_idxs) train_percat = int(trainsize / ncats) test_percat = int(testsize / ncats) def trainsize_of(i: AcasOut): return 0 if i in ignore_idxs else train_percat def testsize_of(i: AcasOut): return 0 if i in ignore_idxs else test_percat fpath = nid.fpath() print('Sampling for network', nid, 'picked nnet file:', fpath) net, bound_mins, bound_maxs = AcasNet.load_nnet(fpath, dom) net = net.to(device) in_lbs = torch.tensor([bound_mins], device=device) in_ubs = torch.tensor([bound_maxs], device=device) in_lbs = net.normalize_inputs(in_lbs, bound_mins, bound_maxs) in_ubs = net.normalize_inputs(in_ubs, bound_mins, bound_maxs) res_inputs = [torch.tensor([]) for _ in range(len(AcasOut))] res_labels = [torch.tensor([]).long() for _ in range(len(AcasOut))] while True: inputs = sample_points(in_lbs, in_ubs, K=trainsize + testsize) with torch.no_grad(): outputs = net(inputs) labels = (outputs * -1).argmax(dim=-1) # because in ACAS Xu, minimum score is the prediction filled_cnt = 0 for category in AcasOut: if len(res_inputs[category]) >= trainsize_of(category) + testsize_of(category): filled_cnt += 1 if category not in ignore_idxs and len(res_inputs[category]) >= trainsize_of(category) + testsize_of(category): continue idxs = labels == category cat_inputs, cat_labels = inputs[idxs], labels[idxs] res_inputs[category] = torch.cat((res_inputs[category], cat_inputs), dim=0) res_labels[category] = torch.cat((res_labels[category], cat_labels), dim=0) pass if filled_cnt == len(AcasOut): break pass empty = torch.tensor([]) train_inputs, train_labels = empty, empty.long() test_inputs, test_labels = empty, empty.long() for category in AcasOut: cat_inputs, cat_labels = res_inputs[category], res_labels[category] if category in ignore_idxs: amount = len(cat_inputs) pivot = int(amount * trainsize / (trainsize + testsize)) train_inputs = torch.cat((train_inputs, cat_inputs[:pivot, ...]), dim=0) train_labels = torch.cat((train_labels, cat_labels[:pivot, ...]), dim=0) test_inputs = torch.cat((test_inputs, cat_inputs[pivot:, ...]), dim=0) test_labels = torch.cat((test_labels, cat_labels[pivot:, ...]), dim=0) else: trainsize_cat = trainsize_of(category) testsize_cat = testsize_of(category) train_inputs = torch.cat((train_inputs, cat_inputs[:trainsize_cat, ...]), dim=0) train_labels = torch.cat((train_labels, cat_labels[:trainsize_cat, ...]), dim=0) test_inputs = torch.cat((test_inputs, cat_inputs[trainsize_cat:trainsize_cat + testsize_cat, ...]), dim=0) test_labels = torch.cat((test_labels, cat_labels[trainsize_cat:trainsize_cat + testsize_cat, ...]), dim=0) pass # # it seems the prediction scores from original ACAS Xu network is very close # softmax = torch.nn.Softmax(dim=1) # loss = torch.nn.CrossEntropyLoss() # print(loss(softmax(outputs * -1), labels)) with open(Path(dir, f'{str(nid)}-normed-train.pt'), 'wb') as f: torch.save((train_inputs, train_labels), f) with open(Path(dir, f'{str(nid)}-normed-test.pt'), 'wb') as f: torch.save((test_inputs, test_labels), f) return
print(f'Category {category} has {cnt} samples.') print() with torch.no_grad(): # because in ACAS Xu, minimum score is the prediction assert torch.equal(train_labels, (net(train_inputs) * -1).argmax(dim=-1)) assert torch.equal(test_labels, (net(test_inputs) * -1).argmax(dim=-1)) return if __name__ == '__main__': device = torch.device('cuda') if torch.cuda.is_available() else torch.device('cpu') dom = DeeppolyDom() # sample_original_data(dom) # sample_balanced_data(dom) # # Ignore idxs are inspected from load_sampled_data(). Network <1, 1> and <1, 2> are already done. # sample_balanced_data_for(dom, AcasNetID(1, 7), [3, 4]) # sample_balanced_data_for(dom, AcasNetID(1, 9), [1, 2, 3, 4]) # sample_balanced_data_for(dom, AcasNetID(2, 1), [2]) # sample_balanced_data_for(dom, AcasNetID(2, 9), [2, 3, 4]) # sample_balanced_data_for(dom, AcasNetID(3, 3), [1]) # sample_balanced_data_for(dom, AcasNetID(4, 5), [2, 4]) print(len(AcasNetID.all_ids())) for nid in AcasNetID.all_ids(): inspect_data_for(dom, nid, normed=False) print('All prepared ACAS dataset loaded and validated.') pass