def test(): output_file = data_path / out_fname output = open(output_file, 'w') output.write("image_id,labels\n") data = get_test_data() for sample_index in tqdm(range(len(data))): image_id, size, labels, path = data[sample_index] image = get_image(path) image = threshold(image) chars, boxes, char_scores, char_images = get_predictions(image) prep_string = "" for char, box in zip(chars, boxes): y,x,_,_ = box prep_string += "{c} {x} {y} ".format(c=char, x=x, y=y) prep_string = image_id + "," + prep_string + "\n" output.write(prep_string)
def eval(): data = get_val_data() tp, fp, fn = 0, 0, 0 total_chars_predicted = 0 total_chars_truth = 0 for sample_index in tqdm(range(len(data))): image_id, size, labels, path = data[sample_index] image = get_image(path) image = threshold(image) chars, boxes, char_scores, char_images = get_predictions(image) total_chars_predicted += len(chars) total_chars_truth += len(labels) _tp, _fp, _fn = eval_prediction(chars, boxes, char_scores, labels) tp += _tp fp += _fp fn += _fn f1, precision, recall = compute_f1(tp, fp, fn) print (num_boxes, box_threshold, char_image_size, total_chars_predicted, total_chars_truth) print ("tp:{tp} fp:{fp} fn:{fn} precision:{p} recall:{r} f1:{f1}".format(tp=tp, fp=fp, fn=fn, p=precision, r=recall, f1=f1))
def get_image(self, path): img = cv2.imread(str(path)) img = threshold(img) img = cv2.resize(img, (self.img_size, self.img_size)) img = img / 255. return img
lower_avg_bad_train_losses[epsilon_idx] = avg_bad_train_loss lower_test_losses[epsilon_idx] = test_loss lower_overall_train_acc[epsilon_idx] = overall_train_acc lower_good_train_acc[epsilon_idx] = good_train_acc lower_bad_train_acc[epsilon_idx] = bad_train_acc lower_test_acc[epsilon_idx] = test_acc lower_params_norm_sq[epsilon_idx] = params_norm_sq lower_weight_decays[epsilon_idx] = lower_weight_decay # Save attack points # I think we just need this for mnist and imdb for fig 1? # Presumably we don't save enron because we don't plot it if not ignore_slab: if dataset_name in ['imdb']: X_poison_sparse = sparse.csr_matrix( data.rround(data.threshold(X_modified[idx_poison, :]))) X_modified = sparse.vstack((X_train, X_poison_sparse)) save_path = datasets.get_int_attack_npz_path( dataset_name, epsilon, norm_sq_constraint, percentile) else: save_path = datasets.get_attack_npz_path(dataset_name, epsilon, norm_sq_constraint, percentile) if dataset_name in ['dogfish', 'mnist_17', 'imdb']: np.savez(save_path, X_modified=X_modified, Y_modified=Y_modified, X_test=X_test, Y_test=Y_test, idx_train=idx_train,
elif process_slab: attack_save_path = datasets.get_slab_attack_npz_path(dataset_name, epsilon, norm_sq_constraint) elif process_grad: attack_save_path = datasets.get_grad_attack_npz_path(dataset_name, epsilon, norm_sq_constraint) elif process_labelflip: attack_save_path = datasets.get_labelflip_attack_npz_path(dataset_name, epsilon, norm_sq_constraint) elif process_int: attack_save_path = datasets.get_int_attack_npz_path(dataset_name, epsilon, norm_sq_constraint) # We generate the imdb data without integrity constraints # and then do the randomized rounding after # so we need a separate call to this script with the --int flag # to fully process its results. # To save space, we don't save it to disk if it's processing slab/grad/etc. elif (dataset_name in ['imdb']) and (no_process): X_poison_sparse = sparse.csr_matrix(data.rround(data.threshold(X_modified[idx_poison, :]))) X_modified = sparse.vstack((X_train, X_poison_sparse)) attack_save_path = datasets.get_int_attack_npz_path(dataset_name, epsilon, norm_sq_constraint, percentile) if attack_save_path is not None: np.savez( attack_save_path, X_modified=X_modified, Y_modified=Y_modified, X_test=X_test, Y_test=Y_test, idx_train=idx_train, idx_poison=idx_poison ) if no_process:
def augment(image): return [threshold(image)]
import IPython import data_utils as data import datasets import defenses import defense_testers import upper_bounds from upper_bounds import hinge_loss, hinge_grad ### This just thresholds and rounds IMDB ### Not guaranteed to actually be feasible dataset_name = 'imdb' weight_decay = datasets.DATASET_WEIGHT_DECAYS[dataset_name] weight_decay = 0.17 ### HACK, need to rerun on proper weight_decay epsilons = datasets.DATASET_EPSILONS[dataset_name] norm_sq_constraint = datasets.DATASET_NORM_SQ_CONSTRAINTS[dataset_name] for epsilon in epsilons: if epsilon == 0: continue attack_npz_path = datasets.get_attack_npz_path(dataset_name, weight_decay, epsilon, norm_sq_constraint) X_modified, Y_modified, X_test, Y_test, idx_train, idx_poison = datasets.load_attack_npz(dataset_name, attack_npz_path) X_modified = sparse.csr_matrix(data.rround(data.threshold(X_modified))) IPython.embed() break