Beispiel #1
0
def test():
    output_file = data_path / out_fname
    output = open(output_file, 'w')
    output.write("image_id,labels\n")

    data = get_test_data()
    for sample_index in tqdm(range(len(data))):
        image_id, size, labels, path = data[sample_index]
        image = get_image(path)
        image = threshold(image)
        chars, boxes, char_scores, char_images = get_predictions(image)
        prep_string = ""
        for char, box in zip(chars, boxes):
            y,x,_,_ = box
            prep_string += "{c} {x} {y} ".format(c=char, x=x, y=y)

        prep_string = image_id + "," + prep_string + "\n"
        output.write(prep_string)
Beispiel #2
0
def eval():
    data = get_val_data()
    tp, fp, fn = 0, 0, 0
    total_chars_predicted = 0
    total_chars_truth = 0
    for sample_index in tqdm(range(len(data))):
        image_id, size, labels, path = data[sample_index]
        image = get_image(path)
        image = threshold(image)
        chars, boxes, char_scores, char_images = get_predictions(image)
        total_chars_predicted += len(chars)
        total_chars_truth += len(labels)
        _tp, _fp, _fn = eval_prediction(chars, boxes, char_scores, labels)
        tp += _tp
        fp += _fp
        fn += _fn

    f1, precision, recall =  compute_f1(tp, fp, fn)
    print (num_boxes, box_threshold, char_image_size, total_chars_predicted, total_chars_truth)
    print ("tp:{tp} fp:{fp} fn:{fn} precision:{p} recall:{r} f1:{f1}".format(tp=tp, fp=fp, fn=fn, p=precision, r=recall, f1=f1))
 def get_image(self, path):
     img = cv2.imread(str(path))
     img = threshold(img)
     img = cv2.resize(img, (self.img_size, self.img_size))
     img = img / 255.
     return img
Beispiel #4
0
    lower_avg_bad_train_losses[epsilon_idx] = avg_bad_train_loss
    lower_test_losses[epsilon_idx] = test_loss
    lower_overall_train_acc[epsilon_idx] = overall_train_acc
    lower_good_train_acc[epsilon_idx] = good_train_acc
    lower_bad_train_acc[epsilon_idx] = bad_train_acc
    lower_test_acc[epsilon_idx] = test_acc
    lower_params_norm_sq[epsilon_idx] = params_norm_sq
    lower_weight_decays[epsilon_idx] = lower_weight_decay

    # Save attack points
    # I think we just need this for mnist and imdb for fig 1?
    # Presumably we don't save enron because we don't plot it
    if not ignore_slab:
        if dataset_name in ['imdb']:
            X_poison_sparse = sparse.csr_matrix(
                data.rround(data.threshold(X_modified[idx_poison, :])))
            X_modified = sparse.vstack((X_train, X_poison_sparse))
            save_path = datasets.get_int_attack_npz_path(
                dataset_name, epsilon, norm_sq_constraint, percentile)
        else:
            save_path = datasets.get_attack_npz_path(dataset_name, epsilon,
                                                     norm_sq_constraint,
                                                     percentile)

        if dataset_name in ['dogfish', 'mnist_17', 'imdb']:
            np.savez(save_path,
                     X_modified=X_modified,
                     Y_modified=Y_modified,
                     X_test=X_test,
                     Y_test=Y_test,
                     idx_train=idx_train,
        elif process_slab:
            attack_save_path = datasets.get_slab_attack_npz_path(dataset_name, epsilon, norm_sq_constraint)
        elif process_grad:
            attack_save_path = datasets.get_grad_attack_npz_path(dataset_name, epsilon, norm_sq_constraint)
        elif process_labelflip:
            attack_save_path = datasets.get_labelflip_attack_npz_path(dataset_name, epsilon, norm_sq_constraint)            
        elif process_int:            
            attack_save_path = datasets.get_int_attack_npz_path(dataset_name, epsilon, norm_sq_constraint)        

    # We generate the imdb data without integrity constraints
    # and then do the randomized rounding after
    # so we need a separate call to this script with the --int flag
    # to fully process its results.
    # To save space, we don't save it to disk if it's processing slab/grad/etc.
    elif (dataset_name in ['imdb']) and (no_process):
        X_poison_sparse = sparse.csr_matrix(data.rround(data.threshold(X_modified[idx_poison, :])))
        X_modified = sparse.vstack((X_train, X_poison_sparse))
        attack_save_path = datasets.get_int_attack_npz_path(dataset_name, epsilon, norm_sq_constraint, percentile)            

    if attack_save_path is not None:
        np.savez(
            attack_save_path,
            X_modified=X_modified,
            Y_modified=Y_modified,
            X_test=X_test,
            Y_test=Y_test,
            idx_train=idx_train,
            idx_poison=idx_poison
            ) 

if no_process:
Beispiel #6
0
def augment(image):
    return [threshold(image)]
import IPython

import data_utils as data
import datasets
import defenses
import defense_testers
import upper_bounds
from upper_bounds import hinge_loss, hinge_grad

### This just thresholds and rounds IMDB
### Not guaranteed to actually be feasible

dataset_name = 'imdb'

weight_decay = datasets.DATASET_WEIGHT_DECAYS[dataset_name]
weight_decay = 0.17 ### HACK, need to rerun on proper weight_decay

epsilons = datasets.DATASET_EPSILONS[dataset_name]
norm_sq_constraint = datasets.DATASET_NORM_SQ_CONSTRAINTS[dataset_name]

for epsilon in epsilons:
    if epsilon == 0: continue

    attack_npz_path = datasets.get_attack_npz_path(dataset_name, weight_decay, epsilon, norm_sq_constraint)
    X_modified, Y_modified, X_test, Y_test, idx_train, idx_poison = datasets.load_attack_npz(dataset_name, attack_npz_path)

    X_modified = sparse.csr_matrix(data.rround(data.threshold(X_modified)))
    IPython.embed()

    break