Exemple #1
0
def diff(clean, error):
    d1 = read_data(clean)
    d2 = read_data(error)
    mse = (np.square(d1 - d2)).mean(axis=None)
    diff_count = np.sum(np.abs(d1 - d2) > 10e-5) + np.sum(np.isnan(d2))
    diff_max = np.max(np.abs(d1 - d2))
    diff_rel_max = np.max(np.abs((d1 - d2) / d1))
    return mse, diff_count, diff_max, diff_rel_max
Exemple #2
0
def diff(clean, error):
    d1 = read_data(clean)
    d2 = read_data(error)
    bit, x, y, z = error.split("_")[-5:-1]
    bit, x, y, z = int(bit), int(x), int(y), int(z)
    print("bit: %s, x: %s, y: %s, z: %s" %(bit, x, y ,z))
    print("clean: %s, error: %s" %(d1[x,y,z], d2[x,y,z]))
    sum_error = np.sum(np.abs(d1 - d2))
    diff_count = np.sum(d1 != d2)
    diff_max = np.max(np.abs(d1 - d2))
    return sum_error, diff_count, diff_max
def restart(checkpoint_file, postfix=0, var_name="dens"):
    timestep = int(checkpoint_file[-4:])
    start_checkpoint_file = checkpoint_file

    os.system("cp ./clean/" + checkpoint_file + " ./" + start_checkpoint_file)
    end_checkpoint_file = start_checkpoint_file[0:-4] + (
        "0000" + str(timestep + 1))[-4:]

    print(start_checkpoint_file, end_checkpoint_file)

    # 1. insert an error into the checkpoint file
    # remeber the error bit and error position (x,y,z)
    f = h5py.File(start_checkpoint_file, "r+")
    x, y, z = get_random_indices()
    error, bit = get_flip_error(f[var_name][0, z, y, x], 0, 30)
    print(bit, x, y, z, f[var_name][0, z, y, x], error)
    f[var_name][0, z, y, x] = error
    f.close()

    # 2. read flash.par and change it so we could restart from the corrupted checkpoint
    # need to specify restart from which iteration
    # and also the end iteration
    # sed can do this easily
    os.system("cp ./flash.par ./flash.par_" + postfix)
    os.system("sed -i \'s/.*checkpointFileNumber.*/checkpointFileNumber = " +
              str(timestep) + "/g\' ./flash.par_" + postfix)
    #os.system("sed -i \'s/.*basenm.*/basenm = " + postfix+"_sod_" + "/g\' ./flash.par_"+postfix)
    #os.system("sed -i \'s/.*nend.*/nend = " +str(timestep+delay) + "/g\' ./flash.par_"+postfix)

    # 3. restart the flash program
    os.system("mpirun -np 4 ./flash4 -par_file ./flash.par_" + postfix)

    # 4. read the corrupted checkpoint file
    new_start_name = "error_%s_%s_%s_%s_%s_start" % (bit, x, y, z, timestep)
    new_end_name = "error_%s_%s_%s_%s_%s_end" % (bit, x, y, z, timestep)
    np.save(new_start_name + ".npy", read_data(start_checkpoint_file,
                                               var_name))
    np.save(new_end_name + ".npy", read_data(end_checkpoint_file, var_name))
    os.system("rm " + start_checkpoint_file)
    os.system("rm " + end_checkpoint_file)

    # 5. delete unnecessary files
    os.system("rm *.dat *.log *_plt_cnt_*")
Exemple #4
0
    elif args.test:
        data_gen = FlashDatasetGenerator(args.data, BATCH_SIZE, args.k == 0,
                                         args.bits)
        model.load_weights(model_file)
        scores = model.predict_generator(generator=data_gen,
                                         use_multiprocessing=False,
                                         workers=1,
                                         verbose=1)
        for threshold in [
                0.5, 0.55, 0.6, 0.65, 0.7, 0.75, 0.8, 0.85, 0.9, 0.95, 0.99
        ]:
            print("Threshold =", threshold)
            pred = (scores >= threshold)
            compute_metrics(pred, data_gen.labels)
    elif args.detect:
        model.load_weights(model_file)
        error = 0
        # match both for clean checkpoint files and k-delay corrupted files
        files = glob.glob(args.detect + "/*chk_*") + glob.glob(args.detect +
                                                               "/*error*")
        files.sort()
        files = files[0:200]
        for filename in files:
            dens = read_data(filename)
            dens_blocks = np.expand_dims(np.squeeze(split_to_windows(dens)),
                                         -1)
            if detection(model, dens_blocks):
                error += 1
        print("detected %s error samples, total: %s, recall: %s" %
              (error, len(files), error * 1.0 / len(files)))