def diff(clean, error): d1 = read_data(clean) d2 = read_data(error) mse = (np.square(d1 - d2)).mean(axis=None) diff_count = np.sum(np.abs(d1 - d2) > 10e-5) + np.sum(np.isnan(d2)) diff_max = np.max(np.abs(d1 - d2)) diff_rel_max = np.max(np.abs((d1 - d2) / d1)) return mse, diff_count, diff_max, diff_rel_max
def diff(clean, error): d1 = read_data(clean) d2 = read_data(error) bit, x, y, z = error.split("_")[-5:-1] bit, x, y, z = int(bit), int(x), int(y), int(z) print("bit: %s, x: %s, y: %s, z: %s" %(bit, x, y ,z)) print("clean: %s, error: %s" %(d1[x,y,z], d2[x,y,z])) sum_error = np.sum(np.abs(d1 - d2)) diff_count = np.sum(d1 != d2) diff_max = np.max(np.abs(d1 - d2)) return sum_error, diff_count, diff_max
def restart(checkpoint_file, postfix=0, var_name="dens"): timestep = int(checkpoint_file[-4:]) start_checkpoint_file = checkpoint_file os.system("cp ./clean/" + checkpoint_file + " ./" + start_checkpoint_file) end_checkpoint_file = start_checkpoint_file[0:-4] + ( "0000" + str(timestep + 1))[-4:] print(start_checkpoint_file, end_checkpoint_file) # 1. insert an error into the checkpoint file # remeber the error bit and error position (x,y,z) f = h5py.File(start_checkpoint_file, "r+") x, y, z = get_random_indices() error, bit = get_flip_error(f[var_name][0, z, y, x], 0, 30) print(bit, x, y, z, f[var_name][0, z, y, x], error) f[var_name][0, z, y, x] = error f.close() # 2. read flash.par and change it so we could restart from the corrupted checkpoint # need to specify restart from which iteration # and also the end iteration # sed can do this easily os.system("cp ./flash.par ./flash.par_" + postfix) os.system("sed -i \'s/.*checkpointFileNumber.*/checkpointFileNumber = " + str(timestep) + "/g\' ./flash.par_" + postfix) #os.system("sed -i \'s/.*basenm.*/basenm = " + postfix+"_sod_" + "/g\' ./flash.par_"+postfix) #os.system("sed -i \'s/.*nend.*/nend = " +str(timestep+delay) + "/g\' ./flash.par_"+postfix) # 3. restart the flash program os.system("mpirun -np 4 ./flash4 -par_file ./flash.par_" + postfix) # 4. read the corrupted checkpoint file new_start_name = "error_%s_%s_%s_%s_%s_start" % (bit, x, y, z, timestep) new_end_name = "error_%s_%s_%s_%s_%s_end" % (bit, x, y, z, timestep) np.save(new_start_name + ".npy", read_data(start_checkpoint_file, var_name)) np.save(new_end_name + ".npy", read_data(end_checkpoint_file, var_name)) os.system("rm " + start_checkpoint_file) os.system("rm " + end_checkpoint_file) # 5. delete unnecessary files os.system("rm *.dat *.log *_plt_cnt_*")
elif args.test: data_gen = FlashDatasetGenerator(args.data, BATCH_SIZE, args.k == 0, args.bits) model.load_weights(model_file) scores = model.predict_generator(generator=data_gen, use_multiprocessing=False, workers=1, verbose=1) for threshold in [ 0.5, 0.55, 0.6, 0.65, 0.7, 0.75, 0.8, 0.85, 0.9, 0.95, 0.99 ]: print("Threshold =", threshold) pred = (scores >= threshold) compute_metrics(pred, data_gen.labels) elif args.detect: model.load_weights(model_file) error = 0 # match both for clean checkpoint files and k-delay corrupted files files = glob.glob(args.detect + "/*chk_*") + glob.glob(args.detect + "/*error*") files.sort() files = files[0:200] for filename in files: dens = read_data(filename) dens_blocks = np.expand_dims(np.squeeze(split_to_windows(dens)), -1) if detection(model, dens_blocks): error += 1 print("detected %s error samples, total: %s, recall: %s" % (error, len(files), error * 1.0 / len(files)))