def get_all_shuffled(): from steves_utils.ORACLE.shuffled_dataset_accessor import Shuffled_Dataset_Factory from steves_utils import utils RANGE = len(ALL_SERIAL_NUMBERS) path = os.path.join(utils.get_datasets_base_path(), "all_shuffled", "output") print(utils.get_datasets_base_path()) print(path) datasets = Shuffled_Dataset_Factory(path, train_val_test_splits=(0.6, 0.2, 0.2)) train_ds = datasets["train_ds"] val_ds = datasets["val_ds"] test_ds = datasets["test_ds"] train_ds = train_ds.map( lambda x: (x["IQ"],tf.one_hot(x["serial_number_id"], RANGE)), num_parallel_calls=tf.data.AUTOTUNE, deterministic=True ) val_ds = val_ds.map( lambda x: (x["IQ"],tf.one_hot(x["serial_number_id"], RANGE)), num_parallel_calls=tf.data.AUTOTUNE, deterministic=True ) test_ds = test_ds.map( lambda x: (x["IQ"],tf.one_hot(x["serial_number_id"], RANGE)), num_parallel_calls=tf.data.AUTOTUNE, deterministic=True ) return train_ds, val_ds, test_ds
def get_shuffled_and_windowed_from_pregen_ds(): from steves_utils.ORACLE.windowed_shuffled_dataset_accessor import Windowed_Shuffled_Dataset_Factory from steves_utils import utils # Batch size is baked into the dataset path = os.path.join(utils.get_datasets_base_path(), "windowed_200k-each-devices_batch-100") print(utils.get_datasets_base_path()) print(path) datasets = Windowed_Shuffled_Dataset_Factory(path) ORIGINAL_BATCH_SIZE = 100 DESIRED_BATCH_SIZE = 512 train_ds = datasets["train_ds"] val_ds = datasets["val_ds"] test_ds = datasets["test_ds"] train_ds = train_ds.map( lambda x: (x["IQ"], tf.one_hot(x["serial_number_id"], RANGE)), num_parallel_calls=tf.data.AUTOTUNE, deterministic=True) val_ds = val_ds.map(lambda x: (x["IQ"], tf.one_hot(x["serial_number_id"], RANGE)), num_parallel_calls=tf.data.AUTOTUNE, deterministic=True) test_ds = test_ds.map(lambda x: (x["IQ"], tf.one_hot(x["serial_number_id"], RANGE)), num_parallel_calls=tf.data.AUTOTUNE, deterministic=True) train_ds = train_ds.unbatch() val_ds = val_ds.unbatch() test_ds = test_ds.unbatch() train_ds = train_ds.shuffle(100 * ORIGINAL_BATCH_SIZE, reshuffle_each_iteration=True) train_ds = train_ds.batch(DESIRED_BATCH_SIZE) val_ds = val_ds.batch(DESIRED_BATCH_SIZE) test_ds = test_ds.batch(DESIRED_BATCH_SIZE) train_ds = train_ds.prefetch(100) val_ds = val_ds.prefetch(100) test_ds = test_ds.prefetch(100) return train_ds, val_ds, test_ds
def get_all_shuffled(): global RANGE from steves_utils.ORACLE.shuffled_dataset_accessor import Shuffled_Dataset_Factory from steves_utils import utils BATCH = 256 path = os.path.join(utils.get_datasets_base_path(), "all_shuffled", "output") print(utils.get_datasets_base_path()) print(path) datasets = Shuffled_Dataset_Factory(path, train_val_test_splits=(0.6, 0.2, 0.2), reshuffle_train_each_iteration=False) train_ds = datasets["train_ds"] val_ds = datasets["val_ds"] test_ds = datasets["test_ds"] train_ds = train_ds.map( lambda x: (x["IQ"], tf.one_hot(x["serial_number_id"], RANGE)), num_parallel_calls=tf.data.AUTOTUNE, deterministic=True) val_ds = val_ds.map(lambda x: (x["IQ"], tf.one_hot(x["serial_number_id"], RANGE)), num_parallel_calls=tf.data.AUTOTUNE, deterministic=True) test_ds = test_ds.map(lambda x: (x["IQ"], tf.one_hot(x["serial_number_id"], RANGE)), num_parallel_calls=tf.data.AUTOTUNE, deterministic=True) train_ds = train_ds.unbatch().take(200000 * len(ALL_SERIAL_NUMBERS)).batch(BATCH) val_ds = val_ds.unbatch().take(10000 * len(ALL_SERIAL_NUMBERS)).batch(BATCH) test_ds = test_ds.unbatch().take(50000 * len(ALL_SERIAL_NUMBERS)).batch(BATCH) return train_ds, val_ds, test_ds
def get_shuffled_and_windowed_from_pregen_ds(): from steves_utils.ORACLE.windowed_shuffled_dataset_accessor import Windowed_Shuffled_Dataset_Factory from steves_utils import utils path = os.path.join(utils.get_datasets_base_path(), "automated_windower", "windowed_EachDevice-200k_batch-100_stride-20_distances-2.8.14.20.26.32.38.44.50.56.62") print(path) datasets = Windowed_Shuffled_Dataset_Factory(path) train_ds = datasets["train_ds"] val_ds = datasets["val_ds"] test_ds = datasets["test_ds"] train_ds = train_ds.map( lambda x: (x["IQ"],tf.one_hot(x["serial_number_id"], RANGE)), num_parallel_calls=tf.data.AUTOTUNE, deterministic=True ) val_ds = val_ds.map( lambda x: (x["IQ"],tf.one_hot(x["serial_number_id"], RANGE)), num_parallel_calls=tf.data.AUTOTUNE, deterministic=True ) test_ds = test_ds.map( lambda x: (x["IQ"],tf.one_hot(x["serial_number_id"], RANGE)), num_parallel_calls=tf.data.AUTOTUNE, deterministic=True ) train_ds = train_ds.unbatch() val_ds = val_ds.unbatch() test_ds = test_ds.unbatch() train_ds = train_ds.shuffle(100 * ORIGINAL_BATCH_SIZE, reshuffle_each_iteration=True) train_ds = train_ds.batch(DESIRED_BATCH_SIZE) val_ds = val_ds.batch(DESIRED_BATCH_SIZE) test_ds = test_ds.batch(DESIRED_BATCH_SIZE) train_ds = train_ds.prefetch(100) val_ds = val_ds.prefetch(100) test_ds = test_ds.prefetch(100) return train_ds, val_ds, test_ds
def get_all_shuffled_windowed(): global RANGE from steves_utils.ORACLE.shuffled_dataset_accessor import Shuffled_Dataset_Factory from steves_utils import utils DATASET_BATCH_SIZE = 100 BATCH = 256 chunk_size = 4 * ORIGINAL_PAPER_SAMPLES_PER_CHUNK STRIDE_SIZE = 1 NUM_REPEATS = math.floor( (chunk_size - ORIGINAL_PAPER_SAMPLES_PER_CHUNK) / STRIDE_SIZE) + 1 path = os.path.join(utils.get_datasets_base_path(), "all_shuffled_chunk-512", "output") print(utils.get_datasets_base_path()) print(path) datasets = Shuffled_Dataset_Factory(path, train_val_test_splits=(0.6, 0.2, 0.2), reshuffle_train_each_iteration=False) train_ds = datasets["train_ds"] val_ds = datasets["val_ds"] test_ds = datasets["test_ds"] train_ds = train_ds.unbatch().take(200000 * len(ALL_SERIAL_NUMBERS)) val_ds = val_ds.unbatch().take(10000 * len(ALL_SERIAL_NUMBERS)) test_ds = test_ds.unbatch().take(50000 * len(ALL_SERIAL_NUMBERS)) train_ds = train_ds.map( lambda x: (x["IQ"], tf.one_hot(x["serial_number_id"], RANGE)), num_parallel_calls=tf.data.AUTOTUNE, deterministic=True) val_ds = val_ds.map(lambda x: (x["IQ"], tf.one_hot(x["serial_number_id"], RANGE)), num_parallel_calls=tf.data.AUTOTUNE, deterministic=True) test_ds = test_ds.map(lambda x: (x["IQ"], tf.one_hot(x["serial_number_id"], RANGE)), num_parallel_calls=tf.data.AUTOTUNE, deterministic=True) train_ds = train_ds.map(lambda x, y: (tf.transpose( tf.signal.frame(x, ORIGINAL_PAPER_SAMPLES_PER_CHUNK, STRIDE_SIZE), [ 1, 0, 2 ]), tf.repeat(tf.reshape(y, (1, RANGE)), repeats=NUM_REPEATS, axis=0)), num_parallel_calls=tf.data.AUTOTUNE, deterministic=True) # We aren't really windowing the val and test data, we are just splitting them into 128 sample chunks so that they are # the same shape as the train data val_ds = val_ds.map( lambda x, y: ( tf.transpose( # See, stride == length, meaning we are just splitting the chunks, not really windowing tf.signal.frame(x, ORIGINAL_PAPER_SAMPLES_PER_CHUNK, ORIGINAL_PAPER_SAMPLES_PER_CHUNK), [1, 0, 2]), tf.repeat(tf.reshape(y, (1, RANGE)), repeats=math.floor(chunk_size / ORIGINAL_PAPER_SAMPLES_PER_CHUNK), axis=0)), num_parallel_calls=tf.data.AUTOTUNE, deterministic=True) test_ds = test_ds.map( lambda x, y: ( tf.transpose( # See, stride == length, meaning we are just splitting the chunks, not really windowing tf.signal.frame(x, ORIGINAL_PAPER_SAMPLES_PER_CHUNK, ORIGINAL_PAPER_SAMPLES_PER_CHUNK), [1, 0, 2]), tf.repeat(tf.reshape(y, (1, RANGE)), repeats=math.floor(chunk_size / ORIGINAL_PAPER_SAMPLES_PER_CHUNK), axis=0)), num_parallel_calls=tf.data.AUTOTUNE, deterministic=True) train_ds = train_ds.unbatch().take(200000 * len(ALL_SERIAL_NUMBERS)) val_ds = val_ds.unbatch().take(10000 * len(ALL_SERIAL_NUMBERS)) test_ds = test_ds.unbatch().take(50000 * len(ALL_SERIAL_NUMBERS)) train_ds = train_ds.shuffle(DATASET_BATCH_SIZE * NUM_REPEATS * 3, reshuffle_each_iteration=True) train_ds = train_ds.batch(BATCH) val_ds = val_ds.batch(BATCH) test_ds = test_ds.batch(BATCH) train_ds = train_ds.prefetch(100) val_ds = val_ds.prefetch(100) test_ds = test_ds.prefetch(100) return train_ds, val_ds, test_ds
def get_windowed_foxtrot_shuffled(): from steves_utils.ORACLE.shuffled_dataset_accessor import Shuffled_Dataset_Factory from steves_utils import utils path = os.path.join(utils.get_datasets_base_path(), "foxtrot", "output") datasets = Shuffled_Dataset_Factory(path, train_val_test_splits=(0.6, 0.2, 0.2)) train_ds = datasets["train_ds"] val_ds = datasets["val_ds"] test_ds = datasets["test_ds"] # count = 0 # for e in train_ds.concatenate(val_ds).concatenate(test_ds): # count += e["IQ"].shape[0] # print(count) # sys.exit(1) train_ds = train_ds.unbatch() val_ds = val_ds.unbatch() test_ds = test_ds.unbatch() # Chunk size and batch is determined by the shuffled dataset chunk_size = 4 * ORIGINAL_PAPER_SAMPLES_PER_CHUNK STRIDE_SIZE = 1 BATCH = 1000 REBATCH = 500 NUM_REPEATS = math.floor( (chunk_size - ORIGINAL_PAPER_SAMPLES_PER_CHUNK) / STRIDE_SIZE) + 1 # print(RANGE) # sys.exit(1) # serial_number_id ranges from [0,15] # train_ds = train_ds.filter(lambda x: x["serial_number_id"] < 13 or x["serial_number_id"] > 13) # val_ds = val_ds.filter(lambda x: x["serial_number_id"] < 13 or x["serial_number_id"] > 13) # test_ds = test_ds.filter(lambda x: x["serial_number_id"] < 13 or x["serial_number_id"] > 13) # train_ds = train_ds.filter(lambda x: x["serial_number_id"] != 13) # val_ds = val_ds.filter(lambda x: x["serial_number_id"] != 13) # test_ds = test_ds.filter(lambda x: x["serial_number_id"] != 13) # train_ds = train_ds.filter(lambda x: x["serial_number_id"] < 15) # val_ds = val_ds.filter(lambda x: x["serial_number_id"] < 15) # test_ds = test_ds.filter(lambda x: x["serial_number_id"] < 15) # val_ds = val_ds.filter(lambda x: x["serial_number_id"] in target_serials) # test_ds = test_ds.filter(lambda x: x["serial_number_id"] in target_serials) train_ds = train_ds.map( lambda x: (x["IQ"], tf.one_hot(x["serial_number_id"], RANGE)), num_parallel_calls=tf.data.AUTOTUNE, deterministic=True) val_ds = val_ds.map(lambda x: (x["IQ"], tf.one_hot(x["serial_number_id"], RANGE)), num_parallel_calls=tf.data.AUTOTUNE, deterministic=True) test_ds = test_ds.map(lambda x: (x["IQ"], tf.one_hot(x["serial_number_id"], RANGE)), num_parallel_calls=tf.data.AUTOTUNE, deterministic=True) train_ds = train_ds.map( lambda x, y: ( tf.transpose( tf.signal. frame(x, ORIGINAL_PAPER_SAMPLES_PER_CHUNK, STRIDE_SIZE ), # Somehow we get 9 frames from this [1, 0, 2]), tf.repeat(tf.reshape(y, (1, RANGE)), repeats=NUM_REPEATS, axis=0 ) # Repeat our one hot tensor 9 times ), num_parallel_calls=tf.data.AUTOTUNE, deterministic=True) val_ds = val_ds.map( lambda x, y: ( tf.transpose( tf.signal.frame(x, ORIGINAL_PAPER_SAMPLES_PER_CHUNK, ORIGINAL_PAPER_SAMPLES_PER_CHUNK ), # Somehow we get 9 frames from this [1, 0, 2]), tf.repeat(tf.reshape(y, (1, RANGE)), repeats=math.floor(chunk_size / ORIGINAL_PAPER_SAMPLES_PER_CHUNK), axis=0) # Repeat our one hot tensor 9 times ), num_parallel_calls=tf.data.AUTOTUNE, deterministic=True) test_ds = test_ds.map( lambda x, y: ( tf.transpose( tf.signal.frame(x, ORIGINAL_PAPER_SAMPLES_PER_CHUNK, ORIGINAL_PAPER_SAMPLES_PER_CHUNK ), # Somehow we get 9 frames from this [1, 0, 2]), tf.repeat(tf.reshape(y, (1, RANGE)), repeats=math.floor(chunk_size / ORIGINAL_PAPER_SAMPLES_PER_CHUNK), axis=0) # Repeat our one hot tensor 9 times ), num_parallel_calls=tf.data.AUTOTUNE, deterministic=True) train_ds = train_ds.unbatch() val_ds = val_ds.unbatch() test_ds = test_ds.unbatch() train_ds = train_ds.shuffle(BATCH * NUM_REPEATS * 4) val_ds = val_ds.shuffle(BATCH * NUM_REPEATS * 4) test_ds = test_ds.shuffle(BATCH * NUM_REPEATS * 4) # for e in test_ds: # print(e[1]) # sys.exit(1) train_ds = train_ds.batch(REBATCH) val_ds = val_ds.batch(REBATCH) test_ds = test_ds.batch(REBATCH) train_ds = train_ds.prefetch(100) val_ds = val_ds.prefetch(100) test_ds = test_ds.prefetch(100) return train_ds, val_ds, test_ds
with open(results_csv_path, "w") as f: f.write("distance,val_loss,val_accuracy,test_loss,test_accuracy") print("Loading best weights...") model.load_weights("./best_weights/weights.ckpt") print("Analyze the model...") for distance in ALL_DISTANCES_FEET: print("Distance", distance) # Distance 4 would not generate a windowed dataset for some reason if distance == 4: continue target_dataset_path = "{datasets_base_path}/automated_windower/windowed_EachDevice-200k_batch-100_stride-20_distances-{distance}".format( datasets_base_path=utils.get_datasets_base_path(), distance=distance) datasets = Windowed_Shuffled_Dataset_Factory(target_dataset_path) train_ds, val_ds, test_ds = apply_dataset_pipeline(datasets) # Analyze on the test data test_results = model.evaluate( test_ds, verbose=1, ) # Analyze on the val data val_results = model.evaluate( val_ds,