Python get_datasets_base_path 예제들, steves_utils.utils.get_datasets_base_path Python 예제들

예제 #1

0

파일 보기

파일: limited_conv.py 프로젝트: stevester94/csc500-past-runs

def get_all_shuffled():
    from steves_utils.ORACLE.shuffled_dataset_accessor import Shuffled_Dataset_Factory
    from steves_utils import utils

    RANGE   = len(ALL_SERIAL_NUMBERS)
    path = os.path.join(utils.get_datasets_base_path(), "all_shuffled", "output")
    print(utils.get_datasets_base_path())
    print(path)
    datasets = Shuffled_Dataset_Factory(path, train_val_test_splits=(0.6, 0.2, 0.2))

    train_ds = datasets["train_ds"]
    val_ds = datasets["val_ds"]
    test_ds = datasets["test_ds"]    

    train_ds = train_ds.map(
        lambda x: (x["IQ"],tf.one_hot(x["serial_number_id"], RANGE)),
        num_parallel_calls=tf.data.AUTOTUNE,
        deterministic=True
    )

    val_ds = val_ds.map(
        lambda x: (x["IQ"],tf.one_hot(x["serial_number_id"], RANGE)),
        num_parallel_calls=tf.data.AUTOTUNE,
        deterministic=True
    )

    test_ds = test_ds.map(
        lambda x: (x["IQ"],tf.one_hot(x["serial_number_id"], RANGE)),
        num_parallel_calls=tf.data.AUTOTUNE,
        deterministic=True
    )

    return train_ds, val_ds, test_ds

예제 #2

0

파일 보기

def get_shuffled_and_windowed_from_pregen_ds():
    from steves_utils.ORACLE.windowed_shuffled_dataset_accessor import Windowed_Shuffled_Dataset_Factory
    from steves_utils import utils

    # Batch size is baked into the dataset
    path = os.path.join(utils.get_datasets_base_path(),
                        "windowed_200k-each-devices_batch-100")
    print(utils.get_datasets_base_path())
    print(path)
    datasets = Windowed_Shuffled_Dataset_Factory(path)

    ORIGINAL_BATCH_SIZE = 100
    DESIRED_BATCH_SIZE = 512

    train_ds = datasets["train_ds"]
    val_ds = datasets["val_ds"]
    test_ds = datasets["test_ds"]

    train_ds = train_ds.map(
        lambda x: (x["IQ"], tf.one_hot(x["serial_number_id"], RANGE)),
        num_parallel_calls=tf.data.AUTOTUNE,
        deterministic=True)

    val_ds = val_ds.map(lambda x:
                        (x["IQ"], tf.one_hot(x["serial_number_id"], RANGE)),
                        num_parallel_calls=tf.data.AUTOTUNE,
                        deterministic=True)

    test_ds = test_ds.map(lambda x:
                          (x["IQ"], tf.one_hot(x["serial_number_id"], RANGE)),
                          num_parallel_calls=tf.data.AUTOTUNE,
                          deterministic=True)

    train_ds = train_ds.unbatch()
    val_ds = val_ds.unbatch()
    test_ds = test_ds.unbatch()

    train_ds = train_ds.shuffle(100 * ORIGINAL_BATCH_SIZE,
                                reshuffle_each_iteration=True)

    train_ds = train_ds.batch(DESIRED_BATCH_SIZE)
    val_ds = val_ds.batch(DESIRED_BATCH_SIZE)
    test_ds = test_ds.batch(DESIRED_BATCH_SIZE)

    train_ds = train_ds.prefetch(100)
    val_ds = val_ds.prefetch(100)
    test_ds = test_ds.prefetch(100)

    return train_ds, val_ds, test_ds

예제 #3

0

파일 보기

파일: limited_conv.py 프로젝트: stevester94/csc500-past-runs

def get_all_shuffled():
    global RANGE
    from steves_utils.ORACLE.shuffled_dataset_accessor import Shuffled_Dataset_Factory
    from steves_utils import utils

    BATCH = 256

    path = os.path.join(utils.get_datasets_base_path(), "all_shuffled",
                        "output")
    print(utils.get_datasets_base_path())
    print(path)
    datasets = Shuffled_Dataset_Factory(path,
                                        train_val_test_splits=(0.6, 0.2, 0.2),
                                        reshuffle_train_each_iteration=False)

    train_ds = datasets["train_ds"]
    val_ds = datasets["val_ds"]
    test_ds = datasets["test_ds"]

    train_ds = train_ds.map(
        lambda x: (x["IQ"], tf.one_hot(x["serial_number_id"], RANGE)),
        num_parallel_calls=tf.data.AUTOTUNE,
        deterministic=True)

    val_ds = val_ds.map(lambda x:
                        (x["IQ"], tf.one_hot(x["serial_number_id"], RANGE)),
                        num_parallel_calls=tf.data.AUTOTUNE,
                        deterministic=True)

    test_ds = test_ds.map(lambda x:
                          (x["IQ"], tf.one_hot(x["serial_number_id"], RANGE)),
                          num_parallel_calls=tf.data.AUTOTUNE,
                          deterministic=True)

    train_ds = train_ds.unbatch().take(200000 *
                                       len(ALL_SERIAL_NUMBERS)).batch(BATCH)
    val_ds = val_ds.unbatch().take(10000 *
                                   len(ALL_SERIAL_NUMBERS)).batch(BATCH)
    test_ds = test_ds.unbatch().take(50000 *
                                     len(ALL_SERIAL_NUMBERS)).batch(BATCH)

    return train_ds, val_ds, test_ds

예제 #4

0

파일 보기

파일: limited_conv.py 프로젝트: stevester94/csc500-past-runs

def get_shuffled_and_windowed_from_pregen_ds():
    from steves_utils.ORACLE.windowed_shuffled_dataset_accessor import Windowed_Shuffled_Dataset_Factory
    from steves_utils import utils

    path = os.path.join(utils.get_datasets_base_path(), "automated_windower", "windowed_EachDevice-200k_batch-100_stride-20_distances-2.8.14.20.26.32.38.44.50.56.62")
    print(path)
    datasets = Windowed_Shuffled_Dataset_Factory(path)

    train_ds = datasets["train_ds"]
    val_ds = datasets["val_ds"]
    test_ds = datasets["test_ds"]

    train_ds = train_ds.map(
        lambda x: (x["IQ"],tf.one_hot(x["serial_number_id"], RANGE)),
        num_parallel_calls=tf.data.AUTOTUNE,
        deterministic=True
    )

    val_ds = val_ds.map(
        lambda x: (x["IQ"],tf.one_hot(x["serial_number_id"], RANGE)),
        num_parallel_calls=tf.data.AUTOTUNE,
        deterministic=True
    )

    test_ds = test_ds.map(
        lambda x: (x["IQ"],tf.one_hot(x["serial_number_id"], RANGE)),
        num_parallel_calls=tf.data.AUTOTUNE,
        deterministic=True
    )

    train_ds = train_ds.unbatch()
    val_ds = val_ds.unbatch()
    test_ds = test_ds.unbatch()

    train_ds = train_ds.shuffle(100 * ORIGINAL_BATCH_SIZE, reshuffle_each_iteration=True)
    
    train_ds = train_ds.batch(DESIRED_BATCH_SIZE)
    val_ds  = val_ds.batch(DESIRED_BATCH_SIZE)
    test_ds = test_ds.batch(DESIRED_BATCH_SIZE)

    train_ds = train_ds.prefetch(100)
    val_ds   = val_ds.prefetch(100)
    test_ds  = test_ds.prefetch(100)

    return train_ds, val_ds, test_ds

예제 #5

0

파일 보기

파일: limited_conv.py 프로젝트: stevester94/csc500-past-runs

def get_all_shuffled_windowed():
    global RANGE
    from steves_utils.ORACLE.shuffled_dataset_accessor import Shuffled_Dataset_Factory
    from steves_utils import utils

    DATASET_BATCH_SIZE = 100
    BATCH = 256
    chunk_size = 4 * ORIGINAL_PAPER_SAMPLES_PER_CHUNK
    STRIDE_SIZE = 1

    NUM_REPEATS = math.floor(
        (chunk_size - ORIGINAL_PAPER_SAMPLES_PER_CHUNK) / STRIDE_SIZE) + 1

    path = os.path.join(utils.get_datasets_base_path(),
                        "all_shuffled_chunk-512", "output")
    print(utils.get_datasets_base_path())
    print(path)
    datasets = Shuffled_Dataset_Factory(path,
                                        train_val_test_splits=(0.6, 0.2, 0.2),
                                        reshuffle_train_each_iteration=False)

    train_ds = datasets["train_ds"]
    val_ds = datasets["val_ds"]
    test_ds = datasets["test_ds"]

    train_ds = train_ds.unbatch().take(200000 * len(ALL_SERIAL_NUMBERS))
    val_ds = val_ds.unbatch().take(10000 * len(ALL_SERIAL_NUMBERS))
    test_ds = test_ds.unbatch().take(50000 * len(ALL_SERIAL_NUMBERS))

    train_ds = train_ds.map(
        lambda x: (x["IQ"], tf.one_hot(x["serial_number_id"], RANGE)),
        num_parallel_calls=tf.data.AUTOTUNE,
        deterministic=True)

    val_ds = val_ds.map(lambda x:
                        (x["IQ"], tf.one_hot(x["serial_number_id"], RANGE)),
                        num_parallel_calls=tf.data.AUTOTUNE,
                        deterministic=True)

    test_ds = test_ds.map(lambda x:
                          (x["IQ"], tf.one_hot(x["serial_number_id"], RANGE)),
                          num_parallel_calls=tf.data.AUTOTUNE,
                          deterministic=True)

    train_ds = train_ds.map(lambda x, y: (tf.transpose(
        tf.signal.frame(x, ORIGINAL_PAPER_SAMPLES_PER_CHUNK, STRIDE_SIZE), [
            1, 0, 2
        ]), tf.repeat(tf.reshape(y, (1, RANGE)), repeats=NUM_REPEATS, axis=0)),
                            num_parallel_calls=tf.data.AUTOTUNE,
                            deterministic=True)

    # We aren't really windowing the val and test data, we are just splitting them into 128 sample chunks so that they are
    # the same shape as the train data
    val_ds = val_ds.map(
        lambda x, y: (
            tf.transpose(
                # See, stride == length, meaning we are just splitting the chunks, not really windowing
                tf.signal.frame(x, ORIGINAL_PAPER_SAMPLES_PER_CHUNK,
                                ORIGINAL_PAPER_SAMPLES_PER_CHUNK),
                [1, 0, 2]),
            tf.repeat(tf.reshape(y, (1, RANGE)),
                      repeats=math.floor(chunk_size /
                                         ORIGINAL_PAPER_SAMPLES_PER_CHUNK),
                      axis=0)),
        num_parallel_calls=tf.data.AUTOTUNE,
        deterministic=True)

    test_ds = test_ds.map(
        lambda x, y: (
            tf.transpose(
                # See, stride == length, meaning we are just splitting the chunks, not really windowing
                tf.signal.frame(x, ORIGINAL_PAPER_SAMPLES_PER_CHUNK,
                                ORIGINAL_PAPER_SAMPLES_PER_CHUNK),
                [1, 0, 2]),
            tf.repeat(tf.reshape(y, (1, RANGE)),
                      repeats=math.floor(chunk_size /
                                         ORIGINAL_PAPER_SAMPLES_PER_CHUNK),
                      axis=0)),
        num_parallel_calls=tf.data.AUTOTUNE,
        deterministic=True)

    train_ds = train_ds.unbatch().take(200000 * len(ALL_SERIAL_NUMBERS))
    val_ds = val_ds.unbatch().take(10000 * len(ALL_SERIAL_NUMBERS))
    test_ds = test_ds.unbatch().take(50000 * len(ALL_SERIAL_NUMBERS))

    train_ds = train_ds.shuffle(DATASET_BATCH_SIZE * NUM_REPEATS * 3,
                                reshuffle_each_iteration=True)

    train_ds = train_ds.batch(BATCH)
    val_ds = val_ds.batch(BATCH)
    test_ds = test_ds.batch(BATCH)

    train_ds = train_ds.prefetch(100)
    val_ds = val_ds.prefetch(100)
    test_ds = test_ds.prefetch(100)

    return train_ds, val_ds, test_ds

예제 #6

0

파일 보기

파일: limited_conv.py 프로젝트: stevester94/csc500-past-runs

def get_windowed_foxtrot_shuffled():
    from steves_utils.ORACLE.shuffled_dataset_accessor import Shuffled_Dataset_Factory
    from steves_utils import utils

    path = os.path.join(utils.get_datasets_base_path(), "foxtrot", "output")
    datasets = Shuffled_Dataset_Factory(path,
                                        train_val_test_splits=(0.6, 0.2, 0.2))

    train_ds = datasets["train_ds"]
    val_ds = datasets["val_ds"]
    test_ds = datasets["test_ds"]

    # count = 0
    # for e in train_ds.concatenate(val_ds).concatenate(test_ds):
    #     count += e["IQ"].shape[0]
    # print(count)
    # sys.exit(1)

    train_ds = train_ds.unbatch()
    val_ds = val_ds.unbatch()
    test_ds = test_ds.unbatch()

    # Chunk size and batch is determined by the shuffled dataset
    chunk_size = 4 * ORIGINAL_PAPER_SAMPLES_PER_CHUNK
    STRIDE_SIZE = 1
    BATCH = 1000
    REBATCH = 500

    NUM_REPEATS = math.floor(
        (chunk_size - ORIGINAL_PAPER_SAMPLES_PER_CHUNK) / STRIDE_SIZE) + 1

    # print(RANGE)
    # sys.exit(1)

    # serial_number_id ranges from [0,15]

    # train_ds = train_ds.filter(lambda x: x["serial_number_id"] < 13 or x["serial_number_id"] > 13)
    # val_ds = val_ds.filter(lambda x: x["serial_number_id"] < 13 or x["serial_number_id"] > 13)
    # test_ds = test_ds.filter(lambda x: x["serial_number_id"] < 13 or x["serial_number_id"] > 13)

    # train_ds = train_ds.filter(lambda x: x["serial_number_id"] !=  13)
    # val_ds = val_ds.filter(lambda x: x["serial_number_id"] !=  13)
    # test_ds = test_ds.filter(lambda x: x["serial_number_id"]  != 13)

    # train_ds = train_ds.filter(lambda x: x["serial_number_id"] < 15)
    # val_ds = val_ds.filter(lambda x: x["serial_number_id"]     < 15)
    # test_ds = test_ds.filter(lambda x: x["serial_number_id"]   < 15)

    # val_ds = val_ds.filter(lambda x: x["serial_number_id"] in target_serials)
    # test_ds = test_ds.filter(lambda x: x["serial_number_id"] in target_serials)

    train_ds = train_ds.map(
        lambda x: (x["IQ"], tf.one_hot(x["serial_number_id"], RANGE)),
        num_parallel_calls=tf.data.AUTOTUNE,
        deterministic=True)

    val_ds = val_ds.map(lambda x:
                        (x["IQ"], tf.one_hot(x["serial_number_id"], RANGE)),
                        num_parallel_calls=tf.data.AUTOTUNE,
                        deterministic=True)

    test_ds = test_ds.map(lambda x:
                          (x["IQ"], tf.one_hot(x["serial_number_id"], RANGE)),
                          num_parallel_calls=tf.data.AUTOTUNE,
                          deterministic=True)

    train_ds = train_ds.map(
        lambda x, y: (
            tf.transpose(
                tf.signal.
                frame(x, ORIGINAL_PAPER_SAMPLES_PER_CHUNK, STRIDE_SIZE
                      ),  # Somehow we get 9 frames from this
                [1, 0, 2]),
            tf.repeat(tf.reshape(y, (1, RANGE)), repeats=NUM_REPEATS, axis=0
                      )  # Repeat our one hot tensor 9 times
        ),
        num_parallel_calls=tf.data.AUTOTUNE,
        deterministic=True)

    val_ds = val_ds.map(
        lambda x, y: (
            tf.transpose(
                tf.signal.frame(x, ORIGINAL_PAPER_SAMPLES_PER_CHUNK,
                                ORIGINAL_PAPER_SAMPLES_PER_CHUNK
                                ),  # Somehow we get 9 frames from this
                [1, 0, 2]),
            tf.repeat(tf.reshape(y, (1, RANGE)),
                      repeats=math.floor(chunk_size /
                                         ORIGINAL_PAPER_SAMPLES_PER_CHUNK),
                      axis=0)  # Repeat our one hot tensor 9 times
        ),
        num_parallel_calls=tf.data.AUTOTUNE,
        deterministic=True)

    test_ds = test_ds.map(
        lambda x, y: (
            tf.transpose(
                tf.signal.frame(x, ORIGINAL_PAPER_SAMPLES_PER_CHUNK,
                                ORIGINAL_PAPER_SAMPLES_PER_CHUNK
                                ),  # Somehow we get 9 frames from this
                [1, 0, 2]),
            tf.repeat(tf.reshape(y, (1, RANGE)),
                      repeats=math.floor(chunk_size /
                                         ORIGINAL_PAPER_SAMPLES_PER_CHUNK),
                      axis=0)  # Repeat our one hot tensor 9 times
        ),
        num_parallel_calls=tf.data.AUTOTUNE,
        deterministic=True)

    train_ds = train_ds.unbatch()
    val_ds = val_ds.unbatch()
    test_ds = test_ds.unbatch()

    train_ds = train_ds.shuffle(BATCH * NUM_REPEATS * 4)
    val_ds = val_ds.shuffle(BATCH * NUM_REPEATS * 4)
    test_ds = test_ds.shuffle(BATCH * NUM_REPEATS * 4)

    # for e in test_ds:
    #     print(e[1])

    # sys.exit(1)

    train_ds = train_ds.batch(REBATCH)
    val_ds = val_ds.batch(REBATCH)
    test_ds = test_ds.batch(REBATCH)

    train_ds = train_ds.prefetch(100)
    val_ds = val_ds.prefetch(100)
    test_ds = test_ds.prefetch(100)

    return train_ds, val_ds, test_ds

예제 #7

0

파일 보기

    with open(results_csv_path, "w") as f:
        f.write("distance,val_loss,val_accuracy,test_loss,test_accuracy")

    print("Loading best weights...")
    model.load_weights("./best_weights/weights.ckpt")

    print("Analyze the model...")
    for distance in ALL_DISTANCES_FEET:
        print("Distance", distance)

        # Distance 4 would not generate a windowed dataset for some reason
        if distance == 4:
            continue

        target_dataset_path = "{datasets_base_path}/automated_windower/windowed_EachDevice-200k_batch-100_stride-20_distances-{distance}".format(
            datasets_base_path=utils.get_datasets_base_path(),
            distance=distance)

        datasets = Windowed_Shuffled_Dataset_Factory(target_dataset_path)

        train_ds, val_ds, test_ds = apply_dataset_pipeline(datasets)

        # Analyze on the test data
        test_results = model.evaluate(
            test_ds,
            verbose=1,
        )

        # Analyze on the val data
        val_results = model.evaluate(
            val_ds,