コード例 #1
0
def load_dataset_from_classifier(
    classifier_result_dir,
    test_log_file="test_log.txt",
    test_data_file="test_data.txt",
    layer_data_file="outputs_to_labels.txt",
):
    test_log_file = os.path.join(classifier_result_dir, test_log_file)
    best_epoch = get_best_epoch(test_log_file)
    epoch_dir = os.path.join(classifier_result_dir, "Epoch_" + str(best_epoch))
    layer_dirs = get_immediate_subdirectories(epoch_dir)
    input_layer_dir = os.path.join(epoch_dir, "layer_0")
    final_layer_dir = os.path.join(epoch_dir,
                                   "layer_" + str(len(layer_dirs) - 1))

    input_layer_data_file = os.path.join(input_layer_dir, layer_data_file)
    final_layer_data_file = os.path.join(final_layer_dir, layer_data_file)

    input_layer_dataset, input_layer_header = load_dataset_from_file(
        input_layer_data_file)
    final_layer_dataset, final_layer_header = load_dataset_from_file(
        final_layer_data_file)

    dataset = {}
    dataset[
        "X"] = final_layer_dataset["train_X"] + final_layer_dataset["test_X"]
    dataset[
        "Y"] = input_layer_dataset["train_X"] + input_layer_dataset["test_X"]

    # get separator if possible
    if "separator_X" in final_layer_dataset:
        dataset["separator"] = final_layer_dataset["separator_X"]
    test_dataset, test_header = load_dataset_from_file(
        os.path.join(classifier_result_dir, test_data_file))
    if "separator" in test_dataset:
        dataset["separator_GT"] = test_dataset["separator"]

    return dataset
コード例 #2
0
def l2_tri_class_test():
    args = parse_args()
    separator_data, header = load_dataset_from_file(
        "/scratch/richards/generative_data/test3/data_processed_3.txt")
    num_s_points = 400
    for i in range(args.trials):
        trial_output_dir = os.path.join(args.output_dir, "trial_" + str(i))
        tp = get_train_params(trial_output_dir,
                              args.input_data,
                              args.depth,
                              prefix="binary")
        train_loop(tp)
        l2_tri_classification(trial_output_dir, tp["train_dataset"],
                              tp["test_dataset"], separator_data, num_s_points,
                              args.depth)
コード例 #3
0
def parse_train_test_datasets(dataset_file, train_ratio=0.85):
    dataset, header = load_dataset_from_file(dataset_file)
    num_samples = len(dataset["X"])
    num_train = int(train_ratio * num_samples)
    population = list(range(num_samples))
    train_samples = random.sample(population, num_train)
    train_samples_sorted = np.sort(train_samples)
    test_samples = []
    i = 0
    pointer = 0
    while True:
        if i == num_samples:
            break
        if pointer == len(train_samples_sorted):
            test_samples.append(i)
            i += 1
            continue
        if train_samples_sorted[pointer] < i:
            pointer += 1
        elif train_samples_sorted[pointer] == i:
            pointer += 1
            i += 1
        else:
            test_samples.append(i)
            i += 1

    random.shuffle(test_samples)

    train_dataset = {"X": [], "Y": []}
    for sample in train_samples:
        train_dataset["X"].append(dataset["X"][sample])
        train_dataset["Y"].append(dataset["Y"][sample])
    test_dataset = {"X": [], "Y": []}
    for sample in test_samples:
        test_dataset["X"].append(dataset["X"][sample])
        test_dataset["Y"].append(dataset["Y"][sample])

    for key in dataset:
        if key != "X" and key != "Y":
            train_dataset[key] = dataset[key]
            test_dataset[key] = dataset[key]
    return [train_dataset, test_dataset]
コード例 #4
0
def get_best_epoch(test_log_file):
    log_dataset, log_header = load_dataset_from_file(test_log_file)
    min_loss_index = np.argmin(np.array(log_dataset["loss"]))
    return log_dataset["epoch"][min_loss_index]