def grid_search(train_labels: str,
                test_labels: str,
                output: str,
                res: tuple = (120, 160),
                epochs: int = 20,
                register: bool = False):
    """
    Runs a grid search over different batch sizes.

    Params
    ------
        train_labels: str
            Path to training labels
        test_labels: str
            Path to testing labels
        output: str
            Path to output directory
        res: tuple
            Input resolution of network
        epochs: int
            Training epochs
        register: bool
            Whether to attempt registering the images
    """

    print("=> Starting batch size grid search.")
    BATCH_SIZES = [2, 8, 32, 64, 128, 512]

    # Data
    print("=> Loading data.")
    train = Dataset(train_labels, res=res, batch_size=1, register=register)
    test = Dataset(test_labels, res=res, batch_size=1, register=register)

    X_train, y_train = train.get_all()
    X_test, y_test = test.get_all()

    # Models
    sub_path = os.path.join(output, "batch_size")
    if not os.path.isdir(sub_path):
        os.mkdir(sub_path)

    with open(os.path.join(sub_path, "report.txt"), "w") as f:
        title = f"##### Evaluation results for batch size grid search #####"
        f.write("#" * len(title) + "\n")
        f.write(title + "\n")
        f.write("#" * len(title) + "\n\n")

    for batch_size in BATCH_SIZES:
        print(f"\n=> Evaluating batch size {batch_size}.")
        name_prefix = f"{batch_size}_batches_"

        # Prepare model
        net = ResNet("fusion",
                     num_classes=train.num_classes(),
                     input_shape=train.shape(),
                     weight_dir=output)
        model = net.get_model()

        optimizer = K.optimizers.Adam(learning_rate=0.000001, epsilon=0.005)
        model.compile(optimizer=optimizer,
                      loss="categorical_crossentropy",
                      metrics=["accuracy"])

        # Train model
        hist = model.fit(x=X_train,
                         y=y_train,
                         epochs=epochs,
                         batch_size=batch_size,
                         validation_data=(X_test, y_test),
                         verbose=2)

        print("\n=> Saving weights and training history")
        # Save weights
        model.save_weights(os.path.join(sub_path, name_prefix + "weights.h5"))

        # Save history
        with open(os.path.join(sub_path, name_prefix + "history.pickle"),
                  "wb") as f:
            pickle.dump(hist.history, f)

        # Evaluate
        print("\n=> Starting evaluation")
        with open(os.path.join(sub_path, "report.txt"), "a") as f:
            title = f"##### Evaluation results for batch size {batch_size} #####"
            f.write("\n\n" + title + "\n")
            f.write("-" * len(title) + "\n")

            # Test classification report
            f.write("\n##### Test #####\n")
            y_pred = np.argmax(model.predict(X_test), axis=1)
            y_test_ = np.argmax(y_test, axis=1)
            f.write(
                classification_report(y_test_,
                                      y_pred,
                                      target_names=test.class_labels))
예제 #2
0
def grid_search(train_labels: str, 
                test_labels: str, 
                output:str, 
                res:tuple=(120, 160), 
                epochs:int=20,
                n_splits:int=5,
                register:bool=False):
    """
    Runs a k-fold CV.

    Params
    ------
        train_labels: str
            Path to training labels
        test_labels: str
            Path to testing labels
        output: str
            Path to output directory
        res: tuple
            Input resolution of network
        epochs: int
            Training epochs
        n_splits: int
            Number of CV splits
        register: bool
            Whether to attempt registering the images
    """

    print("=> Starting k-fold.")

    # Data
    print("=> Loading data.")
    train = Dataset(train_labels, res=res, batch_size=1, register=register)
    test = Dataset(test_labels, res=res, batch_size=1, register=register)

    X_train, y_train = train.get_all()
    X_test, y_test = test.get_all()

    y_train = np.argmax(y_train, axis=1)[:, None]
    label_encoder = OneHotEncoder(sparse=False)
    label_encoder.fit(y_train)

    sub_path = os.path.join(output, "kfold")
    if not os.path.isdir(sub_path):
        os.mkdir(sub_path)

    with open(os.path.join(sub_path, "report.txt"), "w") as f:
        title = f"##### Evaluation results for k-fold CV #####"
        f.write("#" * len(title) + "\n")
        f.write(title + "\n")
        f.write("#" * len(title) + "\n\n")

    skf = StratifiedKFold(n_splits=n_splits, shuffle=True, random_state=42)
    
    for i, (train_index, test_index) in enumerate(skf.split(X_train, y_train)):
        print(f"=> Running Fold {i+1}/{n_splits}")
        name_prefix = f"fold_{i}_"

        X_train_ = X_train[train_index]
        y_train_ = y_train[train_index]
        y_train_ = label_encoder.transform(y_train_)

        # Prepare model
        net = ResNet("fusion", num_classes=train.num_classes(), input_shape=train.shape(), weight_dir=output)
        model = net.get_model()

        optimizer = K.optimizers.Adam(learning_rate=0.000001, epsilon=0.005)
        model.compile(optimizer=optimizer,
                      loss="categorical_crossentropy",
                      metrics=["accuracy"])

        # Train model
        hist = model.fit(x=X_train_, 
                         y=y_train_, 
                         epochs=epochs, 
                         batch_size=32, 
                         validation_data=(X_test, y_test), 
                         verbose=2)

        # Save history
        print("\n=> Saving training history")
        with open(os.path.join(sub_path, name_prefix + "history.pickle"), "wb") as f:
            pickle.dump(hist.history, f)

        # Evaluate
        print("\n=> Starting evaluation")
        with open(os.path.join(sub_path, "report.txt"), "a") as f:
            title = f"##### Evaluation results for fold {i} #####"
            f.write("\n\n" + title + "\n")
            f.write("-" * len(title) + "\n")

            # Test classification report
            f.write("\n##### Test #####\n")
            y_pred = np.argmax(model.predict(X_test), axis=1)
            y_test_ = np.argmax(y_test, axis=1)
            f.write(classification_report(y_test_, y_pred, target_names=test.class_labels))

        # Save classification report
        with open(os.path.join(sub_path, name_prefix + f"report_{i}.pickle"), "wb") as f:
            pickle.dump(classification_report(y_test_, y_pred, target_names=test.class_labels, output_dict=True), f)