Esempio n. 1
0
    def __init__(self, csv_file_path, image_path, has_target=True):
        file_names, target = read_csv_file(csv_file_path)

        self.y_encoder = OneHotEncoder()
        self.y_encoder.fit(target)
        target = self.y_encoder.transform(target)

        self.target = target
        self.has_target = has_target
        self.file_paths = list(map(lambda file_name: os.path.join(image_path, file_name), file_names))
Esempio n. 2
0
    def __init__(self, csv_file_path, image_path, has_target=True):
        file_names, target = read_csv_file(csv_file_path)

        self.y_encoder = OneHotEncoder()
        self.y_encoder.fit(target)
        target = self.y_encoder.transform(target)

        self.target = target
        self.has_target = has_target
        self.file_paths = list(map(lambda file_name: os.path.join(image_path, file_name), file_names))
Esempio n. 3
0
def load_image_dataset(csv_file_path, images_path):
    """Load images from the files and labels from a csv file.

    Second, the dataset is a set of images and the labels are in a CSV file.
    The CSV file should contain two columns whose names are 'File Name' and 'Label'.
    The file names in the first column should match the file names of the images with extensions,
    e.g., .jpg, .png.
    The path to the CSV file should be passed through the `csv_file_path`.
    The path to the directory containing all the images should be passed through `image_path`.

    Args:
        csv_file_path: CSV file path.
        images_path: Path where images exist.

    Returns:
        x: Four dimensional numpy.ndarray. The channel dimension is the last dimension.
        y: The labels.
    """
    img_file_name, y = read_csv_file(csv_file_path)
    x = read_images(img_file_name, images_path)
    return np.array(x), np.array(y)
Esempio n. 4
0
def load_image_dataset(csv_file_path, images_path):
    """Load images from the files and labels from a csv file.

    Second, the dataset is a set of images and the labels are in a CSV file.
    The CSV file should contain two columns whose names are 'File Name' and 'Label'.
    The file names in the first column should match the file names of the images with extensions,
    e.g., .jpg, .png.
    The path to the CSV file should be passed through the `csv_file_path`.
    The path to the directory containing all the images should be passed through `image_path`.

    Args:
        csv_file_path: CSV file path.
        images_path: Path where images exist.

    Returns:
        x: Four dimensional numpy.ndarray. The channel dimension is the last dimension.
        y: The labels.
    """
    img_file_name, y = read_csv_file(csv_file_path)
    x = read_images(img_file_name, images_path)
    return np.array(x), np.array(y)
Esempio n. 5
0
def load_image_dataset(csv_file_path, images_path, parallel=True):
    """Load images from their files and load their labels from a csv file.

    Assumes the dataset is a set of images and the labels are in a CSV file.
    The CSV file should contain two columns whose names are 'File Name' and 'Label'.
    The file names in the first column should match the file names of the images with extensions,
    e.g., .jpg, .png.
    The path to the CSV file should be passed through the `csv_file_path`.
    The path to the directory containing all the images should be passed through `image_path`.

    Args:
        csv_file_path: a string of the path to the CSV file
        images_path: a string of the path containing the directory of the images
        parallel: (Default: True) Load dataset using multiprocessing.

    Returns:
        x: Four dimensional numpy.ndarray. The channel dimension is the last dimension.
        y: a numpy.ndarray of the labels for the images
    """
    img_file_names, y = read_csv_file(csv_file_path)
    x = read_images(img_file_names, images_path, parallel)
    return np.array(x), np.array(y)
Esempio n. 6
0
import numpy as np

from autokeras import TextClassifier
from autokeras.utils import read_csv_file


def convert_labels_to_one_hot(labels, num_labels):
    labels = [int(label) for label in labels]
    one_hot = np.zeros((len(labels), num_labels))
    one_hot[np.arange(len(labels)), labels] = 1
    return one_hot


if __name__ == "__main__":
    file_path = "../data/w_train_v3.csv"
    x_train, y_train = read_csv_file(file_path)
    x_test, y_test = read_csv_file(file_path)

    y_train = convert_labels_to_one_hot(y_train, num_labels=3)
    y_test = convert_labels_to_one_hot(y_test, num_labels=3)

    clf = TextClassifier(verbose=True)
    clf.output_model_file = "../data/v2.h5"
    clf.fit(x=x_train, y=y_train, time_limit=60) # 60*60*15

    print("Classification accuracy is: ", 100 * clf.evaluate(
        x_test,
        y_test
        ), "%")
Esempio n. 7
0
import numpy as np

from autokeras import TextClassifier
from autokeras.utils import read_csv_file


def convert_labels_to_one_hot(labels, num_labels):
    labels = [int(label) for label in labels]
    one_hot = np.zeros((len(labels), num_labels))
    one_hot[np.arange(len(labels)), labels] = 1
    return one_hot


if __name__ == "__main__":
    file_path = "../data/tmp_dataset.csv"
    x_test, y_test = read_csv_file(file_path)
    y_test = convert_labels_to_one_hot(y_test, num_labels=5)

    clf = TextClassifier(verbose=True)
    clf.num_labels = 5
    clf.output_model_file = "../data/v1.h5"

    print("Classification accuracy is: ", 100 * clf.evaluate(x_test, y_test),
          "%")