Example #1
0
    def setUp(self):

        validation_size = 0.2
        # Get the ground truth CSV file from script's parameters.
        self.galaxy_csv_file = os.environ["VIRTUAL_ENV"] + "/data/csv/galaxy/galaxy.csv"
        self.galaxy_images_path = os.environ["VIRTUAL_ENV"] + "/data/images/"

        # Create instance of data set loading strategies.
        galaxy_label_data_set_strategy = GalaxyDataSetLabelStrategy()

        # Set the context to galaxy label data set loading strategy.
        context = Context(galaxy_label_data_set_strategy)
        context.set_strategy(galaxy_label_data_set_strategy)
        self.label_dataset = context.load_dataset(csv_file=self.galaxy_csv_file, one_hot=False,
                                             validation_size=np.float32(validation_size))
def main():
    """
        Program's entry point.
    """
    # The desired validation size.
    validation_size = 0.2

    # Get the ground truth CSV file from script's parameters.
    galaxy_csv_file = os.environ["VIRTUAL_ENV"] + "/data/csv/galaxy/galaxy.csv"
    galaxy_feature_csv_file = os.environ[
        "VIRTUAL_ENV"] + "/data/csv/galaxy/galaxy_feature_vectors.csv"
    spam_feature_csv_file = os.environ[
        "VIRTUAL_ENV"] + "/data/csv/spam/spam.csv"
    galaxy_images_path = os.environ["VIRTUAL_ENV"] + "/data/images/"
    galaxy_feature_vector_export_path = os.environ[
        "VIRTUAL_ENV"] + "/data/csv/galaxy/exported_personal_galaxy_feature_vectors.csv"
    galaxy_mlp_export_path = os.environ[
        "VIRTUAL_ENV"] + "/data/models/exports/MLP/my_mlp"

    # Create instance of data set loading strategies.
    galaxy_image_data_set_strategy = GalaxyDataSetImageStrategy()
    galaxy_feature_data_set_strategy = GalaxyDataSetFeatureStrategy()
    galaxy_label_data_set_strategy = GalaxyDataSetLabelStrategy()
    spam_feature_dataset_strategy = SpamDataSetFeatureStrategy()

    # Set the context to galaxy image data set loading strategy.
    context = Context(galaxy_image_data_set_strategy)
    img_dataset = context.load_dataset(
        csv_file=galaxy_csv_file,
        one_hot=True,
        validation_size=np.float32(validation_size))

    # Set the context to galaxy feature data set loading strategy.
    context.set_strategy(galaxy_feature_data_set_strategy)
    feature_oneHot_dataset = context.load_dataset(
        csv_file=galaxy_feature_csv_file,
        one_hot=True,
        validation_size=np.float32(0.2))

    feature_dataset = context.load_dataset(csv_file=galaxy_feature_csv_file,
                                           one_hot=False,
                                           validation_size=np.float32(0.2))

    # Set the context to galaxy label data set loading strategy.
    context.set_strategy(galaxy_label_data_set_strategy)
    label_dataset = context.load_dataset(
        csv_file=galaxy_csv_file,
        one_hot=False,
        validation_size=np.float32(validation_size))
    context.set_strategy(spam_feature_dataset_strategy)
    spam_feature_dataset = context.load_dataset(
        csv_file=spam_feature_csv_file,
        one_hot=False,
        validation_size=np.float32(validation_size))

    # For TP02, set the discretization strategy and discretize data.
    preprocessor_context = DiscretizerContext(
        SupervisedDiscretizationStrategy())

    supervised_discretised_dataset = preprocessor_context.discretize(
        data_set=feature_dataset, validation_size=np.float32(validation_size))

    preprocessor_context.set_strategy(UnsupervisedDiscretizationStrategy())

    unsupervised_discretised_dataset = preprocessor_context.discretize(
        data_set=feature_dataset, validation_size=np.float32(validation_size))

    # Process galaxies.
    galaxy_processor = GalaxyProcessor(galaxy_images_path)
    features = galaxy_processor.process_galaxy(label_dataset)

    # Save extracted features to file.
    np.savetxt(galaxy_feature_vector_export_path, features, delimiter=",")
    print("File saved in directory " + galaxy_feature_vector_export_path)