예제 #1
0
def test_experiment_dataset_formats(data_format):
    # primary focus of this test is to determine if exceptions are
    # raised for different data set formats and in_memory setting

    input_features = [
        numerical_feature(),
        category_feature()
    ]
    output_features = [
        category_feature(),
        numerical_feature()
    ]

    config = {
        'input_features': input_features,
        'output_features': output_features,
        'combiner': {
            'type': 'concat',
            'fc_size': 14
        },
        'preprocessing': {},
        'training': {'epochs': 2}
    }

    # create temporary name for train and test data sets
    csv_filename = 'train_' + uuid.uuid4().hex[:10].upper() + '.csv'

    # setup training data format to test
    raw_data = generate_data(input_features, output_features,
                               csv_filename)

    training_set_metadata = None

    if data_format == 'hdf5':
        # hdf5 format
        training_set, _, _, training_set_metadata = preprocess_for_training(
            config,
            dataset=raw_data
        )
        dataset_to_use = training_set.data_hdf5_fp
    else:
        dataset_to_use = create_data_set_to_use(data_format, raw_data)

    # define Ludwig model
    model = LudwigModel(
        config=config
    )
    model.train(
        dataset=dataset_to_use,
        training_set_metadata=training_set_metadata,
        random_seed=default_random_seed
    )

    # # run functions with the specified data format
    model.evaluate(dataset=dataset_to_use)
    model.predict(dataset=dataset_to_use)

    # Delete the temporary data created
    delete_temporary_data(csv_filename)
예제 #2
0
def test_experiment_image_dataset(train_format, train_in_memory, test_format,
                                  test_in_memory):
    # primary focus of this test is to determine if exceptions are
    # raised for different data set formats and in_memory setting
    # Image Inputs
    image_dest_folder = os.path.join(os.getcwd(), 'generated_images')

    input_features = [
        image_feature(folder=image_dest_folder,
                      encoder='stacked_cnn',
                      preprocessing={
                          'in_memory': True,
                          'height': 12,
                          'width': 12,
                          'num_channels': 3,
                          'num_processes': 5
                      },
                      fc_size=16,
                      num_filters=8),
    ]
    output_features = [
        category_feature(vocab_size=2, reduce_input='sum'),
    ]

    config = {
        'input_features': input_features,
        'output_features': output_features,
        'combiner': {
            'type': 'concat',
            'fc_size': 14
        },
        'preprocessing': {},
        'training': {
            'epochs': 2
        }
    }

    # create temporary name for train and test data sets
    train_csv_filename = 'train_' + uuid.uuid4().hex[:10].upper() + '.csv'
    test_csv_filename = 'test_' + uuid.uuid4().hex[:10].upper() + '.csv'

    # setup training data format to test
    train_data = generate_data(input_features, output_features,
                               train_csv_filename)
    config['input_features'][0]['preprocessing']['in_memory'] \
        = train_in_memory
    training_set_metadata = None

    if train_format == 'hdf5':
        # hdf5 format
        train_set, _, _, training_set_metadata = preprocess_for_training(
            config, dataset=train_data)
        train_dataset_to_use = train_set.data_hdf5_fp
    else:
        train_dataset_to_use = create_data_set_to_use(train_format, train_data)

    # define Ludwig model
    model = LudwigModel(config=config, )
    model.train(dataset=train_dataset_to_use,
                training_set_metadata=training_set_metadata)

    model.config['input_features'][0]['preprocessing']['in_memory'] \
        = test_in_memory

    # setup test data format to test
    test_data = generate_data(input_features, output_features,
                              test_csv_filename)

    if test_format == 'hdf5':
        # hdf5 format
        # create hdf5 data set
        _, test_set, _, training_set_metadata_for_test = preprocess_for_training(
            model.config, dataset=test_data)
        test_dataset_to_use = test_set.data_hdf5_fp
    else:
        test_dataset_to_use = create_data_set_to_use(test_format, test_data)

    # run functions with the specified data format
    model.evaluate(dataset=test_dataset_to_use)
    model.predict(dataset=test_dataset_to_use)

    # Delete the temporary data created
    shutil.rmtree(image_dest_folder)
    delete_temporary_data(train_csv_filename)
    delete_temporary_data(test_csv_filename)
예제 #3
0
def test_experiment_image_dataset(train_format, train_in_memory, test_format, test_in_memory):
    # primary focus of this test is to determine if exceptions are
    # raised for different data set formats and in_memory setting
    # Image Inputs
    image_dest_folder = os.path.join(os.getcwd(), "generated_images")

    input_features = [
        image_feature(
            folder=image_dest_folder,
            encoder="stacked_cnn",
            preprocessing={"in_memory": True, "height": 12, "width": 12, "num_channels": 3, "num_processes": 5},
            fc_size=16,
            num_filters=8,
        ),
    ]
    output_features = [
        category_feature(vocab_size=2, reduce_input="sum"),
    ]

    config = {
        "input_features": input_features,
        "output_features": output_features,
        "combiner": {"type": "concat", "fc_size": 14},
        "preprocessing": {},
        "training": {"epochs": 2},
    }

    # create temporary name for train and test data sets
    train_csv_filename = "train_" + uuid.uuid4().hex[:10].upper() + ".csv"
    test_csv_filename = "test_" + uuid.uuid4().hex[:10].upper() + ".csv"

    # setup training data format to test
    train_data = generate_data(input_features, output_features, train_csv_filename)
    config["input_features"][0]["preprocessing"]["in_memory"] = train_in_memory
    training_set_metadata = None

    backend = LocalTestBackend()
    if train_format == "hdf5":
        # hdf5 format
        train_set, _, _, training_set_metadata = preprocess_for_training(
            config,
            dataset=train_data,
            backend=backend,
        )
        train_dataset_to_use = train_set.data_hdf5_fp
    else:
        train_dataset_to_use = create_data_set_to_use(train_format, train_data)

    # define Ludwig model
    model = LudwigModel(
        config=config,
        backend=backend,
    )
    model.train(dataset=train_dataset_to_use, training_set_metadata=training_set_metadata)

    model.config["input_features"][0]["preprocessing"]["in_memory"] = test_in_memory

    # setup test data format to test
    test_data = generate_data(input_features, output_features, test_csv_filename)

    if test_format == "hdf5":
        # hdf5 format
        # create hdf5 data set
        _, test_set, _, training_set_metadata_for_test = preprocess_for_training(
            model.config,
            dataset=test_data,
            backend=backend,
        )
        test_dataset_to_use = test_set.data_hdf5_fp
    else:
        test_dataset_to_use = create_data_set_to_use(test_format, test_data)

    # run functions with the specified data format
    model.evaluate(dataset=test_dataset_to_use)
    model.predict(dataset=test_dataset_to_use)

    # Delete the temporary data created
    shutil.rmtree(image_dest_folder)
    delete_temporary_data(train_csv_filename)
    delete_temporary_data(test_csv_filename)