Exemple #1
0
def test_train_batch(data):
    data.split_config = DataSplitConfig(0, 0, 0)
    data.batch_size = 2
    data.augment = False

    assert len(data.train_batch_data(0).df.ImageId.unique()) == 2
    assert len(data.train_batch_data(1).df.ImageId.unique()) == 1
Exemple #2
0
def test_train_size(sample_df):
    # 30% training data, 3 of 10 items
    config = DataSplitConfig(0.5, 0.2, 0)

    split = GroupDataSplit(sample_df, key='group_id', config=config)
    print(split.train_df())
    assert len(split.train_df()) == 3
Exemple #3
0
def test_val_batch(data):
    data.split_config = DataSplitConfig(0.99, 0, 0)
    data.batch_size = 2
    data.augment = True  # No-op
    data._split_data = None

    assert len(data.val_batch_data(0).df.ImageId.unique()) == 2
    assert len(data.val_batch_data(1).df.ImageId.unique()) == 1
Exemple #4
0
def test_neural_network_input_output_with_augmentation(data):
    data.split_config = DataSplitConfig(0, 0, 0)
    data.batch_size = 2
    data.augment = True

    inputs, outputs = data.train_batch_data(0).to_nn((500, 500))

    assert inputs.shape == (2, 500, 500, 3)
    assert outputs.shape == (2, 500, 500, 1)
Exemple #5
0
def test_train_batch_count_with_augmentation(data):
    data.split_config = DataSplitConfig(0, 0, 0)
    data.batch_size = 2
    data.augment = True

    assert data.train_batch_count() == 3

    # Number of images has been halved since each with have an associated
    # augmented counterpart
    assert len(data.train_batch_data(0).df.ImageId.unique()) == 1
Exemple #6
0
def model(args):
    # Use Rio 3band data
    config = DataConfig(DEFAULT_DATA_DIR, BAND3, RIO_REGION)

    # Train (70%), Val (10%), Test (20%), Fixed seed
    split_config = DataSplitConfig(0.1, 0.2,
                                   1337)  # Create data with batch size
    data = Data(config,
                split_config,
                batch_size=args.max_examples
                if hasattr(args, 'max_examples') else args.batch_size,
                augment=True)

    # Exclude empty polygons
    data.data_filter = lambda df: df.sq_ft > 0

    # Create model
    return UNet(data, (args.size, args.size),
                max_batches=args.max_batches,
                epochs=args.epochs,
                weights_path=args.weights_path,
                gpu_count=args.gpu_count,
                learning_rate=args.learning_rate)
Exemple #7
0
def test_test_batch_count(data):
    data.split_config = DataSplitConfig(0, 0.4, 0)
    data.batch_size = 1
    data.augment = True  # No-op

    assert data.test_batch_count() == 2
Exemple #8
0
def test_test_batch(data):
    data.split_config = DataSplitConfig(0, 0.6, 0)
    data.batch_size = 1

    assert len(data.test_batch_data(0).df.ImageId.unique()) == 1
    assert len(data.test_batch_data(1).df.ImageId.unique()) == 1
Exemple #9
0
def test_val_batch_count(data):
    data.split_config = DataSplitConfig(0.99, 0, 0)
    data.batch_size = 1
    data.augment = True  # No-op

    assert data.val_batch_count() == 3
Exemple #10
0
def test_train_batch_count_with_whole_last_batch(data):
    data.split_config = DataSplitConfig(0, 0, 0)
    data.batch_size = 3
    data.augment = False

    assert data.train_batch_count() == 1
Exemple #11
0
def test_test_size(sample_df):
    # 20% test data, 2 of 10 items
    config = DataSplitConfig(0.5, 0.2, 0)

    split = GroupDataSplit(sample_df, key='group_id', config=config)
    assert len(split.test_df) == 2
Exemple #12
0
def test_validation_size(sample_df):
    # 50% validation data, 5 of 10 items
    config = DataSplitConfig(0.5, 0.2, 0)

    split = GroupDataSplit(sample_df, key='group_id', config=config)
    assert len(split.val_df) == 5