def test_train_batch(data): data.split_config = DataSplitConfig(0, 0, 0) data.batch_size = 2 data.augment = False assert len(data.train_batch_data(0).df.ImageId.unique()) == 2 assert len(data.train_batch_data(1).df.ImageId.unique()) == 1
def test_train_size(sample_df): # 30% training data, 3 of 10 items config = DataSplitConfig(0.5, 0.2, 0) split = GroupDataSplit(sample_df, key='group_id', config=config) print(split.train_df()) assert len(split.train_df()) == 3
def test_val_batch(data): data.split_config = DataSplitConfig(0.99, 0, 0) data.batch_size = 2 data.augment = True # No-op data._split_data = None assert len(data.val_batch_data(0).df.ImageId.unique()) == 2 assert len(data.val_batch_data(1).df.ImageId.unique()) == 1
def test_neural_network_input_output_with_augmentation(data): data.split_config = DataSplitConfig(0, 0, 0) data.batch_size = 2 data.augment = True inputs, outputs = data.train_batch_data(0).to_nn((500, 500)) assert inputs.shape == (2, 500, 500, 3) assert outputs.shape == (2, 500, 500, 1)
def test_train_batch_count_with_augmentation(data): data.split_config = DataSplitConfig(0, 0, 0) data.batch_size = 2 data.augment = True assert data.train_batch_count() == 3 # Number of images has been halved since each with have an associated # augmented counterpart assert len(data.train_batch_data(0).df.ImageId.unique()) == 1
def model(args): # Use Rio 3band data config = DataConfig(DEFAULT_DATA_DIR, BAND3, RIO_REGION) # Train (70%), Val (10%), Test (20%), Fixed seed split_config = DataSplitConfig(0.1, 0.2, 1337) # Create data with batch size data = Data(config, split_config, batch_size=args.max_examples if hasattr(args, 'max_examples') else args.batch_size, augment=True) # Exclude empty polygons data.data_filter = lambda df: df.sq_ft > 0 # Create model return UNet(data, (args.size, args.size), max_batches=args.max_batches, epochs=args.epochs, weights_path=args.weights_path, gpu_count=args.gpu_count, learning_rate=args.learning_rate)
def test_test_batch_count(data): data.split_config = DataSplitConfig(0, 0.4, 0) data.batch_size = 1 data.augment = True # No-op assert data.test_batch_count() == 2
def test_test_batch(data): data.split_config = DataSplitConfig(0, 0.6, 0) data.batch_size = 1 assert len(data.test_batch_data(0).df.ImageId.unique()) == 1 assert len(data.test_batch_data(1).df.ImageId.unique()) == 1
def test_val_batch_count(data): data.split_config = DataSplitConfig(0.99, 0, 0) data.batch_size = 1 data.augment = True # No-op assert data.val_batch_count() == 3
def test_train_batch_count_with_whole_last_batch(data): data.split_config = DataSplitConfig(0, 0, 0) data.batch_size = 3 data.augment = False assert data.train_batch_count() == 1
def test_test_size(sample_df): # 20% test data, 2 of 10 items config = DataSplitConfig(0.5, 0.2, 0) split = GroupDataSplit(sample_df, key='group_id', config=config) assert len(split.test_df) == 2
def test_validation_size(sample_df): # 50% validation data, 5 of 10 items config = DataSplitConfig(0.5, 0.2, 0) split = GroupDataSplit(sample_df, key='group_id', config=config) assert len(split.val_df) == 5