def test_structured_data_input_less_col_name(): (x, _), _1 = utils.dataframe_numpy() with pytest.raises(ValueError) as info: adapter = input_adapter.StructuredDataInputAdapter( column_names=utils.LESS_COLUMN_NAMES_FROM_CSV) adapter.fit_transform(x) assert 'Expect column_names to have length' in str(info.value)
def test_time_series_input_less_col_name(): (x, _), _1 = utils.dataframe_numpy() with pytest.raises(ValueError) as info: adapter = input_adapter.TimeseriesInputAdapter( lookback=2, column_names=utils.LESS_COLUMN_NAMES_FROM_CSV) adapter.transform(x) assert 'Expect column_names to have length' in str(info.value)
def test_partial_column_types(): adapter = input_adapter.StructuredDataInputAdapter( column_names=utils.COLUMN_NAMES_FROM_CSV, column_types=utils.PARTIAL_COLUMN_TYPES_FROM_CSV) (x, y), (val_x, val_y) = utils.dataframe_numpy() dataset = x.values.astype(np.unicode) adapter.transform(dataset) assert adapter.column_types['fare'] == 'categorical'
def test_functional_api(tmp_path): # Prepare the data. num_instances = 80 (image_x, train_y), (test_x, test_y) = mnist.load_data() (text_x, train_y), (test_x, test_y) = utils.imdb_raw() (structured_data_x, train_y), (test_x, test_y) = utils.dataframe_numpy() image_x = image_x[:num_instances] text_x = text_x[:num_instances] structured_data_x = structured_data_x[:num_instances] classification_y = utils.generate_one_hot_labels( num_instances=num_instances, num_classes=3) regression_y = utils.generate_data(num_instances=num_instances, shape=(1, )) # Build model and train. image_input = ak.ImageInput() output = ak.Normalization()(image_input) output = ak.ImageAugmentation()(output) outputs1 = ak.ResNetBlock(version='next')(output) outputs2 = ak.XceptionBlock()(output) image_output = ak.Merge()((outputs1, outputs2)) structured_data_input = ak.StructuredDataInput() structured_data_output = ak.CategoricalToNumerical()(structured_data_input) structured_data_output = ak.DenseBlock()(structured_data_output) text_input = ak.TextInput() outputs1 = ak.TextToIntSequence()(text_input) outputs1 = ak.Embedding()(outputs1) outputs1 = ak.ConvBlock(separable=True)(outputs1) outputs1 = ak.SpatialReduction()(outputs1) outputs2 = ak.TextToNgramVector()(text_input) outputs2 = ak.DenseBlock()(outputs2) text_output = ak.Merge()((outputs1, outputs2)) merged_outputs = ak.Merge()( (structured_data_output, image_output, text_output)) regression_outputs = ak.RegressionHead()(merged_outputs) classification_outputs = ak.ClassificationHead()(merged_outputs) automodel = ak.AutoModel( inputs=[image_input, text_input, structured_data_input], directory=tmp_path, outputs=[regression_outputs, classification_outputs], max_trials=2, tuner=ak.Hyperband, seed=utils.SEED) automodel.fit((image_x, text_x, structured_data_x), (regression_y, classification_y), validation_split=0.2, epochs=1)
def test_text_and_structured_data(tmp_path): # Prepare the data. num_instances = 80 (x_text, y_train), (x_test, y_test) = utils.imdb_raw() (x_structured_data, y_train), (x_test, y_test) = utils.dataframe_numpy() x_text = x_text[:num_instances] x_structured_data = x_structured_data[:num_instances] y_classification = utils.generate_one_hot_labels( num_instances=num_instances, num_classes=3) y_regression = utils.generate_data(num_instances=num_instances, shape=(1, )) # Build model and train. structured_data_input = ak.StructuredDataInput() structured_data_output = ak.CategoricalToNumerical()(structured_data_input) structured_data_output = ak.DenseBlock()(structured_data_output) text_input = ak.TextInput() outputs1 = ak.TextToIntSequence()(text_input) outputs1 = ak.Embedding()(outputs1) outputs1 = ak.ConvBlock(separable=True)(outputs1) outputs1 = ak.SpatialReduction()(outputs1) outputs2 = ak.TextToNgramVector()(text_input) outputs2 = ak.DenseBlock()(outputs2) text_output = ak.Merge()((outputs1, outputs2)) merged_outputs = ak.Merge()((structured_data_output, text_output)) regression_outputs = ak.RegressionHead()(merged_outputs) classification_outputs = ak.ClassificationHead()(merged_outputs) automodel = ak.AutoModel( inputs=[text_input, structured_data_input], directory=tmp_path, outputs=[regression_outputs, classification_outputs], max_trials=2, tuner=ak.Hyperband, seed=utils.SEED) automodel.fit((x_text, x_structured_data), (y_regression, y_classification), validation_split=0.2, epochs=1)