def test_cast_to_int32_return_int32(): dataset = utils.generate_one_hot_labels(100, 10, "dataset") dataset = dataset.map(lambda x: tf.cast(x, tf.uint8)) dataset = common.CastToInt32().transform(dataset) for data in dataset: assert data.dtype == tf.int32 break
def test_io_api(tmp_path): num_instances = 20 image_x = utils.generate_data(num_instances=num_instances, shape=(28, 28)) text_x = utils.generate_text_data(num_instances=num_instances) image_x = image_x[:num_instances] structured_data_x = (pd.read_csv(utils.TRAIN_CSV_PATH).to_numpy().astype( np.unicode)[:num_instances]) classification_y = utils.generate_one_hot_labels( num_instances=num_instances, num_classes=3) regression_y = utils.generate_data(num_instances=num_instances, shape=(1, )) # Build model and train. automodel = ak.AutoModel( inputs=[ak.ImageInput(), ak.TextInput(), ak.StructuredDataInput()], outputs=[ ak.RegressionHead(metrics=["mae"]), ak.ClassificationHead(loss="categorical_crossentropy", metrics=["accuracy"]), ], directory=tmp_path, max_trials=2, tuner=ak.RandomSearch, seed=utils.SEED, ) automodel.fit( [image_x, text_x, structured_data_x], [regression_y, classification_y], epochs=1, validation_split=0.2, batch_size=4, )
def test_io_api(tmp_path): num_instances = 100 (image_x, train_y), (test_x, test_y) = mnist.load_data() (text_x, train_y), (test_x, test_y) = utils.imdb_raw(num_instances=num_instances) image_x = image_x[:num_instances] text_x = text_x[:num_instances] structured_data_x = utils.generate_structured_data( num_instances=num_instances) classification_y = utils.generate_one_hot_labels( num_instances=num_instances, num_classes=3) regression_y = utils.generate_data(num_instances=num_instances, shape=(1, )) # Build model and train. automodel = ak.AutoModel( inputs=[ak.ImageInput(), ak.TextInput(), ak.StructuredDataInput()], outputs=[ ak.RegressionHead(metrics=['mae']), ak.ClassificationHead(loss='categorical_crossentropy', metrics=['accuracy']) ], directory=tmp_path, max_trials=2, seed=utils.SEED) automodel.fit([image_x, text_x, structured_data_x], [regression_y, classification_y], epochs=1, validation_split=0.2)
def test_structured_clf_fit_call_auto_model_fit(fit, tmp_path): auto_model = ak.StructuredDataClassifier(directory=tmp_path, seed=utils.SEED) auto_model.fit( x=utils.generate_structured_data(num_instances=100), y=utils.generate_one_hot_labels(num_instances=100, num_classes=3)) assert fit.is_called
def test_clf_head_transform_df_to_dataset(): adapter = output_adapter.ClassificationHeadAdapter(name="a") y = adapter.fit_transform( pd.DataFrame(utils.generate_one_hot_labels(dtype="np", num_classes=10)) ) assert isinstance(y, tf.data.Dataset)
def test_img_clf_fit_call_auto_model_fit(fit, tmp_path): auto_model = ak.ImageClassifier(directory=tmp_path, seed=utils.SEED) auto_model.fit(x=utils.generate_data(num_instances=100, shape=(32, 32, 3)), y=utils.generate_one_hot_labels(num_instances=100, num_classes=10)) assert fit.is_called
def test_infer_ten_classes(): adapter = output_adapter.ClassificationHeadAdapter(name="a") adapter.fit_transform( utils.generate_one_hot_labels(dtype="dataset", num_classes=10) ) assert adapter.num_classes == 10
def test_image_classifier(tmp_path): train_x = utils.generate_data(num_instances=100, shape=(32, 32, 3)) train_y = utils.generate_one_hot_labels(num_instances=100, num_classes=10) clf = ak.ImageClassifier(directory=tmp_path, max_trials=2, seed=utils.SEED) clf.fit(train_x, train_y, epochs=1, validation_split=0.2) keras_model = clf.export_model() clf.evaluate(train_x, train_y) assert clf.predict(train_x).shape == (len(train_x), 10) assert isinstance(keras_model, tf.keras.Model)
def test_clf_head_one_hot_shape_error(): adapter = output_adapter.ClassificationHeadAdapter(name="a", num_classes=9) with pytest.raises(ValueError) as info: adapter.fit_transform( utils.generate_one_hot_labels(dtype="np", num_classes=10) ) assert "Expect one hot encoded labels to have shape" in str(info.value)
def test_infer_ten_classes(): analyser = output_analysers.ClassificationAnalyser(name="a") dataset = utils.generate_one_hot_labels(dtype="dataset", num_classes=10) for data in dataset: analyser.update(data) analyser.finalize() assert analyser.num_classes == 10
def test_structured_clf_fit_call_auto_model_fit(fit, tmp_path): auto_model = ak.StructuredDataClassifier(directory=tmp_path, seed=utils.SEED) auto_model.fit( x=pd.read_csv(utils.TRAIN_CSV_PATH).to_numpy().astype( np.unicode)[:100], y=utils.generate_one_hot_labels(num_instances=100, num_classes=3), ) assert fit.is_called
def test_clf_head_one_hot_shape_error(): analyser = output_analysers.ClassificationAnalyser(name="a", num_classes=9) dataset = tf.data.Dataset.from_tensor_slices( utils.generate_one_hot_labels(dtype="np", num_classes=10)).batch(32) with pytest.raises(ValueError) as info: for data in dataset: analyser.update(data) analyser.finalize() assert "Expect the target data for a to have shape" in str(info.value)
def test_functional_api(tmp_path): # Prepare the data. num_instances = 80 (image_x, train_y), (test_x, test_y) = mnist.load_data() (text_x, train_y), (test_x, test_y) = utils.imdb_raw() (structured_data_x, train_y), (test_x, test_y) = utils.dataframe_numpy() image_x = image_x[:num_instances] text_x = text_x[:num_instances] structured_data_x = structured_data_x[:num_instances] classification_y = utils.generate_one_hot_labels( num_instances=num_instances, num_classes=3) regression_y = utils.generate_data(num_instances=num_instances, shape=(1, )) # Build model and train. image_input = ak.ImageInput() output = ak.Normalization()(image_input) output = ak.ImageAugmentation()(output) outputs1 = ak.ResNetBlock(version='next')(output) outputs2 = ak.XceptionBlock()(output) image_output = ak.Merge()((outputs1, outputs2)) structured_data_input = ak.StructuredDataInput() structured_data_output = ak.CategoricalToNumerical()(structured_data_input) structured_data_output = ak.DenseBlock()(structured_data_output) text_input = ak.TextInput() outputs1 = ak.TextToIntSequence()(text_input) outputs1 = ak.Embedding()(outputs1) outputs1 = ak.ConvBlock(separable=True)(outputs1) outputs1 = ak.SpatialReduction()(outputs1) outputs2 = ak.TextToNgramVector()(text_input) outputs2 = ak.DenseBlock()(outputs2) text_output = ak.Merge()((outputs1, outputs2)) merged_outputs = ak.Merge()( (structured_data_output, image_output, text_output)) regression_outputs = ak.RegressionHead()(merged_outputs) classification_outputs = ak.ClassificationHead()(merged_outputs) automodel = ak.AutoModel( inputs=[image_input, text_input, structured_data_input], directory=tmp_path, outputs=[regression_outputs, classification_outputs], max_trials=2, tuner=ak.Hyperband, seed=utils.SEED) automodel.fit((image_x, text_x, structured_data_x), (regression_y, classification_y), validation_split=0.2, epochs=1)
def test_structured_data_classifier(tmp_path): num_data = 500 num_train = 400 data = pd.read_csv(utils.TRAIN_CSV_PATH).to_numpy().astype(np.unicode)[:num_data] x_train, x_test = data[:num_train], data[num_train:] y = utils.generate_one_hot_labels(num_instances=num_data, num_classes=3) y_train, y_test = y[:num_train], y[num_train:] clf = ak.StructuredDataClassifier( directory=tmp_path, max_trials=1, seed=utils.SEED ) clf.fit(x_train, y_train, epochs=2, validation_data=(x_train, y_train)) clf.export_model() assert clf.predict(x_test).shape == (len(y_test), 3)
def test_structured_data_from_numpy_classifier(tmp_path): num_data = 500 num_train = 400 data = utils.generate_structured_data(num_data) x_train, x_test = data[:num_train], data[num_train:] y = utils.generate_one_hot_labels(num_instances=num_data, num_classes=3) y_train, y_test = y[:num_train], y[num_train:] clf = ak.StructuredDataClassifier(directory=tmp_path, max_trials=1, seed=utils.SEED) clf.fit(x_train, y_train, epochs=2, validation_data=(x_train, y_train)) clf.export_model() assert clf.predict(x_test).shape == (len(y_test), 3)
def test_structured_classifier(init, fit, tmp_dir): num_data = 500 train_x = utils.generate_structured_data(num_data) train_y = utils.generate_one_hot_labels(num_instances=num_data, num_classes=3) clf = structured_data.StructuredDataClassifier( column_names=utils.COLUMN_NAMES_FROM_NUMPY, directory=tmp_dir, max_trials=1, seed=utils.SEED) clf.fit(train_x, train_y, epochs=2, validation_data=(train_x, train_y)) assert init.called assert fit.called
def test_clf_head_hpps_with_uint8_contain_cast_to_int32(): dataset = utils.generate_one_hot_labels(100, 10, "dataset") dataset = dataset.map(lambda x: tf.cast(x, tf.uint8)) head = head_module.ClassificationHead(shape=(8, )) analyser = head.get_analyser() for data in dataset: analyser.update(data) analyser.finalize() head.config_from_analyser(analyser) assert any([ isinstance(hpp, hyper_preprocessors.DefaultHyperPreprocessor) and isinstance(hpp.preprocessor, preprocessors.CastToInt32) for hpp in head.get_hyper_preprocessors() ])
def test_text_and_structured_data(tmp_path): # Prepare the data. num_instances = 80 (x_text, y_train), (x_test, y_test) = utils.imdb_raw() x_structured_data = pd.read_csv(utils.TRAIN_CSV_PATH) x_text = x_text[:num_instances] x_structured_data = x_structured_data[:num_instances] y_classification = utils.generate_one_hot_labels( num_instances=num_instances, num_classes=3) y_regression = utils.generate_data(num_instances=num_instances, shape=(1, )) # Build model and train. structured_data_input = ak.StructuredDataInput() structured_data_output = ak.CategoricalToNumerical()(structured_data_input) structured_data_output = ak.DenseBlock()(structured_data_output) text_input = ak.TextInput() outputs1 = ak.TextToIntSequence()(text_input) outputs1 = ak.Embedding()(outputs1) outputs1 = ak.ConvBlock(separable=True)(outputs1) outputs1 = ak.SpatialReduction()(outputs1) outputs2 = ak.TextToNgramVector()(text_input) outputs2 = ak.DenseBlock()(outputs2) text_output = ak.Merge()((outputs1, outputs2)) merged_outputs = ak.Merge()((structured_data_output, text_output)) regression_outputs = ak.RegressionHead()(merged_outputs) classification_outputs = ak.ClassificationHead()(merged_outputs) automodel = ak.AutoModel( inputs=[text_input, structured_data_input], directory=tmp_path, outputs=[regression_outputs, classification_outputs], max_trials=2, tuner=ak.Hyperband, seed=utils.SEED, ) automodel.fit( (x_text, x_structured_data), (y_regression, y_classification), validation_split=0.2, epochs=1, )
def test_infer_num_classes(): y = utils.generate_one_hot_labels(dtype='dataset') adapter = output_adapter.ClassificationHeadAdapter(name='a') y = adapter.fit(y) assert adapter.num_classes == 10
def test_tf_dataset(): y = utils.generate_one_hot_labels(dtype='dataset') head = output_adapter.ClassificationHeadAdapter(name='a') y = head.fit_transform(y) assert isinstance(head.transform(y), tf.data.Dataset)