def test_json_dataset(): """Test case for JSON Dataset. """ json_filename = os.path.join( os.path.dirname(os.path.abspath(__file__)), "test_json", "json_test.npz") with np.load(json_filename) as f: (x_test, y_test) = f["x_test"], f["y_test"] feature_filename = os.path.join( os.path.dirname(os.path.abspath(__file__)), "test_json", "feature.json") label_filename = os.path.join( os.path.dirname(os.path.abspath(__file__)), "test_json", "label.json") feature_dataset = json_io.JSONDataset(feature_filename) label_dataset = json_io.JSONDataset(label_filename) i = 0 for j_x in feature_dataset: v_x = x_test[i] assert np.alltrue(v_x == j_x.numpy()) i += 1 assert i == len(y_test) ## Test of the reverse order of the columns feature_list = ["integerfeature", "floatfeature"] feature_dataset = json_io.JSONDataset(feature_filename, feature_list) i = 0 for j_x in feature_dataset: v_x = np.flip(x_test[i]) assert np.alltrue(v_x == j_x.numpy()) i += 1 assert i == len(y_test) i = 0 for j_y in label_dataset: v_y = y_test[i] assert np.alltrue(v_y == j_y.numpy()) i += 1 assert i == len(y_test) dataset = tf.data.Dataset.zip(( feature_dataset, label_dataset )) i = 0 for (j_x, j_y) in dataset: v_x = np.flip(x_test[i]) v_y = y_test[i] assert np.alltrue(v_y == j_y.numpy()) assert np.alltrue(v_x == j_x.numpy()) i += 1 assert i == len(y_test)
def test_json_dataset(): """Test case for JSON Dataset. """ x_test = [[1.1, 2], [2.1, 3]] y_test = [[2.2, 3], [1.2, 3]] feature_filename = os.path.join(os.path.dirname(os.path.abspath(__file__)), "test_json", "feature.json") feature_filename = "file://" + feature_filename label_filename = os.path.join(os.path.dirname(os.path.abspath(__file__)), "test_json", "label.json") label_filename = "file://" + label_filename feature_cols = json_io.list_json_columns(feature_filename) assert feature_cols["floatfeature"].dtype == tf.float64 assert feature_cols["integerfeature"].dtype == tf.int64 label_cols = json_io.list_json_columns(label_filename) assert label_cols["floatlabel"].dtype == tf.float64 assert label_cols["integerlabel"].dtype == tf.int64 float_feature = json_io.read_json(feature_filename, feature_cols["floatfeature"]) integer_feature = json_io.read_json(feature_filename, feature_cols["integerfeature"]) float_label = json_io.read_json(label_filename, label_cols["floatlabel"]) integer_label = json_io.read_json(label_filename, label_cols["integerlabel"]) for i in range(2): v_x = x_test[i] v_y = y_test[i] assert v_x[0] == float_feature[i].numpy() assert v_x[1] == integer_feature[i].numpy() assert v_y[0] == float_label[i].numpy() assert v_y[1] == integer_label[i].numpy() feature_dataset = tf.compat.v2.data.Dataset.zip( (json_io.JSONDataset(feature_filename, "floatfeature"), json_io.JSONDataset(feature_filename, "integerfeature"))).apply( tf.data.experimental.unbatch()) label_dataset = tf.compat.v2.data.Dataset.zip( (json_io.JSONDataset(label_filename, "floatlabel"), json_io.JSONDataset(label_filename, "integerlabel"))).apply( tf.data.experimental.unbatch()) dataset = tf.data.Dataset.zip((feature_dataset, label_dataset)) i = 0 for (j_x, j_y) in dataset: v_x = x_test[i] v_y = y_test[i] for index, x in enumerate(j_x): assert v_x[index] == x.numpy() for index, y in enumerate(j_y): assert v_y[index] == y.numpy() i += 1 assert i == len(y_test)
def test_json(): """test_json""" feature_filename = os.path.join(os.path.dirname(os.path.abspath(__file__)), "test_json", "feature.json") label_filename = os.path.join(os.path.dirname(os.path.abspath(__file__)), "test_json", "label.json") d_train_feature = json_io.JSONDataset(feature_filename, ) d_train_label = json_io.JSONDataset(label_filename, ) d_train = tf.data.Dataset.zip((d_train_feature, d_train_label)) model = tf.keras.models.Sequential([ tf.keras.layers.Dense(2, input_shape=(1, )), ]) model.compile(optimizer='sgd', loss='mse', metrics=['accuracy']) model.fit(d_train, epochs=5)
def test_json_dataset(self): """Test case for JSONDataset.""" filename = os.path.join( os.path.dirname(os.path.abspath(__file__)), "test_json", "feature.json") columns = ['floatfeature', 'integerfeature'] output_types = (dtypes.float64, dtypes.int64) num_repeats = 2 dataset = json_io.JSONDataset( filename, columns=columns, dtypes=output_types).repeat(num_repeats) iterator = data.make_initializable_iterator(dataset) init_op = iterator.initializer get_next = iterator.get_next() test_json = [(1.1, 2), (2.1, 3)] with self.test_session() as sess: sess.run(init_op) for _ in range(num_repeats): for i in range(2): (floatf, intf) = test_json[i] vv = sess.run(get_next) self.assertAllClose((floatf, intf), vv) with self.assertRaises(errors.OutOfRangeError): sess.run(get_next)
def test_json_dataset(): """Test case for JSONDataset.""" filename = os.path.join( os.path.dirname(os.path.abspath(__file__)), "test_json", "feature.json") filename = "file://" + filename num_repeats = 2 dataset = json_io.JSONDataset( filename, ["floatfeature", "integerfeature"], dtype=[tf.float64, tf.int64] ).repeat(num_repeats).apply(tf.data.experimental.unbatch()) iterator = tf.compat.v1.data.make_initializable_iterator(dataset) init_op = iterator.initializer get_next = iterator.get_next() test_json = [(1.1, 2), (2.1, 3)] with tf.compat.v1.Session() as sess: sess.run(init_op) for _ in range(num_repeats): for i in range(2): (floatf, intf) = test_json[i] vv = sess.run(get_next) np.allclose((floatf, intf), vv) with pytest.raises(tf.errors.OutOfRangeError): sess.run(get_next)
def test_json_keras(): """Test case for JSONDataset with keras.""" feature_filename = os.path.join( os.path.dirname(os.path.abspath(__file__)), "test_json", "iris.json") feature_filename = "file://" + feature_filename label_filename = os.path.join( os.path.dirname(os.path.abspath(__file__)), "test_json", "species.json") label_filename = "file://" + label_filename feature_list = ['sepalLength', 'sepalWidth', 'petalLength', 'petalWidth'] label_list = ["species"] feature_types = [tf.float64, tf.float64, tf.float64, tf.float64] label_types = [tf.int64] feature_dataset = json_io.JSONDataset( feature_filename, feature_list, feature_types, batch=32) label_dataset = json_io.JSONDataset( label_filename, label_list, label_types, batch=32) dataset = tf.data.Dataset.zip(( feature_dataset, label_dataset )) def pack_features_vector(features, labels): """Pack the features into a single array.""" features = tf.stack(list(features), axis=1) return features, labels dataset = dataset.map(pack_features_vector) model = tf.keras.Sequential([ tf.keras.layers.Dense(10, activation=tf.nn.relu, input_shape=(4,)), # input shape required tf.keras.layers.Dense(10, activation=tf.nn.relu), tf.keras.layers.Dense(3) ]) model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy']) model.fit(dataset, epochs=5)
def test_json_keras(): """Test case for JSONDataset with keras.""" feature_filename = os.path.join(os.path.dirname(os.path.abspath(__file__)), "test_json", "iris.json") feature_filename = "file://" + feature_filename label_filename = os.path.join(os.path.dirname(os.path.abspath(__file__)), "test_json", "species.json") label_filename = "file://" + label_filename feature_cols = json_io.list_json_columns(feature_filename) label_cols = json_io.list_json_columns(label_filename) feature_tensors = [] for feature in feature_cols: dataset = json_io.JSONDataset(feature_filename, feature) feature_tensors.append(dataset) label_tensors = [] for label in label_cols: dataset = json_io.JSONDataset(label_filename, label) label_tensors.append(dataset) feature_dataset = tf.compat.v2.data.Dataset.zip(tuple(feature_tensors)) label_dataset = tf.compat.v2.data.Dataset.zip(tuple(label_tensors)) dataset = tf.data.Dataset.zip((feature_dataset, label_dataset)) def pack_features_vector(features, labels): """Pack the features into a single array.""" features = tf.stack(list(features), axis=1) return features, labels dataset = dataset.map(pack_features_vector) model = tf.keras.Sequential([ tf.keras.layers.Dense(10, activation=tf.nn.relu, input_shape=(4, )), # input shape required tf.keras.layers.Dense(10, activation=tf.nn.relu), tf.keras.layers.Dense(3) ]) model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy']) model.fit(dataset, epochs=5)
import os # Make sure to import these libs with python3 -m pip filename = "JSONData/testData.json" cols = tf_json_io.list_json_columns(filename) # List of functions for each element of the map # https://stackoverflow.com/questions/38381887/how-to-read-json-files-in-tensorflow # https://www.tensorflow.org/tutorials/keras/regression feature_cols = ["floatfeature"] feature_dataset = tf_json_io.JSONDataset(filename, feature_cols) datasetpd = pd.read_json(filename, "records") dataset = datasetpd.copy() print(dataset) linedefsDataset = datasetpd.copy() linedefsDatasetTrain = linedefsDataset.sample(frac=0.8, random_state=0) linedefsDatasetTest = linedefsDataset.drop(linedefsDatasetTrain.index) linedefTrainLabels = linedefsDatasetTrain.pop('floatfeature') linedefsStats = linedefsDatasetTrain.describe() linedefsStats = linedefsStats.transpose() def norm(x): #Normailisation function
def test_json_dataset(): """Test case for JSON Dataset. """ x_test = [[1.1, 2], [2.1, 3]] y_test = [[2.2, 3], [1.2, 3]] feature_filename = os.path.join( os.path.dirname(os.path.abspath(__file__)), "test_json", "feature.json") feature_filename = "file://" + feature_filename label_filename = os.path.join( os.path.dirname(os.path.abspath(__file__)), "test_json", "label.json") label_filename = "file://" + label_filename feature_list = ["floatfeature", "integerfeature"] label_list = ["floatlabel", "integerlabel"] feature_dataset = json_io.JSONDataset( feature_filename, feature_list, [tf.float64, tf.int64]) label_dataset = json_io.JSONDataset( label_filename, label_list, [tf.float64, tf.int64]) i = 0 for record in feature_dataset: v_x = x_test[i] for index, val in enumerate(record): assert v_x[index] == val.numpy() i += 1 assert i == len(y_test) ## Test of the reverse order of the columns feature_list = ["integerfeature", "floatfeature"] feature_dataset = json_io.JSONDataset( feature_filename, feature_list, [tf.int64, tf.float64]) i = 0 for record in feature_dataset: v_x = np.flip(x_test[i]) for index, val in enumerate(record): assert v_x[index] == val.numpy() i += 1 assert i == len(y_test) i = 0 for record in label_dataset: v_y = y_test[i] for index, val in enumerate(record): assert v_y[index] == val.numpy() i += 1 assert i == len(y_test) dataset = tf.data.Dataset.zip(( feature_dataset, label_dataset )) i = 0 for (j_x, j_y) in dataset: v_x = np.flip(x_test[i]) v_y = y_test[i] for index, x in enumerate(j_x): assert v_x[index] == x.numpy() for index, y in enumerate(j_y): assert v_y[index] == y.numpy() i += 1 assert i == len(y_test)