Exemple #1
0
    def test_recordio_data_reader(self):
        num_records = 128
        with tempfile.TemporaryDirectory() as temp_dir_name:
            shard_name = create_recordio_file(num_records,
                                              DatasetName.TEST_MODULE,
                                              1,
                                              temp_dir=temp_dir_name)

            # Test shards creation
            expected_shards = [(shard_name, 0, num_records)]
            reader = RecordIODataReader(data_dir=temp_dir_name)
            self.assertEqual(expected_shards, reader.create_shards())

            # Test records reading
            records = list(
                reader.read_records(
                    _Task(shard_name, 0, num_records,
                          elasticai_api_pb2.TRAINING)))
            self.assertEqual(len(records), num_records)
            for record in records:
                parsed_record = tf.io.parse_single_example(
                    record,
                    {
                        "x": tf.io.FixedLenFeature([1], tf.float32),
                        "y": tf.io.FixedLenFeature([1], tf.float32),
                    },
                )
                for k, v in parsed_record.items():
                    self.assertEqual(len(v.numpy()), 1)
Exemple #2
0
 def test_odps_data_reader_records_reading(self):
     records = list(
         self.reader.read_records(
             _Task(
                 self.test_table,
                 0,
                 2,
                 elasticai_api_pb2.TRAINING,
             )))
     records = np.array(records, dtype="float").tolist()
     self.assertEqual([[6.4, 2.8, 5.6, 2.2, 2], [5.0, 2.3, 3.3, 1.0, 1]],
                      records)
     self.assertEqual(self.reader.metadata.column_names,
                      IRIS_TABLE_COLUMN_NAMES)
     self.assertListEqual(
         list(self.reader.metadata.column_dtypes.values()),
         [
             odps.types.double,
             odps.types.double,
             odps.types.double,
             odps.types.double,
             odps.types.bigint,
         ],
     )
     self.assertEqual(
         self.reader.metadata.get_tf_dtype_from_maxcompute_column(
             self.reader.metadata.column_names[0]),
         tf.float64,
     )
     self.assertEqual(
         self.reader.metadata.get_tf_dtype_from_maxcompute_column(
             self.reader.metadata.column_names[-1]),
         tf.int64,
     )
Exemple #3
0
 def _gen():
     for data in self.reader.read_records(
             _Task(
                 self.test_table,
                 0,
                 num_records,
                 elasticai_api_pb2.TRAINING,
             )):
         if data is not None:
             yield data
Exemple #4
0
 def test_csv_data_reader(self):
     with tempfile.TemporaryDirectory() as temp_dir_name:
         num_records = 128
         columns = [
             "sepal_length",
             "sepal_width",
             "petal_length",
             "petal_width",
             "class",
         ]
         iris_file_name = create_iris_csv_file(size=num_records,
                                               columns=columns,
                                               temp_dir=temp_dir_name)
         csv_data_reader = TextDataReader(filename=iris_file_name,
                                          records_per_task=20)
         shards = csv_data_reader.create_shards()
         self.assertEqual(len(shards), 7)
         task = _Task(iris_file_name, 0, 20, elasticai_api_pb2.TRAINING)
         record_count = 0
         for record in csv_data_reader.read_records(task):
             record_count += 1
         self.assertEqual(csv_data_reader.get_size(), num_records)
         self.assertEqual(record_count, 20)
 def setUp(self):
     self._master_client = Mock()
     self._master_client.get_task = MagicMock(
         return_value=_Task("test_file", 0, 1, elasticai_api_pb2.TRAINING))
     self._master_client.report_task_result = MagicMock(return_value=True)