Exemple #1
0
 def test_process_csv_file(self):
     schema = Schema.from_csv(csv_path=self.test_csv_file_path)
     input = Input(schema)
     input.add_categorical_column('col_0')
     rows = Dataset.Builder(input=input,
                            name="test",
                            root_dir=self.test_dir,
                            parallelism_level=2)._process_csv_files()
     self.assertEqual(len(rows), 10)
     for column in input.columns:
         if column.name == 'col_0':
             self.assertTrue(len(column.metadata.categories), 4)
Exemple #2
0
def create_test_dataset(test_dir,
                        test_csv_file_path,
                        dataset_name,
                        header=False,
                        is_related_path=False):
    col_0 = 'col_0'
    col_1 = 'col_1'
    col_5 = 'col_5'
    if header:
        col_0 = 'col_0_h'
        col_1 = 'col_1_h'
        col_5 = 'col_5_h'
    schema = Schema.from_csv(csv_path=test_csv_file_path, header=header)
    schema.merge_columns_in_range('col_vector', (2, 4))
    input = Input(schema)
    input.add_categorical_column(col_0)
    input.add_numeric_column(col_1)
    input.add_vector_column('col_vector')
    img2d = Img2DColumn(is_related_path=is_related_path)
    input.add_column(col_5, img2d)
    return Dataset.Builder(input, dataset_name, test_dir,
                           parallelism_level=2).build()
Exemple #3
0
 def test_write_read_record_raw_img_false(self):
     schema = Schema.from_csv(csv_path=self.test_csv_file_path)
     schema.merge_columns_in_range('col_vector', (2, 4))
     input = Input(schema)
     input.add_categorical_column('col_0')
     for column in input.columns:
         if column.name == 'col_0':
             metadata = CategoricalColumnMetadata()
             metadata._categories = categories
             column.metadata = metadata
     input.add_numeric_column('col_1')
     input.add_vector_column('col_vector')
     img2d = Img2DColumn(pre_transforms=[],
                         post_transforms=[],
                         is_raw_img=False)
     input.add_column("col_5", img2d)
     os.makedirs(os.path.join(self.test_dir, Dataset.DATA_DIR_NAME))
     record_writer = RecordWriter.factory('HDF5', self.test_dir,
                                          input.columns)
     csv_row = [
         ent.strip() for ent in Schema.read_n_rows(
             csv_file_path=self.test_csv_file_path,
             delimiter=",",
             rows_number=1)[0]
     ]
     precessed_row = {}
     for column in input.columns:
         precessed_row[column.name] = column.process_on_write(csv_row)
     record_writer.write(precessed_row, 0)
     record_reader = RecordReader.factory('HDF5', self.test_dir)
     record = record_reader.read(0)
     data = {}
     for column in input.columns:
         data[column.name] = column.process_on_read(record)
     img_deserialized = data['col_5']
     img_original = skimgio.imread(self.test_img_file_path)
     self.assertTrue(np.array_equal(img_deserialized, img_original))
Exemple #4
0
    import os
    import glob
    #
    datasets_base_path = app_flask.config['DATASETS_BASE_PATH']
    lstDB = glob.glob('%s/test-*' % datasets_base_path)
    numDB = len(lstDB)
    if numDB < 1:
        path_csv = '../../../../data-test/dataset-image2d/simple4c_test/test-csv-v1.csv'
        if not os.path.isfile(path_csv):
            raise Exception('Cant find file [%s]' % path_csv)
        schema = Schema.from_csv(path_csv, header=True, delimiter=',')
        schema.merge_columns_in_range('col_vector', (2, 4))
        schema.print_data()
        schema['path'] = 'image'
        schema.print_columns()
        input = Input(schema=schema)
        input.add_categorical_column("label")
        input.add_vector_column('col_vector')
        img2d = Img2DColumn(is_related_path=True)
        input.add_column("image", img2d)
        dataset = Dataset.Builder(input,
                                  "test",
                                  datasets_base_path,
                                  parallelism_level=2).build()
    else:
        dataset = Dataset.load(lstDB[0])
    dataShapes = dataset.shapes()
    data = dataset.get_train_batch(5)
    for k in data._data.keys():
        print '%s : %s' % (k, data[k].shape)