Exemple #1
0
    def test_fixed_string_field_importer(self):
        bio = BytesIO()
        importer.import_with_schema(self.ts, bio, self.schema, self.files, False, {}, {}, chunk_row_size=self.chunk_row_size)

        expected_patient_id_value_list = [b'E1', b'E123', b'E234', b'', b'E456']
        with h5py.File(bio, 'r') as hf:
            self.assertEqual(list(hf['schema_key']['patient_id']['values'][:]), expected_patient_id_value_list)
Exemple #2
0
 def test_numeric_importer_with_non_empty_valid_value_in_strict_mode(self):
     bio = BytesIO()
     importer.import_with_schema(self.ts, bio, self.schema, self.files, False, {}, {},
                                 chunk_row_size=self.chunk_row_size)
     with h5py.File(bio, 'r') as hf:
         self.assertTrue('id' in set(hf['schema_key'].keys()))  
         self.assertTrue('id_valid' not in set(hf['schema_key'].keys()))
Exemple #3
0
 def test_numeric_importer_in_allow_empty_mode(self):
     bio = BytesIO()
     importer.import_with_schema(self.ts, bio, self.schema, self.files, False, {}, {})
     with h5py.File(bio, 'r') as hf:
         self.assertTrue('age' in set(hf['schema_key'].keys()))
         self.assertTrue('age_valid' in set(hf['schema_key'].keys()))
         self.assertTrue('weight_change' in set(hf['schema_key'].keys()))
         self.assertTrue('weight_change_valid' not in set(hf['schema_key'].keys()))            
Exemple #4
0
 def test_date_importer_without_create_day_field(self):     
     bio = BytesIO()
     importer.import_with_schema(self.ts, bio, self.schema, self.files, False, {}, {}, chunk_row_size=self.chunk_row_size)
     with h5py.File(bio, 'r') as hf:
         self.assertTrue('birthday' in set(hf['schema_key'].keys()))  
         self.assertEqual(datetime.fromtimestamp(hf['schema_key']['birthday']['values'][1]).strftime("%Y-%m-%d"), '1980-03-04')
         
         self.assertTrue('birthday_day' not in set(hf['schema_key'].keys()))       
Exemple #5
0
    def test_importer_with_wrong_arg_include(self):
        bio = BytesIO()
        include, exclude = {'schema_wrong_key': ['id', 'name']}, {}

        with self.assertRaises(Exception) as context:
            importer.import_with_schema(self.ts, bio, self.schema, self.files, False, include, exclude, chunk_row_size=self.chunk_row_size)
        
        self.assertEqual(str(context.exception), "-n/--include: the following include table(s) are not part of any input files: {'schema_wrong_key'}")
Exemple #6
0
 def test_numeric_importer_in_relaxed_mode(self):
     bio = BytesIO()
     importer.import_with_schema(self.ts, bio, self.schema, self.files, False, {}, {}, chunk_row_size=self.chunk_row_size)
     with h5py.File(bio, 'r') as hf:
         self.assertTrue('height' in set(hf['schema_key'].keys()))
         self.assertTrue('height_valid' not in set(hf['schema_key'].keys()))
         self.assertTrue('height_valid_test' in set(hf['schema_key'].keys()))
         self.assertTrue('BMI' in set(hf['schema_key'].keys()))
         self.assertTrue('BMI_valid' in set(hf['schema_key'].keys()))
Exemple #7
0
    def test_datetime_importer_with_create_day_field_True(self):
        bio = BytesIO()
        importer.import_with_schema(self.ts, bio, self.schema, self.files, False, {}, {}, chunk_row_size=self.chunk_row_size)
        with h5py.File(bio, 'r') as hf:
            self.assertTrue('updated_at' in set(hf['schema_key'].keys()))                
            self.assertEqual(datetime.fromtimestamp(hf['schema_key']['updated_at']['values'][1]).strftime("%Y-%m-%d %H:%M:%S"), '2020-05-13 01:00:00')  

            self.assertTrue('updated_at_day' in set(hf['schema_key'].keys()))         
            self.assertEqual(hf['schema_key']['updated_at_day']['values'][1], b'2020-05-13')
Exemple #8
0
    def test_importer_with_arg_exclude(self):
        bio = BytesIO()
        include, exclude = {}, {'schema_key':['updated_at']}

        importer.import_with_schema(self.ts, bio, self.schema, self.files, False, include, exclude, chunk_row_size=self.chunk_row_size)
        with h5py.File(bio, 'r') as hf:
            self.assertListEqual(list(hf.keys()), ['schema_key'])
            self.assertTrue('updated_at' not in set(hf['schema_key'].keys()))
            self.assertEqual(hf['schema_key']['id']['values'].shape[0], 5)
Exemple #9
0
    def test_leaky_categorical_field_importer(self):
        bio = BytesIO()
        importer.import_with_schema(self.ts, bio, self.schema, self.files, False, {}, {}, chunk_row_size=self.chunk_row_size)

        expected_degree_value_list = [1, 2, 0, -1, 3]
        expected_degree_freetext_index_list = [0, 0, 0, 0, 4, 4]
        expected_degree_freetext_value_list = list(np.frombuffer(b'prof', dtype = np.uint8))
        with h5py.File(bio, 'r') as hf:
            self.assertEqual(list(hf['schema_key']['degree']['values'][:]), expected_degree_value_list)
            self.assertEqual(list(hf['schema_key']['degree_freetext']['index'][:]), expected_degree_freetext_index_list)
            self.assertEqual(list(hf['schema_key']['degree_freetext']['values'][:]), expected_degree_freetext_value_list)
Exemple #10
0
    def test_indexed_string_importer_with_small_chunk_size(self):
        chunk_row_size = 20 # chunk_row_size * column_count < total_bytes

        bio = BytesIO()
        importer.import_with_schema(self.ts, bio, self.schema, self.files, False, {}, {}, chunk_row_size=chunk_row_size)
        with h5py.File(bio, 'r') as hf:
            indices = hf['schema_key']['name']['index'][:]
            values = hf['schema_key']['name']['values'][:]

        self.assertListEqual(list(indices), [0,1,3,6,10,15])
        self.assertEqual(values[indices[0]:indices[1]].tobytes(), b'a')
        self.assertEqual(values[indices[3]:indices[4]].tobytes(), b'dddd')
Exemple #11
0
    def test_categorical_field_importer_with_small_chunk_size(self):
        chunk_row_size = 20 # chunk_row_size * column_count < total_bytes
        
        bio = BytesIO()
        importer.import_with_schema(self.ts, bio, self.schema, self.files, False, {}, {}, chunk_row_size=chunk_row_size)
        with h5py.File(bio, 'r') as hf:
            expected_postcode_value_list = [1, 3, 2, 0, 4]
            expected_key_names = [b'', b'NW1', b'E1', b'SW1P', b'NW3']
            expected_key_values = [0,1,2,3,4]

            self.assertEqual(list(hf['schema_key']['postcode']['values'][:]), expected_postcode_value_list)
            self.assertEqual(list(hf['schema_key']['postcode']['key_names'][:]), expected_key_names)
            self.assertEqual(list(hf['schema_key']['postcode']['key_values'][:]), expected_key_values)
Exemple #12
0
    def test_numeric_importer_with_non_empty_valid_value_in_strict_mode(self):
        ts = str(datetime.now(timezone.utc))
        fd_dest, dest_file_name = tempfile.mkstemp(suffix='.hdf5')

        try:
            importer.import_with_schema(ts, dest_file_name,
                                        self.schema_file_name, self.files,
                                        False, {}, {})
            f = h5py.File(dest_file_name, 'r')
            self.assertListEqual(list(f.keys()), ['schema_key'])
            self.assertTrue('id' in set(f['schema_key'].keys()))
            self.assertTrue('id_valid' not in set(f['schema_key'].keys()))
        finally:
            os.close(fd_dest)
Exemple #13
0
    def test_numeric_importer_with_default_value(self):
        ts = str(datetime.now(timezone.utc))
        fd_dest, dest_file_name = tempfile.mkstemp(suffix='.hdf5')

        try:
            importer.import_with_schema(ts, dest_file_name,
                                        self.schema_file_name, self.files,
                                        False, {}, {})
            f = h5py.File(dest_file_name, 'r')
            self.assertListEqual(list(f.keys()), ['schema_key'])
            self.assertEqual(f['schema_key']['height']['values'].shape[0], 3)
            self.assertEqual(f['schema_key']['height']['values'][2], 160.5)
        finally:
            os.close(fd_dest)
Exemple #14
0
    def test_importer_with_create_day_field_True(self):
        ts = str(datetime.now(timezone.utc))
        fd_dest, dest_file_name = tempfile.mkstemp(suffix='.hdf5')

        try:
            importer.import_with_schema(ts, dest_file_name,
                                        self.schema_file_name, self.files,
                                        False, {}, {})
            f = h5py.File(dest_file_name, 'r')
            self.assertListEqual(list(f.keys()), ['schema_key'])
            self.assertTrue('updated_at' in set(f['schema_key'].keys()))
            self.assertTrue('updated_at_day' in set(f['schema_key'].keys()))

        finally:
            os.close(fd_dest)
Exemple #15
0
    def test_importer_with_arg_exclude(self):
        ts = str(datetime.now(timezone.utc))
        fd_dest, dest_file_name = tempfile.mkstemp(suffix='.hdf5')
        include, exclude = {}, {'schema_key': ['updated_at']}

        try:
            importer.import_with_schema(ts, dest_file_name,
                                        self.schema_file_name, self.files,
                                        False, include, exclude)
            f = h5py.File(dest_file_name, 'r')
            self.assertListEqual(list(f.keys()), ['schema_key'])
            self.assertTrue('updated_at' not in set(f['schema_key'].keys()))
            self.assertEqual(f['schema_key']['id']['values'].shape[0], 3)
        finally:
            os.close(fd_dest)
Exemple #16
0
    def test_numeric_field_importer_with_small_chunk_size(self):
        bio = BytesIO()
        importer.import_with_schema(self.ts, bio, self.schema, self.files, False, {}, {}, chunk_row_size=self.chunk_row_size)

        # numeric int field
        expected_age_list = list(np.array([30,40,50,60,70], dtype = np.int32 ))
        # numeric float field with default value
        expected_height_list = list(np.array([170.9,180.2,160.5,160.5,161.0], dtype = np.float32))
        # numeric float field with min_default_value
        expected_weight_change_list = list(np.array([21.2, NewDataSchema._get_min_max('float32')[0], -17.5, -17.5, 2.5], dtype = np.float32))

        with h5py.File(bio, 'r') as hf:
            self.assertListEqual(list(hf['schema_key']['age']['values'][:]), expected_age_list)
            self.assertListEqual(list(hf['schema_key']['height']['values'][:]), expected_height_list)
            self.assertListEqual(list(hf['schema_key']['weight_change']['values'][:]), expected_weight_change_list)
Exemple #17
0
    def test_numeric_importer_with_min_default_value(self):
        ts = str(datetime.now(timezone.utc))
        fd_dest, dest_file_name = tempfile.mkstemp(suffix='.hdf5')

        try:
            importer.import_with_schema(ts, dest_file_name,
                                        self.schema_file_name, self.files,
                                        False, {}, {})
            f = h5py.File(dest_file_name, 'r')
            self.assertListEqual(list(f.keys()), ['schema_key'])
            self.assertEqual(
                f['schema_key']['weight_change']['values'].shape[0], 3)
            self.assertEqual(f['schema_key']['weight_change']['values'][1],
                             NewDataSchema._get_min_max('float32')[0])
        finally:
            os.close(fd_dest)
Exemple #18
0
    def test_importer_with_wrong_arg_include(self):

        ts = str(datetime.now(timezone.utc))
        fd_dest, dest_file_name = tempfile.mkstemp(suffix='.hdf5')
        include, exclude = {'schema_wrong_key': ['id', 'name']}, {}

        try:
            importer.import_with_schema(ts, dest_file_name,
                                        self.schema_file_name, self.files,
                                        False, include, exclude)
        except Exception as e:
            self.assertEqual(
                str(e),
                "-n/--include: the following include table(s) are not part of any input files: {'schema_wrong_key'}"
            )
        finally:
            os.close(fd_dest)
Exemple #19
0
    def test_numeric_importer_with_non_numeric_value_in_strict_mode(self):
        TEST_CSV_CONTENTS_EMPTY_VALUE = '\n'.join((
            'name, id',
            'a,     1',
            'c,     5@'
        ))

        fd_csv, csv_file_name = tempfile.mkstemp(suffix='.csv')
        with open(csv_file_name, 'w') as fcsv:
            fcsv.write(TEST_CSV_CONTENTS_EMPTY_VALUE)

        files = {'schema_key': csv_file_name}
        
        bio = BytesIO()
        expected = ("Field 'id' contains values that cannot "
                    "be converted to float in 'strict' mode")
        with self.assertRaises(ValueError, msg=expected):
            importer.import_with_schema(self.ts, bio, self.schema, files, False, {}, {},
                                        chunk_row_size=self.chunk_row_size)

        os.close(fd_csv)
Exemple #20
0
    def test_numeric_importer_with_non_numeric_value_in_strict_mode(self):
        TEST_CSV_CONTENTS_EMPTY_VALUE = '\n'.join(
            ('name, id', 'a,     1', 'c,     5@'))

        fd_csv, csv_file_name = tempfile.mkstemp(suffix='.csv')
        with open(csv_file_name, 'w') as fcsv:
            fcsv.write(TEST_CSV_CONTENTS_EMPTY_VALUE)

        files = {'schema_key': csv_file_name}

        ts = str(datetime.now(timezone.utc))
        fd_dest, dest_file_name = tempfile.mkstemp(suffix='.hdf5')

        try:
            importer.import_with_schema(ts, dest_file_name,
                                        self.schema_file_name, files, False,
                                        {}, {})
        except Exception as e:
            self.assertEqual(
                str(e),
                "The following numeric value in the field 'id' can not be parsed:5@"
            )
        finally:
            os.close(fd_dest)