예제 #1
0
    def tearDown(self):
        metadata = reg.get_metadata("household_power_consumption")
        self.t_hive._query("drop table if exists " + reg.db_name(metadata) + '.' + reg.db_table(metadata))
        self.t_hive._query("drop table if exists " + reg.db_name(metadata) + '.' + reg.db_table(metadata, type="work"))
        self.t_hive._query("drop table if exists " + reg.db_name(metadata, stage="valid") +
                           '.' + reg.db_table(metadata))

        metadata2 = reg.get_metadata("small")
        self.t_hive._query("drop table if exists " + reg.db_name(metadata2) + '.' + reg.db_table(metadata2))
        self.t_hive._query("drop table if exists " + reg.db_name(metadata2) + '.'
                           + reg.db_table(metadata2, type="work"))
        self.t_hive._query("drop table if exists " + reg.db_name(metadata2, stage="valid") +
                           '.' + reg.db_table(metadata2))
예제 #2
0
 def test_register_valid(self):
     self.assertTrue(registry.validate_uuid(registry.register_valid(
         registry.get_metadata('sample.txt'),
         uuid4(),
         'sample.txt',
         123,
         'validation query'
     )))
예제 #3
0
 def test_sandbox_nodelim(self):
     metadata = reg.get_metadata("no_delim")
     test_header = 'Date;Time;Global_active_power;Global_reactive_power;Voltage;Global_intensity;'
     test_header += 'Sub_metering_1;Sub_metering_2;Sub_metering_3'
     self.t_hive.create_hive_table(metadata, reset=True, header=test_header)
     self.assertEqual(self.t_hive._query('describe ' + reg.db_name(metadata) + '.' + reg.db_table(metadata)),
                      [('column1', 'string', '')]
                      )
예제 #4
0
 def test_raw_hive_work(self):
     metadata = reg.get_metadata("small")
     self.t_hive.create_hive_table(metadata, reset=True, type="work")
     self.assertEqual(self.t_hive._query('describe ' + reg.db_name(metadata) + '.'
                                         + reg.db_table(metadata, type="work")),
                      [('date_field', 'string', 'from deserializer'), ('time_field', 'string', 'from deserializer'),
                       ('globalactivepower', 'string', 'from deserializer')]
                      )
예제 #5
0
 def test_RegisteredFile(self):
     print json.dumps(registry.get_metadata('sampleFile.db'), indent=4)
     self.assertEqual(registry.get_metadata('sampleFile.db'),
                      {
                          "fields": [],
                          "file": {
                              "dataPartition": "none",
                              "technical": {
                                  "tableName": "sample"
                              },
                              "subjectArea": "test",
                              "deleted": "true",
                              "key": "sample",
                              "guid": "7e1a614c-9570-42a6-9bc7-315f2b6218be"
                          }
                      }
                      )
예제 #6
0
 def test_z_copy_compare_append(self):
     # z in the name so this test runs last in Pycharm
     metadata = reg.get_metadata("append_power_consumption")
     self.t_hive.copy_and_compare(metadata, uuid4(), "append_power_consumption")
     raw_work = reg.file_path(metadata, stage="raw", type='work') + "/household_power_consumption_50.txt.gz"
     raw_reg = reg.file_path(metadata, stage="raw") + "/household_power_consumption_50.txt.gz"
     logging.info("raw_work : " + raw_work)
     logging.info("raw regular : " + raw_reg)
예제 #7
0
    def test_z_copy_compare_full(self):
        # z in the name so this test runs last in Pycharm
        metadata = reg.get_metadata("household_power_consumption")
        raw_reg = reg.file_path(metadata, stage="raw") + "/household_power_consumption_50.txt.gz"
        valid = reg.file_path(metadata, stage="valid")

        logging.info("valid work : " + valid)
        logging.info("raw regular : " + raw_reg)
        self.t_hive.copy_and_compare(metadata, uuid4(), "household_power_consumption")
예제 #8
0
 def test_valid_hive(self):
     metadata = reg.get_metadata("small")
     self.t_hive.create_hive_table(metadata, reset=True, stage="valid")
     self.assertEqual(
         self.t_hive._query('describe ' + reg.db_name(metadata, stage="valid") + '.' + reg.db_table(metadata)),
         [('globalactivepower', 'decimal(8,3)', ''),
          ('instance_guid', 'string', ''),
          ('time_field', 'string', ''), ('date_field', 'string', ''), ('', None, None),
          ('# Partition Information', None, None),
          ('# col_name            ', 'data_type           ', 'comment             '), ('', None, None),
          ('time_field', 'string', ''), ('date_field', 'string', '')]
         )
예제 #9
0
 def test_sandbox_full_MD(self):
     metadata = reg.get_metadata("fullsand")
     self.t_hive.create_hive_table(metadata, reset=True)
     self.assertEqual(self.t_hive._query('describe ' + reg.db_name(metadata) + '.' + reg.db_table(metadata)),
                      [('date_field', 'string', 'from deserializer'),
                       ('time_field', 'string', 'from deserializer'),
                       ('globalactivepower', 'string', 'from deserializer'),
                       ('globalreactivepower', 'string', 'from deserializer'),
                       ('voltage', 'string', 'from deserializer'),
                       ('globalintensity', 'string', 'from deserializer'),
                       ('submetering1', 'string', 'from deserializer'),
                       ('submetering2', 'string', 'from deserializer'),
                       ('submetering3', 'string', 'from deserializer')]
                      )
예제 #10
0
 def test_valid_hive_work(self):
     metadata = reg.get_metadata("small")
     self.t_hive.create_hive_table(metadata, reset=True, type="work", stage="valid")
     self.assertEqual(
         self.t_hive._query('describe ' + reg.db_name(metadata, type="work", stage="valid") + '.' +
                            reg.db_table(metadata, type="work", stage="valid")),
         [
             ("date_field", "string", ""),
             ("time_field", "string", ""),
             ("globalactivepower", "decimal(8,3)", ""),
             ("instance_guid", "string", "")
         ]
         )
     self.t_hive._query("drop table if exists " + reg.db_name(metadata) + '.' + reg.db_table(metadata))
예제 #11
0
 def test_sandbox_header_delim(self):
     metadata = reg.get_metadata("sandbox")
     test_header = 'Date;Time;Global_active_power;Global_reactive_power;Voltage;Global_intensity;'
     test_header += 'Sub_metering_1;Sub_metering_2;Sub_metering_3'
     self.t_hive.create_hive_table(metadata, reset=True, header=test_header)
     self.assertEqual(self.t_hive._query('describe ' + reg.db_name(metadata) + '.' + reg.db_table(metadata)),
                      [('date_field', 'string', 'from deserializer'),
                       ('time_field', 'string', 'from deserializer'),
                       ('globalactivepower', 'string', 'from deserializer'),
                       ('globalreactivepower', 'string', 'from deserializer'),
                       ('voltage', 'string', 'from deserializer'),
                       ('globalintensity', 'string', 'from deserializer'),
                       ('submetering1', 'string', 'from deserializer'),
                       ('submetering2', 'string', 'from deserializer'),
                       ('submetering3', 'string', 'from deserializer')]
                      )
예제 #12
0
 def test_db_name_refined(self):
     self.assertEqual(
         registry.db_name(registry.get_metadata('sample.txt'), stage='refined'),
         'dev_none_test',
         "Incorrect db returned")
예제 #13
0
 def test_file_path_stage(self):
     self.assertEqual(
         registry.file_path(registry.get_metadata('sample'), stage='valid'),
         registry.data_root + '/none/test/valid/sample',
         "incorrect file path")
예제 #14
0
 def test_file_path_work(self):
     logger.info(registry.file_path(registry.get_metadata('sample'), type='work'))
     self.assertEqual(
         registry.file_path(registry.get_metadata('sample'), type='work'),
         registry.data_root + '/none/test/raw_work/sample',
         "incorrect file path")
예제 #15
0
 def test_file_path_work_sandbox(self):
     logger.info(registry.file_path(registry.get_metadata('sandbox'), type='work'))
     self.assertEqual(
         registry.file_path(registry.get_metadata('sandbox'), type='work'),
         '/user/cloudera/data/sandbox/bria644/household_electric_power_consumption_work')
예제 #16
0
 def test_db_name_test(self):
     self.assertEqual(
         registry.db_name(registry.get_metadata('sample.txt'), env='test'),
         'test_none_test_raw')
예제 #17
0
 def test_db_name(self):
     self.assertEqual(
         registry.db_name(registry.get_metadata('sample')),
         'dev_none_test_raw')
예제 #18
0
 def test_key_matched(self):
     self.assertEqual(registry.key_matched(registry.get_metadata('sample')), 'sample', "Didn't get UUID")
예제 #19
0
 def test_db_name_valid(self):
     self.assertEqual(
         registry.db_name(registry.get_metadata('sampleFile.txt'), stage='valid'),
         'dev_none_test')
예제 #20
0
 def test_file_path_sandbox(self):
     # service_account/data/sandbox/uid/table_name
     self.assertEqual(
         registry.file_path(registry.get_metadata('sandbox')),
         registry.data_root + '/sandbox/bria644/household_electric_power_consumption')
예제 #21
0
 def test_register_raw(self):
     # self.assertTrue(validate_uuid(register_raw(get_metadata('sampleFile.txt'), 'sampleFile.txt', 'raw', 123)),
     #                 "Didn't get UUID")
     self.assertTrue(registry.validate_uuid(registry.register_raw(registry.get_metadata('household_power_consumption_50.txt.gz'),
                                                'household_power_consumption_50.txt.gz', 'raw', 123)),
                     "Didn't get UUID")
예제 #22
0
 def test_NotRegisteredFile(self):
     # self.assertEqual(get_metadata('badfile.db'), {u'fields': [], u'file': {}})
     self.assertEqual(registry.get_metadata('badfile.db'), {u'fields': [], u'file': {}})
예제 #23
0
 def test_db_table(self):
     self.assertEqual(
         registry.db_table(registry.get_metadata('sample')),
         'sample',
         "wrong db table")
예제 #24
0
 def test_db_table_work(self):
     logger.info(registry.db_table(registry.get_metadata('sample'), type='work'))
     self.assertEqual(
         registry.db_table(registry.get_metadata('sample'), type='work'),
         'sample_work',
         "Bad Work Table")
예제 #25
0
 def test_register_invalid_multireason(self):
     self.assertTrue(registry.validate_uuid(
         registry.register_invalid(registry.get_metadata('sample.txt'), uuid4(), 'sample.txt',
                          {"datatypeMismatch": 3, "rowCountMismatch": 12}, 123, 'validation_query')))
예제 #26
0
 def test_file_path(self):
     self.assertEqual(
         registry.file_path(registry.get_metadata('sample')),
         registry.data_root + '/none/test/raw/sample')
예제 #27
0
 def setUp(self):
     self.t_hive = validator.Hive()
     metadata = reg.get_metadata("household_power_consumption")
     self.t_hive.create_hive_table(metadata, reset=True, type="work")
     self.t_hive.create_hive_table(metadata, reset=True, stage="valid")
예제 #28
0
 def test_template_guid(self):
     self.assertEqual(
         registry.template_guid(registry.get_metadata('sample')),
         '7e1a614c-9570-42a6-9bc7-315f2b6218be',
         "Didn't get UUID")
예제 #29
0
 def test_delta(self):
     metadata = reg.get_metadata("householdElectricPowerConsumption")
     self.t_hive.create_hive_table(metadata, stage="valid", type='work')
     self.t_hive.delta(metadata)
예제 #30
0
 def test_db_name_work(self):
     self.assertEqual(
         registry.db_name(registry.get_metadata('sample.txt'), type='work'),
         'dev_none_test_raw')