def test_bf_03_single_col(self): sampling_method = 'non' sampling_rate = None self.columns = [1] field_freq, truncated = mod.build_freq(self.files, self.dialect, self.columns, self.number, sampling_method, sampling_rate) assert(not truncated) assert(sum(field_freq.values()) == 1000) assert(len(field_freq) == 4) # it's possible (but extremely unlikely) that there could be fewer entries for key in field_freq.keys(): assert(key[0] in ['A1','A2','A3','A4'])
def test_bf_01_multicol(self): sampling_method = 'non' sampling_rate = None field_freq, truncated = mod.build_freq(self.files, self.dialect, self.columns, self.number, sampling_method, sampling_rate) assert(not truncated) assert(sum(field_freq.values()) == 1000) assert(len(field_freq) == 8) for key in field_freq.keys(): assert(key[0] in ['A1','A2','A3','A4']) assert(key[1] in ['B1','B2'])
def test_bf_02_multicol_and_truncation(self): sampling_method = 'non' sampling_rate = None self.number = 4 field_freq, truncated = mod.build_freq(self.files, self.dialect, self.columns, self.number, sampling_method, sampling_rate) assert(truncated) assert(len(field_freq) == 4) # it's possible (but extremely unlikely) that there could be fewer entries for key in field_freq.keys(): assert(key[0] in ['A1','A2','A3','A4']) assert(key[1] in ['B1','B2'])
def test_bf_03_interval_sampling(self): sampling_method = 'interval' sampling_rate = 10 field_freq, truncated = mod.build_freq(self.files, self.dialect, self.columns, self.number, sampling_method, sampling_rate) assert(not truncated) assert(sum(field_freq.values()) == 100) assert(len(field_freq) == 8) # it's possible (but unlikely) that there could be fewer entries for key in field_freq.keys(): assert(key[0] in ['A1','A2','A3','A4']) assert(key[1] in ['B1','B2'])