def make_random_bin_table(num_rows, num_cols, key_col1, key_col2, delim, dst_path): restricted_chars = ['\r', '\n'] + [delim] key_col = random.randint(0, num_cols - 1) good_keys1 = ['alpha', 'beta', 'gamma', 'delta', 'epsilon', 'zeta'] good_keys2 = [str(v) for v in range(20)] result_table = list() for r in rbql.xrange6(num_rows): result_table.append(list()) for c in rbql.xrange6(num_cols): if c == key_col1: result_table[-1].append(random.choice(good_keys1)) elif c == key_col2: result_table[-1].append(random.choice(good_keys2)) else: dice = random.randint(1, 20) if dice == 1: result_table[-1].append(random.choice(good_keys1)) elif dice == 2: result_table[-1].append(random.choice(good_keys2)) else: result_table[-1].append( make_random_csv_entry(0, 20, restricted_chars)) with codecs.open(dst_path, 'w', encoding='latin-1') as f: for row in result_table: f.write(delim.join(row)) if random.randint(0, 2) == 0: f.write('\r\n') else: f.write('\n')
def generate_random_scenario(max_num_rows, max_num_cols, delims): num_rows = random.randint(1, max_num_rows) num_cols = random.randint(1, max_num_cols) delim = random.choice(delims) restricted_chars = ['\r', '\n'] + [delim] key_col = random.randint(0, num_cols - 1) good_keys = [ 'Hello', 'Avada Kedavra ', ' ??????', '128', '3q295 fa#(@*$*)', ' abcdefg ', 'NR', 'a1', 'a2' ] input_table = list() for r in rbql.xrange6(num_rows): input_table.append(list()) for c in rbql.xrange6(num_cols): if c != key_col: input_table[-1].append( make_random_csv_entry(0, 20, restricted_chars)) else: input_table[-1].append(random.choice(good_keys)) output_table = list() target_key = random.choice(good_keys) if random.choice([True, False]): sql_op = '!=' output_table = [ row for row in input_table if row[key_col] != target_key ] else: sql_op = '==' output_table = [ row for row in input_table if row[key_col] == target_key ] query = 'select * where a{} {} "{}"'.format(key_col + 1, sql_op, target_key) return (input_table, query, output_table, delim)
def make_random_csv_entry(min_len, max_len, restricted_chars): strlen = random.randint(min_len, max_len) char_set = list(range(256)) restricted_chars = [ord(c) for c in restricted_chars] char_set = [c for c in char_set if c not in restricted_chars] data = list() for i in rbql.xrange6(strlen): data.append(random.choice(char_set)) pseudo_latin = bytes(bytearray(data)).decode('latin-1') return pseudo_latin
def test_random_bin_tables(self): test_name = 'test_random_bin_tables' for subtest in rbql.xrange6(50): input_table, query, canonic_table, delim = generate_random_scenario( 12, 12, ['\t', ',', ';']) test_table = run_conversion_test_py(query, input_table, test_name, delim=delim) self.compare_tables(canonic_table, test_table) test_table = run_conversion_test_js(query, input_table, test_name, delim=delim) self.compare_tables(canonic_table, test_table)
def compare_tables(self, canonic_table, test_table): self.assertEqual(len(canonic_table), len(test_table)) for i in rbql.xrange6(len(canonic_table)): self.assertEqual(len(canonic_table[i]), len(test_table[i])) self.assertEqual(canonic_table[i], test_table[i]) self.assertEqual(canonic_table, test_table)