def test_model_custom_sink(self): tempdir_name = tempfile.mkdtemp() class SimpleKV(object): def __init__(self, tmp_dir): self._dummy_token = 'dummy_token' self._tmp_dir = tmp_dir def connect(self, url): return self._dummy_token def open_table(self, access_token, table_name): assert access_token == self._dummy_token file_name = self._tmp_dir + os.sep + table_name assert not os.path.exists(file_name) open(file_name, 'wb').close() return table_name def write_to_table(self, access_token, table_name, key, value): assert access_token == self._dummy_token file_name = self._tmp_dir + os.sep + table_name assert os.path.exists(file_name) with open(file_name, 'ab') as f: f.write(key + ':' + value + os.linesep) def rename_table(self, access_token, old_name, new_name): assert access_token == self._dummy_token old_file_name = self._tmp_dir + os.sep + old_name new_file_name = self._tmp_dir + os.sep + new_name assert os.path.isfile(old_file_name) assert not os.path.exists(new_file_name) os.rename(old_file_name, new_file_name) snippets.model_custom_sink(SimpleKV(tempdir_name), [('key' + str(i), 'value' + str(i)) for i in range(100)], 'final_table_no_ptransform', 'final_table_with_ptransform') expected_output = [ 'key' + str(i) + ':' + 'value' + str(i) for i in range(100) ] glob_pattern = tempdir_name + os.sep + 'final_table_no_ptransform*' output_files = glob.glob(glob_pattern) assert output_files received_output = [] for file_name in output_files: with open(file_name) as f: for line in f: received_output.append(line.rstrip(os.linesep)) self.assertItemsEqual(expected_output, received_output) glob_pattern = tempdir_name + os.sep + 'final_table_with_ptransform*' output_files = glob.glob(glob_pattern) assert output_files received_output = [] for file_name in output_files: with open(file_name) as f: for line in f: received_output.append(line.rstrip(os.linesep)) self.assertItemsEqual(expected_output, received_output)
def test_model_custom_sink(self): tempdir_name = tempfile.mkdtemp() class SimpleKV(object): def __init__(self, tmp_dir): self._dummy_token = 'dummy_token' self._tmp_dir = tmp_dir def connect(self, url): return self._dummy_token def open_table(self, access_token, table_name): assert access_token == self._dummy_token file_name = self._tmp_dir + os.sep + table_name assert not os.path.exists(file_name) open(file_name, 'wb').close() return table_name def write_to_table(self, access_token, table_name, key, value): assert access_token == self._dummy_token file_name = self._tmp_dir + os.sep + table_name assert os.path.exists(file_name) with open(file_name, 'ab') as f: f.write(key + ':' + value + os.linesep) def rename_table(self, access_token, old_name, new_name): assert access_token == self._dummy_token old_file_name = self._tmp_dir + os.sep + old_name new_file_name = self._tmp_dir + os.sep + new_name assert os.path.isfile(old_file_name) assert not os.path.exists(new_file_name) os.rename(old_file_name, new_file_name) snippets.model_custom_sink( SimpleKV(tempdir_name), [('key' + str(i), 'value' + str(i)) for i in range(100)], 'final_table_no_ptransform', 'final_table_with_ptransform') expected_output = [ 'key' + str(i) + ':' + 'value' + str(i) for i in range(100)] glob_pattern = tempdir_name + os.sep + 'final_table_no_ptransform*' output_files = glob.glob(glob_pattern) assert output_files received_output = [] for file_name in output_files: with open(file_name) as f: for line in f: received_output.append(line.rstrip(os.linesep)) self.assertItemsEqual(expected_output, received_output) glob_pattern = tempdir_name + os.sep + 'final_table_with_ptransform*' output_files = glob.glob(glob_pattern) assert output_files received_output = [] for file_name in output_files: with open(file_name) as f: for line in f: received_output.append(line.rstrip(os.linesep)) self.assertItemsEqual(expected_output, received_output)