def test_single_item(self):
        """
        test writing a single event and reading it back
        """
        granularity = 5
        header = b"aaa"
        data = b"bbb"

        # we expect the completed directory to be empty
        completed_list = os.listdir(_output_complete_dir)
        self.assertEqual(len(completed_list), 0, completed_list)

        writer = LogStreamWriter(_test_prefix,
                                 _test_suffix,
                                 granularity, 
                                 _output_work_dir, 
                                 _output_complete_dir)

        writer.write(header, data)

        # wait for the current file to roll over
        time.sleep(granularity+1)
        writer.check_for_rollover()

        # we expect a single file in the completed directory
        completed_list = os.listdir(_output_complete_dir)
        self.assertEqual(len(completed_list), 1, completed_list)

        stream_file_path = os.path.join(_output_complete_dir, 
                                        completed_list[0]) 
        log_stream = generate_log_stream_from_file(stream_file_path)

        read_header, read_data = next(log_stream)
        self.assertEqual(read_header, header)
        self.assertEqual(read_data, data)

        self.assertRaises(StopIteration, next, log_stream)
def _iterate_timestamp_content(work_dir,
                               keep_header_pred,
                               keep_content_pred,
                               timestamp_key_dict):

    # put the retrieved timestamps in order
    timestamps = sorted(timestamp_key_dict.keys())

    for timestamp in timestamps:
        _log.info("timestamp {0}".format(timestamp))

        header_list = list()
        data_file_paths = list()

        for index, key in enumerate(timestamp_key_dict[timestamp]):
            _log.info("    key {0}".format(key.name))
            retrieve_path = os.path.join(work_dir, key.name)

            # retrieve the key from nimbus.io to a disk file
            with open(retrieve_path, "wb") as output_file:
                key.get_contents_to_file(output_file)
                     
            # write uncompressed data blocks to a file while maintaining 
            # a sortable list of headers
            data_file_name = "{0:08}".format(index)
            data_file_path = os.path.join(work_dir, data_file_name)
            data_file_paths.append(data_file_path)
            with open(data_file_path, "wb") as data_file:            
                for header_json, data in \
                    generate_log_stream_from_file(retrieve_path):
                    header = json.loads(header_json.decode("utf-8"))

                    if not keep_header_pred(header):
                        continue

                    header["data_file_path"] = data_file_path
                    header["data_offset"] = data_file.tell()
                    header["data_size"] = len(data)
                    header_list.append(header)
                    data_file.write(data)

            # we don't need the retrieved file anymore
            os.unlink(retrieve_path)

        # sort the combined header_list on timestamp and uuid
        header_list.sort(key=_header_key_function)

        # de-dupe the headers and retrieve the data
        data_files = dict()
        for _, group in groupby(header_list, key=_header_key_function):
            group_list = list(group)
            header = group_list[0]
            if not header["data_file_path"] in data_files:
                data_files[header["data_file_path"]] = \
                    open(header["data_file_path"])
            data_file = data_files[header["data_file_path"]]
            data_file.seek(header["data_offset"])
            data = data_file.read(header["data_size"])
            if not keep_content_pred(data):
                continue
            yield data

        for data_file in data_files.values():
            data_file.close()

        for data_file_path in data_file_paths:
            os.unlink(data_file_path)