Example #1
0
 def summarize_file_thread(final_path):
     bytestring = gzip_bytestring(
         summarize_file(
             final_path,
             args['num_head_lines'],
             args['num_tail_lines'],
             args['max_line_length'],
             args['truncation_text'],
         ).encode())
     reply_fn(None, {}, bytestring)
Example #2
0
    def summarize_file(
        self, target, num_head_lines, num_tail_lines, max_line_length, truncation_text, gzipped
    ):
        """
        Summarizes the file at the given path in the bundle, returning bytes
        containing the given numbers of lines from beginning and end of the file.
        If the file needs to be truncated, places truncation_text at the
        truncation point.
        The return value is gzipped if gzipped is True.
        """
        if self._is_available_locally(target):
            file_path = self._get_target_path(target)
            # Note: summarize_file returns string, but make it bytes for consistency.
            bytestring = file_util.summarize_file(
                file_path, num_head_lines, num_tail_lines, max_line_length, truncation_text
            ).encode()
            if gzipped:
                bytestring = file_util.gzip_bytestring(bytestring)
            return bytestring
        else:
            worker = self._bundle_model.get_bundle_worker(target.bundle_uuid)
            response_socket_id = self._worker_model.allocate_socket(
                worker['user_id'], worker['worker_id']
            )
            try:
                read_args = {
                    'type': 'summarize_file',
                    'num_head_lines': num_head_lines,
                    'num_tail_lines': num_tail_lines,
                    'max_line_length': max_line_length,
                    'truncation_text': truncation_text,
                }
                self._send_read_message(worker, response_socket_id, target, read_args)
                bytestring = self._get_read_response(response_socket_id)
            finally:
                self._worker_model.deallocate_socket(response_socket_id)

            # Note: all data from the worker is gzipped (see `local_reader.py`).
            if not gzipped:
                bytestring = file_util.un_gzip_bytestring(bytestring)
            return bytestring
 def test_summarize_file(self):
     with tempfile.NamedTemporaryFile() as f:
         f.write(("aaa\nbbb\n").encode())
         f.flush()
         self.assertEqual(
             summarize_file(
                 f.name,
                 num_head_lines=1,
                 num_tail_lines=0,
                 max_line_length=4,
                 truncation_text="....",
             ),
             "aaa\n",
         )
         self.assertEqual(
             summarize_file(
                 f.name,
                 num_head_lines=0,
                 num_tail_lines=1,
                 max_line_length=4,
                 truncation_text="....",
             ),
             "bbb\n",
         )
         self.assertEqual(
             summarize_file(
                 f.name,
                 num_head_lines=1,
                 num_tail_lines=1,
                 max_line_length=4,
                 truncation_text="....",
             ),
             "aaa\nbbb\n",
         )
         # Should not recognize a line if max_line_length is smaller than the actual line length (4)
         self.assertEqual(
             summarize_file(
                 f.name,
                 num_head_lines=1,
                 num_tail_lines=0,
                 max_line_length=3,
                 truncation_text="....",
             ),
             "",
         )
         self.assertEqual(
             summarize_file(
                 f.name,
                 num_head_lines=0,
                 num_tail_lines=1,
                 max_line_length=3,
                 truncation_text="....",
             ),
             "",
         )
         self.assertEqual(
             summarize_file(
                 f.name,
                 num_head_lines=1,
                 num_tail_lines=1,
                 max_line_length=3,
                 truncation_text="....",
             ),
             "....",
         )