def summarize_file_thread(final_path): bytestring = gzip_bytestring( summarize_file( final_path, args['num_head_lines'], args['num_tail_lines'], args['max_line_length'], args['truncation_text'], ).encode()) reply_fn(None, {}, bytestring)
def summarize_file( self, target, num_head_lines, num_tail_lines, max_line_length, truncation_text, gzipped ): """ Summarizes the file at the given path in the bundle, returning bytes containing the given numbers of lines from beginning and end of the file. If the file needs to be truncated, places truncation_text at the truncation point. The return value is gzipped if gzipped is True. """ if self._is_available_locally(target): file_path = self._get_target_path(target) # Note: summarize_file returns string, but make it bytes for consistency. bytestring = file_util.summarize_file( file_path, num_head_lines, num_tail_lines, max_line_length, truncation_text ).encode() if gzipped: bytestring = file_util.gzip_bytestring(bytestring) return bytestring else: worker = self._bundle_model.get_bundle_worker(target.bundle_uuid) response_socket_id = self._worker_model.allocate_socket( worker['user_id'], worker['worker_id'] ) try: read_args = { 'type': 'summarize_file', 'num_head_lines': num_head_lines, 'num_tail_lines': num_tail_lines, 'max_line_length': max_line_length, 'truncation_text': truncation_text, } self._send_read_message(worker, response_socket_id, target, read_args) bytestring = self._get_read_response(response_socket_id) finally: self._worker_model.deallocate_socket(response_socket_id) # Note: all data from the worker is gzipped (see `local_reader.py`). if not gzipped: bytestring = file_util.un_gzip_bytestring(bytestring) return bytestring
def test_summarize_file(self): with tempfile.NamedTemporaryFile() as f: f.write(("aaa\nbbb\n").encode()) f.flush() self.assertEqual( summarize_file( f.name, num_head_lines=1, num_tail_lines=0, max_line_length=4, truncation_text="....", ), "aaa\n", ) self.assertEqual( summarize_file( f.name, num_head_lines=0, num_tail_lines=1, max_line_length=4, truncation_text="....", ), "bbb\n", ) self.assertEqual( summarize_file( f.name, num_head_lines=1, num_tail_lines=1, max_line_length=4, truncation_text="....", ), "aaa\nbbb\n", ) # Should not recognize a line if max_line_length is smaller than the actual line length (4) self.assertEqual( summarize_file( f.name, num_head_lines=1, num_tail_lines=0, max_line_length=3, truncation_text="....", ), "", ) self.assertEqual( summarize_file( f.name, num_head_lines=0, num_tail_lines=1, max_line_length=3, truncation_text="....", ), "", ) self.assertEqual( summarize_file( f.name, num_head_lines=1, num_tail_lines=1, max_line_length=3, truncation_text="....", ), "....", )