def read_textual_reel_header(fh, encoding): """Read the SEG Y card image header, also known as the textual header Args: fh: A file-like object open in binary mode positioned such that the beginning of the textual header will be the next byte to read. encoding: Either 'cp037' for EBCDIC or 'ascii' for ASCII. Returns: A tuple of forty Unicode strings containing the transcoded header data. """ fh.seek(0) raw_header = fh.read(TEXTUAL_HEADER_NUM_BYTES) num_bytes_read = len(raw_header) if num_bytes_read < TEXTUAL_HEADER_NUM_BYTES: raise EOFError( "Only {} bytes of {} byte textual reel header could be read". format(num_bytes_read, TEXTUAL_HEADER_NUM_BYTES)) lines = tuple( bytes(raw_line).decode(encoding) for raw_line in batched(raw_header, CARD_LENGTH)) return lines
def test_pad_contents(self, items, batch_size, pad): assume(len(items) > 0) assume(0 < batch_size < 1000) num_left_over = len(items) % batch_size pad_length = batch_size - num_left_over if num_left_over != 0 else 0 assume(pad_length != 0) batches = list(batched(items, batch_size, padding=pad)) self.assertEqual(batches[-1][batch_size - pad_length :], [pad] * pad_length)
def test_pad_contents(self, items, batch_size, pad): assume(len(items) > 0) assume(0 < batch_size < 1000) num_left_over = len(items) % batch_size pad_length = batch_size - num_left_over if num_left_over != 0 else 0 assume(pad_length != 0) batches = list(batched(items, batch_size, padding=pad)) self.assertEqual(batches[-1][batch_size - pad_length:], [pad] * pad_length)
def spaced_ranges(min_num_ranges, max_num_ranges, min_interval, max_interval): """A Hypothesis strategy to produce separated, non-overlapping ranges. Args: min_num_ranges: The minimum number of ranges to produce. TODO: Correct? max_num_ranges: The maximum number of ranges to produce. min_interval: The minimum interval used for the lengths of the alternating ranges and spaces. max_interval: The maximum interval used for the lengths of the alternating ranges and spaces. """ return strategy(integers_in_range(min_num_ranges, max_num_ranges)) \ .map(lambda n: 2*n) \ .flatmap(lambda n: (integers_in_range(min_interval, max_interval),) * n) \ .map(list).map(lambda lst: list(accumulate(lst))) \ .map(lambda lst: list(batched(lst, 2))) \ .map(lambda pairs: list(starmap(range, pairs)))
def spaced_ranges(min_num_ranges, max_num_ranges, min_interval, max_interval): """A Hypothesis strategy to produce separated, non-overlapping ranges. Args: min_num_ranges: The minimum number of ranges to produce. TODO: Correct? max_num_ranges: The maximum number of ranges to produce. min_interval: The minimum interval used for the lengths of the alternating ranges and spaces. max_interval: The maximum interval used for the lengths of the alternating ranges and spaces. """ return integers(min_num_ranges, max_num_ranges) \ .map(lambda n: 2*n) \ .flatmap(lambda n: lists(integers(min_interval, max_interval), min_size=n, max_size=n)) \ .map(list).map(lambda lst: list(accumulate(lst))) \ .map(lambda lst: list(batched(lst, 2))) \ .map(lambda pairs: list(starmap(range, pairs)))
def read_textual_reel_header(fh, encoding): """Read the SEG Y card image header, also known as the textual header Args: fh: A file-like object open in binary mode positioned such that the beginning of the textual header will be the next byte to read. encoding: Either 'cp037' for EBCDIC or 'ascii' for ASCII. Returns: A tuple of forty Unicode strings containing the transcoded header data. """ raw_header = fh.read(TEXTUAL_HEADER_NUM_BYTES) num_bytes_read = len(raw_header) if num_bytes_read < TEXTUAL_HEADER_NUM_BYTES: raise EOFError("Only {} bytes of {} byte textual reel header could be read" .format(num_bytes_read, TEXTUAL_HEADER_NUM_BYTES)) lines = tuple(bytes(raw_line).decode(encoding) for raw_line in batched(raw_header, CARD_LENGTH)) return lines
def format_extended_textual_header(text, encoding, include_text_stop=False): """Format a string into pages and line suitable for an extended textual header. Args text: An arbitrary text string. Any universal newlines will be preserved. encoding: Either ASCII ('ascii') or EBCDIC ('cp037') include_text_stop: If True, a text stop stanza header will be appended, otherwise not. """ if not is_supported_encoding(encoding): raise UnsupportedEncodingError("Extended textual header", encoding) # According to the standard: "The Extended Textual File Header consists of one or more 3200-byte records, each # record containing 40 lines of textual card-image text." It goes on "... Each line in an Extended Textual File # Header ends in carriage return and linefeed (EBCDIC 0D25 or ASCII 0D0A)." Given that we're dealing with fixed- # length (80 byte) lines, this implies that we have 78 bytes of space into which we can encode the content of each # line, which must be left-justified and padded with spaces. width = CARD_LENGTH - len(HEADER_NEWLINE) original_lines = text.splitlines() # Split overly long lines (i.e. > 78) and pad too-short lines with spaces lines = [] for original_line in original_lines: padded_lines = (pad_and_terminate_header_line( original_line[i:i + width], width) for i in range(0, len(original_line), width)) lines.extend(padded_lines) pages = list(batched(lines, 40, pad_and_terminate_header_line('', width))) if include_text_stop: stop_page = format_extended_textual_header(END_TEXT_STANZA, encoding)[0] pages.append(stop_page) return pages
def format_extended_textual_header(text, encoding, include_text_stop=False): """Format a string into pages and line suitable for an extended textual header. Args text: An arbitrary text string. Any universal newlines will be preserved. encoding: Either ASCII ('ascii') or EBCDIC ('cp037') include_text_stop: If True, a text stop stanza header will be appended, otherwise not. """ if not is_supported_encoding(encoding): raise UnsupportedEncodingError("Extended textual header", encoding) # According to the standard: "The Extended Textual File Header consists of one or more 3200-byte records, each # record containing 40 lines of textual card-image text." It goes on "... Each line in an Extended Textual File # Header ends in carriage return and linefeed (EBCDIC 0D25 or ASCII 0D0A)." Given that we're dealing with fixed- # length (80 byte) lines, this implies that we have 78 bytes of space into which we can encode the content of each # line, which must be left-justified and padded with spaces. width = CARD_LENGTH - len(HEADER_NEWLINE) original_lines = text.splitlines() # Split overly long lines (i.e. > 78) and pad too-short lines with spaces lines = [] for original_line in original_lines: padded_lines = (pad_and_terminate_header_line(original_line[i:i+width], width) for i in range(0, len(original_line), width)) lines.extend(padded_lines) pages = list(batched(lines, 40, pad_and_terminate_header_line('', width))) if include_text_stop: stop_page = format_extended_textual_header(END_TEXT_STANZA, encoding)[0] pages.append(stop_page) return pages
def test_batch_size_less_than_one_raises_value_error(self): with raises(ValueError): batched([1, 2, 3], 0)
def test_pad(self): batches = list(batched([0, 0], 3, 42)) assert batches[-1] == [0, 0, 42]
def test_batch_sizes_unpadded(self, items, batch_size): assume(batch_size > 0) batches = list(batched(items, batch_size)) self.assertTrue(all(len(batch) == batch_size for batch in batches[:-1]))
def test_pad(self): batches = list(batched([0, 0], 3, 42)) self.assertEqual(batches[-1], [0, 0, 42])
def test_final_batch_sizes(self, items, batch_size): assume(len(items) > 0) assume(batch_size > 0) batches = list(batched(items, batch_size)) self.assertTrue(len(batches[-1]) <= batch_size)
def test_batch_sizes_padded(self, items, batch_size, pad): assume(batch_size > 0) batches = list(batched(items, batch_size, padding=pad)) self.assertTrue(all(len(batch) == batch_size for batch in batches))
def format_standard_textual_header(revision, **kwargs): """Produce a standard SEG Y textual header. Args: revision: The SEG Y revision. **kwargs: Named arguments corresponding to the values in the textual_reel_header.TEMPLATE_FIELD_NAMES dictionary, which in turn correspond to the placeholders in the textual_reel_header.TEMPLATE string. Any omitted arguments will result in placeholders being replaced by spaces. If the end_marker argument is not supplied, an appropriate end marker will be selected based on the SEG Y revision. For standard end markers consider using textual_reel_header.END_TEXTUAL_HEADER or textual_reel_header.END_EBCDIC. Any values which are longer than their placeholder will be truncated to the placeholder length. Returns: A list of forty Unicode strings. Usage: header = format_standard_textual_header(1, client="Lundin", company="Western Geco", crew_number=123, processing1="Sixty North AS", sweep_start_hz=10, sweep_end_hz=1000, sweep_length_ms=10000, sweep_channel_number=3, sweep_type='spread') """ # Consider making the template module an argument with a default. kwargs.setdefault('end_marker', textual_reel_header.END_MARKERS[revision]) template = textual_reel_header.TEMPLATE placeholder_slices = parse_template(template) background_slices = complementary_intervals(list(placeholder_slices.values()), 0, len(template)) chunks = [] for bg_slice, placeholder in zip_longest(background_slices, placeholder_slices.items()): if bg_slice is not None: chunks.append(template[bg_slice]) if placeholder is not None: ph_name, ph_slice = placeholder ph_arg_name = textual_reel_header.TEMPLATE_FIELD_NAMES[ph_name] ph_value = kwargs.pop(ph_arg_name, '') ph_len = ph_slice.stop - ph_slice.start substitute = str(ph_value)[:ph_len].ljust(ph_len, ' ') chunks.append(substitute) if len(kwargs) > 0: raise TypeError("The following keyword arguments did not correspond to template placeholders: {!r}" .format(list(kwargs.keys()))) concatenation = ''.join(chunks) lines = list(''.join(s) for s in batched(concatenation, CARD_LENGTH)) return lines