def vote_results_csv(request): ctx = request.context user_id = authenticated_userid(request) if not user_id: raise HTTPUnauthorized histogram = request.GET.get('histogram', None) if histogram: try: histogram = int(histogram) except ValueError as e: raise HTTPBadRequest(e) if histogram > 25: raise HTTPBadRequest( "Please select at most 25 bins in the histogram.") widget = ctx._instance.widget if widget.activity_state != "ended": permissions = ctx.get_permissions() if P_ADMIN_DISC not in permissions: raise HTTPUnauthorized() output = BytesIO() output_utf8 = TextIOWrapper(output, encoding='utf-8') ctx._instance.csv_results(output_utf8, histogram) output_utf8.detach() output.seek(0) return Response(body_file=output, content_type='text/csv', charset="utf-8")
def csv_text_io_wrapper(buf): """IO wrapper to use the csv reader/writer on a byte stream.""" w = TextIOWrapper(buf, encoding='utf-8-sig', newline='') try: yield w finally: w.detach()
def test_header_check_files(self): """ Determine if files with no header are properly determined. """ from itertools import islice # add some more files to the list to test the header detection # these files have some first lines which are not the header for input_file in self.input_file_names: with open(input_file['path'], encoding=input_file['encoding']) as csvfile: data_as_str = ''.join(list(islice(csvfile, 5))) header_line = CSVData._guess_header_row(data_as_str, input_file['delimiter']) self.assertIn(header_line, input_file['has_header'], input_file['path']) for input_buf in self.buffer_list: # BytesIO is wrapped so that it is fed into guess header row # the same way it would internally buffer = input_buf['path'] if isinstance(input_buf['path'], BytesIO): buffer = TextIOWrapper(input_buf['path'], encoding=input_buf['encoding']) data_as_str = ''.join(list(islice(buffer, 5))) header_line = CSVData._guess_header_row(data_as_str, input_buf['delimiter']) self.assertIn(header_line, input_buf['has_header'], input_buf['path']) # since BytesIO was wrapped, it now has to be detached if isinstance(buffer, TextIOWrapper): buffer.detach()
def print_pdf_tex(self, filename, *args, **kwargs): _dpi = 72 rcParams.update({"svg.fonttype": 'none'}) with cbook.open_file_cm(filename, "w", encoding="utf-8") as fh: filename = getattr(fh, 'name', '') if not isinstance(filename, str): filename = '' if cbook.file_requires_unicode(fh): detach = False else: fh = TextIOWrapper(fh, 'utf-8') detach = True _dpi = self._print_pdftex(filename, fh, **kwargs) # Detach underlying stream from wrapper so that it remains open in # the caller. if detach: fh.detach() subprocess.run([ "inkscape", "--export-filename={}.pdf".format('.'.join( filename.split('.')[:-1])), filename, "--export-dpi={}".format(int(_dpi)), "--export-latex" ])
def get_headers(self): """ Returns the column headers from the csv as a list. """ logger.debug("Retrieving headers from {}".format(self.csv_file)) # set up a csv reader csv_reader = csv.reader(self.csv_file, delimiter=self.delimiter) try: # Pop the headers headers = next(csv_reader) except csv.Error: # this error is thrown in Python 3 when the file is in binary mode # first, rewind the file self.csv_file.seek(0) # take the user-defined encoding, or assume utf-8 encoding = self.encoding or 'utf-8' # wrap the binary file... text_file = TextIOWrapper(self.csv_file, encoding=encoding) # ...so the csv reader can treat it as text csv_reader = csv.reader(text_file, delimiter=self.delimiter) # now pop the headers headers = next(csv_reader) # detach the open csv_file so it will stay open text_file.detach() # Move back to the top of the file self.csv_file.seek(0) return headers
def convert_file(column_widths, column_names, input_file, input_encoding, output_file, output_encoding): ''' Converts a file from fixed width to delimited. column_widths is the width of each field. column_names is the headers to use for the delimited file, None means don't write a header. input_file/input_encoding is the file(bytes) and encoding to use for the input output_file/output_encoding is the file(bytes) and encoding to use for the output ''' input_reader = TextIOWrapper(input_file, encoding=input_encoding) output_writer = TextIOWrapper(output_file, encoding=output_encoding) try: line_processor = LineProcessor(column_widths) if column_names: delimited_line = line_processor.render_delimited_line(column_names) output_writer.write(delimited_line) output_writer.write('\n') for line in input_reader: delimited_line = line_processor.convert_line(line.rstrip('\n')) output_writer.write(delimited_line) output_writer.write('\n') finally: # Python has some weird and inconsistent behavour around file/stream wrappers taking ownership and # closing the underlying files, we want to opt out of that behaviour in this case input_reader.detach() output_writer.detach()
def _get_timeseries_without_moving_file_position(self, datastream): original_position = datastream.tell() wrapped_datastream = TextIOWrapper(datastream, encoding="utf-8", newline="\n") result = HTimeseries.read(wrapped_datastream) wrapped_datastream.detach() # If we don't do this the datastream will be closed datastream.seek(original_position) return result
class Formatter(BaseFormatter): @classmethod def get_file_extension(cls) -> str: return "csv" def _generate_fieldnames(self) -> List[str]: fields = [] for field in self.executor.query.select: fields.append(field.column) return fields def open(self): super().open() self._wrapped_stream = TextIOWrapper(self.stream, encoding="utf-8", write_through=True, newline="") self.out = csv.DictWriter(self._wrapped_stream, fieldnames=self._generate_fieldnames()) self.out.writeheader() def close(self): self._wrapped_stream.detach() def writerow(self, row: Dict[str, Any]): self.out.writerow(row)
def close(self): wrapped = TextIOWrapper(self.stream, encoding="utf-8", write_through=True) json.dump(self._rows, wrapped, sort_keys=True, indent=4) wrapped.detach() super().close()
def df_to_csv(df: pd.DataFrame) -> BytesIO: bio = BytesIO() wrap = TextIOWrapper(bio, encoding="utf-8", write_through=True) df.to_csv(wrap, index=False) wrap.detach() bio.seek(0) return bio
def reformat_stream_or_path( in_stream_or_path: Union[BinaryIO, Path], out_stream_or_path: Union[None, BinaryIO, Path], *, force_write: bool = False, options: Options, ) -> bool: with open_stream_or_path(in_stream_or_path, "rb") as in_stream: src_contents, encoding, existing_newline = decode_stream(in_stream) newline = NEWLINE_FROM_OPTION.get(options.newline, existing_newline) newline_changed = newline != existing_newline content_changed = True try: dst_contents = format_file_contents(src_contents, options=options) except NothingChanged: content_changed = False dst_contents = src_contents will_write = force_write or content_changed or newline_changed if will_write and out_stream_or_path is not None: with open_stream_or_path(out_stream_or_path, "wb") as out_stream: tiow = TextIOWrapper(out_stream, encoding=encoding, newline=newline) tiow.write(dst_contents) # Ensures that the underlying stream is not closed when the # TextIOWrapper is garbage collected. We don't want to close a # stream that was passed to us. tiow.detach() return content_changed or newline_changed
def wrapper(*args, **kwargs): args = list(args) if arg is not None: try: _file = args[arg[1]] def update_arg(new_val): args[arg[1]] = new_val except IndexError: _file = kwargs[arg[0]] def update_arg(new_val): kwargs[arg[0]] = new_val else: _file = args[0] def update_arg(new_val): args[0] = new_val cleanup_textio = False try: if 'r' in mode and hasattr(_file, 'read'): # Check if opened in the correct mode # and wrap in conversion layer if not if _file.read(0) != '' and 'b' not in mode: _file = TextIOWrapper(_file) cleanup_textio = True elif _file.read(0) != b'' and 'b' in mode: raise NotImplementedError("Cannot convert a text file" " handle to binary mode") update_arg(_file) return func(*args, **kwargs) elif 'w' in mode and hasattr(_file, 'write'): if 'b' not in mode: try: _file.write('') except TypeError: _file = TextIOWrapper(_file) cleanup_textio = True else: try: _file.write(b'') except TypeError: raise NotImplementedError( "Cannot convert a text file" " handle to binary mode") update_arg(_file) return func(*args, **kwargs) finally: # TextIOWrapper closes the underlying stream unless detached if cleanup_textio: _file.detach() # This is a path _open = open if compression: _open = open_compressed with _open(_file, mode) as f_handle: update_arg(f_handle) return func(*args, **kwargs)
def read_csv_df(file_path, delimiter, header, selected_columns=[], read_in_string=False, encoding='utf-8'): """ Reads a CSV file in chunks and returns a dataframe in the form of iterator. :param file_path: path to the CSV file. :type file_path: str :param delimiter: character used to separate csv values. :type delimiter: str :param header: the header row in the csv file. :type header: int :param selected_columns: a list of columns to be processed :type selected_columns: list(str) :param read_in_string: if True, all the values in dataframe will be converted to string :type read_in_string: bool :return: Iterator :rtype: pd.DataFrame """ args = { 'delimiter': delimiter, 'header': header, 'iterator': True, 'dtype': 'object', 'keep_default_na': False, 'encoding': encoding } # If a header can be identified, don't skip blanks if header is not None: args.update({'skip_blank_lines': False}) if read_in_string: args['dtype'] = str if len(selected_columns) > 0: args['usecols'] = selected_columns # account for py3.6 requirement for pandas, can remove if >= py3.7 is_buf_wrapped = False if isinstance(file_path, BytesIO): # a BytesIO stream has to be wrapped in order to properly be detached # in 3.6 this avoids read_csv wrapping the stream and closing too early file_path = TextIOWrapper(file_path, encoding=encoding) is_buf_wrapped = True fo = pd.read_csv(file_path, **args) data = fo.read() # if the buffer was wrapped, detach it before returning if is_buf_wrapped: file_path.detach() fo.close() return data
def writeData(self, fp, image): """Write the image data part of the file (second part).""" wrapper = TextIOWrapper(fp, encoding='utf-8') for i, l in enumerate(image.T): fmtstr = '\t'.join(['%d'] * (1 + len(l))) + '\n' wrapper.write(fmtstr % ((i, ) + tuple(l.tolist()))) wrapper.detach() fp.flush()
def global_vote_results_csv(request): ctx = request.context user_id = request.authenticated_userid if not user_id: raise HTTPUnauthorized widget = ctx._instance if widget.activity_state != "ended": permissions = ctx.get_permissions() if P_ADMIN_DISC not in permissions: raise HTTPUnauthorized() user_prefs = LanguagePreferenceCollection.getCurrent() # first fetch the ideas voted on ideas = widget.db.query(Idea ).join(AbstractIdeaVote, AbstractIdeaVote.idea_id == Idea.id ).join(AbstractVoteSpecification ).filter(AbstractVoteSpecification.widget_id == widget.id ).distinct().all() idea_ids = [i.id for i in ideas] titles = [(idea.safe_title(user_prefs, request.localizer), idea.id) for idea in ideas] titles.sort() q = widget.db.query(Idea.id).filter(Idea.id.in_(idea_ids)) # then get the vote specs specs = [(spec.title.best_lang(user_prefs).value if spec.title else str(spec.id), spec) for spec in widget.vote_specifications] specs.sort() # construct a query with each votespec creating two columns: # sum of vote values, and count of votes. # Ideas are rows (and Idea.id is column 0) for (t, spec) in specs: a = aliased(spec.get_vote_class(), name="votes_%d" % spec.id) q = q.outerjoin(a, (a.idea_id == Idea.id) & (a.vote_spec_id == spec.id)) q = q.add_columns(func.sum(a.vote_value).label('vsum_%d' % spec.id), func.count(a.id).label('vcount_%d' % spec.id)) q = q.group_by(Idea.id) r = q.all() r = {x[0]: x for x in r} output = BytesIO() output_utf8 = TextIOWrapper(output, encoding='utf-8') csvw = csv.writer(output_utf8) csvw.writerow([""] + [t.encode('utf-8') for (t, spec) in specs]) for title, idea_id in titles: row = [title.encode('utf-8')] sourcerow = r[idea_id][1:] for i, (t, spec) in enumerate(specs): num = sourcerow[1 + i * 2] if num: if isinstance(spec, TokenVoteSpecification): # we want total number of tokens num = 1 # otherwise we want average vote value row.append(sourcerow[i * 2] / num) else: row.append("") csvw.writerow(row) output_utf8.detach() output.seek(0) return Response(body_file=output, content_type='text/csv', charset="utf-8")
def _get_timeseries_without_moving_file_position(self, datastream): original_position = datastream.tell() wrapped_datastream = TextIOWrapper(datastream, encoding="utf-8", newline="\n") result = self._read_timeseries_from_stream(wrapped_datastream) wrapped_datastream.detach( ) # If we don't do this the datastream will be closed datastream.seek(original_position) return result
def content_to_csv_bytes(content: Union[bytes, str, pd.DataFrame]) -> BytesIO: bio = BytesIO() wrap = TextIOWrapper(bio, encoding="utf-8", write_through=True) if isinstance(content, pd.DataFrame): content.to_csv(wrap, index=False) elif isinstance(content, bytes): wrap.write(content.decode("utf-8")) else: wrap.write(content) wrap.detach() bio.seek(0) return bio
def _wrap_binary_stream(io_buffer): """Access the given stream with the correct format Usually when a `io.TextIOWrapper` is destroyed, the underlying stream is closed. We prevent this here, because we do not control the given stream. """ wrapper = TextIOWrapper(io_buffer, encoding='latin1//TRANSLIT', newline='') try: yield wrapper finally: wrapper.detach() del wrapper
def parse(self, fp, headersonly=False): """Create a message structure from the data in a binary file. Reads all the data from the file and returns the root of the message structure. Optional headersonly is a flag specifying whether to stop parsing after reading the headers or not. The default is False, meaning it parses the entire contents of the file. """ fp = TextIOWrapper(fp, encoding='ascii', errors='surrogateescape') try: return self.parser.parse(fp, headersonly) finally: fp.detach()
def print_svg(self, filename, *args, **kwargs): """ Parameters ---------- filename : str or path-like or file-like Output target; if a string, a file will be opened for writing. metadata : Dict[str, Any], optional Metadata in the SVG file defined as key-value pairs of strings, datetimes, or lists of strings, e.g., ``{'Creator': 'My software', 'Contributor': ['Me', 'My Friend'], 'Title': 'Awesome'}``. The standard keys and their value types are: * *str*: ``'Coverage'``, ``'Description'``, ``'Format'``, ``'Identifier'``, ``'Language'``, ``'Relation'``, ``'Source'``, ``'Title'``, and ``'Type'``. * *str* or *list of str*: ``'Contributor'``, ``'Creator'``, ``'Keywords'``, ``'Publisher'``, and ``'Rights'``. * *str*, *date*, *datetime*, or *tuple* of same: ``'Date'``. If a non-*str*, then it will be formatted as ISO 8601. Values have been predefined for ``'Creator'``, ``'Date'``, ``'Format'``, and ``'Type'``. They can be removed by setting them to `None`. Information is encoded as `Dublin Core Metadata`__. .. _DC: https://www.dublincore.org/specifications/dublin-core/ __ DC_ """ with cbook.open_file_cm(filename, "w", encoding="utf-8") as fh: filename = getattr(fh, 'name', '') if not isinstance(filename, str): filename = '' if cbook.file_requires_unicode(fh): detach = False else: fh = TextIOWrapper(fh, 'utf-8') detach = True self._print_svg(filename, fh, **kwargs) # Detach underlying stream from wrapper so that it remains open in # the caller. if detach: fh.detach()
def detect_format(stream) -> str: """ Detects the file format inside a stream :exception UnicodeDecodeError When stream content is not valid utf-8 """ # utf-8-sig: Skip BOM ss = TextIOWrapper(stream, encoding='utf-8-sig') if ss.readline().startswith("<?xml"): ss.detach() return "sbml" else: ss.detach() return "opt"
def pipe_call(call, cwd=".", break_str=None): wrapper = TextIOWrapper( subprocess.Popen( call, stdout=subprocess.PIPE, stderr=subprocess.STDOUT, cwd=cwd ).stdout, encoding="utf-8", ) for line in wrapper: else: print(line, end="") if break_str and break_str in line: wrapper.detach() return wrapper
def process_recording(f, dbsession): f.seek(0) ft = TextIOWrapper(f, encoding='ascii', errors='replace') csvreader = reader(ft, delimiter='\t') try: header = next(csvreader) except: return metadataError("File not understood.") metadata = {"error": False} version = "0" # check header if header[0].startswith("@"): #version version = header[0].lstrip("@") headerfunc = "version_%s_header" % version if headerfunc in globals(): metadata = globals()[headerfunc](header, metadata, dbsession) else: return metadataError("Unsupported recording version") else: return metadataError("Unsupported recording version") #process rest: rowsfunc = "version_%s_rows" % version if rowsfunc in globals(): metadata = globals()[rowsfunc](csvreader, metadata, dbsession) else: return metadataError("Rows parser missing.") metadata["f"] = ft.detach() return metadata
def _prepare_json(cls, fname: str, data: dict): tio = TextIOWrapper(BytesIO(), "utf-8") kwargs = dict(cls=FlexJSONEncoder, allow_nan=False, skipkeys=False) json.dump(data, tio, **kwargs) bio = tio.detach() info = cls._get_tarinfo_from_bytesio(fname, bio) return info, bio
class bom_open(): """Context manager to open a file or stdin/stdout. Encoding can be detected with chardet. Pass additional arguments to `open()`. Python writes BOM for utf-8-sig, utf-16, or utf-32. BOM is not written when endianness is specified. If `file=None` or `'-'`, open stdin (for reading) or stdout (for writing). If `encoding=None` and `mode='r'` or `'w+'`, file encoding will be detected using chardet.""" def __init__(self, file, mode='r', buffering=-1, encoding=None, *args, **kwargs): if file == '-': self.file = None else: self.file = file self.mode = mode self.buffering = buffering self.encoding = encoding self.args = args self.kwargs = kwargs def __enter__(self): if self.file: self._f = open(self.file, self.mode, self.buffering, self.encoding, *self.args, **self.kwargs) elif self.mode == 'r': self._f = sys.stdin elif self.mode == 'w': if self.encoding: sys.stdout = open(sys.stdout.fileno(), 'w', encoding=self.encoding, buffering=1) self._f = sys.stdout else: raise StdIOError('No file specified, and mode not appropriate ' 'for stdin (r) or stdout (w)') if (self.encoding is None and 'b' not in self.mode and ('r' in self.mode or '+' in self.mode)): # run chardet on buffer without advancing file position peek = self._f.buffer.peek() detected = chardet.detect(peek) self.encoding = detected['encoding'] # re-attach file with detected encoding if self._f.encoding.lower() != self.encoding.lower(): self._f = TextIOWrapper(self._f.detach(), encoding=self.encoding) return self._f def __exit__(self, type, value, traceback): self._f.close()
def main(args=None): parser = create_parser() args = parser.parse_args(args) if args.filename == '-': # read from stdin if PY2: data = getreader(args.encoding)(sys.stdin).read() else: wrapper = TextIOWrapper(sys.stdin.buffer, encoding=args.encoding) try: data = wrapper.read() finally: wrapper.detach() else: try: with open(args.filename, 'r', args.encoding) as f: data = ''.join(f.readlines()) except IOError as e: return _error( u'Failed to read {0}: {1}'.format(args.filename, e)) close_stream = False if args.outfile: try: stream = open(args.outfile, 'w', args.encoding) close_stream = True except IOError as e: return _error(u'Failed to open {0}: {1}'.format(args.outfile, e)) else: stream = sys.stdout formatter_opts = vars(args) try: formatter_opts = sqlparse.formatter.validate_options(formatter_opts) except SQLParseError as e: return _error(u'Invalid options: {0}'.format(e)) s = sqlparse.format(data, **formatter_opts) stream.write(s) stream.flush() if close_stream: stream.close() return 0
def print_svg(self, filename, *args, **kwargs): with cbook.open_file_cm(filename, "w", encoding="utf-8") as fh: filename = getattr(fh, 'name', '') if not isinstance(filename, str): filename = '' if cbook.file_requires_unicode(fh): detach = False else: fh = TextIOWrapper(fh, 'utf-8') detach = True self._print_svg(filename, fh, **kwargs) # Detach underlying stream from wrapper so that it remains open in # the caller. if detach: fh.detach()
def writeMetaInformation(self, fp, title="Device snapshot", update_headerinfo=None): """utility method for writing a standard nicos header to be used by derived sinks""" bycategory = self._collectMetaInformation(update_headerinfo) wrapper = TextIOWrapper(fp, encoding='utf-8') wrapper.write('### NICOS %s V2.0\n' % title) for category, catname in INFO_CATEGORIES: if category not in bycategory: continue wrapper.write('### %s\n' % catname) for key, value in sorted(bycategory[category]): wrapper.write('%25s : %s\n' % (key, value)) # to ease interpreting the data... # note: arraydesc exists only for ImageSinks if hasattr(self, '_arraydesc'): wrapper.write('\n%r' % self._arraydesc) wrapper.write('\n') wrapper.detach()
def func_wrapper(fobj, *args, **kwargs): close_file = False if not hasattr(fobj, 'read'): fobj = open(fobj, 'r', encoding='utf-8') fobj_text = fobj close_file = True elif is_binary_stream(fobj): fobj_text = TextIOWrapper(fobj, encoding='utf-8') else: fobj_text = fobj try: retvals = func(fobj_text, *args, **kwargs) finally: if is_binary_stream(fobj): fobj_text.detach() fobj_text = fobj if close_file: fobj_text.close() return retvals
def test_io_wrapper(self): content = "vive l'été\n" with tempfile.TemporaryFile() as temp, File(temp, name='something.txt') as test_file: test_file.write(content.encode()) test_file.seek(0) wrapper = TextIOWrapper(test_file, 'utf-8', newline='\n') self.assertEqual(wrapper.read(), content) wrapper.write(content) wrapper.seek(0) self.assertEqual(wrapper.read(), content * 2) test_file = wrapper.detach() test_file.seek(0) self.assertEqual(test_file.read(), (content * 2).encode())
def writeHeader(self, fp, metainfo, image): fp.seek(0) wrapper = TextIOWrapper(fp, encoding='utf-8') wrapper.write('\n%s PUMA Polarisation File Header V2.0\n' % (self.sink.commentchar * 3)) # XXX(dataapi): add a utility function to convert metainfo to old # by-category format bycategory = {} for (device, key), (_, val, unit, category) in metainfo.items(): if category: bycategory.setdefault(category, []).append( ('%s_%s' % (device, key), (val + ' ' + unit).strip())) for category, catname in INFO_CATEGORIES: if category not in bycategory: continue wrapper.write('%s %s\n' % (self.sink.commentchar * 3, catname)) for key, value in sorted(bycategory[category]): wrapper.write('%25s : %s\n' % (key, value)) # to ease interpreting the data... # wrapper.write('\n%r' % self._arraydesc) wrapper.write('\n') wrapper.detach() fp.flush()
def test_io_wrapper(self): content = "vive l'été\n" with tempfile.TemporaryFile() as temp, File(temp, name='something.txt') as test_file: test_file.write(content.encode('utf-8')) test_file.seek(0) wrapper = TextIOWrapper(test_file, 'utf-8', newline='\n') self.assertEqual(wrapper.read(), content) # The following seek() call is required on Windows Python 2 when # switching from reading to writing. wrapper.seek(0, 2) wrapper.write(content) wrapper.seek(0) self.assertEqual(wrapper.read(), content * 2) test_file = wrapper.detach() test_file.seek(0) self.assertEqual(test_file.read(), (content * 2).encode('utf-8'))
def main(freqs, selection=None, *, synonyms=None): deadline = monotonic() + 1 midline = False tree = dict() # {subname: ..., ...} if selection: prev = None with open(selection, "rt") as reader: for plant in reader: plant = plant.rstrip(" \r\n") key = list() for word in plant.split(" "): abbr = word if abbr.endswith("."): abbr = abbr[:-1] if abbr in db.abbr: continue if not key: if word.istitle(): word = word.lower() else: msg = "Genus {!r} is not in title case" print(msg.format(word), file=stderr) if word.endswith("."): if prev is None: msg = "No previous entry to expand {!r} from" print(msg.format(plant), file=stderr) elif len(prev) > len(key) \ and prev[:len(key)] == key \ and prev[len(key)].startswith(word[:-1]): word = prev[len(key)] else: print("Abbreviated {!r} does not match " \ "previous entry".format(plant), file=stderr) key.append(word) prev = key [children, remainder] = lookup_tree(tree, key) if remainder: if children or children is tree: add_tree(children, remainder) else: msg = "Supertaxon of {} already listed".format(plant) print(msg, file=stderr) else: if children: while children: [subname, _] = children.popitem() msg = "{} subtaxon {} already listed" print(msg.format(plant, subname), file=stderr) else: msg = "{} equivalent already listed".format(plant) print(msg, file=stderr) parse_synonyms(synonyms, tree) selected = set() evcs = list() # [(evc, desc, {name: freq for each plant}) for each EVC] max_freqs = list() # [max(freq) for each EVC] with closing(FreqExcelReader(freqs)) as freqs: total = format(len(freqs)) last_evc = None for [i, plant] in enumerate(freqs): if stderr: now = monotonic() if now >= deadline: if midline: stderr.write("\r") msg = "Record {:{}}/{}".format(i + 1, len(total), total) stderr.write(msg) stderr.flush() midline = True deadline = now + 0.1 if plant["EVC"] != last_evc: last_evc = plant["EVC"] last_desc = plant["EVC_DESC"] plant_freqs = dict() evcs.append((last_evc, last_desc, plant_freqs)) max_freqs.append(plant["Frequency"]) else: max_freqs[-1] = max(max_freqs[-1], plant["Frequency"]) if plant["EVC_DESC"] != last_desc: msg = "EVC {} EVC_DESC inconsistent between {!r} and " \ "{!r}".format(last_evc, last_desc, plant["EVC_DESC"]) print(msg, file=stderr) last_desc = plant["EVC_DESC"] name = plant["NAME"] if selection: key = list(n[0] for n in db.plant_key(name)) if not key[-1]: key.pop() [children, remainder] = lookup_tree(tree, key) if remainder and children: continue selected.add(name) if name in plant_freqs: msg = "Duplicate record for {NAME} in {EVC}" print(msg.format_map(plant), file=stderr) plant_freqs[name] = plant_freqs.get(name, 0) + plant["Frequency"] if stderr and midline: stderr.write("\x1B[1K\r") stderr.flush() out = TextIOWrapper(stdout.buffer, stdout.encoding, stdout.errors, newline="", line_buffering=stdout.line_buffering) try: writer = csv.writer(out) writer.writerow(("EVC", "EVC_DESC", "max(Frequency)")) for [[evc, desc, _], max_freq] in zip(evcs, max_freqs): writer.writerow((evc, desc, max_freq)) writer.writerow(("NAME",) + tuple(evc for [evc, _, _] in evcs)) for plant in sorted(selected, key=db.plant_key): row = [plant] for [[_, _, freqs], max_freq] in zip(evcs, max_freqs): freq = freqs.get(plant) if freq is None: row.append(None) continue found = True row.append(format(freq / max_freq, ".2f")) writer.writerow(row) if selection: # Prune any non-branching paths leading to this entry key = list(n[0] for n in db.plant_key(plant)) if not key[-1]: key.pop() node = tree for subkey in key: if len(node) > 1: branch_node = node branch_name = subkey try: node = node[subkey] except LookupError: break if not node: del branch_node[branch_name] finally: out.detach() if selection: for path in walk_tree(tree): msg = "No records matching {}" print(msg.format(" ".join(path).capitalize()), file=stderr)
def main(file): with open(file, "rb") as file: ole = OleFileIO(file) doc = ole.openstream("WordDocument") base = FibBase.unpack(doc.read(FibBase.size)) [wIdent, _, _, _, _, bits_fm, _, _, _, _] = base assert wIdent == WORD_BINARY_FILE fWhichTblStm = bits_fm >> WHICH_TBL_STM_BIT & 1 [csw] = unsigned2.unpack(doc.read(2)) doc.seek(csw * 2, SEEK_CUR) [cslw] = unsigned2.unpack(doc.read(2)) doc.seek(cslw * 4, SEEK_CUR) [cbRgFcLcb] = unsigned2.unpack(doc.read(2)) cbRgFcLcb *= 8 assert cbRgFcLcb >= FibRgFcLcb97.size fibRgFcLcb97 = FibRgFcLcb97.unpack(doc.read(FibRgFcLcb97.size)) [fcPlcfBtePapx, lcbPlcfBtePapx, fcClx, lcbClx] = fibRgFcLcb97 table = ole.openstream("{}Table".format(fWhichTblStm)) out = TextIOWrapper(stdout.buffer, stdout.encoding, stdout.errors, newline="", line_buffering=stdout.line_buffering) try: writer = csv.writer(out) row = list() cell = None pieces = Pieces(doc, table, fcClx, lcbClx) i = 0 while i < len(pieces): # For each piece starting a paragraph piece = pieces[i] paras = iter_paras_from(doc, ole, table, fcPlcfBtePapx, lcbPlcfBtePapx, piece.byte_offset) while True: # For each paragraph in the current piece # Scan ahead to find how many pieces span this paragraph j = i scan_piece = piece while True: [end, in_table, is_ttp] = next(paras) end -= scan_piece.byte_offset if end <= scan_piece.bytes_remaining: break while True: # For each piece without paragraph info j += 1 piece = pieces[j] paras = iter_paras_from(doc, table, fcPlcfBtePapx, lcbPlcfBtePapx, scan_piece.byte_offset) if paras is not None: break # Found a paragraph spanning pieces i-j if is_ttp: writer.writerow(row) row.clear() if in_table and not is_ttp: if not cell: cell = StringIO() while i < j: copyfileobj(piece.get_reader(), cell) i += 1 piece = pieces[i] assert end reader = piece.get_reader(end - piece.code_size) copyfileobj(reader, cell) mark = piece.get_reader(piece.code_size).read() if mark == "\x07": row.append(cell.getvalue()) cell = None else: cell.write(mark) else: assert not row assert not cell if i < j: i = j piece = pieces[i] piece.skip(end) if not piece.bytes_remaining: break i += 1 assert not row assert not cell finally: out.detach() for [exctype, msg] in ole.parsing_issues: print("{}: {}".format(exctype.__name__, msg), file=stderr)
# change encoding layer f = TextIOWrapper(f.buffer, encoding='latin-1') print(f) # ValueError: I/O operation on closed # f is destroyed & underlying file is destroyed # f.write('Hello') print('\n!---SECTION---\n') # detach() # disconnects the topmost layer of a file # returns the next lower layer # toplayer will no longer be usable f = open('sample.txt', 'w') print(f) b = f.detach() print(b) # ValueError: underlying buffer has been destroyed # f.write('hello') print('\n!---SECTION---\n') # change top layer f = TextIOWrapper(b, encoding='latin') print(f) # change line handling, error policy, & other aspects of file handling # sys.stdout = TextIOWrapper(sys.stdout.detach(), encoding='ascii', errors='xmlcharrefreplace') # print('Jalape\u00f1o') # Terminal cannot handle