fields = kwargs.get('fields', None) if not fields: fields = [cell.internal_value for cell in it.next()] metainfo = StreamHeader(**dict(kwargs, fields=fields)) yield metainfo # it brings a new method: iter_rows() for row in it: ## stop at the first row with "none" nrow = map(valuenormalize, row) if not any(nrow): break yield metainfo.t._make(nrow) yield StreamFooter() def write(format, metainfo, instream, outfile, encoding, **kwargs): from openpyxl import Workbook wb = Workbook(optimized_write=True) ws = wb.create_sheet() ws.append(metainfo.fields) for k in instream: if isinstance(k, StreamFooter): break else: ws.append(list(k)) wb.save(outfile) BabeBase.addPullPlugin('xlsx', ['xlsx'], read, need_seek=True) BabeBase.addPushPlugin('xlsx', ['xlsx'], write)
it = ws.iter_rows() fields = kwargs.get('fields', None) if not fields: fields = [cell.internal_value for cell in it.next()] metainfo = StreamHeader(**dict(kwargs, fields=fields)) yield metainfo # it brings a new method: iter_rows() for row in it: ## stop at the first row with "none" nrow = map(valuenormalize, row) if not any(nrow): break yield metainfo.t._make(nrow) yield StreamFooter() def write(format, metainfo, instream, outfile, encoding, **kwargs): from openpyxl import Workbook wb = Workbook(optimized_write=True) ws = wb.create_sheet() ws.append(metainfo.fields) for k in instream: if isinstance(k, StreamFooter): break else: ws.append(list(k)) wb.save(outfile) BabeBase.addPullPlugin('xlsx', ['xlsx'], read, need_seek=True) BabeBase.addPushPlugin('xlsx', ['xlsx'], write)
delimiter = ',' doublequote = False escapechar = '\\' quoting = csv.QUOTE_MINIMAL quotechar = '"' def push(format, metainfo, instream, outfile, encoding, delimiter=None, **kwargs): if not encoding: encoding = "utf8" dialect = kwargs.get('dialect', default_dialect) if delimiter: dialect.delimiter = delimiter writer = UnicodeCSVWriter(outfile, dialect=dialect, encoding=encoding) writer.writerow(metainfo.fields) for k in instream: if isinstance(k, StreamFooter): break else: writer.writerow(k) BabeBase.addPullPlugin('csv', ['csv', 'tsv', 'txt'], pull) BabeBase.addPushPlugin('csv', ['csv', 'tsv', 'txt'], push)
def write(format, header, instream, outfile, encoding, **kwargs): if not encoding: encoding = "utf-8" outfile.write("<h2>") outfile.write(header.get_stream_name()) outfile.write("</h2>") if header.description: outfile.write("<p><i>") outfile.write(header.description) outfile.write("</i></p>") outfile.write('<table>\n<tr>') for field in header.fields: outfile.write("<th>") outfile.write(write_value(field, encoding)) outfile.write("</th>") outfile.write("</tr>\n") for row in instream: if isinstance(row, StreamFooter): outfile.write("</table>\n") break else: outfile.write("<tr>") for cell in row: outfile.write("<td>") outfile.write(write_value(cell, encoding)) outfile.write("</td>") outfile.write("</tr>\n") BabeBase.addPushPlugin('html', ['html', 'htm'], write)
def pull(format, stream, kwargs): stream = codecs.getreader(kwargs.get('encoding', 'utf8'))(stream) previous_fields = None for line in stream: data = json.loads(line) fields = data.keys() if previous_fields != fields: metainfo = StreamHeader(**dict(kwargs, fields=fields)) previous_fields = fields yield metainfo yield metainfo.t._make(data.values()) yield StreamFooter() def push(format, metainfo, instream, outfile, encoding, **kwargs): outstream = codecs.getwriter(kwargs.get('encoding', 'utf8'))(outfile) for row in instream: if isinstance(row, StreamFooter): break elif isinstance(row, StreamHeader): metainfo = row else: outstream.write(json.dumps(dict(zip(metainfo.fields, row)))) outstream.write("\n") outstream.flush() BabeBase.addPullPlugin('json', ['json'], pull) BabeBase.addPushPlugin('json', ['json'], push)
import codecs from base import StreamHeader, BabeBase, StreamFooter def pull(format, stream, kwargs): stream = codecs.getreader(kwargs.get('encoding', 'utf8'))(stream) fields = kwargs.get('fields', ['text']) metainfo = StreamHeader(**dict(kwargs, fields=fields)) yield metainfo for line in stream: yield metainfo.t._make([line]) yield StreamFooter() def push(format, metainfo, instream, outfile, encoding, **kwargs): outstream = codecs.getwriter(kwargs.get('encoding', 'utf8'))(outfile) for row in instream: if isinstance(row, StreamFooter): break else: for cell in row: outstream.write(cell) outstream.flush() BabeBase.addPullPlugin('txt', ['txt'], pull) BabeBase.addPushPlugin('txt', ['txt'], push)
for row in csvpull(stream, dialect, kwargs): yield row class default_dialect(csv.Dialect): lineterminator = '\n' delimiter = ',' doublequote = False escapechar = '\\' quoting = csv.QUOTE_MINIMAL quotechar = '"' def push(format, metainfo, instream, outfile, encoding, delimiter=None, **kwargs): if not encoding: encoding = "utf8" dialect = kwargs.get('dialect', default_dialect) if delimiter: dialect.delimiter = delimiter writer = UnicodeCSVWriter(outfile, dialect=dialect, encoding=encoding) writer.writerow(metainfo.fields) for k in instream: if isinstance(k, StreamFooter): break else: writer.writerow(k) BabeBase.addPullPlugin('csv', ['csv', 'tsv', 'txt'], pull) BabeBase.addPushPlugin('csv', ['csv', 'tsv', 'txt'], push)
def pull(format, stream, kwargs): stream = codecs.getreader(kwargs.get('encoding', 'utf8'))(stream) previous_fields = None for line in stream: data = json.loads(line) fields = data.keys() if previous_fields != fields: metainfo = StreamHeader(**dict(kwargs, fields=fields)) previous_fields = fields yield metainfo yield metainfo.t._make(data.values()) yield StreamFooter() def push(format, metainfo, instream, outfile, encoding, **kwargs): outstream = codecs.getwriter(kwargs.get('encoding', 'utf8'))(outfile) for row in instream: if isinstance(row, StreamFooter): break elif isinstance(row, StreamHeader): metainfo = row else: outstream.write(json.dumps(dict(zip(metainfo.fields, row)))) outstream.write("\n") outstream.flush() BabeBase.addPullPlugin('json', ['json'], pull) BabeBase.addPushPlugin('json', ['json'], push)