fields = kwargs['fields'] table = kwargs['table'] header = StreamHeader(fields=fields, table=table) yield header prefix = "INSERT INTO `%s` VALUES " % table try: for line in stream: if not line.startswith(prefix): continue pos = len(prefix) while pos < len(line): (elts, pos) = parse_tuple(pos, line) yield header.t(*elts) if line[pos] == ',': pos = pos + 1 continue elif line[pos] == ';': break else: raise Exception("ParseError pos %u " % pos) except TypeError, e: print len(elts), elts raise e yield StreamFooter() BabeBase.addPullPlugin("sql", ["sql"], pull) if __name__ == "__main__": for line in sys.stdin: print parse_tuple(0, line)
fields = kwargs.get('fields', None) if not fields: fields = [cell.internal_value for cell in it.next()] metainfo = StreamHeader(**dict(kwargs, fields=fields)) yield metainfo # it brings a new method: iter_rows() for row in it: ## stop at the first row with "none" nrow = map(valuenormalize, row) if not any(nrow): break yield metainfo.t._make(nrow) yield StreamFooter() def write(format, metainfo, instream, outfile, encoding, **kwargs): from openpyxl import Workbook wb = Workbook(optimized_write=True) ws = wb.create_sheet() ws.append(metainfo.fields) for k in instream: if isinstance(k, StreamFooter): break else: ws.append(list(k)) wb.save(outfile) BabeBase.addPullPlugin('xlsx', ['xlsx'], read, need_seek=True) BabeBase.addPushPlugin('xlsx', ['xlsx'], write)
delimiter = ',' doublequote = False escapechar = '\\' quoting = csv.QUOTE_MINIMAL quotechar = '"' def push(format, metainfo, instream, outfile, encoding, delimiter=None, **kwargs): if not encoding: encoding = "utf8" dialect = kwargs.get('dialect', default_dialect) if delimiter: dialect.delimiter = delimiter writer = UnicodeCSVWriter(outfile, dialect=dialect, encoding=encoding) writer.writerow(metainfo.fields) for k in instream: if isinstance(k, StreamFooter): break else: writer.writerow(k) BabeBase.addPullPlugin('csv', ['csv', 'tsv', 'txt'], pull) BabeBase.addPushPlugin('csv', ['csv', 'tsv', 'txt'], push)
table = kwargs['table'] header = StreamHeader(fields=fields, table=table) yield header prefix = "INSERT INTO `%s` VALUES " % table try: for line in stream: if not line.startswith(prefix): continue pos = len(prefix) while pos < len(line): (elts, pos) = parse_tuple(pos, line) yield header.t(*elts) if line[pos] == ',': pos = pos+1 continue elif line[pos] == ';': break else: raise Exception("ParseError pos %u " % pos) except TypeError, e: print len(elts), elts raise e yield StreamFooter() BabeBase.addPullPlugin("sql", ["sql"], pull) if __name__ == "__main__": for line in sys.stdin: print parse_tuple(0, line)
from base import StreamHeader, BabeBase, StreamFooter def valuenormalize(cell): return cell.value def read(format, stream, kwargs): import xlrd wb = xlrd.open_workbook(file_contents=stream.read(), encoding_override=kwargs.get('encoding', None)) ws = wb.sheet_by_index(0) nrows = ws.nrows fields = kwargs.get('fields', None) if not fields: b = 1 fields = [cell.value for cell in ws.row(0)] else: b = 0 metainfo = StreamHeader(**dict(kwargs, fields=fields)) yield metainfo for i in xrange(b, nrows): cells = ws.row(i) yield metainfo.t._make(map(valuenormalize, cells)) yield StreamFooter() BabeBase.addPullPlugin('xls', ['xls'], read, need_seek=False)
it = ws.iter_rows() fields = kwargs.get('fields', None) if not fields: fields = [cell.internal_value for cell in it.next()] metainfo = StreamHeader(**dict(kwargs, fields=fields)) yield metainfo # it brings a new method: iter_rows() for row in it: ## stop at the first row with "none" nrow = map(valuenormalize, row) if not any(nrow): break yield metainfo.t._make(nrow) yield StreamFooter() def write(format, metainfo, instream, outfile, encoding, **kwargs): from openpyxl import Workbook wb = Workbook(optimized_write=True) ws = wb.create_sheet() ws.append(metainfo.fields) for k in instream: if isinstance(k, StreamFooter): break else: ws.append(list(k)) wb.save(outfile) BabeBase.addPullPlugin('xlsx', ['xlsx'], read, need_seek=True) BabeBase.addPushPlugin('xlsx', ['xlsx'], write)
def pull(format, stream, kwargs): stream = codecs.getreader(kwargs.get('encoding', 'utf8'))(stream) previous_fields = None for line in stream: data = json.loads(line) fields = data.keys() if previous_fields != fields: metainfo = StreamHeader(**dict(kwargs, fields=fields)) previous_fields = fields yield metainfo yield metainfo.t._make(data.values()) yield StreamFooter() def push(format, metainfo, instream, outfile, encoding, **kwargs): outstream = codecs.getwriter(kwargs.get('encoding', 'utf8'))(outfile) for row in instream: if isinstance(row, StreamFooter): break elif isinstance(row, StreamHeader): metainfo = row else: outstream.write(json.dumps(dict(zip(metainfo.fields, row)))) outstream.write("\n") outstream.flush() BabeBase.addPullPlugin('json', ['json'], pull) BabeBase.addPushPlugin('json', ['json'], push)
import codecs from base import StreamHeader, BabeBase, StreamFooter def pull(format, stream, kwargs): stream = codecs.getreader(kwargs.get('encoding', 'utf8'))(stream) fields = kwargs.get('fields', ['text']) metainfo = StreamHeader(**dict(kwargs, fields=fields)) yield metainfo for line in stream: yield metainfo.t._make([line]) yield StreamFooter() def push(format, metainfo, instream, outfile, encoding, **kwargs): outstream = codecs.getwriter(kwargs.get('encoding', 'utf8'))(outfile) for row in instream: if isinstance(row, StreamFooter): break else: for cell in row: outstream.write(cell) outstream.flush() BabeBase.addPullPlugin('txt', ['txt'], pull) BabeBase.addPushPlugin('txt', ['txt'], push)
for row in csvpull(stream, dialect, kwargs): yield row class default_dialect(csv.Dialect): lineterminator = '\n' delimiter = ',' doublequote = False escapechar = '\\' quoting = csv.QUOTE_MINIMAL quotechar = '"' def push(format, metainfo, instream, outfile, encoding, delimiter=None, **kwargs): if not encoding: encoding = "utf8" dialect = kwargs.get('dialect', default_dialect) if delimiter: dialect.delimiter = delimiter writer = UnicodeCSVWriter(outfile, dialect=dialect, encoding=encoding) writer.writerow(metainfo.fields) for k in instream: if isinstance(k, StreamFooter): break else: writer.writerow(k) BabeBase.addPullPlugin('csv', ['csv', 'tsv', 'txt'], pull) BabeBase.addPushPlugin('csv', ['csv', 'tsv', 'txt'], push)