Beispiel #1
0
    fields = kwargs['fields']
    table = kwargs['table']
    header = StreamHeader(fields=fields, table=table)
    yield header
    prefix = "INSERT INTO `%s` VALUES " % table
    try:
        for line in stream:
            if not line.startswith(prefix):
                continue
            pos = len(prefix)
            while pos < len(line):
                (elts, pos) = parse_tuple(pos, line)
                yield header.t(*elts)
                if line[pos] == ',':
                    pos = pos + 1
                    continue
                elif line[pos] == ';':
                    break
                else:
                    raise Exception("ParseError pos %u " % pos)
    except TypeError, e:
        print len(elts), elts
        raise e
    yield StreamFooter()

BabeBase.addPullPlugin("sql", ["sql"], pull)

if __name__ == "__main__":
    for line in sys.stdin:
        print parse_tuple(0, line)
Beispiel #2
0
    fields = kwargs.get('fields', None)
    if not fields:
        fields = [cell.internal_value for cell in it.next()]
    metainfo = StreamHeader(**dict(kwargs, fields=fields))
    yield metainfo
    # it brings a new method: iter_rows()
    for row in it:
        ## stop at the first row with "none"
        nrow = map(valuenormalize, row)
        if not any(nrow):
            break
        yield metainfo.t._make(nrow)
    yield StreamFooter()


def write(format, metainfo, instream, outfile, encoding, **kwargs):
    from openpyxl import Workbook
    wb = Workbook(optimized_write=True)
    ws = wb.create_sheet()
    ws.append(metainfo.fields)
    for k in instream:
        if isinstance(k, StreamFooter):
            break
        else:
            ws.append(list(k))
    wb.save(outfile)


BabeBase.addPullPlugin('xlsx', ['xlsx'], read, need_seek=True)
BabeBase.addPushPlugin('xlsx', ['xlsx'], write)
Beispiel #3
0
    delimiter = ','
    doublequote = False
    escapechar = '\\'
    quoting = csv.QUOTE_MINIMAL
    quotechar = '"'


def push(format,
         metainfo,
         instream,
         outfile,
         encoding,
         delimiter=None,
         **kwargs):
    if not encoding:
        encoding = "utf8"
    dialect = kwargs.get('dialect', default_dialect)
    if delimiter:
        dialect.delimiter = delimiter
    writer = UnicodeCSVWriter(outfile, dialect=dialect, encoding=encoding)
    writer.writerow(metainfo.fields)
    for k in instream:
        if isinstance(k, StreamFooter):
            break
        else:
            writer.writerow(k)


BabeBase.addPullPlugin('csv', ['csv', 'tsv', 'txt'], pull)
BabeBase.addPushPlugin('csv', ['csv', 'tsv', 'txt'], push)
Beispiel #4
0
	table = kwargs['table']
	header = StreamHeader(fields=fields, table=table)
	yield header 
	prefix = "INSERT INTO `%s` VALUES " % table 
	try: 
		for line in stream: 
			if not line.startswith(prefix):
				continue
			pos = len(prefix)
			while pos < len(line):
				(elts, pos) = parse_tuple(pos, line)
				yield header.t(*elts)
				if line[pos] == ',':
					pos = pos+1
					continue
				elif line[pos] == ';':
					break
				else:
					raise Exception("ParseError pos %u " % pos)
	except TypeError, e:
		print len(elts), elts 
		raise e
	yield StreamFooter()

BabeBase.addPullPlugin("sql", ["sql"], pull)

if __name__ == "__main__": 
	for line in sys.stdin:
		print parse_tuple(0, line)

Beispiel #5
0
from base import StreamHeader, BabeBase, StreamFooter


def valuenormalize(cell):
    return cell.value


def read(format, stream, kwargs):
    import xlrd
    wb = xlrd.open_workbook(file_contents=stream.read(),
                            encoding_override=kwargs.get('encoding', None))
    ws = wb.sheet_by_index(0)
    nrows = ws.nrows
    fields = kwargs.get('fields', None)
    if not fields:
        b = 1
        fields = [cell.value for cell in ws.row(0)]
    else:
        b = 0
    metainfo = StreamHeader(**dict(kwargs, fields=fields))
    yield metainfo
    for i in xrange(b, nrows):
        cells = ws.row(i)
        yield metainfo.t._make(map(valuenormalize, cells))
    yield StreamFooter()


BabeBase.addPullPlugin('xls', ['xls'], read, need_seek=False)
Beispiel #6
0
    it = ws.iter_rows()
    fields = kwargs.get('fields', None)
    if not fields:
        fields = [cell.internal_value for cell in it.next()]
    metainfo = StreamHeader(**dict(kwargs, fields=fields))
    yield metainfo
     # it brings a new method: iter_rows()
    for row in it:
        ## stop at the first row with "none"
        nrow = map(valuenormalize, row)
        if not any(nrow):
            break
        yield metainfo.t._make(nrow)
    yield StreamFooter()


def write(format, metainfo, instream, outfile, encoding, **kwargs):
    from openpyxl import Workbook
    wb = Workbook(optimized_write=True)
    ws = wb.create_sheet()
    ws.append(metainfo.fields)
    for k in instream:
        if isinstance(k, StreamFooter):
            break
        else:
            ws.append(list(k))
    wb.save(outfile)

BabeBase.addPullPlugin('xlsx', ['xlsx'], read, need_seek=True)
BabeBase.addPushPlugin('xlsx', ['xlsx'], write)
Beispiel #7
0

from base import StreamHeader, BabeBase, StreamFooter


def valuenormalize(cell):
    return cell.value


def read(format, stream, kwargs):
    import xlrd
    wb = xlrd.open_workbook(file_contents=stream.read(), encoding_override=kwargs.get('encoding', None))
    ws = wb.sheet_by_index(0)
    nrows = ws.nrows
    fields = kwargs.get('fields', None)
    if not fields:
        b = 1
        fields = [cell.value for cell in ws.row(0)]
    else:
        b = 0
    metainfo = StreamHeader(**dict(kwargs, fields=fields))
    yield metainfo
    for i in xrange(b, nrows):
        cells = ws.row(i)
        yield metainfo.t._make(map(valuenormalize, cells))
    yield StreamFooter()

BabeBase.addPullPlugin('xls', ['xls'], read, need_seek=False)
Beispiel #8
0
def pull(format, stream, kwargs):
    stream = codecs.getreader(kwargs.get('encoding', 'utf8'))(stream)

    previous_fields = None
    for line in stream:
        data = json.loads(line)
        fields = data.keys()
        if previous_fields != fields:
            metainfo = StreamHeader(**dict(kwargs, fields=fields))
            previous_fields = fields
            yield metainfo
        yield metainfo.t._make(data.values())
    yield StreamFooter()


def push(format, metainfo, instream, outfile, encoding, **kwargs):
    outstream = codecs.getwriter(kwargs.get('encoding', 'utf8'))(outfile)
    for row in instream:
        if isinstance(row, StreamFooter):
            break
        elif isinstance(row, StreamHeader):
            metainfo = row
        else:
            outstream.write(json.dumps(dict(zip(metainfo.fields, row))))
            outstream.write("\n")
    outstream.flush()

BabeBase.addPullPlugin('json', ['json'], pull)
BabeBase.addPushPlugin('json', ['json'], push)
Beispiel #9
0
import codecs
from base import StreamHeader, BabeBase, StreamFooter

def pull(format, stream, kwargs):    
    stream = codecs.getreader(kwargs.get('encoding', 'utf8'))(stream)

    fields = kwargs.get('fields', ['text'])
    
    metainfo = StreamHeader(**dict(kwargs, fields=fields))
    yield metainfo 
    
    for line in stream:
        yield metainfo.t._make([line])
    yield StreamFooter()

def push(format, metainfo, instream, outfile, encoding, **kwargs):
    outstream = codecs.getwriter(kwargs.get('encoding', 'utf8'))(outfile)
    for row in instream:
        if isinstance(row, StreamFooter):
            break
        else:
            for cell in row: 
                outstream.write(cell)
    outstream.flush()

BabeBase.addPullPlugin('txt', ['txt'], pull)
BabeBase.addPushPlugin('txt', ['txt'], push)
Beispiel #10
0
    for row in csvpull(stream,  dialect, kwargs):
        yield row 
        

class default_dialect(csv.Dialect):
    lineterminator = '\n'
    delimiter = ','
    doublequote = False
    escapechar = '\\'
    quoting = csv.QUOTE_MINIMAL
    quotechar = '"'

def push(format, metainfo, instream, outfile, encoding, delimiter=None, **kwargs):
    if not encoding:
        encoding = "utf8"
    dialect = kwargs.get('dialect', default_dialect) 
    if delimiter:
        dialect.delimiter = delimiter
    writer = UnicodeCSVWriter(outfile, dialect=dialect, encoding=encoding)
    writer.writerow(metainfo.fields)
    for k in instream: 
        if isinstance(k, StreamFooter):
            break
        else:
            writer.writerow(k)
    
BabeBase.addPullPlugin('csv', ['csv', 'tsv', 'txt'], pull)  
BabeBase.addPushPlugin('csv', ['csv', 'tsv', 'txt'], push)   


Beispiel #11
0
def pull(format, stream, kwargs):
    stream = codecs.getreader(kwargs.get('encoding', 'utf8'))(stream)

    previous_fields = None
    for line in stream:
        data = json.loads(line)
        fields = data.keys()
        if previous_fields != fields:
            metainfo = StreamHeader(**dict(kwargs, fields=fields))
            previous_fields = fields
            yield metainfo
        yield metainfo.t._make(data.values())
    yield StreamFooter()


def push(format, metainfo, instream, outfile, encoding, **kwargs):
    outstream = codecs.getwriter(kwargs.get('encoding', 'utf8'))(outfile)
    for row in instream:
        if isinstance(row, StreamFooter):
            break
        elif isinstance(row, StreamHeader):
            metainfo = row
        else:
            outstream.write(json.dumps(dict(zip(metainfo.fields, row))))
            outstream.write("\n")
    outstream.flush()


BabeBase.addPullPlugin('json', ['json'], pull)
BabeBase.addPushPlugin('json', ['json'], push)