Esempio n. 1
0
def outputData(diter, schema, connection, *args, **formatArgs):
    ### Parameter handling ###
    where = None
    if len(args) > 0:
        where = args[0]
    elif 'file' in formatArgs:
        where = formatArgs['file']
    else:
        raise functions.OperatorError(__name__.rsplit('.')[-1], "No destination provided")

    if 'file' in formatArgs:
        del formatArgs['file']

    if 'mode' not in formatArgs:
        formatArgs['mode'] = autotype(where, {'csv': 'csv', 'tsv': 'tsv', 'xls': 'tsv', 'db': 'db', 'json': 'json'})

    if 'header' not in formatArgs:
        header = False
    else:
        header = formatArgs['header']
        del formatArgs['header']

    if 'compression' not in formatArgs:
        formatArgs['compression'] = False
    if 'compressiontype' not in formatArgs:
        formatArgs['compressiontype'] = 'gz'

    orderby = None
    if 'orderby' in formatArgs:
        orderby = formatArgs['orderby']
        del formatArgs['orderby']

    if 'orderbydesc' in formatArgs:
        orderby = formatArgs['orderbydesc'] + ' desc'
        del formatArgs['orderbydesc']

    append = False
    if 'append' in formatArgs:
        append = formatArgs['append']
        del formatArgs['append']

    type2ext = {'csv': 'csv', 'tsv': 'xls', 'plain': 'txt', 'db': 'db', 'json': 'json'}

    where = autoext(where, formatArgs['mode'], type2ext)
    filename, ext = os.path.splitext(os.path.basename(where))
    fullpath = os.path.split(where)[0]
    if not os.path.exists(fullpath):
        os.makedirs(fullpath)

    if not (formatArgs['mode'] == 'db' or (formatArgs['mode'] == 'json' and 'split' in formatArgs)):
        fileIter = getoutput(where, append, formatArgs['compression'], formatArgs['compressiontype'])

    del formatArgs['compressiontype']
    del formatArgs['compression']
    try:

        if formatArgs['mode'] == 'json':
            del formatArgs['mode']
            import json
            je = json.JSONEncoder(separators=(',', ':'), ensure_ascii=True, check_circular=False).encode

            if 'split' in formatArgs:
                def cjs():
                    unikey = unicode(key)
                    t = open(os.path.join(fullpath, filename + '.' + unikey + ext), 'w')
                    print >> t, je({'schema': schema[1:]})
                    splitkeys[unikey] = t
                    jsfiles[key] = t
                    # Case for number as key
                    if unikey != key:
                        splitkeys[key] = splitkeys[unikey]
                    return splitkeys[key]

                jsfiles = {}
                splitkeys = defaultdict(cjs)

                gc.disable()
                for row in diter:
                    key = row[0]
                    print >> splitkeys[key], je(row[1:])
                gc.enable()

                # Create other parts
                maxparts = 1
                try:
                    maxparts = int(formatArgs['split'])
                except ValueError:
                    maxparts = 1

                if maxparts > 1:
                    for i in xrange(0, maxparts):
                        if i not in splitkeys:
                            key = i
                            tmp = splitkeys[key]

                for f in jsfiles.values():
                    if f is not None:
                        f.close()
            else:
                fileIter.write(je({'schema': schema}) + '\n')

                for row in diter:
                    print >> fileIter, je(row)

        elif formatArgs['mode'] == 'csv':
            del formatArgs['mode']
            csvprinter = writer(fileIter, 'excel', **formatArgs)
            if header:
                csvprinter.writerow([h[0] for h in schema])

            for row in diter:
                csvprinter.writerow(row)

        elif formatArgs['mode'] == 'tsv':
            del formatArgs['mode']
            csvprinter = writer(fileIter, 'excel-tab', **formatArgs)
            if header:
                csvprinter.writerow([h[0] for h in schema])

            for row in diter:
                csvprinter.writerow(
                    [x.replace('\t', '    ') if type(x) is str or type(x) is unicode else x for x in row])

        elif formatArgs['mode'] == 'gtable':
            vtoutpugtformat(fileIter, diter, simplejson=False)

        elif formatArgs['mode'] == 'gjson':
            vtoutpugtformat(fileIter, diter, simplejson=True)

        elif formatArgs['mode'] == 'html':
            raise functions.OperatorError(__name__.rsplit('.')[-1], "HTML format not available yet")

        elif formatArgs['mode'] == 'plain':
            for row in diter:
                fileIter.write(((''.join([unicode(x) for x in row])) + '\n').encode('utf-8'))

        elif formatArgs['mode'] == 'db':
            def createdb(where, tname, schema, page_size=16384):
                c = apsw.Connection(where)
                cursor = c.cursor()
                if not append:
                    cursor.execute('DROP TABLE IF EXISTS ' + tname)
                list(cursor.execute('pragma page_size=' + str(
                    page_size) + ';pragma cache_size=-1000;pragma legacy_file_format=false;pragma synchronous=0;pragma journal_mode=OFF;PRAGMA locking_mode = EXCLUSIVE'))
                if orderby:
                    tname = '_' + tname
                    create_schema = 'create temp table ' + tname + '('
                else:
                    create_schema = 'create table ' + tname + '('
                create_schema += '`' + unicode(schema[0][0]) + '`' + (
                    ' ' + unicode(schema[0][1]) if schema[0][1] != None else '')
                for colname, coltype in schema[1:]:
                    create_schema += ',`' + unicode(colname) + '`' + (' ' + unicode(coltype) if coltype != None else '')
                create_schema += '); begin exclusive;'
                list(cursor.execute(create_schema))
                insertquery = "insert into " + tname + ' values(' + ','.join(['?'] * len(schema)) + ')'
                return c, cursor, insertquery

            if 'pagesize' in formatArgs:
                page_size = int(formatArgs['pagesize'])
            else:
                page_size = list(connection.cursor().execute('pragma page_size'))[0][0]

            tablename = filename
            if 'tablename' in formatArgs:
                tablename = formatArgs['tablename']

            if 'split' in formatArgs:
                maxparts = 0
                try:
                    maxparts = int(formatArgs['split'])
                except ValueError:
                    maxparts = 0

                # If not split parts is defined
                if maxparts == 0:
                    ns = lambda x: x

                    def cdb():
                        unikey = unicode(key)
                        t = createdb(os.path.join(fullpath, filename + '.' + unikey + ext), tablename, schema[1:],
                                     page_size)
                        splitkeys[unikey] = t[1].execute
                        ns.insertqueryw = t[2]
                        dbcon[key] = t[0], t[1]
                        # Case for number as key
                        if unikey != key:
                            splitkeys[key] = splitkeys[unikey]
                        return splitkeys[key]

                    dbcon = {}
                    splitkeys = defaultdict(cdb)

                    gc.disable()
                    for row in diter:
                        key = row[0]
                        splitkeys[key](ns.insertqueryw, row[1:])
                    gc.enable()

                    for c, cursor in dbcon.values():
                        if c != None:
                            cursor.execute('commit')
                            c.close()
                else:
                    # Splitparts defined
                    cursors = []
                    dbcon = []
                    if "MSPW" in functions.apsw_version:
                        iters = []
                        senders = []
                        for i in xrange(0, maxparts):
                            t = createdb(os.path.join(fullpath, filename + '.' + str(i) + ext), tablename, schema[1:],
                                         page_size)
                            it = t[1].executesplit(t[2])
                            iters.append(it)
                            senders.append(it.send)
                            it.send(None)
                            dbcon.append((t[0], t[1]))
                        senders = tuple(senders)

                        for row in diter:
                            senders[hash(row[0]) % maxparts](row)

                        for it in iters:
                            it.close()
                    else:
                        for i in xrange(0, maxparts):
                            t = createdb(os.path.join(fullpath, filename + '.' + str(i) + ext), tablename, schema[1:],
                                         page_size)
                            cursors.append(t[1].execute)
                            dbcon.append((t[0], t[1]))
                            insertqueryw = t[2]
                        cursors = tuple(cursors)

                        for row in diter:
                            cursors[hash(row[0]) % maxparts](insertqueryw, row[1:])

                    for c, cursor in dbcon:
                        if c != None:
                            if orderby:
                                cursor.execute('pragma cache_size=-' + str(
                                    100000) + ';create table ' + tablename + ' as select * from _' + tablename + ' order by ' + orderby)
                            cursor.execute('commit')
                            c.close()
            else:
                # Write to db without split
                c, cursor, insertquery = createdb(where, tablename, schema, page_size)

                gc.disable()
                cursor.executemany(insertquery, diter)
                gc.enable()

                list(cursor.execute('commit'))
                c.close()
        else:
            raise functions.OperatorError(__name__.rsplit('.')[-1], "Unknown mode value")

    except StopIteration, e:
        pass
def buildrawprinter(separator):
    return writer(sys.stdout, dialect=mtermoutput(), delimiter=str(separator))
Esempio n. 3
0
def outputData(diter, schema, connection, *args, **formatArgs):
    ### Parameter handling ###
    where=None
    if len(args)>0:
        where=args[0]
    elif 'file' in formatArgs:
        where=formatArgs['file']
    else:
        raise functions.OperatorError(__name__.rsplit('.')[-1],"No destination provided")

    if 'file' in formatArgs:
        del formatArgs['file']

    if 'mode' not in formatArgs:
        formatArgs['mode']=autotype(where, {'csv':'csv', 'tsv':'tsv', 'xls':'tsv', 'db':'db', 'json':'json'})

    if 'header' not in formatArgs:
        header=False
    else:
        header=formatArgs['header']
        del formatArgs['header']

    if 'compression' not in formatArgs:
       formatArgs['compression']=False
    if 'compressiontype' not in formatArgs:
        formatArgs['compressiontype']='gz'

    orderby = None
    if 'orderby' in formatArgs:
        orderby = formatArgs['orderby']
        del formatArgs['orderby']

    if 'orderbydesc' in formatArgs:
        orderby = formatArgs['orderbydesc'] + ' desc'
        del formatArgs['orderbydesc']

    append=False
    if 'append' in formatArgs:
        append=formatArgs['append']
        del formatArgs['append']

    type2ext={'csv':'csv', 'tsv':'xls', 'plain':'txt', 'db':'db', 'json':'json'}

    where=autoext(where, formatArgs['mode'], type2ext)
    filename, ext=os.path.splitext(os.path.basename(where))
    fullpath=os.path.split(where)[0]

    if not (formatArgs['mode'] == 'db' or (formatArgs['mode']=='json' and 'split' in formatArgs)):
        fileIter=getoutput(where,append,formatArgs['compression'],formatArgs['compressiontype'])

    del formatArgs['compressiontype']
    del formatArgs['compression']
    try:
        
        if formatArgs['mode']=='json':
            del formatArgs['mode']
            import json
            je = json.JSONEncoder(separators = (',',':'), ensure_ascii = True, check_circular = False).encode

            if 'split' in formatArgs:
                def cjs():
                    unikey = unicode(key)
                    t=open(os.path.join(fullpath, filename+'.'+unikey+ext), 'w')
                    print >> t, je( {'schema':schema[1:]} )
                    splitkeys[unikey]=t
                    jsfiles[key]=t
                    # Case for number as key
                    if unikey != key:
                        splitkeys[key] = splitkeys[unikey]
                    return splitkeys[key]

                jsfiles = {}
                splitkeys=defaultdict(cjs)

                gc.disable()
                for row in diter:
                    key=row[0]
                    print >> splitkeys[key], je(row[1:])
                gc.enable()

                # Create other parts
                maxparts = 1
                try:
                    maxparts = int(formatArgs['split'])
                except ValueError:
                    maxparts = 1

                if maxparts > 1:
                    for i in xrange(0, maxparts):
                        if i not in splitkeys:
                            key = i
                            tmp = splitkeys[key]

                for f in jsfiles.values():
                    if f is not None:
                        f.close()
            else:
                fileIter.write(je({'schema':schema}) + '\n')

                for row in diter:
                    print >> fileIter, je(row)

        elif formatArgs['mode'] == 'csv':
            del formatArgs['mode']
            csvprinter = writer(fileIter, 'excel', **formatArgs)
            if header:
                csvprinter.writerow([h[0] for h in schema])
                
            for row in diter:
                csvprinter.writerow(row)

        elif formatArgs['mode'] == 'tsv':
            del formatArgs['mode']
            csvprinter = writer(fileIter, 'excel-tab', **formatArgs)
            if header:
                csvprinter.writerow([h[0] for h in schema])

            for row in diter:
                csvprinter.writerow([x.replace('\t', '    ') if type(x) is str or type(x) is unicode else x for x in row])

        elif formatArgs['mode']=='gtable':
            vtoutpugtformat(fileIter,diter,simplejson=False)

        elif formatArgs['mode']=='gjson':
            vtoutpugtformat(fileIter,diter,simplejson=True)

        elif formatArgs['mode']=='html':
            raise functions.OperatorError(__name__.rsplit('.')[-1],"HTML format not available yet")

        elif formatArgs['mode']=='plain':
            for row in diter:
                fileIter.write(((''.join([unicode(x) for x in row]))+'\n').encode('utf-8'))

        elif formatArgs['mode']=='db':
            def createdb(where, tname, schema, page_size=16384):
                c = apsw.Connection(where)
                cursor = c.cursor()
                list(cursor.execute('pragma page_size='+str(page_size)+';pragma cache_size=-1000;pragma legacy_file_format=false;pragma synchronous=0;pragma journal_mode=OFF;PRAGMA locking_mode = EXCLUSIVE'))
                if orderby:
                    tname = '_' + tname
                    create_schema='create temp table '+tname+'('
                else:
                    create_schema='create table '+tname+'('
                create_schema+='`'+unicode(schema[0][0])+'`'+ (' '+unicode(schema[0][1]) if schema[0][1]!=None else '')
                for colname, coltype in schema[1:]:
                    create_schema+=',`'+unicode(colname)+'`'+ (' '+unicode(coltype) if coltype!=None else '')
                create_schema+='); begin exclusive;'
                list(cursor.execute(create_schema))
                insertquery="insert into "+tname+' values('+','.join(['?']*len(schema))+')'
                return c, cursor, insertquery

            if 'pagesize' in formatArgs:
                page_size=int(formatArgs['pagesize'])
            else:
                page_size=list(connection.cursor().execute('pragma page_size'))[0][0]
                
            tablename=filename
            if 'tablename' in formatArgs:
                tablename=formatArgs['tablename']

            if 'split' in formatArgs:
                maxparts = 0
                try:
                    maxparts = int(formatArgs['split'])
                except ValueError:
                    maxparts = 0

                # If not split parts is defined
                if maxparts == 0:
                    ns = lambda x:x
                    def cdb():
                        unikey = unicode(key)
                        t=createdb(os.path.join(fullpath, filename+'.'+unikey+ext), tablename, schema[1:], page_size)
                        splitkeys[unikey]=t[1].execute
                        ns.insertqueryw = t[2]
                        dbcon[key]=t[0], t[1]
                        # Case for number as key
                        if unikey != key:
                            splitkeys[key] = splitkeys[unikey]
                        return splitkeys[key]

                    dbcon = {}
                    splitkeys=defaultdict(cdb)

                    gc.disable()
                    for row in diter:
                        key=row[0]
                        splitkeys[key](ns.insertqueryw, row[1:])
                    gc.enable()

                    for c, cursor in dbcon.values():
                        if c != None:
                            cursor.execute('commit')
                            c.close()
                else:
                # Splitparts defined
                    cursors = []
                    dbcon = []
                    if "MSPW" in functions.apsw_version:
                        iters = []
                        senders = []
                        for i in xrange(0, maxparts):
                            t = createdb(os.path.join(fullpath, filename+'.'+str(i)+ext), tablename, schema[1:], page_size)
                            it = t[1].executesplit(t[2])
                            iters.append(it)
                            senders.append(it.send)
                            it.send(None)
                            dbcon.append((t[0], t[1]))
                        senders = tuple(senders)

                        for row in diter:
                            senders[hash(row[0]) % maxparts](row)

                        for it in iters:
                            it.close()
                    else:
                        for i in xrange(0, maxparts):
                            t = createdb(os.path.join(fullpath, filename+'.'+str(i)+ext), tablename, schema[1:], page_size)
                            cursors.append(t[1].execute)
                            dbcon.append((t[0], t[1]))
                            insertqueryw = t[2]
                        cursors = tuple(cursors)

                        for row in diter:
                            cursors[hash(row[0]) % maxparts](insertqueryw, row[1:])

                    for c, cursor in dbcon:
                        if c != None:
                            if orderby:
                                cursor.execute('pragma cache_size=-'+str(100000)+';create table '+tablename+' as select * from _'+tablename+' order by '+orderby)
                            cursor.execute('commit')
                            c.close()
            else:
                # Write to db without split
                c, cursor, insertquery=createdb(where, tablename, schema, page_size)

                gc.disable()
                cursor.executemany(insertquery, diter)
                gc.enable()

                list(cursor.execute('commit'))
                c.close()
        else:
            raise functions.OperatorError(__name__.rsplit('.')[-1],"Unknown mode value")

    except StopIteration,e:
        pass
Esempio n. 4
0
def buildrawprinter(separator):
    return writer(sys.stdout, dialect=mtermoutput(), delimiter=str(separator))
            sargs.append(args[i])
    try:
        if params:
            largs, kargs = argsparse.parse(args,csvargs.boolargs,csvargs.nonstringargs,csvargs.needsescape)
        else:
            largs=[]
            kargs=dict()
    except Exception,e:
        raise functions.MadisError(e)
    if len(largs)>0:
        raise functions.OperatorError("strjoin","Unknown argument %s" %(''.join(largs)))
    if 'dialect' not in kargs:
        kargs['dialect']=csvargs.defaultcsv()
    f=StringIO.StringIO()
    try:
        csvprinter=writer(f,**kargs)
    except Exception,e:
        raise functions.MadisError(e)
    csvprinter.writerow(sargs)
    f.seek(0)
    s=f.read()
    return s


strjoin.registered=True


def dateformat(*args):

    """
    .. function:: dateformat(date[,inpformat,outformat])