def strsplitv(*args): ###splits the first arguments """ .. function:: strsplitv(str[,formatting options]) -> [C1] Splits in rows *str* according to *formatting options*, default behavior is to split on space. It is a multiset operator that returns one column but many rows. :ref:`Formatting options<formattingopts>` are like in :func:`strsplit` function. .. seealso:: * :ref:`tutmultiset` functions >>> sql("select strsplitv('First,Second,Third','dialect:csv')") C1 ------ First Second Third >>> sql("select strsplitv('First Second Third')") C1 ------ First Second Third """ if len(args)<1: raise functions.OperatorError("strsplitv","strsplit operator: no input") arg=args[0] args=args[1:] try: largs, kargs = argsparse.parse(args,csvargs.boolargs,csvargs.nonstringargs,csvargs.needsescape) except Exception,e: raise functions.MadisError(e)
def __init__(self, tblist, boolargs, nonstringargs, needsescape, *args, **envars): # envars tablename, auto , OPTIONAL [] self.delayedexception = None self.tblist = tblist self.auto = False self.first = True self.schema = "create table %s('Error')" % (envars['tablename']) self.tablename = envars['tablename'] self.description = None self.consdict = {} self.coldata = [] self.rowids = [] self.kdindex = None self.lastcalculatedidx = None self.ordered = False self.envarsdb = envars['db'] self.innerjoin = True self.query = None self.keepcursor = True largs, kargs = [], dict() try: largs, kargs = argsparse.parse(args, boolargs, nonstringargs, needsescape) except Exception, e: raise functions.MadisError(e)
def maincode(args, boolargs, nonstringargs, needsescape, notsplit, db, func, retalways, connectionhandler): autostring = 'automatic_vtable' try: largs, kargs = argsparse.parse(args, boolargs, nonstringargs, needsescape, notsplit) except Exception, e: raise functions.MadisError(e)
def run(self): c = self.connection.cursor() try: cexec = c.execute(self.query, parse=False) try: schema = c.getdescriptionsafe() except functions.ExecutionCompleteError: raise functions.DynamicSchemaWithEmptyResultError( "got empty input") if self.passconnection: try: self.func(cexec, schema, self.connection, *self.args, **self.kargs) except apsw.AbortError: cexec = c.execute(self.query, parse=False) self.func(cexec, schema, self.connection, *self.args, **self.kargs) else: try: self.func(cexec, schema, *self.args, **self.kargs) except apsw.AbortError: cexec = c.execute(self.query, parse=False) self.func(cexec, schema, *self.args, **self.kargs) ret = True except Exception as e: if functions.settings['logging']: lg = logging.LoggerAdapter( logging.getLogger(__name__), {"flowname": functions.variables.flowname}) lg.exception(e) if self.returnalways: return False else: if functions.settings['tracing']: import traceback print("---Deep Execution traceback--") print(traceback.print_exc()) raise functions.MadisError(e) finally: try: c.close() except: pass return ret
def strjoin(*args): """ .. function:: strjoin(value1,value2,....[,'params',formatting options]) Returns a string with *value1,value2,....* formatted according to :ref:`formatting options<formattingopts>`. Literal parameter 'params' must precede *formating options*. Default formatting is comma separated values. Examples: >>> sql("select strjoin('First','Second','Third',100) as joinedvals") joinedvals ----------------------- First,Second,Third,100 >>> sql("select strjoin('First','Second','Third',100,'params','delimiter: ') as joinedvals") joinedvals ----------------------- First Second Third 100 >>> sql("select strjoin('lola',2,10,'llalallall','params','delimiter:%','quoting:QUOTE_ALL') as joinedvals") joinedvals ----------------------------- "lola"%"2"%"10"%"llalallall" >>> sql("select strjoin('value, with delimiter ',2,10,'new','params','dialect:csv') as joinedvals") joinedvals ---------------------------------- "value, with delimiter ",2,10,new """ if None in args: return None sargs=[] params=False for i in xrange(len(args)): if args[i]=='params': args=args[(i+1):] params=True break else: sargs.append(args[i]) try: if params: largs, kargs = argsparse.parse(args,csvargs.boolargs,csvargs.nonstringargs,csvargs.needsescape) else: largs=[] kargs=dict() except Exception,e: raise functions.MadisError(e)
def maincode(args, boolargs, nonstringargs, needsescape, notsplit, db, func, retalways, connectionhandler): autostring = 'automatic_vtable' try: largs, kargs = argsparse.parse(args, boolargs, nonstringargs, needsescape, notsplit) except Exception as e: raise functions.MadisError(e) if 'query' not in kargs: raise functions.OperatorError( func.__globals__['__name__'].rsplit('.')[-1], "needs query argument ") query = kargs['query'] del kargs['query'] if autostring in kargs: del kargs[autostring] return doall(query, db, func, retalways, connectionhandler, *largs, **kargs)
def rectransform(self, s, s_orig=None): if not (re.search( '(?i)(select|' + '|'.join([x for x in self.vtables]) + '|' + '|'.join(self.multiset_functions) + '|' + '|'.join(self.row_functions) + ')', str(s), re.UNICODE)): return str(s), [], self.direct_exec out_vtables = [] if s_orig is None: s_orig = s query = None # Expand functions with spaces between them and their parenthesis for t in s_orig.tokens: tfm = re.match('(\w+)\s\(', str(t), re.UNICODE) if isinstance(t, sqlparse.sql.Function) and tfm and ( tfm.groups()[0] in self.vtables or tfm.groups()[0] in self.row_functions): tidx = s_orig.token_index(t) s_orig.tokens[tidx:tidx + 1] = t.tokens fs = [x for x in expand_tokens(s)] # Process external_query VTs tmatch = re.match(r'\s*(\w+)\s+(.*|$)', str(s), re.DOTALL | re.UNICODE) if tmatch is not None and tmatch.groups()[0].lower() in self.vtables: op_for_inv = tmatch.groups()[0].lower() if hasattr(self.vtables[op_for_inv], 'external_query'): rest = tmatch.groups()[1] op_for_inv = str(op_for_inv) params, preposition, subq = break_inversion_subquery.match( rest).groups() if subq != '': paramslist = [format_query(subq)] else: paramslist = [] paramslist += [ format_param(''.join(x)) for x in re.findall( r"'([^']*?)'|(\w+:[^\s]+)", params, re.UNICODE) ] inv_s = ','.join(paramslist) vname = vt_name(op_for_inv) self.direct_exec += [(op_for_inv, paramslist, subq)] s_orig.tokens[s_orig.token_index(s.tokens[0]):s_orig. token_index(s.tokens[-1]) + 1] = [ sqlparse.sql.Token( Token.Keyword, 'select * from ' + vname + ' ') ] return str(s), vt_distinct([(vname, op_for_inv, inv_s) ]), self.direct_exec # Process internal parenthesis for t in fs: if type(t) is sqlparse.sql.Parenthesis: subq = find_parenthesis.match(str(t)) if subq != None: subq = subq.groups()[0] t.tokens = sqlparse.parse(subq)[0].tokens out_vtables += self.rectransform(t)[1] t.tokens[0:0] = [ sqlparse.sql.Token(Token.Punctuation, '(') ] t.tokens.append(sqlparse.sql.Token(Token.Punctuation, ')')) # Process Inversions #Process direct row inversion t = re.match(r'\s*(\w+)(\s+.*|$)', str(s), re.DOTALL | re.UNICODE) if t != None and t.groups()[0].lower() in self.row_functions: op_for_inv = t.groups()[0] rest = t.groups()[1] params, preposition, subq = break_inversion_subquery.match( rest).groups() paramslist = [ format_param(''.join(x)) for x in re.findall( r"'([^']*?)'|(\w+:[^\s]+)", params, re.UNICODE) ] if subq != '': if len(preposition) > 0: subq, v, dv = self.rectransform(sqlparse.parse(subq)[0]) out_vtables += v paramslist += ['(' + subq + ')'] else: paramslist += [format_param(subq)] inv_s = 'SELECT ' + op_for_inv + '(' + ','.join(paramslist) + ')' subs = sqlparse.parse(inv_s)[0] s_orig.tokens[s_orig.token_index(s.tokens[0]):s_orig. token_index(s.tokens[-1]) + 1] = subs.tokens s = subs fs = [x for x in expand_tokens(s)] # Process vtable inversion for t in fs: if t.ttype == Token.Keyword.DML: break strt = str(t).lower() if strt in self.vtables: #print "FOUND INVERSION:", strt, fs tindex = fs.index(t) # Break if '.' exists before vtable if tindex > 0 and str(fs[tindex - 1]) == '.': break op_for_inv = strt try: rest = ''.join([str(x) for x in fs[tindex + 1:]]) except KeyboardInterrupt: raise except: rest = '' params, preposition, subq = break_inversion_subquery.match( rest).groups() orig_subq = subq if subq != '': subq, v, dv = self.rectransform(sqlparse.parse(subq)[0]) out_vtables += v if not hasattr(self.vtables[strt], 'external_stream'): if subq != '': paramslist = [format_query(subq)] else: paramslist = [] paramslist += [ format_param(''.join(x)) for x in re.findall( r"'([^']*?)'|(\w+:[^\s]+)", params, re.UNICODE) ] inv_s = ''.join([ str(x) for x in fs[:fs.index(t)] ]) + 'SELECT * FROM ' + op_for_inv + '(' + ','.join( paramslist) + ')' else: paramslist = [ format_param(''.join(x)) for x in re.findall( r"'([^']*?)'|(\w+:[^\s]+)", params, re.UNICODE) ] inv_s = ''.join([ str(x) for x in fs[:fs.index(t)] ]) + 'SELECT * FROM ' + op_for_inv + '(' + ','.join( paramslist) + ') ' + subq subs = sqlparse.parse(inv_s)[0] self.direct_exec += [(op_for_inv, paramslist, orig_subq)] s_orig.tokens[s_orig.token_index(s.tokens[0]):s_orig. token_index(s.tokens[-1]) + 1] = subs.tokens s = subs break # find first select s_start = s.token_next_match(0, Token.Keyword.DML, r'(?i)select', True) if s_start is not None: # find keyword that ends substatement s_end = s.token_next_match( s.token_index(s_start), Token.Keyword, (r'(?i)union', r'(?i)order', r'(?i)limit', r'(?i)intersect', r'(?i)except', r'(?i)having'), True) if len(s.tokens) < 3: return str(s), vt_distinct(out_vtables), self.direct_exec if s_end is None: if s.tokens[-1].value == ')': s_end = s.tokens[-2] else: s_end = s.tokens[-1] else: if s.token_index(s_end) + 1 >= len(s.tokens): raise functions.MadisError( "'" + str(s_end).upper() + "' should be followed by something") out_vtables += self.rectransform( sqlparse.sql.Statement( s.tokens_between(s.tokens[s.token_index(s_end) + 1], s.tokens[-1])), s)[1] s_end = s.tokens[s.token_index(s_end) - 1] query = sqlparse.sql.Statement(s.tokens_between(s_start, s_end)) else: return str(s), vt_distinct(out_vtables), self.direct_exec # find from and select_parameters range from_range = None from_start = query.token_next_match(0, Token.Keyword, r'(?i)from', True) # process virtual tables in from range if from_start is not None: from_end = query.token_next_by_instance( query.token_index(from_start), sqlparse.sql.Where) if from_start == query.tokens[-1]: raise functions.MadisError("Error in FROM range of: '" + str(query) + "'") if from_end is None: from_end = query.tokens[-1] from_range = sqlparse.sql.Statement( query.tokens_between( query.tokens[query.token_index(from_start) + 1], from_end)) else: from_range = sqlparse.sql.Statement( query.tokens_between( query.tokens[query.token_index(from_start) + 1], from_end, exclude_end=True)) for t in [ x for x in expand_type(from_range, ( sqlparse.sql.Identifier, sqlparse.sql.IdentifierList)) ]: if str(t).lower() in ('group', 'order'): break if type(t) is sqlparse.sql.Function: vname = vt_name(str(t)) fname = t.tokens[0].get_real_name().lower() if fname in self.vtables: out_vtables += [(vname, fname, str(t.tokens[1])[1:-1])] t.tokens = [sqlparse.sql.Token(Token.Keyword, vname)] else: raise functions.MadisError("Virtual table '" + fname + "' does not exist") if from_start is not None: select_range = sqlparse.sql.Statement( query.tokens_between(query.tokens[1], from_start, exclude_end=True)) else: select_range = sqlparse.sql.Statement( query.tokens_between(query.tokens[1], query.tokens[-1])) # Process EXPAND functions for t in flatten_with_type(select_range, sqlparse.sql.Function): if hasattr(t.tokens[0], 'get_real_name'): fname = t.tokens[0].get_real_name() else: fname = str(t.tokens[0]) fname = fname.lower().strip() if fname in self.multiset_functions: t = s_orig.group_tokens(sqlparse.sql.Parenthesis, s_orig.tokens_between(s_start, s_end)) vname = vt_name(str(t)) out_vtables += [(vname, 'expand', format_query(t))] s_orig.tokens[s_orig.token_index(t)] = sqlparse.sql.Token( Token.Keyword, 'select * from ' + vname + ' ') break return str(s), vt_distinct(out_vtables), self.direct_exec
def strsplit(*args): ###splits the first arguments """ .. function:: strsplit(str[,formatting options]) -> [C1,C2,....] Splits *str* according to *formatting options*, default behavior is to split on space. It is a multiset operator that returns one row. .. seealso:: * :ref:`tutmultiset` functions .. _formattingopts: Formatting options: :dialect: *tsv/csv* Formats field as tab/comma separated values with minimal quoting :delimiter: A string used to separate fields. It defaults to ' ' :doublequote: *t/f* Controls how instances of quotechar appearing inside a field should be themselves be quoted. When True, the character is doubled. When False, the escapechar is used as a prefix to the quotechar. It defaults to True. On output, if doublequote is False and no escapechar is set, Error is raised if a quotechar is found in a field :escapechar: A one-character string used by the writer to escape the delimiter if quoting is set to QUOTE_NONE and the quotechar if doublequote is False. On reading, the escapechar removes any special meaning from the following character. It defaults to None, which disables escaping. :quotechar: A one-character string used to quote fields containing special characters, such as the delimiter or quotechar, or which contain new-line characters. It defaults to '"'. :quoting: Controls when quotes should be generated by the writer and recognized by the reader. It can take on any of the QUOTE_* constants and defaults to QUOTE_MINIMAL. Possible values are QUOTE_ALL, QUOTE_NONE, QUOTE_MINIMAL, QUOTE_NONNUMERIC :skipinitialspace: *t/f* When True, whitespace immediately following the delimiter is ignored. The default is False :Returned multiset schema: - *C1,C2...* Text values produced from splitting. Examples: >>> sql("select strsplit('First,Second,Third', 'dialect:csv')") C1 | C2 | C3 ---------------------- First | Second | Third >>> sql("select strsplit('-First-%Second%-Third-','delimiter:%','quotechar:-')") C1 | C2 | C3 ---------------------- First | Second | Third .. doctest:: :hide: >>> sql("select strsplit('-First-%Second%-Third-','quotechar:-p')") #doctest:+ELLIPSIS +NORMALIZE_WHITESPACE Traceback (most recent call last): ... MadisError: Madis SQLError: "quotechar" must be an 1-character string """ if len(args)<1: raise functions.OperatorError("strsplit"," no input") arg=args[0] args=args[1:] try: largs, kargs = argsparse.parse(args,csvargs.boolargs,csvargs.nonstringargs,csvargs.needsescape) except Exception,e: raise functions.MadisError(e)
raise functions.MadisError(e) if 'dialect' not in kargs: kargs['dialect']=csvargs.defaultcsv() if 'delimiter' not in kargs: kargs['delimiter']=' ' if len(largs)>0: raise functions.OperatorError("strsplit","strsplit operator: Unknown argument %s" %(''.join(largs))) f=StringIO.StringIO() f.write(arg.encode('utf-8')) f.seek(0) try: r=reader(f,**kargs) except Exception,e: raise functions.MadisError(e) first=True for row in r: if first: first=False yield tuple("C"+str(i) for i in xrange(1, len(row) + 1)) yield row if first: first=False yield ["C1"] f.close() strsplit.registered=True