def jngrams(*args): """ .. function:: jngrams(n,text) -> jpack Converts multiple input arguments into a jpack of ngrams. Examples: >>> sql("select jngrams(1,'This is a test phrase')") jngrams(1,'This is a test phrase') ------------------------------------------- [["This"],["is"],["a"],["test"],["phrase"]] >>> sql("select jngrams(2,'This is a test phrase')") jngrams(2,'This is a test phrase') --------------------------------------------------------- [["This","is"],["is","a"],["a","test"],["test","phrase"]] """ if type(args[0]) == int: n = args[0] text = args[1] else: n = 1 text = args[0] g = text.split(' ') listofngrams = [] for i in xrange(len(g) - n + 1): listofngrams.append(g[i:i + n]) return jopts.toj(listofngrams)
def jfilterempty(*args): """ .. function:: jfilterempty(jpacks.) -> jpack Removes from input jpacks all empty elements. Examples: >>> sql("select jfilterempty('a', '', '[]')") jfilterempty('a', '', '[]') --------------------------- a >>> sql("select jfilterempty('a','[null]',3)") jfilterempty('a','[null]',3) ---------------------------- ["a",3] >>> sql("select jfilterempty('[3]', jpack('b', ''))") jfilterempty('[3]', jpack('b', '')) ----------------------------------- [3,"b"] """ return jopts.toj([x for x in jopts.fromj(*args) if x!='' and x!=[] and x!=None])
def jngrams(*args): """ .. function:: jngrams(n,text) -> jpack Converts multiple input arguments into a jpack of ngrams. Examples: >>> sql("select jngrams(1,'This is a test phrase')") jngrams(1,'This is a test phrase') ------------------------------------------- [["This"],["is"],["a"],["test"],["phrase"]] >>> sql("select jngrams(2,'This is a test phrase')") jngrams(2,'This is a test phrase') --------------------------------------------------------- [["This","is"],["is","a"],["a","test"],["test","phrase"]] """ if type(args[0]) == int: n = args[0] text = args[1] else: n = 1 text = args[0] g = text.split(' ') listofngrams = [] for i in xrange(len(g)-n+1): listofngrams.append(g[i:i+n]) return jopts.toj(listofngrams)
def jfilterempty(*args): """ .. function:: jfilterempty(jpacks.) -> jpack Removes from input jpacks all empty elements. Examples: >>> sql("select jfilterempty('a', '', '[]')") jfilterempty('a', '', '[]') --------------------------- a >>> sql("select jfilterempty('a','[null]',3)") jfilterempty('a','[null]',3) ---------------------------- ["a",3] >>> sql("select jfilterempty('[3]', jpack('b', ''))") jfilterempty('[3]', jpack('b', '')) ----------------------------------- [3,"b"] """ return jopts.toj( [x for x in jopts.fromj(*args) if x != '' and x != [] and x != None])
def jexcept(*args): """ .. function:: jexcept(jpackA, jpackB) -> jpack Returns the items of jpackA except the items that appear on jpackB. Examples: >>> sql("select jexcept('[1,2,3]', '[1,2,3]')") # doctest: +NORMALIZE_WHITESPACE jexcept('[1,2,3]', '[1,2,3]') ----------------------------- [] >>> sql("select jexcept('[1,2,3]', '[1,3]')") # doctest: +NORMALIZE_WHITESPACE jexcept('[1,2,3]', '[1,3]') --------------------------- 2 """ if len(args) < 2: raise functions.OperatorError("jexcept","operator needs at least two inputs") b = set(jopts.fromj(args[1])) return jopts.toj([x for x in jopts.fromj(args[0]) if x not in b])
def jsplitv(*args): """ .. function:: jsplitv(jpacks) -> [C1] Splits vertically a jpack. Examples: >>> sql("select jsplitv(jmerge('[1,2,3]', '[1,2,3]', 'b', 'a', 3 ))") # doctest: +NORMALIZE_WHITESPACE C1 -- 1 2 3 1 2 3 b a 3 """ yield ('C1', ) for j1 in jopts.fromj(*args): yield [jopts.toj(j1)]
def jmergeregexpnamed(*args): """ .. function:: jmergeregexpnamed(jpacks) -> jpack Creates a regular expression that matches all of the jpack's contents with named groups. If the number of named groups in a regular expression is greater than 99, then the output will be a jpack of regular expressions. Examples: >>> sql(''' select jmergeregexpnamed('["abc", "def"]') ''') # doctest: +NORMALIZE_WHITESPACE jmergeregexpnamed('["abc", "def"]') ----------------------------------- (abc)|(def) """ inp = jopts.fromj(*args) inp.sort() out = [] for g in xrange(0, len(inp), 99): out.append('|'.join('('+x+')' for x in inp[g:g+99])) return jopts.toj(out)
def jpack(*args): """ .. function:: jpack(args...) -> jpack Converts multiple input arguments into a single string. Jpacks preserve the types of their inputs and are based on JSON encoding. Single values are represented as themselves where possible. Examples: >>> sql("select jpack('a')") jpack('a') ---------- a >>> sql("select jpack('a','b',3)") jpack('a','b',3) ---------------- ["a","b",3] >>> sql("select jpack('a', jpack('b',3))") jpack('a', jpack('b',3)) ------------------------ ["a",["b",3]] """ return jopts.toj(jopts.elemfromj(*args))
def final(self): yield tuple('c'+str(i) for i in xrange(1,self.numofargs)) for groupkey, sumcols in self.groupsdict.iteritems(): cols = list(groupkey) for col in sumcols: try: cols.append(sum(col)) except TypeError: cols.append(jopts.toj(sorted(set( jopts.fromj(*col) )))) yield cols
def jdictgroupkey(*args): """ .. function:: jdictgroupkey(list_of_jdicts, groupkey1, groupkey2, ...) It groups an array of jdicts into a hierarchical structure. The grouping is done first by groupkey1 then by groupkey2 and so on. If no groupkeys are provided, then the first key of array's first jdict is used as a groupkey. Examples: >>> sql('''select jdictgroupkey('[{"gkey":"v1", "b":1},{"gkey":"v1","b":2},{"gkey":"v2","b":1, "c":2}]') as j''') j --------------------------------------------- {"v1":[{"b":1},{"b":2}],"v2":[{"b":1,"c":2}]} >>> sql('''select jdictgroupkey('[{"gkey":"v1", "b":1},{"gkey":"v1","b":2},{"gkey":"v2","b":1, "c":2}]', "gkey") as j''') j --------------------------------------------- {"v1":[{"b":1},{"b":2}],"v2":[{"b":1,"c":2}]} >>> sql('''select jdictgroupkey('[{"gkey":"v1", "gkey2":"f1", "b":1},{"gkey":"v1", "gkey2":"f2", "b":2},{"gkey":"v1", "gkey2":"f2", "b":1, "c":2}]', "gkey", "gkey2") as j''') j -------------------------------------------------------------- {"v1":{"gkey2":{"f1":[{"b":1}],"f2":[{"b":2},{"b":1,"c":2}]}}} """ def recgroupkey(jdict, gkeys): outdict=OrderedDict() for d in jdict: if d[gkeys[0]] not in outdict: outdict[d[gkeys[0]]] = [d] else: outdict[d[gkeys[0]]].append(d) del(d[gkeys[0]]) if len(gkeys)>1: outdict = OrderedDict([(x, recgroupkey(y, gkeys[1:])) for x,y in outdict.iteritems()]) return {gkeys[0]:outdict} outdict=OrderedDict() dlist=json.loads(args[0], object_pairs_hook=OrderedDict) if len(args) == 1: groupkeys = [iter(dlist[0]).next()] else: groupkeys = args[1:] outdict = recgroupkey(dlist, groupkeys) return jopts.toj(outdict[groupkeys[0]])
def nl2j(*args): """ .. function:: nl2j(text) -> jpack Converts a text with newlines to a jpack. """ fj = [] for t in args: fj += [x for x in t.split('\n')] return jopts.toj(fj)
def jdictvals(*args): """ .. function:: jdictvals(jdict, [key1, key2,..]) -> jpack If only the first argument (jdict) is provided, it returns a jpack of the values of input jdict (sorted by the jdict keys). If key values are also provided, it returns only the keys that have been provided. Examples: >>> sql(''' select jdictvals('{"k1":1,"k2":2}') ''') # doctest: +NORMALIZE_WHITESPACE jdictvals('{"k1":1,"k2":2}') ---------------------------- [1,2] >>> sql(''' select jdictvals('{"k1":1,"k2":2, "k3":3}', 'k3', 'k1', 'k4') ''') # doctest: +NORMALIZE_WHITESPACE jdictvals('{"k1":1,"k2":2, "k3":3}', 'k3', 'k1', 'k4') ------------------------------------------------------ [3,1,null] >>> sql(''' select jdictvals('{"k1":1}') ''') # doctest: +NORMALIZE_WHITESPACE jdictvals('{"k1":1}') --------------------- 1 >>> sql(''' select jdictvals('{"k1":1}') ''') # doctest: +NORMALIZE_WHITESPACE jdictvals('{"k1":1}') --------------------- 1 >>> sql(''' select jdictvals(1) ''') # doctest: +NORMALIZE_WHITESPACE jdictvals(1) ------------ 1 """ if type(args[0]) in (int,float) or args[0][0]!='{' or args[0][-1]!='}': return args[0] d=json.loads(args[0]) if len(args)==1: d=d.items() d.sort(key=operator.itemgetter(1,0)) vals=[x[1] for x in d] else: vals=[] for i in args[1:]: try: vals.append(d[i]) except KeyboardInterrupt: raise except: vals.append(None) return jopts.toj( vals )
def jdictsplit(*args): """ .. function:: jdictvals(jdict, [key1, key2,..]) -> columns If only the first argument (jdict) is provided, it returns a row containing the values of input jdict (sorted by the jdict keys). If key values are also provided, it returns only the columns of which the keys have been provided. Examples: >>> sql(''' select jdictsplit('{"k1":1,"k2":2}') ''') # doctest: +NORMALIZE_WHITESPACE k1 | k2 ------- 1 | 2 >>> sql(''' select jdictsplit('{"k1":1,"k2":2, "k3":3}', 'k3', 'k1', 'k4') ''') # doctest: +NORMALIZE_WHITESPACE k3 | k1 | k4 -------------- 3 | 1 | None """ d=json.loads(args[0]) if len(args)==1: d=d.items() d.sort(key=operator.itemgetter(1,0)) yield tuple([x[0] for x in d]) yield [jopts.toj(x[1]) for x in d] else: vals=[] yield tuple(args[1:]) for i in args[1:]: try: vals.append(jopts.toj(d[i])) except KeyboardInterrupt: raise except: vals.append(None) yield vals
def jdictsplit(*args): """ .. function:: jdictsplit(jdict, [key1, key2,..]) -> columns If only the first argument (jdict) is provided, it returns a row containing the values of input jdict (sorted by the jdict keys). If key values are also provided, it returns only the columns of which the keys have been provided. Examples: >>> sql(''' select jdictsplit('{"k1":1,"k2":2}') ''') # doctest: +NORMALIZE_WHITESPACE k1 | k2 ------- 1 | 2 >>> sql(''' select jdictsplit('{"k1":1,"k2":2, "k3":3}', 'k3', 'k1', 'k4') ''') # doctest: +NORMALIZE_WHITESPACE k3 | k1 | k4 -------------- 3 | 1 | None """ d=json.loads(args[0]) if len(args)==1: d=sorted(d.items()) yield tuple([x[0] for x in d]) yield [jopts.toj(x[1]) for x in d] else: vals=[] yield tuple(args[1:]) for i in args[1:]: try: vals.append(jopts.toj(d[i])) except KeyboardInterrupt: raise except: vals.append(None) yield vals
def regexpr(*args): """ .. function:: regexp(pattern,expression[,replacestr]) This function returns a match to the first parenthesis of *pattern* or replaces the matches of *pattern* in *expression* with *replacestr*. `Pattern Syntax <http://docs.python.org/library/re.html#re-syntax>`_ is according to python's re module. Examples use `inversion`. Examples: >>> table1(''' ... 25 ... ''') >>> sql("regexpr 'start\s(\w+)\send' 'start otherword end' ") regexpr('start\s(\w+)\send','start otherword end') -------------------------------------------------- otherword >>> sql("regexpr '\W+' '@#$%@$#% tobereplaced @#$%@#$%' 'nonword' ") regexpr('\W+','@#$%@$#% tobereplaced @#$%@#$%','nonword') --------------------------------------------------------- nonwordtobereplacednonword >>> sql("select regexpr('(\w+).*?(\w+)', 'one two three')") regexpr('(\w+).*?(\w+)', 'one two three') ----------------------------------------- ["one","two"] """ if len(args) < 2: return if len(args) == 2: a = re.search(args[0], unicode(args[1]), re.UNICODE) if a != None: if len(a.groups()) > 0: return jopts.toj(a.groups()) else: return True else: return None if len(args) == 3: try: return re.sub(args[0], args[2], args[1], flags=re.UNICODE) except TypeError: return re.sub(args[0], args[2], args[1])
def jdictsplitv(*args): """ .. function:: jdictsplitv(jdict, [key1, key2,..]) -> columns If only the first argument (jdict) is provided, it returns rows containing the values of input jdict. If key values are also provided, it returns only the columns of which the keys have been provided. Examples: >>> sql(''' select jdictsplitv('{"k1":1,"k2":2}') ''') # doctest: +NORMALIZE_WHITESPACE key | val --------- k1 | 1 k2 | 2 >>> sql(''' select jdictsplitv('{"k1":1,"k2":2, "k3":3}', 'k3', 'k1', 'k4') ''') # doctest: +NORMALIZE_WHITESPACE key | val --------- k3 | 3 k1 | 1 """ yield ('key', 'val') if len(args) == 1: dlist = json.loads(args[0], object_pairs_hook=OrderedDict) for k, v in dlist.iteritems(): yield [k, jopts.toj(v)] else: dlist = json.loads(args[0]) for k in args[1:]: try: yield k, jopts.toj(dlist[k]) except KeyError: pass
def jpermutations(*args): """ .. function:: jpermutations(jpack, r) -> multiset Returns all length r permutations of jpack. Examples: >>> sql('''select jpermutations('["t1","t2","t3"]',2)''') C1 | C2 ------- t1 | t2 t1 | t3 t2 | t1 t2 | t3 t3 | t1 t3 | t2 >>> sql('''select jpermutations('["t1","t2",["t3","t4"]]',2)''') C1 | C2 ------------------------- t1 | t2 t1 | ["t3","t4"] t2 | t1 t2 | ["t3","t4"] ["t3","t4"] | t1 ["t3","t4"] | t2 >>> sql('''select jpermutations(null,2)''') C1 | C2 ----- >>> sql('''select jpermutations('["t1","t2","t3","t4"]')''') C1 -- t1 t2 t3 t4 """ r=1 if len(args)==2: r=args[1] yield tuple(('C'+str(x) for x in xrange(1,r+1))) for p in itertools.permutations(jopts.fromj(args[0]), r): yield [jopts.toj(x) for x in p]
def jset(*args): """ .. function:: jset(jpacks) -> jpack Returns a set representation of a jpack, unifying duplicate items. Examples: >>> sql("select jset('[1,2,3]', '[1,2,3]', 'b', 'a', 3 )") # doctest: +NORMALIZE_WHITESPACE jset('[1,2,3]', '[1,2,3]', 'b', 'a', 3 ) ---------------------------------------- [1,2,3,"a","b"] """ return jopts.toj(sorted(set(jopts.fromj(*args))))
def jsort(*args): """ .. function:: jsort(jpacks) -> jpack Sorts the input jpacks. Examples: >>> sql("select jsort('[1,2,3]', '[1,2,3]', 'b', 'a', 3 )") # doctest: +NORMALIZE_WHITESPACE jsort('[1,2,3]', '[1,2,3]', 'b', 'a', 3 ) ----------------------------------------- [1,1,2,2,3,3,3,"a","b"] """ return jopts.toj(sorted(jopts.fromj(*args)))
def jmerge(*args): """ .. function:: jmerge(jpacks) -> jpack Merges multiple jpacks into one jpack. Examples: >>> sql("select jmerge('[1,2,3]', '[1,2,3]', 'a', 3 )") # doctest: +NORMALIZE_WHITESPACE jmerge('[1,2,3]', '[1,2,3]', 'a', 3 ) ------------------------------------- [1,2,3,1,2,3,"a",3] """ return jopts.toj(jopts.fromj(*args))
def jpermutations(*args): """ .. function:: jpermutations(jpack, r) -> multiset Returns all length r permutations of jpack. Examples: >>> sql('''select jpermutations('["t1","t2","t3"]',2)''') C1 | C2 ------- t1 | t2 t1 | t3 t2 | t1 t2 | t3 t3 | t1 t3 | t2 >>> sql('''select jpermutations('["t1","t2",["t3","t4"]]',2)''') C1 | C2 ------------------------- t1 | t2 t1 | ["t3","t4"] t2 | t1 t2 | ["t3","t4"] ["t3","t4"] | t1 ["t3","t4"] | t2 >>> sql('''select jpermutations(null,2)''') >>> sql('''select jpermutations('["t1","t2","t3","t4"]')''') C1 -- t1 t2 t3 t4 """ r=1 if len(args)==2: r=args[1] yield tuple(('C'+str(x) for x in xrange(1,r+1))) for p in itertools.permutations(jopts.fromj(args[0]), r): yield [jopts.toj(x) for x in p]
def jrange(num): """ .. function:: jrange(num) -> jrange Returns a jrange of integer numbers. Examples: >>> sql("select jrange(5)") jrange('a') ----------------- ["0","1","2","3"] """ jran = [None] * num for i in xrange(num): jran[i] = str(i) return jopts.toj(jran)
def jfrequentwords(*args): """ .. function:: jfrequentwords(args...) -> jpack Returns the frequent words of a text in a jpack """ wordslist = args[0].split(' ') setwords = set(wordslist) c = dict.fromkeys(setwords, 0) for w in wordslist: c[w] += 1 lenwords = len(setwords) extremevals = int(math.ceil(lenwords * 3 * 1.0 / 100)) frequences = sorted(c.values())[extremevals:(lenwords - extremevals)] avgfrequency = math.ceil(sum(frequences) * 1.0 / len(frequences)) return jopts.toj([k for k, v in c.iteritems() if v >= avgfrequency])
def s2j(*args): """ .. function:: s2j(tabpack) -> jpack Converts a space separated pack to a jpack. Examples: >>> sql("select s2j('1 2 3 ')") # doctest: +NORMALIZE_WHITESPACE s2j('1 2 3 ') -------------- ["1","2","3"] """ fj = [] for t in args: fj += [x for x in t.split(' ') if x != ''] return jopts.toj(fj)
def jdict(*args): """ .. function:: jdict(key, value, key1, value1) -> jdict Returns a jdict of the keys and value pairs. Examples: >>> sql(''' select jdict('key1', 'val1', 'key2', 'val2') ''') # doctest: +NORMALIZE_WHITESPACE jdict('key1', 'val1', 'key2', 'val2') ------------------------------------- {"key1":"val1","key2":"val2"} >>> sql(''' select jdict('key', '{"k1":1,"k2":2}') ''') # doctest: +NORMALIZE_WHITESPACE jdict('key', '{"k1":1,"k2":2}') ------------------------------- {"key":{"k1":1,"k2":2}} >>> sql(''' select jdict('key', '["val1", "val2"]') ''') # doctest: +NORMALIZE_WHITESPACE jdict('key', '["val1", "val2"]') -------------------------------- {"key":["val1","val2"]} >>> sql(''' select jdict('1') ''') # doctest: +NORMALIZE_WHITESPACE Traceback (most recent call last): ... OperatorError: Madis SQLError: Operator JDICT: At least two arguments required """ if len(args)==1: raise functions.OperatorError('jdict',"At least two arguments required") result = OrderedDict() for i in xrange(0, len(args), 2): result[args[i]] = jopts.fromjsingle(args[i+1]) return jopts.toj( result )
def jflatten(*args): """ .. function:: jflattten(jpacks) -> jpack Flattens all nested sub-jpacks. Examples: >>> sql(''' select jflatten('1', '[2]') ''') # doctest: +NORMALIZE_WHITESPACE jflatten('1', '[2]') -------------------- ["1",2] >>> sql(''' select jflatten('[["word1", 1], ["word2", 1], [["word3", 2], ["word4", 2]], 3]') ''') # doctest: +NORMALIZE_WHITESPACE jflatten('[["word1", 1], ["word2", 1], [["word3", 2], ["word4", 2]], 3]') ------------------------------------------------------------------------- ["word1",1,"word2",1,"word3",2,"word4",2,3] """ return jopts.toj(jopts.flatten(jopts.elemfromj(*args)))
def t2j(*args): """ .. function:: t2j(tabpack) -> jpack Converts a tab separated pack to a jpack. Examples: >>> sql("select t2j(j2t('[1,2,3]'))") # doctest: +NORMALIZE_WHITESPACE t2j(j2t('[1,2,3]')) ------------------- ["1","2","3"] """ fj=[] for t in args: fj+=t.split('\t') return jopts.toj(fj)
def jsplit(*args): """ .. function:: jsplit(jpacks) -> [C1, C2, ...] Splits horizontally a jpack. Examples: >>> sql("select jsplit('[1,2,3]', '[3,4,5]')") # doctest: +NORMALIZE_WHITESPACE C1 | C2 | C3 | C4 | C5 | C6 --------------------------- 1 | 2 | 3 | 3 | 4 | 5 """ fj = [jopts.toj(x) for x in jopts.fromj(*args)] if fj == []: yield ('C1',) yield tuple(['C' + str(x) for x in xrange(1, len(fj) + 1)]) yield fj
def jintersection(*args): """ .. function:: jintersection(jpackA, jpackB) -> jpack Returns the items of jpackA except the items that appear on jpackB. Examples: >>> sql("select jintersection('[1,2,3]', '[1,2,3]')") # doctest: +NORMALIZE_WHITESPACE jintersection('[1,2,3]', '[1,2,3]') ----------------------------------- [1,2,3] >>> sql("select jintersection('[1,2,3]', '[1,3]', 1)") # doctest: +NORMALIZE_WHITESPACE jintersection('[1,2,3]', '[1,3]', 1) ------------------------------------ 1 """ if len(args) < 2: raise functions.OperatorError("jintersection","operator needs at least two inputs") return jopts.toj(sorted(set.intersection(*[set(jopts.fromj(x)) for x in args])))
def final(self): return jopts.toj(self.outgroup)
def final(self): yield tuple(['C'+str(i) for i in xrange(1, self.lenargs)] ) yield [jopts.toj(list(i)) for i in self.vals]
""" wordslist = args[0].split(' ') setwords = set(wordslist) c = dict.fromkeys(setwords, 0) for w in wordslist: c[w]+=1 lenwords = len(setwords) extremevals = int(math.ceil(lenwords * 3 * 1.0/100)) frequences = sorted(c.values())[extremevals:(lenwords-extremevals)] try: avgfrequency = math.ceil(sum(frequences)*1.0/len(frequences)) except Exception,e: avgfrequency = 0.0 return jopts.toj([k for k,v in c.iteritems() if v >= avgfrequency]) jfrequentwords.registered=True def jsonstrict(*args): """ .. function:: jsonstrict(args...) -> json string Sometimes we wish to process json lists from another application. Jsonstrict function tries to always create json compatible lists. So it always returns json lists. Examples: >>> sql("select jsonstrict('a')") jsonstrict('a')
i=args[0] try: if i[0]=='{' and i[-1]=='}': keys=[x for x in json.loads(i, object_pairs_hook=OrderedDict).iterkeys()] except TypeError,e: pass else: keys=OrderedDict() for i in args: try: if i[0]=='{' and i[-1]=='}': keys.update([(x,None) for x in json.loads(i, object_pairs_hook=OrderedDict).iterkeys()]) except TypeError,e: pass keys=list(keys) return jopts.toj( keys ) jdictkeys.registered=True def jdictvals(*args): """ .. function:: jdictvals(jdict, [key1, key2,..]) -> jpack If only the first argument (jdict) is provided, it returns a jpack of the values of input jdict (sorted by the jdict keys). If key values are also provided, it returns only the keys that have been provided. Examples: >>> sql(''' select jdictvals('{"k1":1,"k2":2}') ''') # doctest: +NORMALIZE_WHITESPACE
def jsonpath(*args): """ .. function:: jsonpath(JSON, jsonpathexpr1, jsonpathexpr2) -> multiset Uses jsonpath expressions to pick values from inside a JSON input. If the outputs of all JSONpath expressions have the same number of elements in them, it splits the output into multiple rows. .. note:: For more on JSONpath see: http://goessner.net/articles/JsonPath/ Examples: >>> sql('''select jsonpath('{"d1":[{"name":"n1", "value":"v1"}, {"name":"n2", "value":"v2"}]}', '$.d1') ''') C1 ------------------------------------------------------- [{"name":"n1","value":"v1"},{"name":"n2","value":"v2"}] >>> sql('''select jsonpath('{"d1":[{"name":"n1", "value":"v1"}, {"name":"n2", "value":"v2"}]}', '$.d1[*].name') ''') C1 -- n1 n2 >>> sql('''select jsonpath('{"d1":[{"name":"n1", "value":"v1"}, {"name":"n2", "value":"v2"}]}', '$.d1[*].name', '$.d1[*].value') ''') C1 | C2 ------- n1 | v1 n2 | v2 >>> sql('''select jsonpath('{"d1":[{"name":"n1", "value":"v1"}, {"name":"n2", "value":"v2"}]}', '$.d1[*].name', '$.d1[*].nonexisting') ''') C1 | C2 --------- n1 | None n2 | None >>> sql('''select jsonpath('{"d1":[{"name":"n1", "value":"v1"}, {"name":"n2"}]}', '$.d1[*].name', '$.d1[*].value') ''') C1 | C2 ---------------- ["n1","n2"] | v1 >>> sql('''select jsonpath('{"d1":[{"name":"n1", "value":"v1"}, {"name":"n2", "value":"v2"}]}', '$.nonexisting') ''') """ j = json.loads(args[0]) yield tuple( ('C'+str(x)for x in xrange( 1,len(args) ) ) ) output=[libjsonpath(j, jp, use_eval=False) for jp in args[1:]] l=0 lchanges=0 for i in output: try: if len(i)!=l: l=len(i) lchanges+=1 except TypeError: pass if l==0: return if lchanges>1: yield [jopts.toj(x) if type(x)!=bool else None for x in output] else: for i in xrange(l): yield [jopts.toj(x[i]) if type(x)!=bool else None for x in output]
def jsonpath(*args): """ .. function:: jsonpath(JSON, jsonpathexpr1, jsonpathexpr2) -> multiset Uses jsonpath expressions to pick values from inside a JSON input. If the outputs of all JSONpath expressions have the same number of elements in them, it splits the output into multiple rows. .. note:: For more on JSONpath see: http://goessner.net/articles/JsonPath/ Examples: >>> sql('''select jsonpath('{"d1":[{"name":"n1", "value":"v1"}, {"name":"n2", "value":"v2"}]}', '$.d1') ''') C1 ------------------------------------------------------- [{"name":"n1","value":"v1"},{"name":"n2","value":"v2"}] >>> sql('''select jsonpath('{"d1":[{"name":"n1", "value":"v1"}, {"name":"n2", "value":"v2"}]}', '$.d1[*].name') ''') C1 -- n1 n2 >>> sql('''select jsonpath('{"d1":[{"name":"n1", "value":"v1"}, {"name":"n2", "value":"v2"}]}', '$.d1[*].name', '$.d1[*].value') ''') C1 | C2 ------- n1 | v1 n2 | v2 >>> sql('''select jsonpath('{"d1":[{"name":"n1", "value":"v1"}, {"name":"n2", "value":"v2"}]}', '$.d1[*].name', '$.d1[*].nonexisting') ''') C1 | C2 --------- n1 | None n2 | None >>> sql('''select jsonpath('{"d1":[{"name":"n1", "value":"v1"}, {"name":"n2"}]}', '$.d1[*].name', '$.d1[*].value') ''') C1 | C2 ---------------- ["n1","n2"] | v1 >>> sql('''select jsonpath('{"d1":[{"name":"n1", "value":"v1"}, {"name":"n2", "value":"v2"}]}', '$.nonexisting') ''') """ try: j = json.loads(args[0]) except: try: j = json.loads(utf8clean(args[0])) except: import sys sys.stderr.write(args[0]) error = 'Error in input line: ' + args[0] raise yield tuple(('C' + str(x) for x in xrange(1, len(args)))) output = [libjsonpath(j, jp, use_eval=False) for jp in args[1:]] l = 0 lchanges = 0 for i in output: try: if len(i) != l: l = len(i) lchanges += 1 except TypeError: pass if l == 0: return try: if lchanges > 1: yield [jopts.toj(x) if type(x) != bool else None for x in output] else: for i in xrange(l): yield [ jopts.toj(x[i]) if type(x) != bool else None for x in output ] except: import sys sys.stderr.write(args[0]) error = 'Error in input line: ' + args[0] raise Exception(error)
def jsplice(*args): """ .. function:: jsplice(jpack, range1_start, range1_end, ...) -> jpack Splices input jpack. If only a single range argument is provided, it returns input jpack's element in provided position. If defined position index is positive, then it starts counting from the beginning of input jpack. If defined position is negative it starts counting from the end of input jpack. If more than one range arguments are provided, then the arguments are assumed to be provided in pairs (start, end) that define ranges inside the input jpack that should be joined together in output jpack. Examples: >>> sql(''' select jsplice('[1,2,3,4,5]',0) ''') # doctest: +NORMALIZE_WHITESPACE jsplice('[1,2,3,4,5]',0) ------------------------ 1 >>> sql(''' select jsplice('[1,2,3,4,5]',-1) ''') # doctest: +NORMALIZE_WHITESPACE jsplice('[1,2,3,4,5]',-1) ------------------------- 5 >>> sql(''' select jsplice('[1,2,3,4,5]',10) ''') # doctest: +NORMALIZE_WHITESPACE jsplice('[1,2,3,4,5]',10) ------------------------- None >>> sql(''' select jsplice('[1,2,3,4,5]', 0, 3, 0, 2) ''') # doctest: +NORMALIZE_WHITESPACE jsplice('[1,2,3,4,5]', 0, 3, 0, 2) ---------------------------------- [1,2,3,1,2] >>> sql(''' select jsplice('[1,2,3,4,5]', 2, -1) ''') # doctest: +NORMALIZE_WHITESPACE jsplice('[1,2,3,4,5]', 2, -1) ----------------------------- [3,4] """ largs = len(args) if largs == 1: return args[0] fj = jopts.fromj(args[0]) if largs == 2: try: return jopts.toj(fj[args[1]]) except KeyboardInterrupt: raise except: return None outj = [] for i in xrange(1, largs, 2): try: outj += fj[args[i]:args[i + 1]] except KeyboardInterrupt: raise except: pass return jopts.toj(outj)
x for x in json.loads( i, object_pairs_hook=OrderedDict).iterkeys() ] except TypeError, e: pass else: keys = OrderedDict() for i in args: try: if i[0] == '{' and i[-1] == '}': keys.update([(x, None) for x in json.loads( i, object_pairs_hook=OrderedDict).iterkeys()]) except TypeError, e: pass keys = list(keys) return jopts.toj(keys) jdictkeys.registered = True def jdictvals(*args): """ .. function:: jdictvals(jdict, [key1, key2,..]) -> jpack If only the first argument (jdict) is provided, it returns a jpack of the values of input jdict (sorted by the jdict keys). If key values are also provided, it returns only the keys that have been provided. Examples:
def jsplice(*args): """ .. function:: jsplice(jpack, range1_start, range1_end, ...) -> jpack Splices input jpack. If only a single range argument is provided, it returns input jpack's element in provided position. If defined position index is positive, then it starts counting from the beginning of input jpack. If defined position is negative it starts counting from the end of input jpack. If more than one range arguments are provided, then the arguments are assumed to be provided in pairs (start, end) that define ranges inside the input jpack that should be joined together in output jpack. Examples: >>> sql(''' select jsplice('[1,2,3,4,5]',0) ''') # doctest: +NORMALIZE_WHITESPACE jsplice('[1,2,3,4,5]',0) ------------------------ 1 >>> sql(''' select jsplice('[1,2,3,4,5]',-1) ''') # doctest: +NORMALIZE_WHITESPACE jsplice('[1,2,3,4,5]',-1) ------------------------- 5 >>> sql(''' select jsplice('[1,2,3,4,5]',10) ''') # doctest: +NORMALIZE_WHITESPACE jsplice('[1,2,3,4,5]',10) ------------------------- None >>> sql(''' select jsplice('[1,2,3,4,5]', 0, 3, 0, 2) ''') # doctest: +NORMALIZE_WHITESPACE jsplice('[1,2,3,4,5]', 0, 3, 0, 2) ---------------------------------- [1,2,3,1,2] >>> sql(''' select jsplice('[1,2,3,4,5]', 2, -1) ''') # doctest: +NORMALIZE_WHITESPACE jsplice('[1,2,3,4,5]', 2, -1) ----------------------------- [3,4] """ largs=len(args) if largs==1: return args[0] fj=jopts.fromj(args[0]) if largs==2: try: return jopts.toj(fj[args[1]]) except KeyboardInterrupt: raise except: return None outj=[] for i in xrange(1,largs,2): try: outj+=fj[args[i]:args[i+1]] except KeyboardInterrupt: raise except: pass return jopts.toj(outj)