Esempio n. 1
0
def jngrams(*args):
    """
    .. function:: jngrams(n,text) -> jpack

    Converts multiple input arguments into a jpack of ngrams.

    Examples:

    >>> sql("select jngrams(1,'This is a test phrase')")
    jngrams(1,'This is a test phrase')
    -------------------------------------------
    [["This"],["is"],["a"],["test"],["phrase"]]

    >>> sql("select jngrams(2,'This is a test phrase')")
    jngrams(2,'This is a test phrase')
    ---------------------------------------------------------
    [["This","is"],["is","a"],["a","test"],["test","phrase"]]


    """
    if type(args[0]) == int:
        n = args[0]
        text = args[1]
    else:
        n = 1
        text = args[0]
    g = text.split(' ')
    listofngrams = []
    for i in xrange(len(g) - n + 1):
        listofngrams.append(g[i:i + n])
    return jopts.toj(listofngrams)
Esempio n. 2
0
def jfilterempty(*args):
    """
    .. function:: jfilterempty(jpacks.) -> jpack

    Removes from input jpacks all empty elements.

    Examples:

    >>> sql("select jfilterempty('a', '', '[]')")
    jfilterempty('a', '', '[]')
    ---------------------------
    a

    >>> sql("select jfilterempty('a','[null]',3)")
    jfilterempty('a','[null]',3)
    ----------------------------
    ["a",3]

    >>> sql("select jfilterempty('[3]', jpack('b', ''))")
    jfilterempty('[3]', jpack('b', ''))
    -----------------------------------
    [3,"b"]

    """

    return jopts.toj([x for x in jopts.fromj(*args) if x!='' and x!=[] and x!=None])
Esempio n. 3
0
def jngrams(*args):

    """
    .. function:: jngrams(n,text) -> jpack

    Converts multiple input arguments into a jpack of ngrams.

    Examples:

    >>> sql("select jngrams(1,'This is a test phrase')")
    jngrams(1,'This is a test phrase')
    -------------------------------------------
    [["This"],["is"],["a"],["test"],["phrase"]]

    >>> sql("select jngrams(2,'This is a test phrase')")
    jngrams(2,'This is a test phrase')
    ---------------------------------------------------------
    [["This","is"],["is","a"],["a","test"],["test","phrase"]]


    """
    if type(args[0]) == int:
        n = args[0]
        text = args[1]
    else:
        n = 1
        text = args[0]
    g = text.split(' ')
    listofngrams = []
    for i in xrange(len(g)-n+1):
        listofngrams.append(g[i:i+n])
    return jopts.toj(listofngrams)
Esempio n. 4
0
def jfilterempty(*args):
    """
    .. function:: jfilterempty(jpacks.) -> jpack

    Removes from input jpacks all empty elements.

    Examples:

    >>> sql("select jfilterempty('a', '', '[]')")
    jfilterempty('a', '', '[]')
    ---------------------------
    a

    >>> sql("select jfilterempty('a','[null]',3)")
    jfilterempty('a','[null]',3)
    ----------------------------
    ["a",3]

    >>> sql("select jfilterempty('[3]', jpack('b', ''))")
    jfilterempty('[3]', jpack('b', ''))
    -----------------------------------
    [3,"b"]

    """

    return jopts.toj(
        [x for x in jopts.fromj(*args) if x != '' and x != [] and x != None])
Esempio n. 5
0
def jexcept(*args):
    """
    .. function:: jexcept(jpackA, jpackB) -> jpack

    Returns the items of jpackA except the items that appear on jpackB.

    Examples:

    >>> sql("select jexcept('[1,2,3]', '[1,2,3]')") # doctest: +NORMALIZE_WHITESPACE
    jexcept('[1,2,3]', '[1,2,3]')
    -----------------------------
    []

    >>> sql("select jexcept('[1,2,3]', '[1,3]')") # doctest: +NORMALIZE_WHITESPACE
    jexcept('[1,2,3]', '[1,3]')
    ---------------------------
    2

    """

    if len(args) < 2:
        raise functions.OperatorError("jexcept","operator needs at least two inputs")

    b = set(jopts.fromj(args[1]))
    return jopts.toj([x for x in jopts.fromj(args[0]) if x not in b])
Esempio n. 6
0
def jsplitv(*args):

    """
    .. function:: jsplitv(jpacks) -> [C1]

    Splits vertically a jpack.

    Examples:

    >>> sql("select jsplitv(jmerge('[1,2,3]', '[1,2,3]', 'b', 'a', 3 ))") # doctest: +NORMALIZE_WHITESPACE
    C1
    --
    1
    2
    3
    1
    2
    3
    b
    a
    3

    """

    yield ('C1', )

    for j1 in jopts.fromj(*args):
        yield [jopts.toj(j1)]
Esempio n. 7
0
def jmergeregexpnamed(*args):

    """
    .. function:: jmergeregexpnamed(jpacks) -> jpack

    Creates a regular expression that matches all of the jpack's contents with named groups. If the number of
    named groups in a regular expression is greater than 99, then the output will be a jpack of regular expressions.

    Examples:

    >>> sql(''' select jmergeregexpnamed('["abc", "def"]') ''') # doctest: +NORMALIZE_WHITESPACE
    jmergeregexpnamed('["abc", "def"]')
    -----------------------------------
    (abc)|(def)

    """

    inp = jopts.fromj(*args)
    inp.sort()

    out = []
    for g in xrange(0, len(inp), 99):
        out.append('|'.join('('+x+')' for x in inp[g:g+99]))

    return jopts.toj(out)
Esempio n. 8
0
def jpack(*args):
    """
    .. function:: jpack(args...) -> jpack

    Converts multiple input arguments into a single string. Jpacks preserve the types
    of their inputs and are based on JSON encoding. Single values are represented as
    themselves where possible.

    Examples:

    >>> sql("select jpack('a')")
    jpack('a')
    ----------
    a

    >>> sql("select jpack('a','b',3)")
    jpack('a','b',3)
    ----------------
    ["a","b",3]

    >>> sql("select jpack('a', jpack('b',3))")
    jpack('a', jpack('b',3))
    ------------------------
    ["a",["b",3]]

    """

    return jopts.toj(jopts.elemfromj(*args))
Esempio n. 9
0
def jpack(*args):
    """
    .. function:: jpack(args...) -> jpack

    Converts multiple input arguments into a single string. Jpacks preserve the types
    of their inputs and are based on JSON encoding. Single values are represented as
    themselves where possible.

    Examples:

    >>> sql("select jpack('a')")
    jpack('a')
    ----------
    a

    >>> sql("select jpack('a','b',3)")
    jpack('a','b',3)
    ----------------
    ["a","b",3]

    >>> sql("select jpack('a', jpack('b',3))")
    jpack('a', jpack('b',3))
    ------------------------
    ["a",["b",3]]

    """

    return jopts.toj(jopts.elemfromj(*args))
Esempio n. 10
0
 def final(self):
     yield tuple('c'+str(i) for i in xrange(1,self.numofargs))
     for groupkey, sumcols in self.groupsdict.iteritems():
         cols = list(groupkey)
         for col in sumcols:
             try:
                 cols.append(sum(col))
             except TypeError:
                 cols.append(jopts.toj(sorted(set( jopts.fromj(*col) ))))
         yield cols
Esempio n. 11
0
def jdictgroupkey(*args):
    """
    .. function:: jdictgroupkey(list_of_jdicts, groupkey1, groupkey2, ...)

    It groups an array of jdicts into a hierarchical structure. The grouping is done
    first by groupkey1 then by groupkey2 and so on.

    If no groupkeys are provided, then the first key of array's first jdict is used as a groupkey.

    Examples:

    >>> sql('''select jdictgroupkey('[{"gkey":"v1", "b":1},{"gkey":"v1","b":2},{"gkey":"v2","b":1, "c":2}]') as j''')
    j
    ---------------------------------------------
    {"v1":[{"b":1},{"b":2}],"v2":[{"b":1,"c":2}]}

    >>> sql('''select jdictgroupkey('[{"gkey":"v1", "b":1},{"gkey":"v1","b":2},{"gkey":"v2","b":1, "c":2}]', "gkey") as j''')
    j
    ---------------------------------------------
    {"v1":[{"b":1},{"b":2}],"v2":[{"b":1,"c":2}]}

    >>> sql('''select jdictgroupkey('[{"gkey":"v1", "gkey2":"f1", "b":1},{"gkey":"v1", "gkey2":"f2", "b":2},{"gkey":"v1", "gkey2":"f2", "b":1, "c":2}]', "gkey", "gkey2") as j''')
    j
    --------------------------------------------------------------
    {"v1":{"gkey2":{"f1":[{"b":1}],"f2":[{"b":2},{"b":1,"c":2}]}}}

    """

    def recgroupkey(jdict, gkeys):
        outdict=OrderedDict()

        for d in jdict:
            if d[gkeys[0]] not in outdict:
                outdict[d[gkeys[0]]] = [d]
            else:
                outdict[d[gkeys[0]]].append(d)
            del(d[gkeys[0]])

        if len(gkeys)>1:
            outdict = OrderedDict([(x, recgroupkey(y, gkeys[1:])) for x,y in outdict.iteritems()])

        return {gkeys[0]:outdict}

    outdict=OrderedDict()
    dlist=json.loads(args[0], object_pairs_hook=OrderedDict)

    if len(args) == 1:
        groupkeys = [iter(dlist[0]).next()]
    else:
        groupkeys = args[1:]

    outdict = recgroupkey(dlist, groupkeys)

    return jopts.toj(outdict[groupkeys[0]])
Esempio n. 12
0
def nl2j(*args):
    """
    .. function:: nl2j(text) -> jpack

    Converts a text with newlines to a jpack.
    """

    fj = []
    for t in args:
        fj += [x for x in t.split('\n')]

    return jopts.toj(fj)
Esempio n. 13
0
def jdictvals(*args):

    """
    .. function:: jdictvals(jdict, [key1, key2,..]) -> jpack

    If only the first argument (jdict) is provided, it returns a jpack of the values of input jdict (sorted by the jdict keys).

    If key values are also provided, it returns only the keys that have been provided.

    Examples:

    >>> sql(''' select jdictvals('{"k1":1,"k2":2}') ''') # doctest: +NORMALIZE_WHITESPACE
    jdictvals('{"k1":1,"k2":2}')
    ----------------------------
    [1,2]

    >>> sql(''' select jdictvals('{"k1":1,"k2":2, "k3":3}', 'k3', 'k1', 'k4') ''') # doctest: +NORMALIZE_WHITESPACE
    jdictvals('{"k1":1,"k2":2, "k3":3}', 'k3', 'k1', 'k4')
    ------------------------------------------------------
    [3,1,null]
    >>> sql(''' select jdictvals('{"k1":1}') ''') # doctest: +NORMALIZE_WHITESPACE
    jdictvals('{"k1":1}')
    ---------------------
    1
    >>> sql(''' select jdictvals('{"k1":1}') ''') # doctest: +NORMALIZE_WHITESPACE
    jdictvals('{"k1":1}')
    ---------------------
    1
    >>> sql(''' select jdictvals(1) ''') # doctest: +NORMALIZE_WHITESPACE
    jdictvals(1)
    ------------
    1

    """

    if type(args[0]) in (int,float) or args[0][0]!='{' or args[0][-1]!='}':
        return args[0]
    d=json.loads(args[0])
    if len(args)==1:
        d=d.items()
        d.sort(key=operator.itemgetter(1,0))
        vals=[x[1] for x in d]
    else:
        vals=[]
        for i in args[1:]:
            try:
                vals.append(d[i])
            except KeyboardInterrupt:
                raise
            except:
                vals.append(None)
        
    return jopts.toj( vals )
Esempio n. 14
0
def nl2j(*args):
    """
    .. function:: nl2j(text) -> jpack

    Converts a text with newlines to a jpack.
    """

    fj = []
    for t in args:
        fj += [x for x in t.split('\n')]

    return jopts.toj(fj)
def jdictsplit(*args):

    """
    .. function:: jdictvals(jdict, [key1, key2,..]) -> columns

    If only the first argument (jdict) is provided, it returns a row containing the values of input jdict (sorted by the jdict keys).

    If key values are also provided, it returns only the columns of which the keys have been provided.

    Examples:

    >>> sql(''' select jdictsplit('{"k1":1,"k2":2}') ''') # doctest: +NORMALIZE_WHITESPACE
    k1 | k2
    -------
    1  | 2

    >>> sql(''' select jdictsplit('{"k1":1,"k2":2, "k3":3}', 'k3', 'k1', 'k4') ''') # doctest: +NORMALIZE_WHITESPACE
    k3 | k1 | k4
    --------------
    3  | 1  | None

    """

    d=json.loads(args[0])
    if len(args)==1:
        d=d.items()
        d.sort(key=operator.itemgetter(1,0))
        yield tuple([x[0] for x in d])
        yield [jopts.toj(x[1]) for x in d]
    else:
        vals=[]
        yield tuple(args[1:])
        for i in args[1:]:
            try:
                vals.append(jopts.toj(d[i]))
            except KeyboardInterrupt:
                raise    
            except:
                vals.append(None)
        yield vals
Esempio n. 16
0
def jdictsplit(*args):

    """
    .. function:: jdictsplit(jdict, [key1, key2,..]) -> columns

    If only the first argument (jdict) is provided, it returns a row containing the values of input jdict (sorted by the jdict keys).

    If key values are also provided, it returns only the columns of which the keys have been provided.

    Examples:

    >>> sql(''' select jdictsplit('{"k1":1,"k2":2}') ''') # doctest: +NORMALIZE_WHITESPACE
    k1 | k2
    -------
    1  | 2

    >>> sql(''' select jdictsplit('{"k1":1,"k2":2, "k3":3}', 'k3', 'k1', 'k4') ''') # doctest: +NORMALIZE_WHITESPACE
    k3 | k1 | k4
    --------------
    3  | 1  | None

    """

    d=json.loads(args[0])
    if len(args)==1:
        d=sorted(d.items())
        yield tuple([x[0] for x in d])
        yield [jopts.toj(x[1]) for x in d]
    else:
        vals=[]
        yield tuple(args[1:])
        for i in args[1:]:
            try:
                vals.append(jopts.toj(d[i]))
            except KeyboardInterrupt:
                raise    
            except:
                vals.append(None)
        yield vals
Esempio n. 17
0
def regexpr(*args):

    """
    .. function:: regexp(pattern,expression[,replacestr])

    This function returns a match to the first parenthesis of *pattern*
    or replaces the matches of *pattern* in *expression* with *replacestr*.
    `Pattern Syntax <http://docs.python.org/library/re.html#re-syntax>`_ is
    according to python's re module.

    Examples use `inversion`.
    
    Examples:

    >>> table1('''
    ... 25
    ... ''')
    
    >>> sql("regexpr 'start\s(\w+)\send' 'start otherword end'  ")
    regexpr('start\s(\w+)\send','start otherword end')
    --------------------------------------------------
    otherword

    >>> sql("regexpr '\W+' '@#$%@$#% tobereplaced @#$%@#$%' 'nonword'  ")
    regexpr('\W+','@#$%@$#% tobereplaced @#$%@#$%','nonword')
    ---------------------------------------------------------
    nonwordtobereplacednonword

    >>> sql("select regexpr('(\w+).*?(\w+)', 'one two three')")
    regexpr('(\w+).*?(\w+)', 'one two three')
    -----------------------------------------
    ["one","two"]
    """
    if len(args) < 2:
        return

    if len(args) == 2:
        a = re.search(args[0], unicode(args[1]), re.UNICODE)
        if a != None:
            if len(a.groups()) > 0:
                return jopts.toj(a.groups())
            else:
                return True
        else:
            return None

    if len(args) == 3:
        try:
            return re.sub(args[0], args[2], args[1], flags=re.UNICODE)
        except TypeError:
            return re.sub(args[0], args[2], args[1])
Esempio n. 18
0
def jdictsplitv(*args):

    """
    .. function:: jdictsplitv(jdict, [key1, key2,..]) -> columns

    If only the first argument (jdict) is provided, it returns rows containing the values of input jdict.

    If key values are also provided, it returns only the columns of which the keys have been provided.

    Examples:

    >>> sql(''' select jdictsplitv('{"k1":1,"k2":2}') ''') # doctest: +NORMALIZE_WHITESPACE
    key | val
    ---------
    k1  | 1
    k2  | 2

    >>> sql(''' select jdictsplitv('{"k1":1,"k2":2, "k3":3}', 'k3', 'k1', 'k4') ''') # doctest: +NORMALIZE_WHITESPACE
    key | val
    ---------
    k3  | 3
    k1  | 1

    """

    yield ('key', 'val')
    if len(args) == 1:
        dlist = json.loads(args[0], object_pairs_hook=OrderedDict)
        for k, v in dlist.iteritems():
            yield [k, jopts.toj(v)]
    else:
        dlist = json.loads(args[0])
        for k in args[1:]:
            try:
                yield k, jopts.toj(dlist[k])
            except KeyError:
                pass
def jpermutations(*args):

    """
    .. function:: jpermutations(jpack, r) -> multiset

    Returns all length r permutations of jpack.

    Examples:

    >>> sql('''select jpermutations('["t1","t2","t3"]',2)''')
    C1 | C2
    -------
    t1 | t2
    t1 | t3
    t2 | t1
    t2 | t3
    t3 | t1
    t3 | t2

    >>> sql('''select jpermutations('["t1","t2",["t3","t4"]]',2)''')
    C1          | C2
    -------------------------
    t1          | t2
    t1          | ["t3","t4"]
    t2          | t1
    t2          | ["t3","t4"]
    ["t3","t4"] | t1
    ["t3","t4"] | t2

    >>> sql('''select jpermutations(null,2)''')
    C1 | C2
    -----

    >>> sql('''select jpermutations('["t1","t2","t3","t4"]')''')
    C1
    --
    t1
    t2
    t3
    t4
    """

    r=1
    if len(args)==2:
        r=args[1]

    yield tuple(('C'+str(x) for x in xrange(1,r+1)))
    for p in itertools.permutations(jopts.fromj(args[0]), r):
        yield [jopts.toj(x) for x in p]
Esempio n. 20
0
def jset(*args):
    """
    .. function:: jset(jpacks) -> jpack

    Returns a set representation of a jpack, unifying duplicate items.

    Examples:

    >>> sql("select jset('[1,2,3]', '[1,2,3]', 'b', 'a', 3 )") # doctest: +NORMALIZE_WHITESPACE
    jset('[1,2,3]', '[1,2,3]', 'b', 'a', 3 )
    ----------------------------------------
    [1,2,3,"a","b"]

    """

    return jopts.toj(sorted(set(jopts.fromj(*args))))
Esempio n. 21
0
def jsort(*args):
    """
    .. function:: jsort(jpacks) -> jpack

    Sorts the input jpacks.

    Examples:

    >>> sql("select jsort('[1,2,3]', '[1,2,3]', 'b', 'a', 3 )") # doctest: +NORMALIZE_WHITESPACE
    jsort('[1,2,3]', '[1,2,3]', 'b', 'a', 3 )
    -----------------------------------------
    [1,1,2,2,3,3,3,"a","b"]

    """

    return jopts.toj(sorted(jopts.fromj(*args)))
Esempio n. 22
0
def jmerge(*args):
    """
    .. function:: jmerge(jpacks) -> jpack

    Merges multiple jpacks into one jpack.

    Examples:

    >>> sql("select jmerge('[1,2,3]', '[1,2,3]', 'a', 3 )") # doctest: +NORMALIZE_WHITESPACE
    jmerge('[1,2,3]', '[1,2,3]', 'a', 3 )
    -------------------------------------
    [1,2,3,1,2,3,"a",3]

    """

    return jopts.toj(jopts.fromj(*args))
Esempio n. 23
0
def jmerge(*args):
    """
    .. function:: jmerge(jpacks) -> jpack

    Merges multiple jpacks into one jpack.

    Examples:

    >>> sql("select jmerge('[1,2,3]', '[1,2,3]', 'a', 3 )") # doctest: +NORMALIZE_WHITESPACE
    jmerge('[1,2,3]', '[1,2,3]', 'a', 3 )
    -------------------------------------
    [1,2,3,1,2,3,"a",3]

    """

    return jopts.toj(jopts.fromj(*args))
Esempio n. 24
0
def jset(*args):
    """
    .. function:: jset(jpacks) -> jpack

    Returns a set representation of a jpack, unifying duplicate items.

    Examples:

    >>> sql("select jset('[1,2,3]', '[1,2,3]', 'b', 'a', 3 )") # doctest: +NORMALIZE_WHITESPACE
    jset('[1,2,3]', '[1,2,3]', 'b', 'a', 3 )
    ----------------------------------------
    [1,2,3,"a","b"]

    """

    return jopts.toj(sorted(set(jopts.fromj(*args))))
Esempio n. 25
0
def jpermutations(*args):

    """
    .. function:: jpermutations(jpack, r) -> multiset

    Returns all length r permutations of jpack.

    Examples:

    >>> sql('''select jpermutations('["t1","t2","t3"]',2)''')
    C1 | C2
    -------
    t1 | t2
    t1 | t3
    t2 | t1
    t2 | t3
    t3 | t1
    t3 | t2

    >>> sql('''select jpermutations('["t1","t2",["t3","t4"]]',2)''')
    C1          | C2
    -------------------------
    t1          | t2
    t1          | ["t3","t4"]
    t2          | t1
    t2          | ["t3","t4"]
    ["t3","t4"] | t1
    ["t3","t4"] | t2

    >>> sql('''select jpermutations(null,2)''')

    >>> sql('''select jpermutations('["t1","t2","t3","t4"]')''')
    C1
    --
    t1
    t2
    t3
    t4
    """

    r=1
    if len(args)==2:
        r=args[1]

    yield tuple(('C'+str(x) for x in xrange(1,r+1)))
    for p in itertools.permutations(jopts.fromj(args[0]), r):
        yield [jopts.toj(x) for x in p]
Esempio n. 26
0
def jrange(num):
    """
    .. function:: jrange(num) -> jrange

    Returns a jrange of integer numbers.

    Examples:

    >>> sql("select jrange(5)")
    jrange('a')
    -----------------
    ["0","1","2","3"]

    """
    jran = [None] * num
    for i in xrange(num):
        jran[i] = str(i)
    return jopts.toj(jran)
Esempio n. 27
0
def jfrequentwords(*args):
    """
    .. function:: jfrequentwords(args...) -> jpack

    Returns the frequent words of a text in a jpack

    """
    wordslist = args[0].split(' ')
    setwords = set(wordslist)
    c = dict.fromkeys(setwords, 0)
    for w in wordslist:
        c[w] += 1
    lenwords = len(setwords)
    extremevals = int(math.ceil(lenwords * 3 * 1.0 / 100))
    frequences = sorted(c.values())[extremevals:(lenwords - extremevals)]
    avgfrequency = math.ceil(sum(frequences) * 1.0 / len(frequences))

    return jopts.toj([k for k, v in c.iteritems() if v >= avgfrequency])
Esempio n. 28
0
def jrange(num):
    """
    .. function:: jrange(num) -> jrange

    Returns a jrange of integer numbers.

    Examples:

    >>> sql("select jrange(5)")
    jrange('a')
    -----------------
    ["0","1","2","3"]

    """
    jran = [None] * num
    for i in xrange(num):
        jran[i] = str(i)
    return jopts.toj(jran)
Esempio n. 29
0
def s2j(*args):
    """
    .. function:: s2j(tabpack) -> jpack

    Converts a space separated pack to a jpack.

    Examples:

    >>> sql("select s2j('1  2 3 ')") # doctest: +NORMALIZE_WHITESPACE
    s2j('1  2 3 ')
    --------------
    ["1","2","3"]
    """

    fj = []
    for t in args:
        fj += [x for x in t.split(' ') if x != '']

    return jopts.toj(fj)
Esempio n. 30
0
def s2j(*args):
    """
    .. function:: s2j(tabpack) -> jpack

    Converts a space separated pack to a jpack.

    Examples:

    >>> sql("select s2j('1  2 3 ')") # doctest: +NORMALIZE_WHITESPACE
    s2j('1  2 3 ')
    --------------
    ["1","2","3"]
    """

    fj = []
    for t in args:
        fj += [x for x in t.split(' ') if x != '']

    return jopts.toj(fj)
Esempio n. 31
0
def jdict(*args):

    """
    .. function:: jdict(key, value, key1, value1) -> jdict

    Returns a jdict of the keys and value pairs.

    Examples:

    >>> sql(''' select jdict('key1', 'val1', 'key2', 'val2') ''') # doctest: +NORMALIZE_WHITESPACE
    jdict('key1', 'val1', 'key2', 'val2')
    -------------------------------------
    {"key1":"val1","key2":"val2"}

    >>> sql(''' select jdict('key', '{"k1":1,"k2":2}') ''') # doctest: +NORMALIZE_WHITESPACE
    jdict('key', '{"k1":1,"k2":2}')
    -------------------------------
    {"key":{"k1":1,"k2":2}}

    >>> sql(''' select jdict('key', '["val1", "val2"]') ''') # doctest: +NORMALIZE_WHITESPACE
    jdict('key', '["val1", "val2"]')
    --------------------------------
    {"key":["val1","val2"]}

    >>> sql(''' select jdict('1') ''') # doctest: +NORMALIZE_WHITESPACE
    Traceback (most recent call last):
    ...
    OperatorError: Madis SQLError:
    Operator JDICT: At least two arguments required

    """

    if len(args)==1:
        raise functions.OperatorError('jdict',"At least two arguments required")

    result = OrderedDict()
    
    for i in xrange(0, len(args), 2):
        result[args[i]] = jopts.fromjsingle(args[i+1])

    return jopts.toj( result )
Esempio n. 32
0
def jflatten(*args):
    """
    .. function:: jflattten(jpacks) -> jpack

    Flattens all nested sub-jpacks.

    Examples:

    >>> sql(''' select jflatten('1', '[2]') ''') # doctest: +NORMALIZE_WHITESPACE
    jflatten('1', '[2]')
    --------------------
    ["1",2]

    >>> sql(''' select jflatten('[["word1", 1], ["word2", 1], [["word3", 2], ["word4", 2]], 3]') ''') # doctest: +NORMALIZE_WHITESPACE
    jflatten('[["word1", 1], ["word2", 1], [["word3", 2], ["word4", 2]], 3]')
    -------------------------------------------------------------------------
    ["word1",1,"word2",1,"word3",2,"word4",2,3]

    """

    return jopts.toj(jopts.flatten(jopts.elemfromj(*args)))
def t2j(*args):

    """
    .. function:: t2j(tabpack) -> jpack

    Converts a tab separated pack to a jpack.

    Examples:

    >>> sql("select t2j(j2t('[1,2,3]'))") # doctest: +NORMALIZE_WHITESPACE
    t2j(j2t('[1,2,3]'))
    -------------------
    ["1","2","3"]

    """
    
    fj=[]
    for t in args:
        fj+=t.split('\t')

    return jopts.toj(fj)
Esempio n. 34
0
def jsplit(*args):
    """
    .. function:: jsplit(jpacks) -> [C1, C2, ...]

    Splits horizontally a jpack.

    Examples:

    >>> sql("select jsplit('[1,2,3]', '[3,4,5]')") # doctest: +NORMALIZE_WHITESPACE
    C1 | C2 | C3 | C4 | C5 | C6
    ---------------------------
    1  | 2  | 3  | 3  | 4  | 5

    """

    fj = [jopts.toj(x) for x in jopts.fromj(*args)]

    if fj == []:
        yield ('C1',)

    yield tuple(['C' + str(x) for x in xrange(1, len(fj) + 1)])
    yield fj
Esempio n. 35
0
def jintersection(*args):
    """
    .. function:: jintersection(jpackA, jpackB) -> jpack

    Returns the items of jpackA except the items that appear on jpackB.

    Examples:

    >>> sql("select jintersection('[1,2,3]', '[1,2,3]')") # doctest: +NORMALIZE_WHITESPACE
    jintersection('[1,2,3]', '[1,2,3]')
    -----------------------------------
    [1,2,3]

    >>> sql("select jintersection('[1,2,3]', '[1,3]', 1)") # doctest: +NORMALIZE_WHITESPACE
    jintersection('[1,2,3]', '[1,3]', 1)
    ------------------------------------
    1

    """

    if len(args) < 2:
        raise functions.OperatorError("jintersection","operator needs at least two inputs")

    return jopts.toj(sorted(set.intersection(*[set(jopts.fromj(x)) for x in args])))
Esempio n. 36
0
 def final(self):
     return jopts.toj(self.outgroup)
Esempio n. 37
0
 def final(self):
     yield tuple(['C'+str(i) for i in xrange(1, self.lenargs)] )
     yield [jopts.toj(list(i)) for i in self.vals]
Esempio n. 38
0
    """
    wordslist = args[0].split(' ')
    setwords = set(wordslist)
    c = dict.fromkeys(setwords, 0)
    for w in wordslist:
        c[w]+=1
    lenwords = len(setwords)
    extremevals = int(math.ceil(lenwords * 3 * 1.0/100))
    frequences = sorted(c.values())[extremevals:(lenwords-extremevals)]
    try:
        avgfrequency = math.ceil(sum(frequences)*1.0/len(frequences))
    except Exception,e:
        avgfrequency = 0.0

    return jopts.toj([k for k,v in c.iteritems() if v >= avgfrequency])

jfrequentwords.registered=True

def jsonstrict(*args):

    """
    .. function:: jsonstrict(args...) -> json string

    Sometimes we wish to process json lists from another application. Jsonstrict function
    tries to always create json compatible lists. So it always returns json lists.

    Examples:

    >>> sql("select jsonstrict('a')")
    jsonstrict('a')
Esempio n. 39
0
        i=args[0]
        try:
            if i[0]=='{' and i[-1]=='}':
                keys=[x for x in json.loads(i, object_pairs_hook=OrderedDict).iterkeys()]
        except TypeError,e:
            pass
    else:
        keys=OrderedDict()
        for i in args:
            try:
                if i[0]=='{' and i[-1]=='}':
                    keys.update([(x,None) for x in json.loads(i, object_pairs_hook=OrderedDict).iterkeys()])
            except TypeError,e:
                pass
        keys=list(keys)
    return jopts.toj( keys )

jdictkeys.registered=True

def jdictvals(*args):

    """
    .. function:: jdictvals(jdict, [key1, key2,..]) -> jpack

    If only the first argument (jdict) is provided, it returns a jpack of the values of input jdict (sorted by the jdict keys).

    If key values are also provided, it returns only the keys that have been provided.

    Examples:

    >>> sql(''' select jdictvals('{"k1":1,"k2":2}') ''') # doctest: +NORMALIZE_WHITESPACE
Esempio n. 40
0
def jsonpath(*args):

    """
    .. function:: jsonpath(JSON, jsonpathexpr1, jsonpathexpr2) -> multiset

    Uses jsonpath expressions to pick values from inside a JSON input. If the outputs of all JSONpath expressions
    have the same number of elements in them, it splits the output into multiple rows.

    .. note::

        For more on JSONpath see: http://goessner.net/articles/JsonPath/

    Examples:

    >>> sql('''select jsonpath('{"d1":[{"name":"n1", "value":"v1"}, {"name":"n2", "value":"v2"}]}', '$.d1') ''')
    C1
    -------------------------------------------------------
    [{"name":"n1","value":"v1"},{"name":"n2","value":"v2"}]

    >>> sql('''select jsonpath('{"d1":[{"name":"n1", "value":"v1"}, {"name":"n2", "value":"v2"}]}', '$.d1[*].name') ''')
    C1
    --
    n1
    n2

    >>> sql('''select jsonpath('{"d1":[{"name":"n1", "value":"v1"}, {"name":"n2", "value":"v2"}]}', '$.d1[*].name', '$.d1[*].value') ''')
    C1 | C2
    -------
    n1 | v1
    n2 | v2

    >>> sql('''select jsonpath('{"d1":[{"name":"n1", "value":"v1"}, {"name":"n2", "value":"v2"}]}', '$.d1[*].name', '$.d1[*].nonexisting') ''')
    C1 | C2
    ---------
    n1 | None
    n2 | None

    >>> sql('''select jsonpath('{"d1":[{"name":"n1", "value":"v1"}, {"name":"n2"}]}', '$.d1[*].name', '$.d1[*].value') ''')
    C1          | C2
    ----------------
    ["n1","n2"] | v1
    
    >>> sql('''select jsonpath('{"d1":[{"name":"n1", "value":"v1"}, {"name":"n2", "value":"v2"}]}', '$.nonexisting') ''')


    """

    j = json.loads(args[0])

    yield tuple( ('C'+str(x)for x in xrange( 1,len(args) ) )   )
    output=[libjsonpath(j, jp, use_eval=False) for jp in args[1:]]

    l=0
    lchanges=0
    for i in output:
        try:
            if len(i)!=l:
                l=len(i)
                lchanges+=1
        except TypeError:
            pass

    if l==0:
        return

    if lchanges>1:
        yield [jopts.toj(x) if type(x)!=bool else None for x in output]
    else:
        for i in xrange(l):
            yield [jopts.toj(x[i]) if type(x)!=bool else None for x in output]
Esempio n. 41
0
def jsonpath(*args):
    """
    .. function:: jsonpath(JSON, jsonpathexpr1, jsonpathexpr2) -> multiset

    Uses jsonpath expressions to pick values from inside a JSON input. If the outputs of all JSONpath expressions
    have the same number of elements in them, it splits the output into multiple rows.

    .. note::

        For more on JSONpath see: http://goessner.net/articles/JsonPath/

    Examples:

    >>> sql('''select jsonpath('{"d1":[{"name":"n1", "value":"v1"}, {"name":"n2", "value":"v2"}]}', '$.d1') ''')
    C1
    -------------------------------------------------------
    [{"name":"n1","value":"v1"},{"name":"n2","value":"v2"}]

    >>> sql('''select jsonpath('{"d1":[{"name":"n1", "value":"v1"}, {"name":"n2", "value":"v2"}]}', '$.d1[*].name') ''')
    C1
    --
    n1
    n2

    >>> sql('''select jsonpath('{"d1":[{"name":"n1", "value":"v1"}, {"name":"n2", "value":"v2"}]}', '$.d1[*].name', '$.d1[*].value') ''')
    C1 | C2
    -------
    n1 | v1
    n2 | v2

    >>> sql('''select jsonpath('{"d1":[{"name":"n1", "value":"v1"}, {"name":"n2", "value":"v2"}]}', '$.d1[*].name', '$.d1[*].nonexisting') ''')
    C1 | C2
    ---------
    n1 | None
    n2 | None

    >>> sql('''select jsonpath('{"d1":[{"name":"n1", "value":"v1"}, {"name":"n2"}]}', '$.d1[*].name', '$.d1[*].value') ''')
    C1          | C2
    ----------------
    ["n1","n2"] | v1
    
    >>> sql('''select jsonpath('{"d1":[{"name":"n1", "value":"v1"}, {"name":"n2", "value":"v2"}]}', '$.nonexisting') ''')


    """
    try:
        j = json.loads(args[0])
    except:
        try:
            j = json.loads(utf8clean(args[0]))
        except:
            import sys
            sys.stderr.write(args[0])
            error = 'Error in input line: ' + args[0]
            raise

    yield tuple(('C' + str(x) for x in xrange(1, len(args))))
    output = [libjsonpath(j, jp, use_eval=False) for jp in args[1:]]

    l = 0
    lchanges = 0
    for i in output:
        try:
            if len(i) != l:
                l = len(i)
                lchanges += 1
        except TypeError:
            pass

    if l == 0:
        return
    try:
        if lchanges > 1:
            yield [jopts.toj(x) if type(x) != bool else None for x in output]
        else:
            for i in xrange(l):
                yield [
                    jopts.toj(x[i]) if type(x) != bool else None
                    for x in output
                ]
    except:
        import sys
        sys.stderr.write(args[0])
        error = 'Error in input line: ' + args[0]
        raise Exception(error)
Esempio n. 42
0
def jsplice(*args):
    """
    .. function:: jsplice(jpack, range1_start, range1_end, ...) -> jpack

    Splices input jpack. If only a single range argument is provided, it returns input jpack's element in provided position. If defined position
    index is positive, then it starts counting from the beginning of input jpack. If defined position is negative it starts counting from the
    end of input jpack.

    If more than one range arguments are provided, then the arguments are assumed to be provided in pairs (start, end) that define ranges inside
    the input jpack that should be joined together in output jpack.

    Examples:

    >>> sql(''' select jsplice('[1,2,3,4,5]',0) ''') # doctest: +NORMALIZE_WHITESPACE
    jsplice('[1,2,3,4,5]',0)
    ------------------------
    1

    >>> sql(''' select jsplice('[1,2,3,4,5]',-1) ''') # doctest: +NORMALIZE_WHITESPACE
    jsplice('[1,2,3,4,5]',-1)
    -------------------------
    5

    >>> sql(''' select jsplice('[1,2,3,4,5]',10) ''') # doctest: +NORMALIZE_WHITESPACE
    jsplice('[1,2,3,4,5]',10)
    -------------------------
    None

    >>> sql(''' select jsplice('[1,2,3,4,5]', 0, 3, 0, 2) ''') # doctest: +NORMALIZE_WHITESPACE
    jsplice('[1,2,3,4,5]', 0, 3, 0, 2)
    ----------------------------------
    [1,2,3,1,2]

    >>> sql(''' select jsplice('[1,2,3,4,5]', 2, -1) ''') # doctest: +NORMALIZE_WHITESPACE
    jsplice('[1,2,3,4,5]', 2, -1)
    -----------------------------
    [3,4]

    """

    largs = len(args)
    if largs == 1:
        return args[0]

    fj = jopts.fromj(args[0])

    if largs == 2:
        try:
            return jopts.toj(fj[args[1]])
        except KeyboardInterrupt:
            raise
        except:
            return None

    outj = []
    for i in xrange(1, largs, 2):
        try:
            outj += fj[args[i]:args[i + 1]]
        except KeyboardInterrupt:
            raise
        except:
            pass

    return jopts.toj(outj)
Esempio n. 43
0
                    x for x in json.loads(
                        i, object_pairs_hook=OrderedDict).iterkeys()
                ]
        except TypeError, e:
            pass
    else:
        keys = OrderedDict()
        for i in args:
            try:
                if i[0] == '{' and i[-1] == '}':
                    keys.update([(x, None) for x in json.loads(
                        i, object_pairs_hook=OrderedDict).iterkeys()])
            except TypeError, e:
                pass
        keys = list(keys)
    return jopts.toj(keys)


jdictkeys.registered = True


def jdictvals(*args):
    """
    .. function:: jdictvals(jdict, [key1, key2,..]) -> jpack

    If only the first argument (jdict) is provided, it returns a jpack of the values of input jdict (sorted by the jdict keys).

    If key values are also provided, it returns only the keys that have been provided.

    Examples:
Esempio n. 44
0
def jsplice(*args):

    """
    .. function:: jsplice(jpack, range1_start, range1_end, ...) -> jpack

    Splices input jpack. If only a single range argument is provided, it returns input jpack's element in provided position. If defined position
    index is positive, then it starts counting from the beginning of input jpack. If defined position is negative it starts counting from the
    end of input jpack.

    If more than one range arguments are provided, then the arguments are assumed to be provided in pairs (start, end) that define ranges inside
    the input jpack that should be joined together in output jpack.

    Examples:

    >>> sql(''' select jsplice('[1,2,3,4,5]',0) ''') # doctest: +NORMALIZE_WHITESPACE
    jsplice('[1,2,3,4,5]',0)
    ------------------------
    1

    >>> sql(''' select jsplice('[1,2,3,4,5]',-1) ''') # doctest: +NORMALIZE_WHITESPACE
    jsplice('[1,2,3,4,5]',-1)
    -------------------------
    5

    >>> sql(''' select jsplice('[1,2,3,4,5]',10) ''') # doctest: +NORMALIZE_WHITESPACE
    jsplice('[1,2,3,4,5]',10)
    -------------------------
    None

    >>> sql(''' select jsplice('[1,2,3,4,5]', 0, 3, 0, 2) ''') # doctest: +NORMALIZE_WHITESPACE
    jsplice('[1,2,3,4,5]', 0, 3, 0, 2)
    ----------------------------------
    [1,2,3,1,2]

    >>> sql(''' select jsplice('[1,2,3,4,5]', 2, -1) ''') # doctest: +NORMALIZE_WHITESPACE
    jsplice('[1,2,3,4,5]', 2, -1)
    -----------------------------
    [3,4]

    """

    largs=len(args)
    if largs==1:
        return args[0]

    fj=jopts.fromj(args[0])

    if largs==2:
        try:
            return jopts.toj(fj[args[1]])
        except KeyboardInterrupt:
            raise
        except:
            return None

    outj=[]
    for i in xrange(1,largs,2):
        try:
            outj+=fj[args[i]:args[i+1]]
        except KeyboardInterrupt:
            raise           
        except:
            pass

    return jopts.toj(outj)