Exemple #1
0
def farith(*args):
    """
    .. function:: farith(calc) -> float or Fraction

    Takes as input a mathematical expression in polish notation and computes the result using fractional computation

    Examples:

    >>> sql("select farith('+',5,7)" )
    farith('+',5,7)
    ---------------
    12

    >>> sql("select farith('-','*','/',15,'-',7,'+',1,1,3,'+',2,'+',1,1)" )
    farith('-','*','/',15,'-',7,'+',1,1,3,'+',2,'+',1,1)
    ----------------------------------------------------
    5
    """

    s = []
    for i in reversed(args):
        if i in ('*', '/', '-', '+'):
            operand1 = s.pop()
            operand2 = s.pop()
            if i == '+':
                operand = operand1 + operand2
            elif i == '-':
                operand = operand1 - operand2
            elif i == '/':
                operand = operand1 / operand2
            elif i == '*':
                operand = operand1 * operand2
            s.append(operand)
        else:
            if type(i) in (int, float, long):
                operand = Fraction(i)
                s.append(operand)
            else:
                try:
                    s.append(Fraction(*json.loads(i)))
                except ValueError as e:
                    raise functions.OperatorError(
                        'farith', "invalid expression found: '" + i + "'")

    return simplify_fraction(s.pop())
def maincode(args, boolargs, nonstringargs, needsescape, notsplit, db, func,
             retalways, connectionhandler):
    autostring = 'automatic_vtable'
    try:
        largs, kargs = argsparse.parse(args, boolargs, nonstringargs,
                                       needsescape, notsplit)
    except Exception as e:
        raise functions.MadisError(e)
    if 'query' not in kargs:
        raise functions.OperatorError(
            func.__globals__['__name__'].rsplit('.')[-1],
            "needs query argument ")
    query = kargs['query']
    del kargs['query']
    if autostring in kargs:
        del kargs[autostring]
    return doall(query, db, func, retalways, connectionhandler, *largs,
                 **kargs)
    def step(self, *args):
        if self.notchecked:
            if len(args) < 2:
                raise functions.OperatorError("groupsum",
                                              "Wrong number of arguments")
            self.grouplen = args[0]
            self.numofargs = len(args)
            self.notchecked = False

        groupkey = args[1:self.grouplen + 1]
        try:
            group = self.groupsdict[groupkey]
            j = 0
            for i in xrange(self.grouplen + 1, self.numofargs):
                group[j].append(args[i])
                j += 1
        except KeyError:
            self.groupsdict[groupkey] = [[x] for x in args[self.grouplen + 1:]]
Exemple #4
0
def apachelogsplit(*args):
    """
    .. function:: apachelogsplit(apache_log_line) -> [ip, ident, authuser, date, request, status, bytes, referrer, useragent]

    Breaks a single apache log row into multiple fields.

    Examples:

    >>> table1('''
    ... '1.1.1.1 - - [01/Feb/2001:01:02:03 +0001] "HEAD /test.com HTTP/1.1" 200 - "-" "reftest"'
    ... ''')
    >>> sql("select apachelogsplit(a) from table1")
    ip      | ident | authuser | date                     | method | uri       | httpver | status | bytes | referrer | useragent
    ----------------------------------------------------------------------------------------------------------------------------
    1.1.1.1 | None  | None     | 2001-02-01T01:02:03+0001 | HEAD   | /test.com | 1.1     | 200    | None  | None     | reftest

    """

    yield ('ip', 'ident', 'authuser', 'date', 'method', 'uri', 'httpver', 'status', 'bytes', 'referrer', 'useragent')

    f = apache_log_split.match(''.join(args).strip())

    if f == None:
        raise functions.OperatorError("APACHELOGSPLIT", "Row function didn't receive any input")
    f = f.groups()

    f = [None if x == '-' else x for x in f]

    # parse date
    if f[3] != None:
        if f[3][4:7] in months:
            f[3] = f[3][1:-1]
            date = f[3]
            f[3] = date[7:11] + '-' + months[date[3:6]] + '-' + date[0:2] + 'T' + date[12:14] + ':' + date[
                                                                                                      15:17] + ':' + date[
                                                                                                                     18:20] + date[
                                                                                                                              21:]

    if f[7] != None:
        f[7] = int(f[7])
    if f[8] != None:
        f[8] = int(f[8])

    yield f
Exemple #5
0
def reencode(*args):
    if len(args) != 1:
        raise functions.OperatorError("reencode", "operator takes only one arguments")

    us = args[0]
    if us == None:
        return None
    us = unicode(us)
    try:
        a = unicode(us.encode('iso-8859-1'), 'utf-8')
        return a
    except KeyboardInterrupt:
        raise
    except Exception:
        try:
            a = unicode(us.encode('windows-1252'), 'utf-8')
            return a
        except Exception:
            return us
def var(*args):
    """
    .. function:: var(varname[, value]) -> value

    Sets (if both varname and value are given) or returns (if only varname is given) the contents of a variable.

    Examples:

    >>> sql("var 'v'")  # doctest: +NORMALIZE_WHITESPACE
    Traceback (most recent call last):
        ...
    OperatorError: Madis SQLError:
    Operator VAR: Variable 'v' does not exist
    >>> sql("var 'v' 5")
    var('v','5')
    ------------
    5
    >>> sql("var 'v'")
    var('v')
    --------
    5
    >>> sql("select var('v')")
    var('v')
    --------
    5
    """

    if len(args) == 0:
        return str(functions.variables.__dict__)

    var = args[0]

    if len(args) == 1:
        if hasattr(functions.variables, var):
            return functions.variables.__dict__[var]
        else:
            raise functions.OperatorError(
                'var', "Variable '" + var + "' does not exist")
    elif len(args) == 2:
        functions.variables.__dict__[var] = args[1]
        return functions.variables.__dict__[var]
    else:
        return None
Exemple #7
0
    def VTiter(self, *parsedArgs, **envars):
        def authorizer(operation, paramone, paramtwo, databasename,
                       triggerorview):
            """Called when each operation is prepared.  We can return SQLITE_OK, SQLITE_DENY or
            SQLITE_IGNORE"""
            # find the operation name
            plan.append([
                apsw.mapping_authorizer_function[operation], paramone,
                paramtwo, databasename, triggerorview
            ])
            return apsw.SQLITE_OK

        def buststatementcache():
            c = connection.cursor()
            for i in xrange(110):
                a = list(c.execute("select " + str(i)))

        _, dictargs = self.full_parse(parsedArgs)

        yield [('operation', 'text'), ('paramone', 'text'),
               ('paramtwo', 'text'), ('databasename', 'text'),
               ('triggerorview', 'text')]
        if 'query' not in dictargs:
            raise functions.OperatorError(
                __name__.rsplit('.')[-1], " needs query argument ")
        query = dictargs['query']

        connection = envars['db']
        plan = []

        buststatementcache()

        cursor = connection.cursor()

        cursor.setexectrace(lambda x, y, z: apsw.SQLITE_DENY)
        connection.setauthorizer(authorizer)

        cursor.execute(query)

        connection.setauthorizer(None)

        for r in plan:
            yield r
Exemple #8
0
    def step(self, *args):
        if not args:
            raise functions.OperatorError("frecencyindex","No arguments")

        now = datetime.datetime.now()
        now = iso8601.parse_date(now.strftime("%Y-%m-%d %H:%M:%S"))
        d = args[0].replace('T',' ')
        dt = iso8601.parse_date(args[0].replace('Z',''))
        diff=now-dt

        if (diff.days)<30:
                    self.monthCounter+=1
        elif (diff.days)<3*30:
                    self.trimesterCounter+=1
        elif (diff.days)<6*30:
                    self.semesterCounter+=1
        elif (diff.days)<12*30:
                    self.yearCounter+=1
        elif (diff.days)<24*30:
                    self.twoyearsCounter+=1
Exemple #9
0
    def step(self, *args):
        if not args:
            raise functions.OperatorError("frecency", "No arguments")
        # last 2 arguments are static , so they are parse only the first time
        if not self.initstatic:
            self.initstatic = True
            self.points = 100.0
            self.now = datetime.datetime.now()
            if len(args) >= 2:
                for arg in args[1:]:
                    isnowarg = re_now.match(arg)
                    if isnowarg:
                        nowdate = isnowarg.groupdict()['now']
                        self.now = iso8601.parse_date(nowdate)
                    else:
                        self.points = int(arg)

        input = args[0]
        dt = iso8601.parse_date(input)
        self.frecency += self.__decrease(self.now - dt) * self.points
Exemple #10
0
def jdict(*args):
    """
    .. function:: jdict(key, value, key1, value1) -> jdict

    Returns a jdict of the keys and value pairs.

    Examples:

    >>> sql(''' select jdict('key1', 'val1', 'key2', 'val2') ''') # doctest: +NORMALIZE_WHITESPACE
    jdict('key1', 'val1', 'key2', 'val2')
    -------------------------------------
    {"key1":"val1","key2":"val2"}

    >>> sql(''' select jdict('key', '{"k1":1,"k2":2}') ''') # doctest: +NORMALIZE_WHITESPACE
    jdict('key', '{"k1":1,"k2":2}')
    -------------------------------
    {"key":{"k1":1,"k2":2}}

    >>> sql(''' select jdict('key', '["val1", "val2"]') ''') # doctest: +NORMALIZE_WHITESPACE
    jdict('key', '["val1", "val2"]')
    --------------------------------
    {"key":["val1","val2"]}

    >>> sql(''' select jdict('1') ''') # doctest: +NORMALIZE_WHITESPACE
    Traceback (most recent call last):
    ...
    OperatorError: Madis SQLError:
    Operator JDICT: At least two arguments required

    """

    if len(args) == 1:
        raise functions.OperatorError('jdict',
                                      "At least two arguments required")

    result = OrderedDict()

    for i in xrange(0, len(args), 2):
        result[args[i]] = jopts.fromjsingle(args[i + 1])

    return jopts.toj(result)
Exemple #11
0
    def VTiter(self, *parsedArgs, **envars):
        largs, dictargs = self.full_parse(parsedArgs)

        if 'query' not in dictargs:
            raise functions.OperatorError(__name__.rsplit('.')[-1], "No query argument")
        query = dictargs['query']

        cur = envars['db'].cursor()
        c = cur.execute(query, parse=False)
        schema = [('_rowid_',)]
        schemaorder = {}
        record = []
        r1=[]
        firstrow = c.next()
        schema.append((firstrow[1],))
        record.append(firstrow[0])
        record.append(firstrow[2])
        for row in c:
            if row[0] == firstrow[0]:
                schema.append((row[1],))
                record.append(row[2])
            else:
                firstrow=row
                r1.append(row[0])
                r1.append(row[2])
                break
        yield schema
        yield record
        
        record=r1
        for row in c:                    
            if row[0] == firstrow[0]:
                record.append(row[2])
            else:
                firstrow=row
                r1=[]
                r1.append(row[0])
                r1.append(row[2])
                yield record
                record=r1
        yield record
Exemple #12
0
    def step(self, *args):
        if self.init:
            self.init = False
            if not args:
                raise functions.OperatorError("fsum", "No arguments")

        try:
            if type(args[0]) in (int, float, long):
                x = Fraction(args[0])
            else:
                try:
                    json_object = json.loads(args[0])
                    x = Fraction(json_object[0], json_object[1])
                except ValueError, e:
                    return
        except KeyboardInterrupt:
            raise
        except:
            return

        self.x += x
Exemple #13
0
    def VTiter(self, *parsedArgs, **envars):
        largs, dictargs = self.full_parse(parsedArgs)

        if 'query' not in dictargs:
            raise functions.OperatorError(
                __name__.rsplit('.')[-1], "No query argument ")
        query = dictargs['query']

        cur = envars['db'].cursor()
        c = cur.execute(query)
        schema = cur.getdescriptionsafe()

        init = True

        for myrow in c:  # assume that it is ordered by nodeno
            # print myrow
            level = int(myrow[0])  #currentlevel
            nodestoinsert = ast.literal_eval(
                myrow[1])  #nodes of the level at hand. It is a dice
            for i in nodestoinsert:
                if str(i['leafval']) == "": i.pop('leafval')
                if str(i['childnodes']) == "": i.pop('childnodes')

            if init is True:
                for k in nodestoinsert:
                    k.pop('id')
                resulttable = nodestoinsert
                init = False
            else:
                # print "AA", resulttable
                recursive_checkchilds(resulttable, nodestoinsert, level)

        # print "RESULT",resulttable
        yield [
            ('result', ),
        ]
        # print str(resulttable)
        yield [
            str(resulttable).replace("'", "\""),
        ]
Exemple #14
0
def contains(*args):
    """
    .. function:: contains(str1,str2) -> bool

    Returns true if string *str1* contains *str2*.

    Examples:

    >>> sql("select contains('test string', 'str') as test  ")
    test
    ----
    1
    >>> sql("select contains('test string', 'nostr') as test  ")
    test
    ----
    0
    """
    if len(args) != 2:
        raise functions.OperatorError("included", "operator takes exactly two arguments")
    if (args[1] in args[0]):
        return True
    return False
Exemple #15
0
def pyfunerrtonul(*args):
    """
    .. function:: pyfunerrtonul(pyfunction, parameters)

    Calls a python function and returns the result. If an error occurs it returns
    *null*.

    >>> sql("select pyfunerrtonul('math.sqrt', -1)")
    pyfunerrtonul('math.sqrt', -1)
    ------------------------------
    None
    >>> sql("select pyfunerrtonul('math.log10', -1)")
    pyfunerrtonul('math.log10', -1)
    -------------------------------
    None
    """

    if len(args) == 0:
        return

    fsplit = args[0].split('.')
    try:
        f = __import__(fsplit[0])
        for i in fsplit[1:]:
            f = f.__dict__[i]
    except:
        try:
            f = __import__('libexternal' + '.' + fsplit[0])
            for i in fsplit:
                f = f.__dict__[i]
        except:
            raise functions.OperatorError("pyfunerrtonul",
                                          "didn't find function: " + args[0])

    try:
        res = f(*args[1:])
    except Exception, e:
        return None
Exemple #16
0
    def VTiter(self, *parsedArgs, **envars):
        largs, dictargs = self.full_parse(parsedArgs)

        if 'query' not in dictargs:
            raise functions.OperatorError(__name__.rsplit('.')[-1], "No query argument ")
        query = dictargs['query']

        c = envars['db'].cursor()
        q = c.execute(query, parse=False)

        try:
            yield list(c.getdescriptionsafe())
        except StopIteration:
            try:
                raise
            finally:
                try:
                    c.close()
                except:
                    pass

        for _ in q:
            pass
Exemple #17
0
def regexprfindall(*args):
    """
    .. function:: regexprfindall(pattern,text)

    This function returns *all* matches of *pattern* in text.

    Examples:

    >>> sql("select regexprfindall('\w+', 'one')")
    regexprfindall('\w+', 'one')
    ----------------------------
    ["one"]

    >>> sql("select regexprfindall('\w+', 'one two three')")
    regexprfindall('\w+', 'one two three')
    --------------------------------------
    ["one","two","three"]
    """

    if len(args) != 2:
        raise functions.OperatorError('regexprfindall', 'Two parameters should be provided')

    return jopts.tojstrict(re.findall(args[0], unicode(args[1]), re.UNICODE))
Exemple #18
0
def sunitouni(*args):
    """
    .. function:: sunitouni(str)

    Returns *str* replacing literal str code points to their string representation.

    Examples:

    >>> sql("select sunitouni('br\\u00fbl\\u00e9') as test  ")
    test
    -------
    brûlé
    >>> sql("select sunitouni('\\u that is not a str code point') as test  ")
    test
    -----------------------------------
    \u that is not a str code point
    >>> sql("select sunitouni(null)")
    sunitouni(null)
    ---------------
    None
    >>> sql("select sunitouni(9)")
    sunitouni(9)
    ------------
    9
    """
    if len(args) != 1:
        raise functions.OperatorError("sunitouni",
                                      "operator takes only one arguments")
    if args[0] == None:
        return None
    kk = "u'%s'" % (str(args[0]).replace("'", "\\'"))
    try:
        return eval(kk)
    except KeyboardInterrupt:
        raise
    except Exception:
        return args[0]
Exemple #19
0
def normuni(*args):
    """
    .. function:: normuni(str)

    Returns *str* normalised in the composed str normal form without replacing
    same look characters. For example this 'À' character can be encoded with one or two
    different characters, :func:`normuni` returns an one-character encoded version. This
    function is important to check true strings equality.

    Functions :func:`sunitouni` and :func:`unitosuni` are used in the examples to make it more comprehensive.

    Examples:

    .. note::
        Returned results in the next two examples should look the same,
        if not that is a bug at the combined characters rendering of the shell
        that the documentation was created.

    >>> sql("select sunitouni('C\u0327') as test  ")
    test
    ----
    Ç
    >>> sql("select normuni(sunitouni('C\u0327')) as test  ")
    test
    ----
    Ç
    >>> sql("select unitosuni(normuni(sunitouni('C\u0327'))) as test  ")
    test
    ------
    \u00c7
    """
    if len(args) != 1:
        raise functions.OperatorError("normuni",
                                      "operator takes only one arguments")
    if args[0] == None:
        return None
    return strdata.normalize('NFC', args[0])
Exemple #20
0
    def VTiter(self, *parsedArgs, **envars):
        largs, dictargs = self.full_parse(parsedArgs)

        self.nonames = True
        self.names = []
        self.types = []

        if 'query' not in dictargs:
            raise functions.OperatorError(
                __name__.rsplit('.')[-1], "No query argument ")
        query = dictargs['query']

        cur = envars['db'].cursor()
        c = cur.execute(query)

        yield [('c1', )]

        output = StringIO.StringIO()
        writer = csv.writer(output)
        for r in c:
            writer.writerow(r)
        ll = output.getvalue().splitlines()
        for row in ll:
            yield (row, )
def requirevars(*args):
    """
    .. function:: requirevars(varname1, [varname2,...])

    Checks if all variables (varname1,...) exist. If not it throws an exception.

    Examples:

    >>> sql("var 'cv1' 5")
    var('cv1','5')
    --------------
    5
    >>> sql("var 'cv2' 10")
    var('cv2','10')
    ---------------
    10
    >>> sql("requirevars 'cv1' 'cv2'")
    requirevars('cv1','cv2')
    ------------------------
    1
    >>> sql("requirevars cv1 cv2")
    requirevars('cv1 cv2')
    ----------------------
    1
    >>> sql("requirevars 'cv1' 'testvar'") # doctest: +NORMALIZE_WHITESPACE
    Traceback (most recent call last):
    ...
    OperatorError: Madis SQLError:
    Operator REQUIREVARS: Variable testvar isn't initialized
    """

    for v in (' '.join(args).strip()).split():
        if not hasattr(functions.variables, v):
            raise functions.OperatorError("requirevars",
                                          "Variable %s isn't initialized" % v)
    return 1
Exemple #22
0
def regexprmatches(*args):
    """
    .. function:: regexprmatches(pattern, arg)

    This function returns true if the pattern matches arg or false otherwise.

    Examples use `inversion`.

    Examples:

    >>> sql("regexprmatches '(a)' 'qwer a qwer'  ")
    regexprmatches('(a)','qwer a qwer')
    -----------------------------------
    1

    """
    if len(args) != 2:
        raise functions.OperatorError('regexprmatches', 'Two parameters should be provided')

    a = re.search(args[0], unicode(args[1]), re.UNICODE)
    if a != None:
        return True
    else:
        return False
def flowname(*args):
    """
    .. function:: flowname([str])

    Sets and retrieves, 'flowname' variable

    Examples:

    >>> sql("flowname test flow ")
    flowname('test flow')
    ---------------------
    test flow
    >>> sql("flowname")
    flowname()
    ----------
    test flow
    >>> sql("flowname 'arg1' arg2") # doctest: +NORMALIZE_WHITESPACE
    Traceback (most recent call last):
        ...
    OperatorError: Madis SQLError:
    Operator FLOWNAME: Flowname accepts only 1 argument
    """

    var = 'flowname'
    if len(args) > 1:
        raise functions.OperatorError('flowname',
                                      'Flowname accepts only 1 argument')

    if len(args) == 0 and hasattr(functions.variables, var):
        return str(functions.variables.__dict__[var])
    elif len(args) == 1:
        functions.variables.__dict__[var] = ' '.join(
            [str(x) for x in args[0:]])
        return str(functions.variables.__dict__[var])
    else:
        return None
Exemple #24
0
def jintersection(*args):
    """
    .. function:: jintersection(jpackA, jpackB) -> jpack

    Returns the items of jpackA except the items that appear on jpackB.

    Examples:

    >>> sql("select jintersection('[1,2,3]', '[1,2,3]')") # doctest: +NORMALIZE_WHITESPACE
    jintersection('[1,2,3]', '[1,2,3]')
    -----------------------------------
    [1,2,3]

    >>> sql("select jintersection('[1,2,3]', '[1,3]', 1)") # doctest: +NORMALIZE_WHITESPACE
    jintersection('[1,2,3]', '[1,3]', 1)
    ------------------------------------
    1

    """

    if len(args) < 2:
        raise functions.OperatorError("jintersection","operator needs at least two inputs")

    return jopts.toj(sorted(set.intersection(*[set(jopts.fromj(x)) for x in args])))
    def VTiter(self, *parsedArgs, **envars):
        largs, dictargs = self.full_parse(parsedArgs)

        if 'query' not in dictargs:
            raise functions.OperatorError(
                __name__.rsplit('.')[-1], "No query argument ")

        query = dictargs['query']
        connection = envars['db']

        yield (('column', 'text'), ('type', 'text'))

        cur = connection.cursor()
        execit = cur.execute(query, parse=False)
        try:
            samplerow = execit.next()
        except StopIteration:
            pass

        vals = cur.getdescriptionsafe()
        cur.close()

        for i in vals:
            yield i
Exemple #26
0
    def VTiter(self, *parsedArgs, **envars):
        largs, dictargs = self.full_parse(parsedArgs)

        self.nonames = True
        self.names = []
        self.types = []

        if 'query' not in dictargs:
            raise functions.OperatorError(__name__.rsplit('.')[-1], "No query argument ")
        query = dictargs['query']

        cur = envars['db'].cursor()
        c = cur.execute(query)
        schema = cur.getdescriptionsafe()
        schema1 = []

        first_row = c.next()
        first_tuple = []
        j = 0
        for i in first_row:
            if is_number(i):
                schema1.append(schema[j])
                first_tuple.append(i)
            j += 1

        yield tuple(schema1)
        yield tuple(first_tuple)

        for row in c:
            tmp_row = []
            j = 0
            for col in row:
                if schema[j] in schema1:
                    tmp_row.append(col)
                j += 1
            yield tmp_row
Exemple #27
0
    def VTiter(self, *parsedArgs, **envars):
        largs, dictargs = self.full_parse(parsedArgs)

        if 'key' not in dictargs:
            raise functions.OperatorError(
                __name__.rsplit('.')[-1], "No URL argument ")
        else:
            key = dictargs['key']

        if 'rate' in dictargs:
            rate = dictargs['rate']
        else:
            rate = 0

        schema = [('id', 'text'), ('tweet', 'text'), ('name', 'text'),
                  ('location', 'text'), ('favourites', 'text'),
                  ('screen_name', 'text'), ('friends', 'text'),
                  ('followers', 'text'), ('sentiment', 'text')]
        yield schema

        db = 'db5.db'
        tname = key
        rows = checkTableMetadata(key, rate, tname, db)
        if rows is not None:
            #print "GETTING TABLE FROM CACHE"
            for r in rows:
                yield r
            return

        api = twitter.Api(
            consumer_key='5vQVQ4B8bUcNGG3WOKr80gPdQ',
            consumer_secret=
            'jkQw1PPQrKcKddBjg6AqYNH3n7cAogXhNTwf4m13urR37zKUdG',
            access_token_key=
            '747542150561341440-RyK8r6AA0iCr3w5cbuNKmcxCDRfdJ42',
            access_token_secret='v5PfDnaLCIRu8KyLmfzXDOrykUtK96mmIwkTQNoUHG7mW'
        )
        results = api.GetSearch(raw_query="l=&q=" + key +
                                "%20-filter%3Aretweets&count=100")

        tuples = []
        sentiment = ''
        host = socket.gethostname()
        port = 12345
        for r in results:
            tweet = unicode(r.text)  #s.connect((host, port))
            #s = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
            #s.connect((host, port))
            #s.sendall(r.text.encode('UTF-8'))
            #sentiment = s.recv(1024)
            t = (r.id, tweet, r.user.name, r.user.location,
                 r.user.favourites_count, r.user.screen_name,
                 r.user.friends_count, r.user.followers_count, sentiment)
            yield t
            tuples.append(t)
        s.close()

        createTable(db, tname, schema, tuples)  #materialize the table
        #getMaterializedContent(db,tname)

        gc.enable()
Exemple #28
0
    def VTiter(self, *parsedArgs, **envars):
        largs, dictargs = self.full_parse(parsedArgs)

        if len(largs) < 1:
            raise functions.OperatorError(
                __name__.rsplit('.')[-1], "Not defined union tables ")
        streams = str(largs[0]).split(",")
        if len(streams) < 2:
            raise functions.OperatorError(
                __name__.rsplit('.')[-1],
                "Union tables must be more than one ")

        cursors = []
        execs = []
        for stream in streams:
            cursors.append(envars['db'].cursor())
            execs.append(cursors[-1].execute("select * from " + str(stream) +
                                             ";"))

        comparedcursor = str(cursors[0].getdescriptionsafe())
        # for cursor in cursors:
        #     if str(cursor.getdescriptionsafe()) != comparedcursor:
        #         raise functions.OperatorError(__name__.rsplit('.')[-1],"Union tables with different schemas ")

        if 'cols' in dictargs:
            try:
                cols = int(dictargs['cols'])
            except ValueError:
                try:
                    cols = [y[0] for y in cursors[0].getdescriptionsafe()
                            ].index(dictargs['cols'])
                except ValueError:
                    raise functions.OperatorError(
                        __name__.rsplit('.')[-1],
                        "Column name does not exists ")
        else:
            cols = 0

        if cols >= len(cursors[0].getdescriptionsafe()):
            raise functions.OperatorError(
                __name__.rsplit('.')[-1], "Column position does not exists ")

        for x in range(0, len(streams)):
            if x is 0:
                execs[0] = ((v[cols], (0, ) + v) for v in execs[0])
            elif x is 1:
                execs[1] = ((v[cols], (1, ) + v) for v in execs[1])
            elif x is 2:
                execs[2] = ((v[cols], (2, ) + v) for v in execs[2])
            elif x is 3:
                execs[3] = ((v[cols], (3, ) + v) for v in execs[3])
            elif x is 4:
                execs[4] = ((v[cols], (4, ) + v) for v in execs[4])

        try:
            yield list(cursors[0].getdescriptionsafe())
        except StopIteration:
            try:
                raise
            finally:
                try:
                    for cur in cursors:
                        cur.close()
                except:
                    pass

        currentgroup = None
        lists = [[]] * len(streams)
        for k, v in heapq.merge(*execs):
            if currentgroup is None or currentgroup != k:
                unionset = set().union(*lists[1:])
                for t in (set(lists[0]) - unionset):
                    yield t

                lists = [[]] * len(streams)

            lists[v[0]] = lists[v[0]] + [tuple(v[1:])]
            currentgroup = k

        unionset = set().union(*lists[1:])
        for t in list(set(lists[0]) - unionset):
            yield t
Exemple #29
0
    def VTiter(self, *parsedArgs, **envars):
        import sklearn

        largs, dictargs = self.full_parse(parsedArgs)

        if 'query' not in dictargs:
            raise functions.OperatorError(
                __name__.rsplit('.')[-1], "No query argument ")
        query = dictargs['query']
        print 'MADIS/QUERY', query
        cur = envars['db'].cursor()
        c = cur.execute(query, parse=False)
        schema = []
        try:
            schema = [x[0] for x in cur.getdescriptionsafe()]
        except StopIteration:
            try:
                raise
            finally:
                try:
                    c.close()
                except:
                    pass

        if 'filename' not in dictargs:
            raise functions.OperatorError(
                __name__.rsplit('.')[-1], "No filename provided")
        f = open(dictargs['filename'], 'w')

        if 'initstr' not in dictargs:
            raise functions.OperatorError(
                __name__.rsplit('.')[-1], "No initialization string")
        initstr = dictargs['initstr']
        #-- IMPORT MODULES ---
        import itertools

        ######################################################3
        #from sklearn.cluster import *
        #from sklearn.linear_model import *
        #from sklearn.neighbors import *
        #from sklearn.svm import *
        #from sklearn.naive_bayes import *
        #from sklearn.tree import *
        #from sklearn.ensemble import *
        #from sklearn.model_selection import *
        ########################################################
        ### to specify imports

        # from sklearn.cross_validation import *
        print 'MADIS/sklearn version', sklearn.__version__

        # from sklearn.cluster import AgglomerativeClustering
        import cPickle as cp
        import numpy as np
        # import unicodedata
        import zlib
        # --------------------
        model = eval(initstr)
        print 'MADIS/MODEL:', model
        if 'classname' not in dictargs:
            # raise functions.OperatorError(__name__.rsplit('.')[-1],"No classname argument ")
            trainList = []
            for row in c:
                trainList = [row for row in c]
            train = np.array(trainList).astype(np.float)

            model.fit(train)
            pstr = cp.dumps(model, 2)
            f.write(zlib.compress(pstr, 3))
            yield [('id', ), ('cluster_label', )]
            for i in xrange(0, len(train)):
                yield (i, int(model.labels_[i]))

        else:
            classname = dictargs['classname']
            idclassname = schema.index(classname)

            trainList = []
            targetList = []
            cv_func = ''
            cv = 0
            if 'cv' not in dictargs:
                cv = 5
            else:
                cv = int(dictargs['cv'])

            #Constructing group of samples:

            if 'groupname' in dictargs:
                groupname = ''
                groups = []
                groupname = dictargs['groupname']
                idgroupname = schema.index(groupname)
                groupList = []
                # print 'trainlist:', trainList

            for i, row in enumerate(c):
                trainList.append(
                    list(row[0:idclassname] + row[idclassname + 1:len(row)]))
                targetList.append(int(row[idclassname]))
                if 'groupname' in dictargs:
                    groupList.append(row[idgroupname])
                    groups = np.array(groupList)
                    del trainList[i][idgroupname]
                else:
                    groups = None

            X = np.array(trainList).astype(np.float)
            y = np.array(targetList).astype(np.int)

            preds = []
            pred_probs = []
            # print 'MADIS/GROUPS?: ',groups
            preds = cross_val_predict(model, X, y, cv=cv, groups=groups)
            # pred_probs = cross_val_predict(model, X, y, cv=cv_func,method='predict_proba')
            # if model.probability:
            if hasattr(model, 'probability') and model.probability:
                pred_probs = cross_val_predict(model,
                                               X,
                                               y,
                                               cv=cv,
                                               groups=groups,
                                               method='predict_proba')

            # print 'MADIS/preds',preds
            # print 'MADIS/probs',pred_probs

            #Fit again and Store model in disk:
            model.fit(X, y)
            # pred_probs = model.predict_proba(X)
            pstr = cp.dumps(model, 2)
            f.write(zlib.compress(pstr, 3))
            # print 'MADIS/CLASSNAMES',model.classes_
            # yield tuple(['id','predicted_label'] + ['center'+str(i) for i in xrange(1,len(self.sample[0])+1)])
            # yield [('id',), ('predicted_label',), ('prediction_probability',),([tuple('probability_'+str(i)+',') for i in range(len(model.classes_))])]
            if hasattr(model, 'probability') and model.probability:
                # if model.probability:
                yield [('id', ), ('predicted_label', ),
                       ('prediction_probability', ), ('probs_per_class', )]
                for i in range(len(X)):
                    pred = preds[i]
                    yield (i, int(pred), pred_probs[i][pred],
                           str([
                               pred_probs[i][j]
                               for j in range(len(model.classes_))
                           ]))
                    # yield (i, int(pred), pred_probs[i][pred], [pred_probs[i][j] for j in range(len(model.classes_))])
            else:
                yield [
                    ('id', ),
                    ('predicted_label', ),
                ]
                for i in range(len(X)):
                    pred = preds[i]
                    yield (i, int(pred))
    def VTiter(self, *args,**formatArgs):
        largs, dictargs = self.full_parse(args)
        where = None
        mode = 'row'
        

        if 'file' in dictargs:
            where=dictargs['file']
        else:
            raise functions.OperatorError(__name__.rsplit('.')[-1],"No destination provided")
        if 'mode' in dictargs:
            mode = dictargs['mode']
        col = 0

        if 'cols' in dictargs:
            a = re.split(' |,| , |, | ,' , dictargs['cols'])
            column = [x for x in a if x != '']
        else:
            col = 1


        filename, ext=os.path.splitext(os.path.basename(where))
        fullpath=os.path.split(where)[0]
        fileIter=open(where, "rb")
        selectcols = (10,11)
        filtercols = [10]
        value = "1993-01-26"
        indices = []
        if mode == 'spac':
            import msgpack
            blocksize = struct.unpack('!i',fileIter.read(4))
            b = struct.unpack('!B',fileIter.read(1))
            schema = cPickle.load(fileIter)
            colnum = len(schema)
            found = 0
            index_found = 0
            blocknum = 0
            myvals = [[None] for _ in xrange(colnum)]
            myfiltervals = [[None] for _ in xrange(colnum)]
            selectschema = [str(x) for x in selectcols]
            yield selectschema
            input = cStringIO.StringIO()
            while True:
                input.truncate(0)
                blocknum += 1
                d = 0
                ind = [0 for _ in xrange(colnum*4+1)]
                try:
                    blocksize = struct.unpack('!i',fileIter.read(4))
                except:
                    break
                input.write(fileIter.read(blocksize[0]))
                input.seek(0)
                block_kind = struct.unpack('!B',input.read(1))
                
                compression_bit = struct.unpack('!B',input.read(1))
                type = '!'+'i'*(colnum*4+1)
                ind = list(struct.unpack(type, input.read(4*(colnum*4+1))))
                d2 = [[] for _ in xrange(len(selectcols))]
                d3 = [[] for _ in xrange(len(filtercols))]
                def binarySearch(alist, item):
						first = 0
						last = len(alist)-1
						found = False
						midpoint = (first + last)//2
						while first<=last and not found:
							midpoint = (first + last)//2
							if alist[midpoint] == item:
								  found = True
							else:
								if item < alist[midpoint]:
									last = midpoint-1
								else:
									first = midpoint+1
						return midpoint, found

### filter evaluation
                for index,col in enumerate(filtercols):
					indices = []
					input.seek(ind[col*4+3])
					
					if ind[col*4+2] == 0: #full data block
						column = msgpack.loads(zlib.decompress(input.read(ind[col*4])))
						myfiltervals[col] = column
						if len(myfiltervals[col])<256:
								listptr = array('B')
						elif len(myfiltervals[col])<65536:
							listptr = array('H')
						else:
							listptr = array('i')
						listptr.fromstring(zlib.decompress(input.read(ind[col*4+1])))                       
						if (value >= myfiltervals[col][0] and value<=myfiltervals[col][len(myfiltervals[col])-1]):
							t = binarySearch(myfiltervals[col],value)
							if t[1]:
								found = 1
								index_found = t[0]
								for j,i in enumerate(listptr):
									if i == index_found:
										indices.append(j)
							else:
								found = 0
						else:
							found = 0
                         
					else: # differential block
						column = msgpack.loads(zlib.decompress(input.read(ind[col*4])))
						  
						if found:
							myfiltervals[col] = myfiltervals[col] + [None] * len(column)
							if len(myfiltervals[col])<256:
								listptr = array('B')
							elif len(myfiltervals[col])<65536:
								listptr = array('H')
							else:
								listptr = array('i')
							listptr.fromstring(zlib.decompress(input.read(ind[col*4+1])))  
							for j,i in enumerate(listptr):
								if i == index_found:
									indices.append(j)      
						else:
							if (value >= column[0] and value<=column[len(column)-1]):
								t = binarySearch(column,value)
								if t[1]:
									found = 1
									index_found = t[0] + len(myfiltervals[col])
									if len(myfiltervals[col])+len(column)<256:
										listptr = array('B')
									elif len(myfiltervals[col])+len(column)<65536:
										listptr = array('H')
									else:
										listptr = array('i')
									listptr.fromstring(zlib.decompress(input.read(ind[col*4+1])))  
									for j,i in enumerate(listptr):
										if i == index_found:
											indices.append(j)

							myfiltervals[col] = myfiltervals[col] + [None] * len(column)
	
#### end of filter evaluation
                
                for index,col in enumerate(selectcols):
					input.seek(ind[col*4+3])
					column = msgpack.loads(zlib.decompress(input.read(ind[col*4])))
					if ind[col*4+2] == 0: 
						myvals[col] = column
					else:
						myvals[col] = myvals[col] + column

					if len(myvals[col])<256:
						listptr = array('B')
					elif len(myvals[col])<65536:
						listptr = array('H')
					else:
						listptr = array('i')

					if (ind[col*4+1]==0 and ind[col*4+2] == 0):
						for i in xrange(ind[len(ind)-1]):
							d2[index].append(myvals[col][0])
					else:			

						listptr.fromstring(zlib.decompress(input.read(ind[col*4+1])))                       
						if len(filtercols)>0:
								for i in indices:
									d2[index].append(myvals[col][listptr[i]])
						else:
								for i in listptr:
									d2[index].append(myvals[col][i])
					
						
                for row in izip(*d2):
					yield row


        if mode == 'sorteddictpercol':

#            if col:
#                print "lala"
#                gc.disable()
#                schema = marshal.load(fileIter)
#                colnum = len(schema)
#                cols = [[] for _ in xrange(colnum)]
#                yield schema
#                listptr = [array('H') for _ in xrange(colnum) ]
#                while True:
#                    try:
#                        row=0
#                        d = 0
#                        ind = struct.unpack('L'*(colnum+2), fileIter.read(8*(colnum+2)))
#                        for i in xrange(colnum):
#                            cols[i] = marshal.load(fileIter)
#                            listptr[i].fromfile(fileIter,ind[colnum+1])
#                        for row in xrange (ind[colnum+1]):
#                            tup = [0 for _ in xrange(colnum)]
#                            for col in xrange(colnum):
#                                tup[col] = cols[col][listptr[col][row]]
#                            yield tup
#                            tup = []
#
#                        listptr = [array('H') for _ in xrange(colnum) ]
#                    except:
#                        break
#                gc.enable()
#            elif len(column) == 1:
#                schema = marshal.load(fileIter)
#                colid = [x[0] for x in schema].index(column[0])
#                colnum = len(schema)
#                yield [schema[colid]]
#
#                while True:
#                    try:
#                        ind = struct.unpack('L'*(colnum+2), fileIter.read(8*(colnum+2)))
#                        listptr = array('H')
#                        next=ind[colnum]
#                        fileIter.seek(ind[colid])
#                        col = marshal.load(fileIter)
#                        listptr.fromfile(fileIter,ind[colnum+1])
#                        for c in listptr:
#                            yield(col[c],)
#                        fileIter.seek(next)
#                    except:
#                        break
#
#
#            else:
                import msgpack
                schema = msgpack.load(fileIter)
                colnum = len(schema)
                yield schema
                output = cStringIO.StringIO()
                blocknum = 0
                paxcols = {}
                while True:
                    try:
                        output.truncate(0)
                        blocksize = struct.unpack('i', fileIter.read(4))
#                        output.write(fileIter.read(blocksize[0]))
#                        output.seek(0)
                        ind = list(struct.unpack('L'*(colnum*2+1), fileIter.read(8*(colnum*2+1))))
                        d2 = [[] for _ in xrange(colnum)]
                        for c in xrange(colnum):
                            s = cPickle.loads(zlib.decompress(fileIter.read(ind[c*2])))
                            if (blocknum == 1 and c in paxcols) or (blocknum == 0 and len(s)>50*1.0*ind[colnum*2]/100):
                                d2[c] = s
                                if blocknum == 0:
                                    paxcols[c]=1
                            else:
                                if len(s)==1:
                                    d2[c] = [s[0] for _ in xrange(ind[colnum*2])]
                                elif len(s)<256:
                                    listptr = array('B')
                                    listptr.fromstring(zlib.decompress(fileIter.read(ind[c*2+1])))
                                    for lala in listptr:
                                        d2[c].append(s[lala])
                                else:
                                    listptr = array('H')
                                    listptr.fromstring(zlib.decompress(fileIter.read(ind[c*2+1])))
                                    for lala in listptr:
                                        d2[c].append(s[lala])
                        for row in izip(*d2):
                            yield row
                        blocknum = 1
                    except:
                        break


        if mode == 'dictperval':
            if col:
                gc.disable()
                schema = cPickle.load(fileIter)
                colnum = len(schema)
                cols = [[] for _ in xrange(colnum)]
                yield schema
                listptr = [array('H') for _ in xrange(colnum) ]
                while True:
                    try:
                        row=0
                        d = 0
                        ind = struct.unpack('L'*(colnum+3), fileIter.read(8*(colnum+3)))
                        for i in xrange(colnum):
                            cols[i] = cPickle.load(fileIter)
                        for i in xrange(colnum):
                            listptr[i].fromfile(fileIter,ind[colnum+2])

                        for row in xrange (ind[colnum+2]):
                            tup = [0 for _ in xrange(colnum)]
                            for col in xrange(colnum):
                                tup[col] = cols[col][listptr[col][row]]
                            yield tup
                            tup = []

                        listptr = [array('H') for _ in xrange(colnum) ]

                    except:
                        break
                gc.enable()
            elif len(column) == 1:
                schema = cPickle.load(fileIter)
                colid = [x[0] for x in schema].index(column[0])
                colnum = len(schema)
                yield [schema[colid]]
                while True:
                    try:
                        ind = struct.unpack('L'*(colnum+3), fileIter.read(8*(colnum+3)))
                        next=ind[colnum+1]
                        fileIter.seek(ind[colid])
                        col = cPickle.load(fileIter)
                        fileIter.seek(ind[colnum])
                        listptr = [array('H') for _ in xrange(colnum) ]
                        for i in xrange(colnum):
                            listptr[i].fromfile(fileIter,ind[colnum+2])
                        for c in listptr[colid]:
                            yield(col[c],)
                        fileIter.seek(next)
                    except:
                        break


            else:
                schema = marshal.load(fileIter)
                lcols = []
                for c in column:
                    lcols.append([x[0] for x in schema].index(c))

                colnum = len(schema)
                yield [schema[lcols[i]] for i in xrange(len(lcols))]
                while True:
                    row = 0
                    try:
                        d=0
                        ind = list(struct.unpack("<%dL" % ((colnum+1) * 2), fileIter.read(8*(colnum+1))))
                        next=ind[len(ind)-2]
                        d2 = [[] for _ in xrange(len(lcols))]
                        j = 0
                        for c in lcols:
                            fileIter.seek(ind[c*2])
                            d2[j] = marshal.load(fileIter)
                            j+=1
                        while True:
                            tup = []
                            for col in xrange(len(lcols)):
                                try:
                                    tup.append(d2[col][row])
                                except :
                                    d = 1
                                    break
                            if d == 1:
                                break
                            yield tup
                            tup = []
                            row+=1
                        fileIter.seek(next)
                    except:
                        break



        if mode == 'rcstreampax':
            if col:
                schema = marshal.load(fileIter)
                colnum = len(schema)
                ENDFILE = 0
                yield schema

                while True:
                    row=0
                    d = 0
                    ind = [0 for _ in xrange(colnum+2)]

                    if ENDFILE==1:
                        try:
                            marshal.load(fileIter)
                            ENDFILE=0
                        except EOFError:
                            break


                    for i in xrange(colnum+2):
                        ind[i] = struct.unpack('L',fileIter.read(8))
                    if ind[colnum+1][0] == 1:
                        ENDFILE = 1

                    d2 = [[] for _ in xrange(colnum)]

                    for col in xrange(colnum):
                        obj = fileIter.read(ind[col+1][0]-ind[col][0])
                        d2[col] = marshal.loads(zlib.decompress(obj))

                    while True:
                        tup = []
                        for col in xrange(colnum):
                            try:
                                tup.append(d2[col][row])
                            except :
                                d = 1
                                break
                        if d == 1:
                            break
                        yield tup
                        tup = []
                        row+=1


            elif len(column) == 1:
                schema = cPickle.load(fileIter)
                colid = [x[0] for x in schema].index(column[0])
                colnum = len(schema)
                yield [schema[colid]]
                while True:
                    try:
                        ind = list(struct.unpack("<%dL" % ((colnum+1) * 2), fileIter.read(8*(colnum+1))))
                        next=ind[len(ind)-2]
                        fileIter.seek(ind[colid*2])
                        d2 = cPickle.loads(zlib.decompress(fileIter.read(ind[colid*2+1]-ind[colid*2])))
                        #d2 = cPickle.load(fileIter)
                        for c in d2:
                            yield(c,)
                        fileIter.seek(next)
                    except:
                        break
            else:
                schema = marshal.load(fileIter)
                lcols = []
                for c in column:
                    lcols.append([x[0] for x in schema].index(c))

                colnum = len(schema)
                yield [schema[lcols[i]] for i in xrange(len(lcols))]
                while True:
                    row = 0
                    try:
                        d=0
                        ind = list(struct.unpack("<%dL" % ((colnum+1) * 2), fileIter.read(8*(colnum+1))))
                        next=ind[len(ind)-2]
                        d2 = [[] for _ in xrange(len(lcols))]
                        j = 0
                        fileIter.seek(ind[c*2])
                        d2[j] = marshal.load(fileIter)
                        j+=1
                        while True:
                            tup = []
                            for col in xrange(len(lcols)):
                                try:
                                    tup.append(d2[col][row])
                                except :
                                    d = 1
                                    break
                            if d == 1:
                                break
                            yield tup
                            tup = []
                            row+=1
                        fileIter.seek(next)
                    except:
                        break


        if mode == 'row':
            try:
                d2 =  cPickle.Unpickler(fileIter).load()
                yield d2

                while True:
                    try:
                        s = struct.unpack("i",fileIter.read(4))
                        for row in cPickle.loads(zlib.decompress(fileIter.read(s[0]))):
                            yield row
                    except:
                        break
            except EOFError,e:
                pass