Example #1
0
def open(rootdir, mode='a'):
    # ----------------------------------------------------------------------
    # https://github.com/Blosc/bcolz/blob/master/bcolz/toplevel.py#L104-L132
    # ----------------------------------------------------------------------
    """
    open(rootdir, mode='a')

    Open a disk-based carray/ctable.
    This function could be used to open bcolz objects as bquery objects to
    perform queries on them.

    Parameters
    ----------
    rootdir : pathname (string)
        The directory hosting the carray/ctable object.
    mode : the open mode (string)
        Specifies the mode in which the object is opened.  The supported
        values are:

          * 'r' for read-only
          * 'w' for emptying the previous underlying data
          * 'a' for allowing read/write on top of existing data

    Returns
    -------
    out : a carray/ctable object or IOError (if not objects are found)

    """
    # First try with a carray
    rootsfile = os.path.join(rootdir, ROOTDIRS)
    if os.path.exists(rootsfile):
        return bquery.ctable(rootdir=rootdir, mode=mode)
    else:
        return bquery.carray(rootdir=rootdir, mode=mode)
Example #2
0
    def test_where_terms00(self):
        """
        test_where_terms00: get terms in one column bigger than a certain value
        """

        # expected result
        ref_data = np.fromiter(((x > 10000) for x in range(20000)),
                               dtype='bool')
        ref_result = bquery.carray(ref_data)

        # generate data to filter on
        iterable = ((x, x) for x in range(20000))
        data = np.fromiter(iterable, dtype='i8,i8')

        # filter data
        terms_filter = [('f0', '>', 10000)]
        ct = bquery.ctable(data, rootdir=self.rootdir)
        result = ct.where_terms(terms_filter)

        # compare
        assert_array_equal(result, ref_result)
Example #3
0
    def test_where_terms00(self):
        """
        test_where_terms00: get terms in one column bigger than a certain value
        """

        # expected result
        ref_data = np.fromiter(((x > 10000) for x in range(20000)),
                               dtype='bool')
        ref_result = bquery.carray(ref_data)

        # generate data to filter on
        iterable = ((x, x) for x in range(20000))
        data = np.fromiter(iterable, dtype='i8,i8')

        # filter data
        terms_filter = [('f0', '>', 10000)]
        ct = bquery.ctable(data, rootdir=self.rootdir)
        result = ct.where_terms(terms_filter)

        # compare
        assert_array_equal(result, ref_result)
Example #4
0
    def where_terms(self, term_list):
        """
        TEMPORARY WORKAROUND TILL NUMEXPR WORKS WITH IN
        where_terms(term_list, outcols=None, limit=None, skip=0)

        Iterate over rows where `term_list` is true.
        A terms list has a [(col, operator, value), ..] construction.
        Eg. [('sales', '>', 2), ('state', 'in', ['IL', 'AR'])]

        :param term_list:
        :param outcols:
        :param limit:
        :param skip:
        :return: :raise ValueError:
        """

        if type(term_list) not in [list, set, tuple]:
            raise ValueError("Only term lists are supported")

        eval_string = ''
        eval_list = []

        for term in term_list:
            filter_col = term[0]
            filter_operator = term[1].lower()
            filter_value = term[2]

            # convert postgres eq and ne to python equivalents
            if filter_operator == '=':
                filter_operator = '=='
            elif filter_operator == '<>':
                filter_operator = '!='

            if filter_operator not in ['in', 'not in'] and filter_operator not in opMap:
                # direct filters should be added to the eval_string

                # add and logic if not the first term
                if eval_string:
                    eval_string += ' & '

                eval_string += '(' + filter_col + ' ' \
                               + filter_operator + ' ' \
                               + str(filter_value) + ')'
                        
                print eval_string

            elif filter_operator in opMap:
                eval_list.append(
                    (filter_col, filter_operator, filter_value)
                )

            elif filter_operator in ['in', 'not in']:
                # Check input
                if type(filter_value) not in [list, set, tuple]:
                    raise ValueError("In selections need lists, sets or tuples")

                if len(filter_value) < 1:
                    raise ValueError("A value list needs to have values")

                elif len(filter_value) == 1:
                    # handle as eval
                    # add and logic if not the first term
                    if eval_string:
                        eval_string += ' & '

                    if filter_operator == 'not in':
                        filter_operator = '!='
                    else:
                        filter_operator = '=='

                    eval_string += '(' + filter_col + ' ' + \
                                   filter_operator

                    filter_value = filter_value[0]

                    if type(filter_value) == str:
                        filter_value = '"' + filter_value + '"'
                    else:
                        filter_value = str(filter_value)

                    eval_string += filter_value + ') '

                else:

                    if type(filter_value) in [list, tuple]:
                        filter_value = set(filter_value)

                    eval_list.append(
                        (filter_col, filter_operator, filter_value)
                    )
            else:
                raise ValueError(
                    "Input not correctly formatted for eval or list filtering"
                )

        # (1) Evaluate terms in eval
        # return eval_string, eval_list
        if eval_string:
            boolarr = self.eval(eval_string)
            if eval_list:
                # convert to numpy array for array_is_in
                boolarr = boolarr[:]
        else:
            boolarr = np.ones(self.size, dtype=bool)

        # (2) Evaluate other terms like 'in' or 'not in' ...
        for term in eval_list:

            name = term[0]
            col = self.cols[name]

            operator = term[1]
                        
            if operator.lower() in ['not in', 'in']:
                if operator.lower() == 'not in':
                    reverse = True
                elif operator.lower() == 'in':
                    reverse = False
                else:
                    raise ValueError(
                        "Input not correctly formatted for list filtering"
                    )

                value_set = set(term[2])

                ctable_ext.carray_is_in(col, value_set, boolarr, reverse)

            elif operator in opMap:
                opFunc = getOperatorFunction(operator)
                value = term[2]
                i = 0
                for row in col.iter():
                    if not opFunc(name, value):
                        boolarr[i] = False
                    i += 1

        if eval_list:
            # convert boolarr back to carray
            boolarr = bquery.carray(boolarr)

        return boolarr
Example #5
0
    def where_terms(self, term_list):
        """
        TEMPORARY WORKAROUND TILL NUMEXPR WORKS WITH IN
        where_terms(term_list, outcols=None, limit=None, skip=0)

        Iterate over rows where `term_list` is true.
        A terms list has a [(col, operator, value), ..] construction.
        Eg. [('sales', '>', 2), ('state', 'in', ['IL', 'AR'])]

        :param term_list:
        :param outcols:
        :param limit:
        :param skip:
        :return: :raise ValueError:
        """

        if type(term_list) not in [list, set, tuple]:
            raise ValueError("Only term lists are supported")

        eval_string = ''
        eval_list = []

        for term in term_list:
            filter_col = term[0]
            filter_operator = term[1].lower()
            filter_value = term[2]

            # convert postgres eq and ne to python equivalents
            if filter_operator == '=':
                filter_operator = '=='
            elif filter_operator == '<>':
                filter_operator = '!='

            if filter_operator not in ['in', 'not in'
                                       ] and filter_operator not in opMap:
                # direct filters should be added to the eval_string

                # add and logic if not the first term
                if eval_string:
                    eval_string += ' & '

                eval_string += '(' + filter_col + ' ' \
                               + filter_operator + ' ' \
                               + str(filter_value) + ')'

                print eval_string

            elif filter_operator in opMap:
                eval_list.append((filter_col, filter_operator, filter_value))

            elif filter_operator in ['in', 'not in']:
                # Check input
                if type(filter_value) not in [list, set, tuple]:
                    raise ValueError(
                        "In selections need lists, sets or tuples")

                if len(filter_value) < 1:
                    raise ValueError("A value list needs to have values")

                elif len(filter_value) == 1:
                    # handle as eval
                    # add and logic if not the first term
                    if eval_string:
                        eval_string += ' & '

                    if filter_operator == 'not in':
                        filter_operator = '!='
                    else:
                        filter_operator = '=='

                    eval_string += '(' + filter_col + ' ' + \
                                   filter_operator

                    filter_value = filter_value[0]

                    if type(filter_value) == str:
                        filter_value = '"' + filter_value + '"'
                    else:
                        filter_value = str(filter_value)

                    eval_string += filter_value + ') '

                else:

                    if type(filter_value) in [list, tuple]:
                        filter_value = set(filter_value)

                    eval_list.append(
                        (filter_col, filter_operator, filter_value))
            else:
                raise ValueError(
                    "Input not correctly formatted for eval or list filtering")

        # (1) Evaluate terms in eval
        # return eval_string, eval_list
        if eval_string:
            boolarr = self.eval(eval_string)
            if eval_list:
                # convert to numpy array for array_is_in
                boolarr = boolarr[:]
        else:
            boolarr = np.ones(self.size, dtype=bool)

        # (2) Evaluate other terms like 'in' or 'not in' ...
        for term in eval_list:

            name = term[0]
            col = self.cols[name]

            operator = term[1]

            if operator.lower() in ['not in', 'in']:
                if operator.lower() == 'not in':
                    reverse = True
                elif operator.lower() == 'in':
                    reverse = False
                else:
                    raise ValueError(
                        "Input not correctly formatted for list filtering")

                value_set = set(term[2])

                ctable_ext.carray_is_in(col, value_set, boolarr, reverse)

            elif operator in opMap:
                opFunc = getOperatorFunction(operator)
                value = term[2]
                i = 0
                for row in col.iter():
                    if not opFunc(name, value):
                        boolarr[i] = False
                    i += 1

        if eval_list:
            # convert boolarr back to carray
            boolarr = bquery.carray(boolarr)

        return boolarr