Beispiel #1
0
def raw_query(query_str, tbs):
    # Takes in raw query string and process the raw query to do singular
    # valid queries. E.g. breaks down query1 UNETELO CN query2 into 2 separate
    # queries before unioning them afterwards
    # Check and strip ';'
    if query_str.strip()[-1] != ';':
        raise hpr.InvalidQueryError('Incomplete query missing ";"')
    # Check overall query
    check_query(query_str)
    # strip the ';' for queries
    query_str = query_str.strip(';')
    if query_str[0] == '(':
        # There are joins
        queries = re.split(r'\sUNETELO CN\s'
                           r'|\sUNETELO TODO CN\s'
                           r'|\sCOMUN CN\s'
                           r'|\sSACALE\s', query_str)
        joins = re.findall(r'\sUNETELO CN\s'
                           r'|\sUNETELO TODO CN\s'
                           r'|\sCOMUN CN\s'
                           r'|\sSACALE\s', query_str)
        # Sending in substring of atomic query to take away the outer brackets
        results = (atomic_query(q[1:-1], tbs)
                   for q in queries if check_query(q))
        result_tb, col_order = join_query_res(results, joins)
        return man.cols_to_rows(result_tb, ['TODO'])
    else:
        # Only one expression to evaluate
        result_tb, col_order = atomic_query(query_str, tbs)
        return man.cols_to_rows(result_tb, col_order)
Beispiel #2
0
def eval_onde(line, tables):
    '''
    Evaluate a ONDE query
    :param line: ONDE query without the 'ONDE ' keyword
    :param tables: a dict {tname: tb}
    :return: a dict {tname: filtered_tb}
    '''
    # Get atomic conditional statement and connectives from line
    atomics, conn = man.split_exprn(line)
    # all the filtered indexes from each condition statement
    f_cols_lst = [eval_cond(cond, tables) for cond in atomics]
    # Reduce the list of dicts to one single dict by joining them together
    # by the connectives
    join = lambda ind1, ind2: man.join_filt_indexes(tables.keys(),
                                                    ind1, ind2, conn.pop(0))
    f_cols = fct.reduce(join, f_cols_lst)
    if not isinstance(f_cols, bool):
        # If ONDE consists of only an EXISTE(...) clause then there is no need
        # to filter tables
        return man.filter_tables(tables, f_cols)
    elif f_cols:
        # f_cols is a boolean value, i.e. a single EXISTE(...) clause, return
        # tables if it is True, else raise TerminateQueryError
        return tables
    elif not f_cols:
        raise hpr.TerminateQueryError('Existe condition is False')
    else:
        raise hpr.InvalidQueryError('Something is wrong with the query')
Beispiel #3
0
def get_grouping(line, tb):
    '''
    Create a quotient set of all the row indexes of the table. Each equivalence
    class is a list of row indexes that are equivalent by the grouping of the
    line. e.g. GROUP age -> [[1, 2, 3], [4, 7, 9]] where row 1, 2, and 3 have
    age 10, and row 4, 7, 9 have age 20
    :param line: AGRUPATELOS X ..., but without the keyword so it would just be
                 the column name of the grouping
    :param tb: a dict {col_name: [entries]}
    :return: a grouping list [[row indexes], [row indexes]]
    '''
    # Get the column entries
    entries = tb.get(line.strip())
    if entries is None:
        # table does not have the column, invalid query
        raise hpr.InvalidQueryError('Cannot find {0} in table for grouping'
                                    .format(line))
    # Use defaultdict to create the quotient set

    def add_entry(dic, entry):
            # Add entry to default dict
            # entry is a tuple (index, entry_value)
            dic[entry[1]].append(entry[0])
            return dic
    # enumerate the entries to get the row indexes
    dic = fct.reduce(add_entry, enumerate(entries), col.defaultdict(list))
    # return the values of the defaultdict
    return list(dic.values())
Beispiel #4
0
def cutoff_rows(line, table):
    '''
    Cut off all the columns of the table by the required number of entries
    :param line: line without 'SOLO'
    :param table: a dict {col_name: [entries]}
    :return:
    '''
    try:
        num_of_rows = int(line.strip())
    except (TypeError, ValueError):
        # Raise exception if cannot parse line as int, since something is wrong
        raise hpr.InvalidQueryError('Type error at SOLO expression')
    return {col_name: entries[:num_of_rows]
            for col_name, entries in table.items()}
Beispiel #5
0
def filter_by_query(tname, col_name, col_entries, query, tables):
    '''
    Make an inner query and filter col_entries on the results of the query
    :param tname: tname
    :param col_name: column name
    :param col_entries: column entries
    :param query: inner query
    :param tables: dict{tname: tables}
    :return: a dict {col_name: [col indexes]}
    '''
    result_tb, col_order = atomic_query(query, tables)
    if result_tb.get(col_name, None) is None:
        raise hpr.InvalidQueryError(query)
    # Need to convert generator to a list
    result_entries = result_tb[col_name]
    return filter_by_val_list(tname, col_entries, list(result_entries))
Beispiel #6
0
def filter_grps(line, tb, grouping):
    '''
    Filter out groups that do not meet filter condition before performing
    column functions on groups in do_col_func_two
    :param line: a TENIENDO condition without ' TENIENDO ' and '()'
    :param tb: a dict {col_name: [entries]}
    :param grouping: a list of [indexes], each sublist is an equivalence class
    :return: a list of [indexes], each equivalence class meets the condition
    '''
    # split the line into var1, operator, var2
    var1, oper, var2 = split_clauses(line)
    if '(' in var1:
        col_str = var1
        val_str = var2
    else:
        col_str = var2
        val_str = var1
    # Get the list of function names
    func_keys = list(eva.FUNC.keys())
    col, func = [(strip_func(col_str), func) for func in func_keys
                 if func + '(' in col_str][0]
    if tb.get(col) is None:
        raise hpr.InvalidQueryError('Table do not have column: {0}'.format(col))
    # Get the list of entry values of each group after performing function
    values = [eva.FUNC[func](man.get_entries(tb[col], grp)) for grp in grouping]
    # Filter list of values by the var2 using operator, return a list of group
    # ids of groups that passed filter condition
    if col_str == var1:
        filtered_grps = [grp_id for grp_id, val in enumerate(values)
                         if eva.OPER[oper](var1=val,
                                           var2=hpr.parse_val(val_str))]
    else:
        filtered_grps = [grp_id for grp_id, val in enumerate(values)
                         if eva.OPER[oper](var1=hpr.parse_val(val_str),
                                           var2=val)]
    return [grouping[i] for i in filtered_grps]
def check_type_match(type_lst1, type_lst2):
    # Raise error if there is mismatch of types
    if type_lst1 != type_lst2:
        raise hpr.InvalidQueryError('Unmatching column types')
def check_num_of_col(tb1, tb2):
    # Raise error if the column numbers do not match
    if len(list(tb1.keys())) != len(list(tb2.keys())):
        raise hpr.InvalidQueryError('Unmatching column numbers')