def raw_query(query_str, tbs): # Takes in raw query string and process the raw query to do singular # valid queries. E.g. breaks down query1 UNETELO CN query2 into 2 separate # queries before unioning them afterwards # Check and strip ';' if query_str.strip()[-1] != ';': raise hpr.InvalidQueryError('Incomplete query missing ";"') # Check overall query check_query(query_str) # strip the ';' for queries query_str = query_str.strip(';') if query_str[0] == '(': # There are joins queries = re.split(r'\sUNETELO CN\s' r'|\sUNETELO TODO CN\s' r'|\sCOMUN CN\s' r'|\sSACALE\s', query_str) joins = re.findall(r'\sUNETELO CN\s' r'|\sUNETELO TODO CN\s' r'|\sCOMUN CN\s' r'|\sSACALE\s', query_str) # Sending in substring of atomic query to take away the outer brackets results = (atomic_query(q[1:-1], tbs) for q in queries if check_query(q)) result_tb, col_order = join_query_res(results, joins) return man.cols_to_rows(result_tb, ['TODO']) else: # Only one expression to evaluate result_tb, col_order = atomic_query(query_str, tbs) return man.cols_to_rows(result_tb, col_order)
def eval_onde(line, tables): ''' Evaluate a ONDE query :param line: ONDE query without the 'ONDE ' keyword :param tables: a dict {tname: tb} :return: a dict {tname: filtered_tb} ''' # Get atomic conditional statement and connectives from line atomics, conn = man.split_exprn(line) # all the filtered indexes from each condition statement f_cols_lst = [eval_cond(cond, tables) for cond in atomics] # Reduce the list of dicts to one single dict by joining them together # by the connectives join = lambda ind1, ind2: man.join_filt_indexes(tables.keys(), ind1, ind2, conn.pop(0)) f_cols = fct.reduce(join, f_cols_lst) if not isinstance(f_cols, bool): # If ONDE consists of only an EXISTE(...) clause then there is no need # to filter tables return man.filter_tables(tables, f_cols) elif f_cols: # f_cols is a boolean value, i.e. a single EXISTE(...) clause, return # tables if it is True, else raise TerminateQueryError return tables elif not f_cols: raise hpr.TerminateQueryError('Existe condition is False') else: raise hpr.InvalidQueryError('Something is wrong with the query')
def get_grouping(line, tb): ''' Create a quotient set of all the row indexes of the table. Each equivalence class is a list of row indexes that are equivalent by the grouping of the line. e.g. GROUP age -> [[1, 2, 3], [4, 7, 9]] where row 1, 2, and 3 have age 10, and row 4, 7, 9 have age 20 :param line: AGRUPATELOS X ..., but without the keyword so it would just be the column name of the grouping :param tb: a dict {col_name: [entries]} :return: a grouping list [[row indexes], [row indexes]] ''' # Get the column entries entries = tb.get(line.strip()) if entries is None: # table does not have the column, invalid query raise hpr.InvalidQueryError('Cannot find {0} in table for grouping' .format(line)) # Use defaultdict to create the quotient set def add_entry(dic, entry): # Add entry to default dict # entry is a tuple (index, entry_value) dic[entry[1]].append(entry[0]) return dic # enumerate the entries to get the row indexes dic = fct.reduce(add_entry, enumerate(entries), col.defaultdict(list)) # return the values of the defaultdict return list(dic.values())
def cutoff_rows(line, table): ''' Cut off all the columns of the table by the required number of entries :param line: line without 'SOLO' :param table: a dict {col_name: [entries]} :return: ''' try: num_of_rows = int(line.strip()) except (TypeError, ValueError): # Raise exception if cannot parse line as int, since something is wrong raise hpr.InvalidQueryError('Type error at SOLO expression') return {col_name: entries[:num_of_rows] for col_name, entries in table.items()}
def filter_by_query(tname, col_name, col_entries, query, tables): ''' Make an inner query and filter col_entries on the results of the query :param tname: tname :param col_name: column name :param col_entries: column entries :param query: inner query :param tables: dict{tname: tables} :return: a dict {col_name: [col indexes]} ''' result_tb, col_order = atomic_query(query, tables) if result_tb.get(col_name, None) is None: raise hpr.InvalidQueryError(query) # Need to convert generator to a list result_entries = result_tb[col_name] return filter_by_val_list(tname, col_entries, list(result_entries))
def filter_grps(line, tb, grouping): ''' Filter out groups that do not meet filter condition before performing column functions on groups in do_col_func_two :param line: a TENIENDO condition without ' TENIENDO ' and '()' :param tb: a dict {col_name: [entries]} :param grouping: a list of [indexes], each sublist is an equivalence class :return: a list of [indexes], each equivalence class meets the condition ''' # split the line into var1, operator, var2 var1, oper, var2 = split_clauses(line) if '(' in var1: col_str = var1 val_str = var2 else: col_str = var2 val_str = var1 # Get the list of function names func_keys = list(eva.FUNC.keys()) col, func = [(strip_func(col_str), func) for func in func_keys if func + '(' in col_str][0] if tb.get(col) is None: raise hpr.InvalidQueryError('Table do not have column: {0}'.format(col)) # Get the list of entry values of each group after performing function values = [eva.FUNC[func](man.get_entries(tb[col], grp)) for grp in grouping] # Filter list of values by the var2 using operator, return a list of group # ids of groups that passed filter condition if col_str == var1: filtered_grps = [grp_id for grp_id, val in enumerate(values) if eva.OPER[oper](var1=val, var2=hpr.parse_val(val_str))] else: filtered_grps = [grp_id for grp_id, val in enumerate(values) if eva.OPER[oper](var1=hpr.parse_val(val_str), var2=val)] return [grouping[i] for i in filtered_grps]
def check_type_match(type_lst1, type_lst2): # Raise error if there is mismatch of types if type_lst1 != type_lst2: raise hpr.InvalidQueryError('Unmatching column types')
def check_num_of_col(tb1, tb2): # Raise error if the column numbers do not match if len(list(tb1.keys())) != len(list(tb2.keys())): raise hpr.InvalidQueryError('Unmatching column numbers')