def test_remove_duplicate_rows(self): # Expected values test_tb = man.rmv_dups(self.tb41) t_col1 = test_tb['col1'] t_col2 = test_tb['col2'] t_col3 = test_tb['col3'] self.assertEqual(len(t_col1), len(self.e_col41)) self.assertEqual(len(t_col2), len(self.e_col42)) self.assertEqual(len(t_col3), len(self.e_col43)) self.assertEqual(t_col1, self.e_col41) self.assertEqual(t_col2, self.e_col42) self.assertEqual(t_col3, self.e_col43)
def atomic_query(query_str, tables): ''' Does an atomic query using the provided tables :param query_str: an atomic query is a valid query with one EMPRESTA and DE :param tables: a dict of tb_name (keys): tables (values). Each table is a dict of col_names (keys): col_entries (values) :return: table (a dict), col_order (a list of col_names for printing) ''' ''' if 'EMPRESTA pokemon_id DE pokemon' in query_str: print(query_str) ''' # Get list of intervals (start, end) to mark the start and end of sub-exprns intervals = get_exprn_intervals(query_str) # Get and label the sub-expressions: "keyword + [variables]" sub_exprns = [query_str[itv[0]: itv[1]] for itv in intervals] l_sub_exprns = label_exprns(sub_exprns) ''' print('\nFor query: {0}'.format(query_str)) for label, exprn in l_sub_exprns.items(): print('{0}: {1}'.format(label, exprn)) ''' # Get the queried table names and columns tnames = de(l_sub_exprns[' DE ']) cols, col_funcs = emp(l_sub_exprns['EMPRESTA ']) # Get the queried tables tb_list = [tables.get(tname) for tname in tnames] # Get the conditional subexpression if exists onde = l_sub_exprns.get(' ONDE ', None) if onde: # eval_onde returns a dict {tname: table} filtered_tbs = eval_onde(onde.replace('ONDE', '', 1).strip(), tables) # Filter the tables by the conditions tb_list = [filtered_tbs[tname] for tname in tnames] # Finish off with executing EMPRESTA ... DE ... # Do the string strip() here so that there won't be a case of ONDE being # misinterpreted as DE order = l_sub_exprns.get(' ORDENATELOS X ') if order is not None: # Order the tables if there is ORDENATELOS X tb_list = order_cols(order.replace(' ORDENATELOS X ', ''), tb_list) # AGRUPATELOS grp_line = l_sub_exprns.get(' AGRUPATELOS X ') # TENIENDO grp_fil_line = l_sub_exprns.get(' TENIENDO ') # Add on the columns need for TENIENDO and AGRUPATELOS if they are not in # the column list new_cols = cols.copy() if grp_line: grp_col = grp_line.replace(' AGRUPATELOS X ', '').strip() if grp_col not in new_cols: new_cols.append(grp_col) if grp_fil_line: # split the line into var1, operator, var2 var1, oper, var2 = split_clauses(grp_fil_line.replace(' TENIENDO ', '') .strip('() ')) if '(' in var1: col_str = var1 else: col_str = var2 # Assumes the var1 will be a function with column name # Get the list of function names func_keys = list(eva.FUNC.keys()) col_str, __ = [(strip_func(col_str), func) for func in func_keys if func + '(' in col_str][0] if col_str not in new_cols: # Add the col if it is not in the list of columns new_cols.append(col_str) # Table list is reduced to a single table with all the columns stated in the # cols (list) result_tb = emp_de_two(tb_list, new_cols, col_funcs) # Get groupings if grouping is not None grouping = None if grp_line: grouping = get_grouping(grp_line.replace(' AGRUPATELOS X ', '').strip(), result_tb) if grp_fil_line: # If there is a filter group condition, filter grouping grouping = filter_grps(grp_fil_line.replace(' TENIENDO ', '') .strip('() '), result_tb, grouping) # Remove columns added for AGRUPATELOS and TENIENDO result_tb = man.reduce_tb(result_tb, cols) # Do the column functions on the result table # result_tb = do_col_funcs(result_tb, col_funcs) result_tb = do_col_funcs_two(result_tb, col_funcs, grouping) # return result table and the cols order of the query # If DIVERGENTE is true, remove duplicates if col_funcs['DIVERGENTE']: result_tb = man.rmv_dups(result_tb) # Check for SOLO expression, if there is cut_off the rows solo = l_sub_exprns.get(' SOLO ') if solo is not None: # Cut off rows result_tb = cutoff_rows(solo.strip(' SOLO '), result_tb) return result_tb, cols