def test_remove_duplicate_rows(self):
     # Expected values
     test_tb = man.rmv_dups(self.tb41)
     t_col1 = test_tb['col1']
     t_col2 = test_tb['col2']
     t_col3 = test_tb['col3']
     self.assertEqual(len(t_col1), len(self.e_col41))
     self.assertEqual(len(t_col2), len(self.e_col42))
     self.assertEqual(len(t_col3), len(self.e_col43))
     self.assertEqual(t_col1, self.e_col41)
     self.assertEqual(t_col2, self.e_col42)
     self.assertEqual(t_col3, self.e_col43)
Exemple #2
0
 def test_remove_duplicate_rows(self):
     # Expected values
     test_tb = man.rmv_dups(self.tb41)
     t_col1 = test_tb['col1']
     t_col2 = test_tb['col2']
     t_col3 = test_tb['col3']
     self.assertEqual(len(t_col1), len(self.e_col41))
     self.assertEqual(len(t_col2), len(self.e_col42))
     self.assertEqual(len(t_col3), len(self.e_col43))
     self.assertEqual(t_col1, self.e_col41)
     self.assertEqual(t_col2, self.e_col42)
     self.assertEqual(t_col3, self.e_col43)
Exemple #3
0
def atomic_query(query_str, tables):
    '''
    Does an atomic query using the provided tables
    :param query_str: an atomic query is a valid query with one EMPRESTA and DE
    :param tables: a dict of tb_name (keys): tables (values). Each table is a
                dict of col_names (keys): col_entries (values)
    :return: table (a dict), col_order (a list of col_names for printing)
    '''
    '''
    if 'EMPRESTA pokemon_id DE pokemon' in query_str:
        print(query_str)
    '''
    # Get list of intervals (start, end) to mark the start and end of sub-exprns
    intervals = get_exprn_intervals(query_str)
    # Get and label the sub-expressions: "keyword + [variables]"
    sub_exprns = [query_str[itv[0]: itv[1]] for itv in intervals]
    l_sub_exprns = label_exprns(sub_exprns)
    '''
    print('\nFor query: {0}'.format(query_str))
    for label, exprn in l_sub_exprns.items():
        print('{0}: {1}'.format(label, exprn))
    '''
    # Get the queried table names and columns
    tnames = de(l_sub_exprns[' DE '])
    cols, col_funcs = emp(l_sub_exprns['EMPRESTA '])
    # Get the queried tables
    tb_list = [tables.get(tname) for tname in tnames]
    # Get the conditional subexpression if exists
    onde = l_sub_exprns.get(' ONDE ', None)
    if onde:
        # eval_onde returns a dict {tname: table}
        filtered_tbs = eval_onde(onde.replace('ONDE', '', 1).strip(), tables)
        # Filter the tables by the conditions
        tb_list = [filtered_tbs[tname] for tname in tnames]
    # Finish off with executing EMPRESTA ... DE ...
    # Do the string strip() here so that there won't be a case of ONDE being
    # misinterpreted as DE
    order = l_sub_exprns.get(' ORDENATELOS X ')
    if order is not None:
        # Order the tables if there is ORDENATELOS X
        tb_list = order_cols(order.replace(' ORDENATELOS X ', ''),
                             tb_list)
    # AGRUPATELOS
    grp_line = l_sub_exprns.get(' AGRUPATELOS X ')
    # TENIENDO
    grp_fil_line = l_sub_exprns.get(' TENIENDO ')
    # Add on the columns need for TENIENDO and AGRUPATELOS if they are not in
    # the column list
    new_cols = cols.copy()
    if grp_line:
        grp_col = grp_line.replace(' AGRUPATELOS X ', '').strip()
        if grp_col not in new_cols:
            new_cols.append(grp_col)
    if grp_fil_line:
        # split the line into var1, operator, var2
        var1, oper, var2 = split_clauses(grp_fil_line.replace(' TENIENDO ', '')
                                         .strip('() '))
        if '(' in var1:
            col_str = var1
        else:
            col_str = var2
        # Assumes the var1 will be a function with column name
        # Get the list of function names
        func_keys = list(eva.FUNC.keys())
        col_str, __ = [(strip_func(col_str), func) for func in func_keys
                       if func + '(' in col_str][0]
        if col_str not in new_cols:
            # Add the col if it is not in the list of columns
            new_cols.append(col_str)
    # Table list is reduced to a single table with all the columns stated in the
    # cols (list)
    result_tb = emp_de_two(tb_list, new_cols, col_funcs)
    # Get groupings if grouping is not None
    grouping = None
    if grp_line:
        grouping = get_grouping(grp_line.replace(' AGRUPATELOS X ', '').strip(),
                                result_tb)
        if grp_fil_line:
            # If there is a filter group condition, filter grouping
            grouping = filter_grps(grp_fil_line.replace(' TENIENDO ', '')
                                   .strip('() '), result_tb, grouping)
    # Remove columns added for AGRUPATELOS and TENIENDO
    result_tb = man.reduce_tb(result_tb, cols)
    # Do the column functions on the result table
    # result_tb = do_col_funcs(result_tb, col_funcs)
    result_tb = do_col_funcs_two(result_tb, col_funcs, grouping)
    # return result table and the cols order of the query
    # If DIVERGENTE is true, remove duplicates
    if col_funcs['DIVERGENTE']:
        result_tb = man.rmv_dups(result_tb)

    # Check for SOLO expression, if there is cut_off the rows
    solo = l_sub_exprns.get(' SOLO ')
    if solo is not None:
        # Cut off rows
        result_tb = cutoff_rows(solo.strip(' SOLO '), result_tb)
    return result_tb, cols