コード例 #1
0
def countgroup(table: Arrable, groupOn: list):
    """
    groups 'Arrable' by 'groupOn' and counts the number of rows containing a value for 'to_count' in each group
    returns arrable with two columns, 'groupOn' and new 'countgroup' columns
    """
    # all_groups = _groupby(table, groupOn)
    orig_groupOn = groupOn.copy()
    group_col_name = "countgroup"
    final_columns = table.get_col_names().append(group_col_name)
    all_groups = [table]
    while groupOn:
        next_group_col = groupOn.pop()
        print("nextgroup call = ", next_group_col)
        new_all_groups = []
        for group in all_groups:
            new_group = _groupby(group, next_group_col)
            new_all_groups.extend(new_group)
        all_groups = new_all_groups

    list_of_counts = []
    for group in all_groups:
        list_of_counts.append(len(group.get_rows()))

    arrable_rows = []
    group_col_name = "countgroup"
    final_columns = table.get_col_names().append(group_col_name)
    for i, group in enumerate(all_groups):
        group.get_rows()[0].update({group_col_name: str(list_of_counts[i])})
        arrable_rows.append(group.get_rows()[0])

    final_arrable = Arrable().init_from_arrable(final_columns, arrable_rows)
    orig_groupOn.append("countgroup")
    result = project(final_arrable, orig_groupOn)

    return result
コード例 #2
0
def sumgroup(table: Arrable, to_add: str, groupOn: list):
    """
    groups 'Arrable' by 'groupOn' and compute the sum of column 'to_add' for each group
    returns arrable with two columns, 'groupOn' and new 'sumgroup' columns
    """
    orig_groupOn = groupOn.copy()
    group_col_name = "sumgroup"
    final_columns = table.get_col_names().append(group_col_name)
    all_groups = [table]
    while groupOn:
        next_group_col = groupOn.pop()
        print("nextgroup call = ", next_group_col)
        new_all_groups = []
        for group in all_groups:
            new_group = _groupby(group, next_group_col)
            new_all_groups.extend(new_group)
        all_groups = new_all_groups
    list_of_sums = [0] * len(all_groups)

    # generate list of sums, one for each group
    for index, group in enumerate(all_groups):
        for row in group.get_rows():
            list_of_sums[index] += int(row[to_add])

    # append sumgroup as new column and then append row to new final arrable
    arrable_rows = []
    for i, group in enumerate(all_groups):
        group.get_rows()[0].update({group_col_name: str(list_of_sums[i])})
        arrable_rows.append(group.get_rows()[0])

    final_arrable = Arrable().init_from_arrable(final_columns, arrable_rows)
    orig_groupOn.append("sumgroup")
    result = project(final_arrable, orig_groupOn)
    return result
コード例 #3
0
def count(table: Arrable):
    """
    counts the number of rows containing a value in the specified column 'col_name'
    returns int
    """
    result = [{"count": len(table.get_rows())}]
    col_name = ["count"]
    return Arrable().init_from_arrable(col_name, result)
コード例 #4
0
 def test_concat(self):
     arr1 = Arrable().import_from_file("test_input1")
     arr2 = Arrable().import_from_file("test_input1_to_concat")
     result = q.concat(arr1, arr2)
     # print("arr1 num rows = <", len(arr1.get_rows()), ">\n")
     # print("arr2 num rows = <", len(arr2.get_rows()), ">\n")
     # print("concat arr num rows = <", len(result.get_rows()), ">\n")
     self.assertEqual(len(result.get_rows()), 9)
コード例 #5
0
def _rename_fields_in_table(table: Arrable, table_name):
    rows = []
    for row in table.get_rows():
        newRow = {}
        for col in table.get_col_names():
            new_col = ''.join([table_name, "_",
                               col])  # Turn all "col"s to "A_col"
            newRow[new_col] = row[col]
        rows.append(newRow)
    return rows
コード例 #6
0
def _groupby(fromTable: Arrable, groupOn: str):
    """
    Returns an arrable per distinct column value as an intermediate step. 
    To be concatted.
    """

    res = defaultdict(list)
    groupOn = groupOn.strip()
    for row in fromTable.get_rows():
        res[row[groupOn]].append(row)

    return list([
        Arrable().init_from_arrable(fromTable.get_col_names(), groupArr)
        for groupArr in res.values()
    ])
コード例 #7
0
def project(fromTable: Arrable, columns: list):
    """
    filters columns specified in params (*arg) 
    from the specified arrable (fromTable)
    returns: Arrable
    """
    # columns = list(args)
    print("COLUMNS = ", columns)
    result = []
    for (j, row) in enumerate(fromTable.get_rows()):
        new_row = {col: row[col] for col in columns}
        result.append(new_row)

    newArrable = Arrable().init_from_arrable(columns, result)
    return newArrable
コード例 #8
0
def _get_converted_col_tables_for_join(tableA: Arrable, A_name: str,
                                       tableB: Arrable, B_name: str):

    renamedArows = _rename_fields_in_table(tableA, A_name)
    renamedBrows = _rename_fields_in_table(tableB, B_name)

    colsA = list(
        map(lambda col: ''.join([A_name, "_", col]), tableA.get_col_names()))
    colsB = list(
        map(lambda col: ''.join([B_name, "_", col]), tableB.get_col_names()))

    newA = Arrable().init_from_arrable(colsA, renamedArows, tableA.pk)
    newB = Arrable().init_from_arrable(colsB, renamedBrows, tableB.pk)

    return newA, newB
コード例 #9
0
 def test_import_from_file_no_head(self):
     arr = Arrable().import_from_file("test_input1", header=False)
     self.assertEqual([0, 1, 2, 3, 4, 5, 6, 7], arr.get_col_names())
     self.assertEqual(
         {
             0: '36',
             1: '14',
             2: '2',
             3: '38',
             4: '49',
             5: '15',
             6: 'moderate',
             '  ': 1,
             7: 'hey'
         },
         arr.get_rows()[1])
コード例 #10
0
 def test_mov_avg(self):
     arr = Arrable().import_from_file("sales1")
     res = q.moving_avg(arr, "qty", 2)
     rows = res.get_rows()
     self.assertEqual(rows[0], 15)
     self.assertEqual(rows[1], 23)
     self.assertEqual(rows[-1], 12)
コード例 #11
0
def avg(table: Arrable, col_name: str):
    """
    computes average of all values in a specified column
    does not check for proper data type - will add later
    returns int
    """
    sum_elts = 0
    num_elts = 0
    for i, row in enumerate(table.get_rows()):
        num_elts += 1
        sum_elts += float(row[col_name])

    if num_elts == 0:
        num_elts = 1
    result = [{"avg": sum_elts / num_elts}]

    return Arrable().init_from_arrable(["avg"], result)
コード例 #12
0
def moving_op(table: Arrable, col_name: str, sliding_window: int, op,
              op_name: str):
    newArr = Arrable().init_from_arrable([op_name], [])
    stubs = []

    for stub in range(0, sliding_window):
        slice = table.get_slice(0, stub + 1)
        newArr = concat(newArr, op(slice, col_name))
    for i, row in enumerate(table.get_rows()):
        if i + sliding_window - 1 <= len(table.get_rows()) - 1:
            slice = table.get_slice(i,
                                    i + sliding_window)  # make this the slice
            # print(newArr.get_col_names())
            # print(op(slice, col_name).get_rows())
            newArr = concat(newArr, op(slice, col_name))

    return newArr
コード例 #13
0
 def test_import_from_file(self):
     arr = Arrable().import_from_file("test_input1")
     self.assertEqual([
         'saleid', 'itemid', 'customerid', 'storeid', 'time', 'qty',
         'pricerange', 'join'
     ], arr.get_col_names())
     self.assertEqual(
         arr.get_rows()[0], {
             '  ': 0,
             'saleid': '36',
             'itemid': '14',
             'join': 'hey',
             'customerid': '2',
             'storeid': '38',
             'time': '49',
             'qty': '15',
             'pricerange': 'moderate'
         })
コード例 #14
0
 def test_project(self):
     arr = Arrable().import_from_file("test_input1")
     result = q.project(arr, "saleid", "itemid", "time")
     self.assertEqual({
         'saleid': '36',
         'itemid': '14',
         'time': '49'
     },
                      result.get_rows()[0])
コード例 #15
0
 def test_select_math_asymm(self):
     arr = Arrable().import_from_file("test_input1")
     res = q.select(arr,
                    "itemid,customerid",
                    where="(itemid=14) and (customerid*2<5)")
     self.assertEqual([{
         '  ': 0,
         'customerid': '2',
         'itemid': '14'
     }], res.get_rows())
コード例 #16
0
def sort(
    fromTable: Arrable, *args: str
):  # args can also be None for base case, returning the table itself (recursion)
    """
    Order in reverse order so that the earlier columns have higher priority
    """
    if not list(args):
        return fromTable
    orderedPreference = list(args)
    colToOrderOn = orderedPreference.pop()
    # print(colToOrderOn)
    sorted_table_rows = sorted(
        fromTable.get_rows(),
        key=lambda row: int(row[colToOrderOn])
        if row[colToOrderOn].isnumeric() else row[colToOrderOn])
    # [print(row) for row in sorted_table_rows]
    # print("\n")
    newArr = Arrable().init_from_arrable(fromTable.get_col_names(),
                                         sorted_table_rows)
    return sort(newArr, *orderedPreference)
コード例 #17
0
def _some(table: Arrable, col_name: str):
    """
    function name due to unclear instructions in hw specification. will include other namings to hedge our bets.
    computes the sum of all values in a specified column
    does not check for proper data type - will add later
    returns int
    """
    result = 0
    for i, row in enumerate(table.get_rows()):
        result += int(row[col_name])

    return float(result)
コード例 #18
0
 def test_join(self):
     a = Arrable().import_from_file("test_input1")
     b = Arrable().import_from_file("test_input2")
     res = q.join(a, "a", b, "b", "a_join = b_j_oin")
     self.assertEqual(res.get_rows(), [{
         'a_saleid': '36',
         'a_itemid': '14',
         'a_customerid': '2',
         'a_storeid': '38',
         'a_time': '49',
         'a_qty': '15',
         'a_pricerange': 'moderate',
         'a_join': 'hey',
         'b_saleid': '3506',
         'b_I': '13517',
         'b_C': '16566',
         'b_S': '45',
         'b_T': '73',
         'b_Q': '19',
         'b_P': 'expensive',
         'b_j_oin': 'hey'
     }])
コード例 #19
0
def join(tableA: Arrable, A_name: str, tableB: Arrable, B_name: str,
         where: str):
    """
    The where string expects a pre-parsed string where "Table.Column" has already become "Table_Column".
    e.g. 
        join(R, S, "R_price = S_cost and R_cost = S_price")
    """

    renamed_cols_tableA, renamed_cols_tableB = _get_converted_col_tables_for_join(
        tableA, A_name, tableB, B_name)
    joined_cols = renamed_cols_tableA.get_col_names(
    ) + renamed_cols_tableB.get_col_names()  # Concat the lists

    where = where.replace(".", "_")
    where = WherePredicates(where)

    res = []

    done = len(tableA.get_rows()) * len(tableB.get_rows())
    # print(done)

    prog = 0
    progress_bar = progressbar.ProgressBar(max_value=100).start()
    for Arow in renamed_cols_tableA.get_rows():
        intermediate_cartesian = []
        # print(prog/done)
        progress_bar.update((prog / done) * 100)
        for Brow in renamed_cols_tableB.get_rows():
            joined_row = {**Arow, **Brow}
            intermediate_cartesian.append(joined_row)
            prog += 1
        for cart_row in intermediate_cartesian:
            if where.isMatch(cart_row, joined_cols):
                res.append(cart_row)

    progress_bar.finish()
    newArr = Arrable().init_from_arrable(joined_cols, res)

    return newArr
コード例 #20
0
def select(fromTable: Arrable, cols: str, where: str):
    """
    e.g. 
        select(R, R.get_cols_names(), "(time > 50) or (qty < 30)")
        select(R, R.get_cols_names(), "qty = 5") 
        select(R, R.get_cols_names(), "itemid = 7")
    """

    orig_cols = fromTable.get_col_names()
    cols = cols.strip().split(",")
    where = WherePredicates(where)

    res = []
    for i, row in enumerate(fromTable.get_rows()):
        if where.isMatch(row, orig_cols):
            val = {entry: row[entry] for entry in cols}
            val["  "] = i
            res.append(val)

    newArr = Arrable().init_from_arrable(cols, res)

    return newArr
コード例 #21
0
def concat(table1: Arrable, table2: Arrable):
    """
    concats two arrables
    returns arrable
    """
    if table1.get_col_names() != table2.get_col_names():
        print("Table schemas don't match.")
        return
    col_names = table1.get_col_names()
    concated = table1.get_rows() + table2.get_rows()
    result = Arrable().init_from_arrable(col_names, concated)
    return result
コード例 #22
0
 def test_avggroup(self):
     arr = Arrable().import_from_file("test_input1")
     result = q.avggroup(arr, "qty", "time")
     self.assertEqual(result.get_rows(), [{
         'avggroup': '15.0',
         'time': '49'
     }, {
         'avggroup': '31.0',
         'time': '46'
     }, {
         'avggroup': '14.0',
         'time': '81'
     }, {
         'avggroup': '43.0',
         'time': '67'
     }, {
         'avggroup': '1.0',
         'time': '35'
     }])
コード例 #23
0
 def test_sumgroup(self):
     arr = Arrable().import_from_file("test_input1")
     result = q.sumgroup(arr, "qty", "time")
     self.assertEqual(result.get_rows(), [{
         'sumgroup': '15',
         'time': '49'
     }, {
         'sumgroup': '31',
         'time': '46'
     }, {
         'sumgroup': '14',
         'time': '81'
     }, {
         'sumgroup': '86',
         'time': '67'
     }, {
         'sumgroup': '1',
         'time': '35'
     }])
コード例 #24
0
 def test_groupby(self):
     arr = Arrable().import_from_file("test_input1")
     resList = q._groupby(arr, "customerid")
     self.assertEqual([len(res) for res in resList], [3, 1, 1, 1])
コード例 #25
0
 def test_select_multiple_word(self):
     arr = Arrable().import_from_file("test_input1")
     res = q.select(arr, "itemid", where="(itemid=14) and (saleid=36)")
     self.assertEqual([{'itemid': '14', '  ': 0}], res.get_rows())
コード例 #26
0
 def test_select_multiple_comma(self):
     arr = Arrable().import_from_file("sales1")
     res = q.select(arr, "itemid", where="itemid=14,saleid=36")
     self.assertEqual([{'itemid': '14', '  ': 0}], res.get_rows())
コード例 #27
0
 def test_output(self):
     arr = Arrable().import_from_file("test_input1")
     arr.output_to_file("test_out_file")
コード例 #28
0
 def test_sort_by_columns_single(self):
     arr = Arrable().import_from_file("test_input1")
     res = q.sort(arr, "qty")
コード例 #29
0
 def test_import_from_file_pk(self):
     arr = Arrable().import_from_file("sales1", pk="itemid")
コード例 #30
0
 def test_sort_by_columns_multiple(self):
     arr = Arrable().import_from_file("test_input1")
     res = q.sort(arr, "saleid", "qty", "pricerange")
     # print(res.get_rows(), [{'  ': 0, 'saleid': '36', 'itemid': '14', 'customerid': '2', 'storeid': '38', 'time': '49', 'qty': '15', 'pricerange': 'moderate', 'join': 'hey'}, {'  ': 1, 'saleid': '784', 'itemid': '90', 'customerid': '182', 'storeid': '97', 'time': '46', 'qty': '31', 'pricerange': 'moderate', 'join': '1'}, {'  ': 5, 'saleid': '951', 'itemid': '102', 'customerid': '116', 'storeid': '45', 'time': '35', 'qty': '1', 'pricerange': 'outrageous', 'join': 'wop'}, {'  ': 2, 'saleid': '801', 'itemid': '117', 'customerid': '2', 'storeid': '43', 'time': '81', 'qty': '14', 'pricerange': 'outrageous', 'join': 'g'}, {'  ': 3, 'saleid': '905', 'itemid': '79', 'customerid': '119', 'storeid': '81', 'time': '67', 'qty': '44', 'pricerange': 'outrageous', 'join': 'n'}, {'  ': 4, 'saleid': '227', 'itemid': '68', 'customerid': '2', 'storeid': '66', 'time': '67', 'qty': '42', 'pricerange': 'supercheap', 'join': 'lol'}])
     self.assertEqual(res.get_rows(), [{
         '  ': 0,
         'saleid': '36',
         'itemid': '14',
         'customerid': '2',
         'storeid': '38',
         'time': '49',
         'qty': '15',
         'pricerange': 'moderate',
         'join': 'hey'
     }, {
         '  ': 1,
         'saleid': '784',
         'itemid': '90',
         'customerid': '182',
         'storeid': '97',
         'time': '46',
         'qty': '31',
         'pricerange': 'moderate',
         'join': '1'
     }, {
         '  ': 5,
         'saleid': '951',
         'itemid': '102',
         'customerid': '116',
         'storeid': '45',
         'time': '35',
         'qty': '1',
         'pricerange': 'outrageous',
         'join': 'wop'
     }, {
         '  ': 2,
         'saleid': '801',
         'itemid': '117',
         'customerid': '2',
         'storeid': '43',
         'time': '81',
         'qty': '14',
         'pricerange': 'outrageous',
         'join': 'g'
     }, {
         '  ': 3,
         'saleid': '905',
         'itemid': '79',
         'customerid': '119',
         'storeid': '81',
         'time': '67',
         'qty': '44',
         'pricerange': 'outrageous',
         'join': 'n'
     }, {
         '  ': 4,
         'saleid': '227',
         'itemid': '68',
         'customerid': '2',
         'storeid': '66',
         'time': '67',
         'qty': '42',
         'pricerange': 'supercheap',
         'join': 'lol'
     }])