예제 #1
0
class TestMutate(TestCase):
    def setUp(self):
        self.df = DataFrame({'Cat': ["A", "A", "A", "B", "B", "B", "B", "C", "C", "C"],
                             'Date': ["2021-03-01", "2021-03-02", "2021-03-03",
                                      "2021-03-01", "2021-03-02", "2021-03-03", "2021-03-04",
                                      "2021-03-01", "2021-03-02", "2021-03-03"],
                             'Var1': [10, 14, 13, 22, 28, 23, 30, 6, 8, 9],
                             'Var2': [250, 245, 209, 360, 328, 359, 372, 74, 78, 80],
                             'VarNone': [87, 99, None, 120, 128, None, 99, None, None, None],
                             'VarMixed': [-3, 2, "Null", 0, None, "Null", 5, None, "Null", "Null"],
                             'VarTextNum': ["5", "8", "-1", "0", "7.4", "11.9", "-8.44", "5", -4.8, 9.2]})

    def test_compute_var(self):
        compute_var = Mutate(Var3="Var1+Var2")
        result = compute_var.apply(self.df)
        self.assertEqual([260, 259, 222, 382, 356, 382, 402, 80, 86, 89], result['Var3'])

    def test_vars_with_lag(self):
        vars_with_lag = Mutate(Var4="Var1-lag_Var1")
        result = vars_with_lag.apply(self.df)
        self.assertEqual([None, 4, -1, 9, 6, -5, 7, -24, 2, 1], result['Var4'])

    def test_leads_with_groups(self):
        self.df.add_group('Cat')
        leads_with_goups = Mutate(Var5='Var2+lead_Var2')
        result = leads_with_goups.apply(self.df)
        self.assertEqual([495, 454, None, 688, 687, 731, None, 152, 158, None], result['Var5'])
예제 #2
0
 def apply(self, df):
     result = DataFrame()
     for var in self.vars:
         result.add_column(var, df[var])
     kept_group_vars = [var for var in df.groups_vars if var in self.vars]
     result = GroupBy(*kept_group_vars).apply(result)
     return result
예제 #3
0
 def setUp(self):
     self.df = DataFrame({'Dep': ["1", "1", "1", "1", "1", "2", "2", "2", "2", "2"],
                          'Jour': ["2021-01-01", "2021-01-02", "2021-01-03", "2021-01-04",
                                   "2021-01-05", "2020-11-16", "2020-11-17", "2020-11-18",
                                   "2020-11-19", "2020-11-20"],
                          'Var': [248, 245, 209, 359, 326, 86, 92, 74, 80, 77],
                          'VarNone': [87, 99, None, 120, 128, None, 99, None, None, None],
                          'VarMixed': [-3, 2, "Null", 0, None, "Null", 5, None, "Null", "Null"],
                          'VarTextNum': ["5", "8", "-1", "0", "7.4", "11.9", "-8.44", "5", -4.8, 9.2]})
예제 #4
0
 def setUp(self):
     self.df = DataFrame({'Cat': ["A", "A", "A", "B", "B", "B", "B", "C", "C", "C"],
                          'Date': ["2021-03-01", "2021-03-02", "2021-03-03",
                                   "2021-03-01", "2021-03-02", "2021-03-03", "2021-03-04",
                                   "2021-03-01", "2021-03-02", "2021-03-03"],
                          'Var1': [10, 14, 13, 22, 28, 23, 30, 6, 8, 9],
                          'Var2': [250, 245, 209, 360, 328, 359, 372, 74, 78, 80],
                          'VarNone': [87, 99, None, 120, 128, None, 99, None, None, None],
                          'VarMixed': [-3, 2, "Null", 0, None, "Null", 5, None, "Null", "Null"],
                          'VarTextNum': ["5", "8", "-1", "0", "7.4", "11.9", "-8.44", "5", -4.8, 9.2]})
예제 #5
0
 def setUp(self):
     self.df1 = DataFrame({'Reg': ["1", "2", "3", "4", "5", "6", "7", "8", "9", "9"],
                          'Var1': [10, 14, 13, 22, 28, 23, 30, 6, 8, 9],
                          'Var2': [250, 245, 209, 360, 328, 359, 372, 74, 78, 80],
                          'VarNone': [87, 99, None, 120, 128, None, 99, None, None, None],
                          'VarMixed': [-3, 2, "Null", 0, None, "Null", 5, None, "Null", "Null"],
                          'VarTextNum': ["5", "8", "-1", "0", "7.4", "11.9", "-8.44", "5", -4.8, 9.2]})
     self.df2 = DataFrame({'Region': ["1", "2", "3", "4", "5", "5", "7", "8", "9", "10"],
                          'Nom': ["A","B","C","D","E","Ebis","G","H","I","J"],
                          'Var1':[22, 44, 84, 16, 7, 99, 11, 14, 29, 22]})
예제 #6
0
 def setUp(self):
     self.df = DataFrame({
         'Cat': ["A", "A", "A", "B", "B", "B", "B", "C", "C", "C"],
         'Date': [
             "2021-03-01", "2021-03-02", "2021-03-03", "2021-03-01",
             "2021-03-02", "2021-03-03", "2021-03-04", "2021-03-01",
             "2021-03-02", "2021-03-03"
         ],
         'Var1': [10, 14, 13, 22, 28, 23, 30, 6, 8, 9],
         'Var2': [250, 245, 209, 360, 328, 359, 372, 74, 78, 80]
     })
     self.df.add_group('Cat')
예제 #7
0
 def setUp(self):
     self.df = DataFrame({
         "Cat": ["A", "A", "A", "B", "B", "B", "C", "C"],
         "Var1": [348, 402, 397, 380, 589, 520, 620, 289],
         "Var2": [74, 81, 85, 71, 102, 99, 101, 76],
         "Var3": [74, None, 85, 71, "NaN", 99, 101, 76]
     })
예제 #8
0
class TestUngroup(TestCase):
    def setUp(self):
        self.df = DataFrame({
            'Cat': ["A", "A", "A", "B", "B", "B", "B", "C", "C", "C"],
            'Date': [
                "2021-03-01", "2021-03-02", "2021-03-03", "2021-03-01",
                "2021-03-02", "2021-03-03", "2021-03-04", "2021-03-01",
                "2021-03-02", "2021-03-03"
            ],
            'Var1': [10, 14, 13, 22, 28, 23, 30, 6, 8, 9],
            'Var2': [250, 245, 209, 360, 328, 359, 372, 74, 78, 80]
        })
        self.df.add_group('Cat')

    def test_ungroup(self):
        self.assertEqual([0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
                         Ungroup().apply(self.df).groups)
예제 #9
0
 def setUp(self):
     self.df = DataFrame({'Cat': ["A", "A", "A", "B", "B", "B", "B", "C", "C", "C"],
                          'Date': ["2021-03-01", "2021-03-02", "2021-03-03",
                                   "2021-03-01", "2021-03-02", "2021-03-03", "2021-03-04",
                                   "2021-03-01", "2021-03-02", "2021-03-03"],
                          'Var1': [10.1273, 14.26834, 13.9122, 22.74, 28.09, 23.1, 30, 6.06, 8.57, 9.532],
                          'Var2': [250.1823, 245.682109, 209.0111111111, 360.873421, 328.09534163, 359.07426, 372.07227173, 74, 78, 80],
                          'VarNone': [87.09912, 99.102, None, 120, 128.88, None, 99.64, None, None, None],
                          'VarMixed': [-3, 2, "Null", 0, None, "Null", 5, None, "Null", "Null"],
                          'VarTextNum': ["5", "8", "-1.54", "0.1", "7.4", "11.9", "-8.644", "5", -4.8, 9.2]})
예제 #10
0
 def setUp(self):
     self.df = DataFrame({
         'X': [
             2, 4, 3.5, -3, -1.5, 1, 0.75, -2, -0.5, -3.5, -5, 2.5, 5, -1,
             3.5, -1.75, 3, 4, -3.5, -4
         ],
         'Y': [
             1, 3, -2.5, 2.5, -3, 5, -0.5, 4, 1, -2.5, -4, -4.5, 0.75, 1.5,
             -5, -2.5, 2.5, -2, 2, -1.25
         ]
     })
예제 #11
0
class TestMovingAverage(TestCase):
    def setUp(self):
        self.df = DataFrame({'Dep': ["1", "1", "1", "1", "1", "2", "2", "2", "2", "2"],
                             'Jour': ["2021-01-01", "2021-01-02", "2021-01-03", "2021-01-04",
                                      "2021-01-05", "2020-11-16", "2020-11-17", "2020-11-18",
                                      "2020-11-19", "2020-11-20"],
                             'Var': [248, 245, 209, 359, 326, 86, 92, 74, 80, 77],
                             'VarNone': [87, 99, None, 120, 128, None, 99, None, None, None],
                             'VarMixed': [-3, 2, "Null", 0, None, "Null", 5, None, "Null", "Null"],
                             'VarTextNum': ["5", "8", "-1", "0", "7.4", "11.9", "-8.44", "5", -4.8, 9.2]})

    def test_ungroup_ma(self):
        ungroup_ma = MovingAverage(3, 'Jour', 'Var')
        result = ungroup_ma.apply(self.df)
        self.assertEqual([None, None, 84, 82, 77, 135, 190, 234, 271, 298], result['Var_MA3'])

    def test_group_ma(self):
        self.df.add_group('Dep')
        group_ma = MovingAverage(3, 'Jour', 'Var')
        result = group_ma.apply(self.df)
        self.assertEqual([None, None, 234, 271, 298, None, None, 84, 82, 77], result['Var_MA3'])
예제 #12
0
 def _operation(self, df):
     vars_with_criterion = list(self.__criteria.keys())
     if any(var not in df.vars for var in vars_with_criterion):
         raise KeyError
     result = DataFrame()
     for var in df.vars:
         result.add_column(var)
     for i in range(len(df)):
         row_dict = df.row_as_dict(i)
         add_row = True
         for var in vars_with_criterion:
             test_result = False
             try:
                 test_result = eval(
                     str(var) + " " + str(self.__criteria[var]),
                     {"__builtins__": {}}, row_dict)
             except TypeError:
                 pass
             except Exception as e:
                 raise e
             finally:
                 if isinstance(test_result, bool):
                     add_row *= test_result
                 else:
                     raise TypeError
         if add_row:
             result.add_row(df[None, i])
     return result
예제 #13
0
 def _operation(self, group_df):
     nested_group_list = []
     for row in group_df:
         nested_group_list.append(row)
     index_criteria = []
     for var in self.vars:
         is_desc = False
         if var.startswith("desc_"):
             is_desc = True
             var = var[5:]
         index_criterion = group_df.vars.index(var) + 1
         if is_desc:
             index_criterion *= -1
         index_criteria.append(index_criterion)
     default_recursion_limit = getrecursionlimit()
     setrecursionlimit(len(group_df) + 10)
     sorted_group_list = Sort.__merge_sort(nested_group_list,
                                           index_criteria)
     setrecursionlimit(default_recursion_limit)
     result = DataFrame()
     for var in group_df.vars:
         result.add_column(var)
     for row in sorted_group_list:
         result.add_row(row)
     return result
예제 #14
0
    def import_json(path, root=None):
        """
        Imports a JSON file as DataFrame.

        Parameters
        ----------
        path : str
            Absolute or relative path to the JSON file to import
        root : str = None
            Name of the root's node to import ; if None, imports the first root node of the file

        Returns
        -------
        DataFrame
            A DataFrame with the contents of the JSON file
        """
        with open(path) as jsonfile:
            data = json.load(jsonfile)
            roots = list(data.keys())
            if len(roots) == 1 or root is None:
                root = roots[0]
            elif root not in roots:
                raise KeyError
        table = data[root]
        table_vars = list(table[0].keys())
        df = DataFrame()
        for var in table_vars:
            df.add_column(var)
        for row in table:
            df.add_row(list(row.values()))
        return df
 def apply(self, df):
     result = DataFrame()
     groups = df.groups_df
     for group_df in groups:
         transformed_group = self._operation(group_df)
         if len(transformed_group) > 0:
             if len(result.vars) == 0:
                 for var in transformed_group.vars:
                     result.add_column(var)
             for row in transformed_group:
                 result.add_row(row)
     result = GroupBy(*df.groups_vars).apply(result)
     return result
예제 #16
0
    def import_csv(path, headers=True, delimiter=";", encoding='ISO-8859-1'):
        """
        Imports a CSV file as DataFrame

        Parameters
        ----------
        path : str
            Absolute or relative path to the CSV file to import
        headers : bool = True
            Specify if the file have headers
        delimiter : str = ";"
            Specify the file's delimiter
        encoding : str = 'ISO-8859-1'
            Specify the file's encoding

        Returns
        -------
        DataFrame
            A DataFrame with the contents of the CSV file
        """
        df = DataFrame()
        with open(path, newline='', encoding=encoding) as csv_file:
            reader = csv.reader(csv_file, delimiter=delimiter)
            first_row = True
            for row in reader:
                if first_row:
                    if headers:
                        for var in row:
                            df.add_column(var)
                    else:
                        for i in range(len(row)):
                            name = "Var" + str(i)
                            df.add_column(name)
                    first_row = False
                else:
                    df.add_row(row)
        return df
예제 #17
0
 def apply(self, df):
     result = DataFrame()
     other_vars = [
         var for var in self.__other.vars
         if var not in list(self.__matches.keys())
     ]
     for var in df.vars:
         result.add_column(var)
     for var in other_vars:
         if var in df.vars:
             result.add_column("Y_" + str(var))
         else:
             result.add_column(var)
     known_matches = {}
     for i in range(len(df)):
         base_row = df[None, i]
         filter_kw = {}
         filter_str = ""
         for key in list(self.__matches.keys()):
             target_value = df[self.__matches[key], i]
             filter_kw[key] = '=="' + str(target_value) + '"'
             filter_str += str(key) + "_" + str(target_value)
         if known_matches.get(filter_str) is None:
             matches = Filter(**filter_kw).apply(self.__other)
             if len(matches) == 0:
                 other_content = [None] * len(other_vars)
             else:
                 other_content = Select(*other_vars).apply(matches)
             known_matches[filter_str] = other_content
         else:
             other_content = known_matches[filter_str]
         if isinstance(other_content, DataFrame):
             for row in other_content:
                 new_row = deepcopy(base_row)
                 new_row.extend(row)
                 result.add_row(new_row)
         else:
             new_row = deepcopy(base_row)
             new_row.extend(other_content)
             result.add_row(new_row)
     return result
예제 #18
0
 def apply(self, df):
     list_vars = [*df.groups_vars, *self.vars]
     df = Select(*list_vars).apply(df)
     result = DataFrame()
     for var in list_vars:
         result.add_column(var)
     groups = df.groups_df
     for group_df in groups:
         row = []
         for group_var in df.groups_vars:
             row.append(group_df[group_var, 0])
         for var in self.vars:
             col = group_df[var]
             if self.__del_na:
                 col = [val for val in col if val is not None]
             if self.__del_nan:
                 col = [val for val in col if isinstance(val, Number)]
             partial_result = self._operation(col)
             if isinstance(partial_result, dict):
                 keys = list(partial_result.keys())
                 if (var + "_" + keys[0]) not in result.vars:
                     last = var
                     for key in keys:
                         new_var = var + "_" + key
                         result.add_column(new_var, after=last)
                         last = new_var
                     result.del_column(var)
                 row.extend(list(partial_result.values()))
             else:
                 row.append(partial_result)
         result.add_row(row)
     result = GroupBy(*df.groups_vars[:-1]).apply(result)
     return result
예제 #19
0
 def setUp(self):
     self.df = DataFrame({"Cat": ["A", "A", "A", "A", "B", "B", "B", "B"],
                          "Var1": [10, 10, 10, 10, 10, 10, 10, 10],
                          "Var2": [10, 10, 11, 11, 13, 13, 13, 13]})
예제 #20
0
 def test_init_emptyDataFrame(self):
     df = DataFrame()
     self.assertIsInstance(df, DataFrame)
     self.assertEqual(len(df), 0)
     self.assertEqual(len(df.vars), 0)
예제 #21
0
 def setUp(self):
     self.df = DataFrame({
         "Cat": ["A", "A", "A", "B", "B", "B", "C", "C"],
         "Var1": [98, 100, 102, 100, 200, 150, 620, 40],
         "Var2": [74, 81, 85, 71, 103, 99, 101, 76]
     })