def _operation(self, group_df): nested_group_list = [] for row in group_df: nested_group_list.append(row) index_criteria = [] for var in self.vars: is_desc = False if var.startswith("desc_"): is_desc = True var = var[5:] index_criterion = group_df.vars.index(var) + 1 if is_desc: index_criterion *= -1 index_criteria.append(index_criterion) default_recursion_limit = getrecursionlimit() setrecursionlimit(len(group_df) + 10) sorted_group_list = Sort.__merge_sort(nested_group_list, index_criteria) setrecursionlimit(default_recursion_limit) result = DataFrame() for var in group_df.vars: result.add_column(var) for row in sorted_group_list: result.add_row(row) return result
def _operation(self, df): vars_with_criterion = list(self.__criteria.keys()) if any(var not in df.vars for var in vars_with_criterion): raise KeyError result = DataFrame() for var in df.vars: result.add_column(var) for i in range(len(df)): row_dict = df.row_as_dict(i) add_row = True for var in vars_with_criterion: test_result = False try: test_result = eval( str(var) + " " + str(self.__criteria[var]), {"__builtins__": {}}, row_dict) except TypeError: pass except Exception as e: raise e finally: if isinstance(test_result, bool): add_row *= test_result else: raise TypeError if add_row: result.add_row(df[None, i]) return result
def import_json(path, root=None): """ Imports a JSON file as DataFrame. Parameters ---------- path : str Absolute or relative path to the JSON file to import root : str = None Name of the root's node to import ; if None, imports the first root node of the file Returns ------- DataFrame A DataFrame with the contents of the JSON file """ with open(path) as jsonfile: data = json.load(jsonfile) roots = list(data.keys()) if len(roots) == 1 or root is None: root = roots[0] elif root not in roots: raise KeyError table = data[root] table_vars = list(table[0].keys()) df = DataFrame() for var in table_vars: df.add_column(var) for row in table: df.add_row(list(row.values())) return df
def apply(self, df): list_vars = [*df.groups_vars, *self.vars] df = Select(*list_vars).apply(df) result = DataFrame() for var in list_vars: result.add_column(var) groups = df.groups_df for group_df in groups: row = [] for group_var in df.groups_vars: row.append(group_df[group_var, 0]) for var in self.vars: col = group_df[var] if self.__del_na: col = [val for val in col if val is not None] if self.__del_nan: col = [val for val in col if isinstance(val, Number)] partial_result = self._operation(col) if isinstance(partial_result, dict): keys = list(partial_result.keys()) if (var + "_" + keys[0]) not in result.vars: last = var for key in keys: new_var = var + "_" + key result.add_column(new_var, after=last) last = new_var result.del_column(var) row.extend(list(partial_result.values())) else: row.append(partial_result) result.add_row(row) result = GroupBy(*df.groups_vars[:-1]).apply(result) return result
def apply(self, df): result = DataFrame() for var in self.vars: result.add_column(var, df[var]) kept_group_vars = [var for var in df.groups_vars if var in self.vars] result = GroupBy(*kept_group_vars).apply(result) return result
def apply(self, df): result = DataFrame() groups = df.groups_df for group_df in groups: transformed_group = self._operation(group_df) if len(transformed_group) > 0: if len(result.vars) == 0: for var in transformed_group.vars: result.add_column(var) for row in transformed_group: result.add_row(row) result = GroupBy(*df.groups_vars).apply(result) return result
def import_csv(path, headers=True, delimiter=";", encoding='ISO-8859-1'): """ Imports a CSV file as DataFrame Parameters ---------- path : str Absolute or relative path to the CSV file to import headers : bool = True Specify if the file have headers delimiter : str = ";" Specify the file's delimiter encoding : str = 'ISO-8859-1' Specify the file's encoding Returns ------- DataFrame A DataFrame with the contents of the CSV file """ df = DataFrame() with open(path, newline='', encoding=encoding) as csv_file: reader = csv.reader(csv_file, delimiter=delimiter) first_row = True for row in reader: if first_row: if headers: for var in row: df.add_column(var) else: for i in range(len(row)): name = "Var" + str(i) df.add_column(name) first_row = False else: df.add_row(row) return df
def apply(self, df): result = DataFrame() other_vars = [ var for var in self.__other.vars if var not in list(self.__matches.keys()) ] for var in df.vars: result.add_column(var) for var in other_vars: if var in df.vars: result.add_column("Y_" + str(var)) else: result.add_column(var) known_matches = {} for i in range(len(df)): base_row = df[None, i] filter_kw = {} filter_str = "" for key in list(self.__matches.keys()): target_value = df[self.__matches[key], i] filter_kw[key] = '=="' + str(target_value) + '"' filter_str += str(key) + "_" + str(target_value) if known_matches.get(filter_str) is None: matches = Filter(**filter_kw).apply(self.__other) if len(matches) == 0: other_content = [None] * len(other_vars) else: other_content = Select(*other_vars).apply(matches) known_matches[filter_str] = other_content else: other_content = known_matches[filter_str] if isinstance(other_content, DataFrame): for row in other_content: new_row = deepcopy(base_row) new_row.extend(row) result.add_row(new_row) else: new_row = deepcopy(base_row) new_row.extend(other_content) result.add_row(new_row) return result