def execute(self, df: data.Frame) -> data.Frame: df = df.getRawFrame() colOrder: List[str] = df.columns.to_list() subDf = df.iloc[:, self.__selected] duplicates = find_duplicates(subDf) if duplicates: df = df.copy(True) df = df.drop(duplicates, axis=1) # Keep original order order = [c for c in colOrder if c not in duplicates] df = df[order] return data.Frame(df)
def execute(self, df: data.Frame) -> data.Frame: columns = df.colnames df = df.getRawFrame().copy(True) # Notice that this timestamps are already set to a proper format (with default time/date) by # the editor intervals: Dict[int, pd.IntervalIndex] = \ {i: pd.IntervalIndex([pd.Interval(a, b, closed='right') for a, b in zip(opts[0], opts[0][1:])]) for i, opts in self.__attributes.items()} processedDict = dict() for i, opts in self.__attributes.items(): _, labels, byDate, byTime = opts applyCol = df.iloc[:, i] if byTime and not byDate: # Replace the date part with the default date in a way that every ts has the # same date, but retains its original time. Nan values are propagated applyCol = applyCol \ .map(lambda ts: pd.Timestamp(QDateTime(_IntervalWidget.DEFAULT_DATE, toQtDateTime(ts.to_pydatetime()).time()).toPython()), na_action='ignore') name = columns[i] if self.__attributesSuffix: name += self.__attributesSuffix categoriesMap = dict(zip(intervals[i], labels)) processedDict[name] = pd.cut( applyCol, bins=intervals[i]).cat.rename_categories(categoriesMap) if self.__attributesSuffix: duplicateColumns: Set[str] = set( processedDict.keys()) & set(columns) else: duplicateColumns: List[str] = list(processedDict.keys()) if duplicateColumns: df = df.drop(columns=duplicateColumns) processed = pd.DataFrame(processedDict).set_index(df.index) df = pd.concat([df, processed], ignore_index=False, axis=1) if not self.__attributesSuffix: # Reorder columns df = df[columns] return data.Frame(df)