예제 #1
0
    def execute(self, df: data.Frame) -> data.Frame:
        df = df.getRawFrame()
        colOrder: List[str] = df.columns.to_list()

        subDf = df.iloc[:, self.__selected]

        duplicates = find_duplicates(subDf)

        if duplicates:
            df = df.copy(True)
            df = df.drop(duplicates, axis=1)
            # Keep original order
            order = [c for c in colOrder if c not in duplicates]
            df = df[order]
        return data.Frame(df)
예제 #2
0
    def execute(self, df: data.Frame) -> data.Frame:
        columns = df.colnames
        df = df.getRawFrame().copy(True)

        # Notice that this timestamps are already set to a proper format (with default time/date) by
        # the editor
        intervals: Dict[int, pd.IntervalIndex] = \
            {i: pd.IntervalIndex([pd.Interval(a, b, closed='right') for a, b in zip(opts[0],
                                                                                    opts[0][1:])])
             for i, opts in self.__attributes.items()}

        processedDict = dict()
        for i, opts in self.__attributes.items():
            _, labels, byDate, byTime = opts
            applyCol = df.iloc[:, i]
            if byTime and not byDate:
                # Replace the date part with the default date in a way that every ts has the
                # same date, but retains its original time. Nan values are propagated
                applyCol = applyCol \
                    .map(lambda ts:
                         pd.Timestamp(QDateTime(_IntervalWidget.DEFAULT_DATE,
                                                toQtDateTime(ts.to_pydatetime()).time()).toPython()),
                         na_action='ignore')
            name = columns[i]
            if self.__attributesSuffix:
                name += self.__attributesSuffix
            categoriesMap = dict(zip(intervals[i], labels))
            processedDict[name] = pd.cut(
                applyCol,
                bins=intervals[i]).cat.rename_categories(categoriesMap)

        if self.__attributesSuffix:
            duplicateColumns: Set[str] = set(
                processedDict.keys()) & set(columns)
        else:
            duplicateColumns: List[str] = list(processedDict.keys())
        if duplicateColumns:
            df = df.drop(columns=duplicateColumns)
        processed = pd.DataFrame(processedDict).set_index(df.index)

        df = pd.concat([df, processed], ignore_index=False, axis=1)
        if not self.__attributesSuffix:
            # Reorder columns
            df = df[columns]
        return data.Frame(df)