Example #1
0
    def test_indexSearch(self, dataFrame):
        datasearch = DataSearch('Test', dataFrame=dataFrame)

        filterString = 'indexSearch([0])'
        datasearch.setFilterString(filterString)
        ret, valid = datasearch.search()
        assert valid
        assert sum(ret) == 1

        filterString = 'indexSearch([0, 2])'
        datasearch.setFilterString(filterString)
        ret, valid = datasearch.search()
        assert valid
        assert sum(ret) == 2

        filterString = 'indexSearch([0, 1, 2])'
        datasearch.setFilterString(filterString)
        ret, valid = datasearch.search()
        assert valid
        assert sum(ret) == 3

        filterString = 'indexSearch([99])'
        datasearch.setFilterString(filterString)
        ret, valid = datasearch.search()
        assert valid
        assert sum(ret) == 0
Example #2
0
    def test_extentSearch(self, geoDataFrame, dataFrame):
        datasearch = DataSearch('Test', dataFrame=geoDataFrame)

        filterString = 'extentSearch(51, 9, 55, 14)'
        datasearch.setFilterString(filterString)
        ret, valid = datasearch.search()
        assert valid
        assert sum(ret) == 1

        datasearch = DataSearch('Test', dataFrame=dataFrame)

        filterString = 'extentSearch(51, 9, 55, 14)'
        datasearch.setFilterString(filterString)
        ret, valid = datasearch.search()
        assert valid
        assert sum(ret) == 0
Example #3
0
    def test_freeSearch(self, dataFrame):
        datasearch = DataSearch('Test', dataFrame=dataFrame)

        filterString = 'freeSearch("0")'
        datasearch.setFilterString(filterString)
        ret, valid = datasearch.search()
        assert valid
        assert sum(ret) == 2

        filterString = 'freeSearch(1)'
        datasearch.setFilterString(filterString)
        ret, valid = datasearch.search()
        assert not valid

        filterString = 'freeSearch("12")'
        datasearch.setFilterString(filterString)
        ret, valid = datasearch.search()
        assert valid
        assert sum(ret) == 1
Example #4
0
    def test_search(self, dataFrame):
        datasearch = DataSearch('Test', dataFrame=dataFrame)

        filterString = 'Foo < 10'
        datasearch.setFilterString(filterString)
        ret, valid = datasearch.search()
        assert valid
        assert sum(ret) == 2

        filterString = 'Foo < 10 and Bar'
        datasearch.setFilterString(filterString)
        ret, valid = datasearch.search()
        assert not valid

        filterString = '(Foo < 10) & (Bar > 1)'
        datasearch.setFilterString(filterString)
        ret, valid = datasearch.search()
        assert valid
        assert sum(ret) == 1

        filterString = '(Monty < 10) & (Bar > 1)'
        datasearch.setFilterString(filterString)
        ret, valid = datasearch.search()
        assert not valid
class DataFrameModel(QtCore.QAbstractTableModel):
    """data model for use in QTableView, QListView, QComboBox, etc.

    Attributes:
        timestampFormat (unicode): formatting string for conversion of timestamps to QtCore.QDateTime.
            Used in data method.
        sortingAboutToStart (QtCore.pyqtSignal): emitted directly before sorting starts.
        sortingFinished (QtCore.pyqtSignal): emitted, when sorting finished.
        dtypeChanged (Signal(columnName)): passed from related ColumnDtypeModel
            if a columns dtype has changed.
        changingDtypeFailed (Signal(columnName, index, dtype)):
            passed from related ColumnDtypeModel.
            emitted after a column has changed it's data type.
        dataChanged (Signal):
            Emitted, if data has changed, e.x. finished loading, new columns added or removed.
            It's not the same as layoutChanged.
            Usefull to reset delegates in the view.
    """

    _float_precisions = {
        "float16": numpy.finfo(numpy.float16).precision - 2,
        "float32": numpy.finfo(numpy.float32).precision - 1,
        "float64": numpy.finfo(numpy.float64).precision - 1
    }
    """list of int datatypes for easy checking in data() and setData()"""
    _intDtypes = SupportedDtypes.intTypes() + SupportedDtypes.uintTypes()
    """list of float datatypes for easy checking in data() and setData()"""
    _floatDtypes = SupportedDtypes.floatTypes()
    """list of bool datatypes for easy checking in data() and setData()"""
    _boolDtypes = SupportedDtypes.boolTypes()
    """list of datetime datatypes for easy checking in data() and setData()"""
    _dateDtypes = SupportedDtypes.datetimeTypes()

    _timestampFormat = Qt.ISODate

    sortingAboutToStart = Signal()
    sortingFinished = Signal()
    dtypeChanged = Signal(int, object)
    changingDtypeFailed = Signal(object, QtCore.QModelIndex, object)
    dataChanged = Signal()
    dataFrameChanged = Signal()

    def __init__(self, dataFrame=None, copyDataFrame=False, filePath=None):
        """

        Args:
            dataFrame (pandas.core.frame.DataFrame, optional): initializes the model with given DataFrame.
                If none is given an empty DataFrame will be set. defaults to None.
            copyDataFrame (bool, optional): create a copy of dataFrame or use it as is. defaults to False.
                If you use it as is, you can change it from outside otherwise you have to reset the dataFrame
                after external changes.
            filePath (str, optional): stores the original path for tracking.

        """
        super(DataFrameModel, self).__init__()

        self._dataFrame = pandas.DataFrame()

        if dataFrame is not None:
            self.setDataFrame(dataFrame, copyDataFrame=copyDataFrame)

        self.dataChanged.emit()

        self._dataFrameOriginal = None
        self._search = DataSearch("nothing", "")
        self.editable = False
        self._filePath = filePath

    @property
    def filePath(self):
        """
        Access to the internal _filepath property (could be None)
        :return: qtpandas.models.DataFrameModel._filepath
        """
        return self._filePath

    def dataFrame(self):
        """
        getter function to _dataFrame. Holds all data.

        Note:
            It's not implemented with python properties to keep Qt conventions.
            Not sure why??
        """
        return self._dataFrame

    def setDataFrameFromFile(self, filepath, **kwargs):
        """
        Sets the model's dataFrame by reading a file.
        Accepted file formats:
            - .xlsx (sheet1 is read unless specified in kwargs)
            - .csv (comma separated unless specified in kwargs)
            - .txt (any separator)

        :param filepath: (str)
            The path to the file to be read.
        :param kwargs:
            pandas.read_csv(**kwargs) or pandas.read_excel(**kwargs)
        :return: None
        """
        df = superReadFile(filepath, **kwargs)
        self.setDataFrame(df, filePath=filepath)

    def setDataFrame(self, dataFrame, copyDataFrame=False, filePath=None):
        """
        Setter function to _dataFrame. Holds all data.

        Note:
            It's not implemented with python properties to keep Qt conventions.

        Raises:
            TypeError: if dataFrame is not of type pandas.core.frame.DataFrame.

        Args:
            dataFrame (pandas.core.frame.DataFrame): assign dataFrame to _dataFrame. Holds all the data displayed.
            copyDataFrame (bool, optional): create a copy of dataFrame or use it as is. defaults to False.
                If you use it as is, you can change it from outside otherwise you have to reset the dataFrame
                after external changes.

        """
        if not isinstance(dataFrame, pandas.core.frame.DataFrame):
            raise TypeError("not of type pandas.core.frame.DataFrame")

        self.layoutAboutToBeChanged.emit()
        if copyDataFrame:
            self._dataFrame = dataFrame.copy()
        else:
            self._dataFrame = dataFrame

        self._columnDtypeModel = ColumnDtypeModel(dataFrame)
        self._columnDtypeModel.dtypeChanged.connect(self.propagateDtypeChanges)
        self._columnDtypeModel.changeFailed.connect(
            lambda columnName, index, dtype: self.changingDtypeFailed.emit(
                columnName, index, dtype))
        if filePath is not None:
            self._filePath = filePath
        self.layoutChanged.emit()
        self.dataChanged.emit()
        self.dataFrameChanged.emit()

    @Slot(int, object)
    def propagateDtypeChanges(self, column, dtype):
        """
        Emits a dtypeChanged signal with the column and dtype.

        :param column: (str)
        :param dtype: ??
        :return: None
        """
        self.dtypeChanged.emit(column, dtype)

    @property
    def timestampFormat(self):
        """getter to _timestampFormat"""
        return self._timestampFormat

    @timestampFormat.setter
    def timestampFormat(self, timestampFormat):
        """
        Setter to _timestampFormat. Formatting string for conversion of timestamps to QtCore.QDateTime

        Raises:
            AssertionError: if timestampFormat is not of type unicode.

        Args:
            timestampFormat (unicode): assign timestampFormat to _timestampFormat.
                Formatting string for conversion of timestamps to QtCore.QDateTime. Used in data method.

        """
        if not isinstance(timestampFormat, str):
            raise TypeError('not of type unicode')
        #assert isinstance(timestampFormat, unicode) or timestampFormat.__class__.__name__ == "DateFormat", "not of type unicode"
        self._timestampFormat = timestampFormat

    def rename(self, index=None, columns=None, **kwargs):
        """
        Renames the dataframe inplace calling appropriate signals.
        Wraps pandas.DataFrame.rename(*args, **kwargs) - overrides
        the inplace kwarg setting it to True.

        Example use:
        renames = {'colname1':'COLNAME_1', 'colname2':'COL2'}
        DataFrameModel.rename(columns=renames)

        :param args:
            see pandas.DataFrame.rename
        :param kwargs:
            see pandas.DataFrame.rename
        :return:
            None
        """
        kwargs['inplace'] = True
        self.layoutAboutToBeChanged.emit()
        self._dataFrame.rename(index, columns, **kwargs)
        self.layoutChanged.emit()
        self.dataChanged.emit()
        self.dataFrameChanged.emit()

    def applyFunction(self, func):
        """
        Applies a function to the dataFrame with appropriate signals.
        The function must return a dataframe.
        :param func: A function (or partial function) that accepts a dataframe as the first argument.
        :return: None
        :raise:
            AssertionError if the func is not callable.
            AssertionError if the func does not return a DataFrame.
        """
        assert callable(func), "function {} is not callable".format(func)
        self.layoutAboutToBeChanged.emit()
        df = func(self._dataFrame)
        assert isinstance(df, pandas.DataFrame
                          ), "function {} did not return a DataFrame.".format(
                              func.__name__)
        self._dataFrame = df
        self.layoutChanged.emit()
        self.dataChanged.emit()
        self.dataFrameChanged.emit()

    def headerData(self, section, orientation, role=Qt.DisplayRole):
        """
        Return the header depending on section, orientation and Qt::ItemDataRole

        Args:
            section (int): For horizontal headers, the section number corresponds to the column number.
                Similarly, for vertical headers, the section number corresponds to the row number.
            orientation (Qt::Orientations):
            role (Qt::ItemDataRole):

        Returns:
            None if not Qt.DisplayRole
            _dataFrame.columns.tolist()[section] if orientation == Qt.Horizontal
            section if orientation == Qt.Vertical
            None if horizontal orientation and section raises IndexError
        """
        if role != Qt.DisplayRole:
            return None

        if orientation == Qt.Horizontal:
            try:
                label = self._dataFrame.columns.tolist()[section]
                if label == section:
                    label = section
                return label
            except (IndexError, ):
                return None
        elif orientation == Qt.Vertical:
            return section

    def data(self, index, role=Qt.DisplayRole):
        """return data depending on index, Qt::ItemDataRole and data type of the column.

        Args:
            index (QtCore.QModelIndex): Index to define column and row you want to return
            role (Qt::ItemDataRole): Define which data you want to return.

        Returns:
            None if index is invalid
            None if role is none of: DisplayRole, EditRole, CheckStateRole, DATAFRAME_ROLE

            if role DisplayRole:
                unmodified _dataFrame value if column dtype is object (string or unicode).
                _dataFrame value as int or long if column dtype is in _intDtypes.
                _dataFrame value as float if column dtype is in _floatDtypes. Rounds to defined precision (look at: _float16_precision, _float32_precision).
                None if column dtype is in _boolDtypes.
                QDateTime if column dtype is numpy.timestamp64[ns]. Uses timestampFormat as conversion template.

            if role EditRole:
                unmodified _dataFrame value if column dtype is object (string or unicode).
                _dataFrame value as int or long if column dtype is in _intDtypes.
                _dataFrame value as float if column dtype is in _floatDtypes. Rounds to defined precision (look at: _float16_precision, _float32_precision).
                _dataFrame value as bool if column dtype is in _boolDtypes.
                QDateTime if column dtype is numpy.timestamp64[ns]. Uses timestampFormat as conversion template.

            if role CheckStateRole:
                Qt.Checked or Qt.Unchecked if dtype is numpy.bool_ otherwise None for all other dtypes.

            if role DATAFRAME_ROLE:
                unmodified _dataFrame value.

            raises TypeError if an unhandled dtype is found in column.
        """

        if not index.isValid():
            return None

        def convertValue(row, col, columnDtype):
            value = None
            if columnDtype == object:
                value = self._dataFrame.ix[row, col]
            elif columnDtype in self._floatDtypes:
                value = round(float(self._dataFrame.ix[row, col]),
                              self._float_precisions[str(columnDtype)])
            elif columnDtype in self._intDtypes:
                value = int(self._dataFrame.ix[row, col])
            elif columnDtype in self._boolDtypes:
                # TODO this will most likely always be true
                # See: http://stackoverflow.com/a/715455
                # well no: I am mistaken here, the data is already in the dataframe
                # so its already converted to a bool
                value = bool(self._dataFrame.ix[row, col])

            elif columnDtype in self._dateDtypes:
                #print numpy.datetime64(self._dataFrame.ix[row, col])
                value = pandas.Timestamp(self._dataFrame.ix[row, col])
                value = QtCore.QDateTime.fromString(str(value),
                                                    self.timestampFormat)
                #print value
            # else:
            #     raise TypeError, "returning unhandled data type"
            return value

        row = self._dataFrame.index[index.row()]
        col = self._dataFrame.columns[index.column()]
        columnDtype = self._dataFrame[col].dtype

        if role == Qt.DisplayRole:
            # return the value if you wanne show True/False as text
            if columnDtype == numpy.bool:
                result = self._dataFrame.ix[row, col]
            else:
                result = convertValue(row, col, columnDtype)
        elif role == Qt.EditRole:
            result = convertValue(row, col, columnDtype)
        elif role == Qt.CheckStateRole:
            if columnDtype == numpy.bool_:
                if convertValue(row, col, columnDtype):
                    result = Qt.Checked
                else:
                    result = Qt.Unchecked
            else:
                result = None
        elif role == DATAFRAME_ROLE:
            result = self._dataFrame.ix[row, col]
        else:
            result = None
        return result

    def flags(self, index):
        """Returns the item flags for the given index as ored value, e.x.: Qt.ItemIsUserCheckable | Qt.ItemIsEditable

        If a combobox for bool values should pop up ItemIsEditable have to set for bool columns too.

        Args:
            index (QtCore.QModelIndex): Index to define column and row

        Returns:
            if column dtype is not boolean Qt.ItemIsSelectable | Qt.ItemIsEnabled | Qt.ItemIsEditable
            if column dtype is boolean Qt.ItemIsSelectable | Qt.ItemIsEnabled | Qt.ItemIsUserCheckable
        """
        flags = super(DataFrameModel, self).flags(index)

        if not self.editable:
            return flags

        col = self._dataFrame.columns[index.column()]
        if self._dataFrame[col].dtype == numpy.bool:
            flags |= Qt.ItemIsUserCheckable
        else:
            # if you want to have a combobox for bool columns set this
            flags |= Qt.ItemIsEditable

        return flags

    def setData(self, index, value, role=Qt.DisplayRole):
        """Set the value to the index position depending on Qt::ItemDataRole and data type of the column

        Args:
            index (QtCore.QModelIndex): Index to define column and row.
            value (object): new value.
            role (Qt::ItemDataRole): Use this role to specify what you want to do.

        Raises:
            TypeError: If the value could not be converted to a known datatype.

        Returns:
            True if value is changed. Calls layoutChanged after update.
            False if value is not different from original value.

        """
        if not index.isValid() or not self.editable:
            return False

        if value != index.data(role):

            self.layoutAboutToBeChanged.emit()

            row = self._dataFrame.index[index.row()]
            col = self._dataFrame.columns[index.column()]
            #print 'before change: ', index.data().toUTC(), self._dataFrame.iloc[row][col]
            columnDtype = self._dataFrame[col].dtype

            if columnDtype == object:
                pass

            elif columnDtype in self._intDtypes:
                dtypeInfo = numpy.iinfo(columnDtype)
                if value < dtypeInfo.min:
                    value = dtypeInfo.min
                elif value > dtypeInfo.max:
                    value = dtypeInfo.max

            elif columnDtype in self._floatDtypes:
                value = numpy.float64(value).astype(columnDtype)

            elif columnDtype in self._boolDtypes:
                value = numpy.bool_(value)

            elif columnDtype in self._dateDtypes:
                # convert the given value to a compatible datetime object.
                # if the conversation could not be done, keep the original
                # value.
                if isinstance(value, QtCore.QDateTime):
                    value = value.toString(self.timestampFormat)
                try:
                    value = pandas.Timestamp(value)
                except Exception:
                    raise Exception(
                        "Can't convert '{0}' into a datetime".format(value))
                    # return False
            else:
                raise TypeError("try to set unhandled data type")

            self._dataFrame.set_value(row, col, value)

            #print 'after change: ', value, self._dataFrame.iloc[row][col]
            self.layoutChanged.emit()
            return True
        else:
            return False

    def rowCount(self, index=QtCore.QModelIndex()):
        """returns number of rows

        Args:
            index (QtCore.QModelIndex, optional): Index to define column and row. defaults to empty QModelIndex

        Returns:
            number of rows
        """
        # len(df.index) is faster, so use it:
        # In [12]: %timeit df.shape[0]
        # 1000000 loops, best of 3: 437 ns per loop
        # In [13]: %timeit len(df.index)
        # 10000000 loops, best of 3: 110 ns per loop
        # %timeit df.__len__()
        # 1000000 loops, best of 3: 215 ns per loop
        return len(self._dataFrame.index)

    def columnCount(self, index=QtCore.QModelIndex()):
        """returns number of columns

        Args:
            index (QtCore.QModelIndex, optional): Index to define column and row. defaults to empty QModelIndex

        Returns:
            number of columns
        """
        # speed comparison:
        # In [23]: %timeit len(df.columns)
        # 10000000 loops, best of 3: 108 ns per loop

        # In [24]: %timeit df.shape[1]
        # 1000000 loops, best of 3: 440 ns per loop
        return len(self._dataFrame.columns)

    def sort(self, columnId, order=Qt.AscendingOrder):
        """
        Sorts the model column

        After sorting the data in ascending or descending order, a signal
        `layoutChanged` is emitted.

        :param: columnId (int)
            the index of the column to sort on.
        :param: order (Qt::SortOrder, optional)
            descending(1) or ascending(0). defaults to Qt.AscendingOrder

        """
        self.layoutAboutToBeChanged.emit()
        self.sortingAboutToStart.emit()
        column = self._dataFrame.columns[columnId]
        self._dataFrame.sort_values(column,
                                    ascending=not bool(order),
                                    inplace=True)
        self.layoutChanged.emit()
        self.sortingFinished.emit()

    def setFilter(self, search):
        """
        Apply a filter and hide rows.

        The filter must be a `DataSearch` object, which evaluates a python
        expression.
        If there was an error while parsing the expression, the data will remain
        unfiltered.

        Args:
            search(qtpandas.DataSearch): data search object to use.

        Raises:
            TypeError: An error is raised, if the given parameter is not a
                `DataSearch` object.

        """
        if not isinstance(search, DataSearch):
            raise TypeError(
                'The given parameter must an `qtpandas.DataSearch` object')

        self._search = search

        self.layoutAboutToBeChanged.emit()

        if self._dataFrameOriginal is not None:
            self._dataFrame = self._dataFrameOriginal
        self._dataFrameOriginal = self._dataFrame.copy()

        self._search.setDataFrame(self._dataFrame)
        searchIndex, valid = self._search.search()

        if valid:
            self._dataFrame = self._dataFrame[searchIndex]
            self.layoutChanged.emit()
        else:
            self.clearFilter()
            self.layoutChanged.emit()

        self.dataFrameChanged.emit()

    def clearFilter(self):
        """
        Clear all filters.
        """
        if self._dataFrameOriginal is not None:
            self.layoutAboutToBeChanged.emit()
            self._dataFrame = self._dataFrameOriginal
            self._dataFrameOriginal = None
            self.layoutChanged.emit()

    def columnDtypeModel(self):
        """
        Getter for a ColumnDtypeModel.

        :return:
            qtpandas.models.ColumnDtypeModel
        """
        return self._columnDtypeModel

    def enableEditing(self, editable=True):
        """
        Sets the DataFrameModel and columnDtypeModel's
        editable properties.
        :param editable: bool
            defaults to True,
            False disables most editing methods.
        :return:
            None
        """
        self.editable = editable
        self._columnDtypeModel.setEditable(self.editable)

    def dataFrameColumns(self):
        """
        :return: list containing dataframe columns
        """
        return self._dataFrame.columns.tolist()

    def addDataFrameColumn(self, columnName, dtype=str, defaultValue=None):
        """
        Adds a column to the dataframe as long as
        the model's editable property is set to True and the
        dtype is supported.

        :param columnName: str
            name of the column.
        :param dtype: qtpandas.models.SupportedDtypes option
        :param defaultValue: (object)
            to default the column's value to, should be the same as the dtype or None
        :return: (bool)
            True on success, False otherwise.
        """
        if not self.editable or dtype not in SupportedDtypes.allTypes():
            return False

        elements = self.rowCount()
        columnPosition = self.columnCount()

        newColumn = pandas.Series([defaultValue] * elements,
                                  index=self._dataFrame.index,
                                  dtype=dtype)

        self.beginInsertColumns(QtCore.QModelIndex(), columnPosition - 1,
                                columnPosition - 1)
        try:
            self._dataFrame.insert(columnPosition,
                                   columnName,
                                   newColumn,
                                   allow_duplicates=False)
        except ValueError as e:
            # columnName does already exist
            return False

        self.endInsertColumns()

        self.propagateDtypeChanges(columnPosition, newColumn.dtype)

        return True

    def addDataFrameRows(self, count=1):
        """

        Adds rows to the dataframe.

        :param count: (int)
            The number of rows to add to the dataframe.
        :return: (bool)
            True on success, False on failure.

        """
        # don't allow any gaps in the data rows.
        # and always append at the end

        if not self.editable:
            return False

        position = self.rowCount()

        if count < 1:
            return False

        if len(self.dataFrame().columns) == 0:
            # log an error message or warning
            return False

        # Note: This function emits the rowsAboutToBeInserted() signal which
        # connected views (or proxies) must handle before the data is
        # inserted. Otherwise, the views may end up in an invalid state.
        self.beginInsertRows(QtCore.QModelIndex(), position,
                             position + count - 1)

        defaultValues = []
        for dtype in self._dataFrame.dtypes:
            if dtype.type == numpy.dtype('<M8[ns]'):
                val = pandas.Timestamp('')
            elif dtype.type == numpy.dtype(object):
                val = ''
            else:
                val = dtype.type()
            defaultValues.append(val)

        for i in range(count):
            self._dataFrame.loc[position + i] = defaultValues
        self._dataFrame.reset_index()
        self.endInsertRows()
        return True

    def removeDataFrameColumns(self, columns):
        """
        Removes columns from the dataframe.
        :param columns: [(int, str)]
        :return: (bool)
            True on success, False on failure.
        """
        if not self.editable:
            return False

        if columns:
            deleted = 0
            errored = False
            for (position, name) in columns:
                position = position - deleted
                if position < 0:
                    position = 0
                self.beginRemoveColumns(QtCore.QModelIndex(), position,
                                        position)
                try:
                    self._dataFrame.drop(name, axis=1, inplace=True)
                except ValueError as e:
                    errored = True
                    continue
                self.endRemoveColumns()
                deleted += 1
            self.dataChanged.emit()

            if errored:
                return False
            else:
                return True
        return False

    def removeDataFrameRows(self, rows):
        """
        Removes rows from the dataframe.

        :param rows: (list)
            of row indexes to removes.
        :return: (bool)
            True on success, False on failure.
        """
        if not self.editable:
            return False

        if rows:
            position = min(rows)
            count = len(rows)
            self.beginRemoveRows(QtCore.QModelIndex(), position,
                                 position + count - 1)

            removedAny = False
            for idx, line in self._dataFrame.iterrows():
                if idx in rows:
                    removedAny = True
                    self._dataFrame.drop(idx, inplace=True)

            if not removedAny:
                return False

            self._dataFrame.reset_index(inplace=True, drop=True)

            self.endRemoveRows()
            return True
        return False