Esempio n. 1
0
def test_SetInput():
    d = {
        'col1': [1, 2, 3, 4.0, 10],
        'col2': [3, 4, 5, 6, 0],
        'col3': ['q', '2', 'c', '4', 'x'],
        'date':
        ['05-09-1988', '22-12-1994', '21-11-1995', '22-06-1994', '12-12-2012']
    }

    f = Frame(d)

    work = WorkbenchModelMock()
    # Set dataframe
    work.setDataframeByName('var', f)

    op = SetInput(work)
    assert op.getOutputShape() is None
    assert op.getOptions() == {'inputF': None}
    op.setOptions(inputF='var')
    op.addInputShape(Shape(), pos=0)  # this does nothing
    assert op.getOptions() == {'inputF': 'var'}

    assert op.getOutputShape() == f.shape

    g = op.execute()
    assert g == f

    # g should be a copy
    f = f.rename({'col1': 'ewew'})
    assert g != f
Esempio n. 2
0
def test_fillnan_ffill():
    e = {'col1': [np.nan, 2, np.nan, 4, 10],
         'col2': pd.Categorical(['3', '4', np.nan, np.nan, '0'], ordered=True),
         'col3': ['q', '2', 'c', np.nan, np.nan],
         'date': pd.Series(['05-09-1988', np.nan, np.nan, '22-06-1994', '12-12-2012'],
                           dtype='datetime64[ns]')}
    g = data.Frame(e)

    g = g.setIndex('col1')

    op = FillNan()
    assert op.getOutputShape() is None
    op.addInputShape(g.shape, 0)
    op.setOptions(selected={0: None, 1: None, 2: None}, fillMode='ffill')
    assert op.getOptions() == {
        'selected': {0: None, 1: None, 2: None},
        'fillMode': 'ffill'
    }

    s = Shape()
    s.colNames = ['col3', 'col2', 'date']
    s.colTypes = [Types.String, Types.Ordinal, Types.Datetime]
    s.index = ['col1']
    s.indexTypes = [IndexType(Types.Numeric)]
    assert op.getOutputShape() == s

    h = op.execute(g)
    assert h.shape == s

    assert mapDate(roundValues(nan_to_None(h.to_dict()), decimals=3)) == {
        'col3': ['q', '2', 'c', 'c', 'c'],
        'col2': ['3', '4', '4', '4', '0'],
        'date': [t.strftime(format='%Y-%m-%d') if not pd.isna(t) else '1988-05-09'
                 for t in e['date']]
    }
Esempio n. 3
0
def test_fromShape_categories():
    d = {
        'col1': [1, 2, 3, 4.0, 10],
        'col2':
        pd.Categorical([3, 4, 5, 6, 0]),
        'col3':
        pd.Categorical(['q', '2', 'c', '4', 'x'], ordered=True),
        'cold':
        pd.Series([
            '05-09-1988', '22-12-1994', '21-11-1995', '22-06-1994',
            '12-12-2012'
        ],
                  dtype='datetime64[ns]')
    }
    f = Frame(d)
    f = f.setIndex(['col2', 'col3', 'col1'])

    g = Frame.fromShape(f.shape)

    s = Shape()
    # fromShape does preserve index
    s.colNames = ['cold']
    s.colTypes = [Types.Datetime]
    s.index = ['col3', 'col1', 'col2']
    s.indexTypes = [
        IndexType(Types.Ordinal),
        IndexType(Types.Numeric),
        IndexType(Types.Nominal)
    ]
    assert g.shape == s == f.shape
Esempio n. 4
0
 def __init__(self,
              parent: QWidget = None,
              frame: Union[Frame, Shape] = Frame()):
     super().__init__(parent)
     if isinstance(frame, Frame):
         self.__frame: Frame = frame
         self.__shape: Shape = self.__frame.shape
     elif isinstance(frame, Shape):  # it's a Shape
         self.__frame: Frame = Frame()
         self.__shape: Shape = frame
     else:
         self.__frame: Frame = Frame()
         self.__shape: Shape = Shape()
     # Dictionary { attributeIndex: value }
     self._statistics: Dict[int, Dict[str, object]] = dict()
     self._histogram: Dict[int, Dict[Any, int]] = dict()
     # Dataframe name
     self.name: str = ''
     # Set of alive workers by identifier (attribute number, type, operation)
     self._runningWorkers: Set[Tuple] = set()
     self._dataAccessMutex = QMutex()
Esempio n. 5
0
def test_shape():
    d = {
        'col1': [1, 2, 3, 4, 10],
        'col2': [3, 4, 5, 6, 0],
        'col3': ['q', '2', 'c', '4', 'x']
    }
    f = Frame(d)

    s = Shape()
    s.index = ['Unnamed']
    s.indexTypes = [IndexType(Types.Numeric)]
    s.colNames = ['col1', 'col2', 'col3']
    s.colTypes = [Types.Numeric, Types.Numeric, Types.String]

    assert f.shape == s
    assert f.nRows == 5
Esempio n. 6
0
def test_fillnan_byVal_date_num():
    e = {'col1': [np.nan, 2, np.nan, 4, 10],
         'col2': pd.Categorical(['3', '4', np.nan, np.nan, '0'], ordered=True),
         'col3': ['q', '2', 'c', np.nan, np.nan],
         'date': pd.Series(['05-09-1988', np.nan, np.nan, '22-06-1994', '12-12-2012'],
                           dtype='datetime64[ns]'),
         'col4': [np.nan, 2, np.nan, 4, 10]}
    g = data.Frame(e)

    g = g.setIndex('col1')

    op = FillNan()
    assert op.getOutputShape() is None
    op.addInputShape(g.shape, 0)
    with pytest.raises(OptionValidationError):
        op.setOptions(selected={0: {'fill': 'pol'}, 1: {'fill': '23'},  # wrong
                                2: {'fill': '1966-04-02 00:00:30'},
                                3: {'fill': 'march'}},  # wrong
                      fillMode='value')

    op.setOptions(selected={2: {'fill': '1966-04-02 00:00:30'},
                            3: {'fill': '0.9'}},
                  fillMode='value')

    assert op.getOptions() == {
        'selected': {2: {'fill': '1966-04-02 00:00:30'},
                     3: {'fill': '0.9'}},
        'fillMode': 'value'
    }

    s = Shape()
    s.colNames = ['col3', 'col2', 'date', 'col4']
    s.colTypes = [Types.String, Types.Ordinal, Types.Datetime, Types.Numeric]
    s.index = ['col1']
    s.indexTypes = [IndexType(Types.Numeric)]
    assert op.getOutputShape() == s

    h = op.execute(g)
    assert h.shape == s

    assert mapDate(roundValues(nan_to_None(h.to_dict()), decimals=3)) == {
        'col3': ['q', '2', 'c', None, None],
        'col2': ['3', '4', None, None, '0'],
        'date': [t.strftime(format='%Y-%m-%d') if not pd.isna(t) else '1966-04-02'
                 for t in e['date']],
        'col4': [0.9, 2.0, 0.9, 4.0, 10.0]
    }
Esempio n. 7
0
def test_shape_index():
    d = {
        'col1': [1, 2, 3, 4.0, 10],
        'col2': [3, 4, 5, 6, 0],
        'col3': ['q', '2', 'c', '4', 'x']
    }
    f = Frame(d)
    f = Frame(f.getRawFrame().set_index('col3'))

    # Desired shape obj
    s = Shape()
    s.index = ['col3']
    s.indexTypes = [IndexType(Types.String)]
    s.colNames = ['col1', 'col2']
    s.colTypes = [Types.Numeric, Types.Numeric]

    assert f.shape == s
    assert f.nRows == 5
Esempio n. 8
0
def test_cloneShape():
    s = Shape()
    s.colNames = ['cold']
    s.colTypes = [Types.Datetime]
    s.index = ['col3', 'col1', 'col2']
    s.indexTypes = [
        IndexType(Types.Ordinal),
        IndexType(Types.Numeric),
        IndexType(Types.Nominal)
    ]
    sColDict = s.columnsDict
    sIndexDict = s.indexDict

    sc = s.clone()
    sc.index.append('col4')
    sc.indexTypes.append(IndexType(Types.Numeric))
    sc.colTypes[0] = Types.Ordinal
    sc.colNames[0] = 'col_new'
    assert sc != s
    assert s.columnsDict == {'cold': Types.Datetime}
    assert sc.columnsDict == {'col_new': Types.Ordinal}
    assert s.columnsDict == sColDict
    assert s.indexDict == sIndexDict
Esempio n. 9
0
def test_fromShape_single_index():
    d = {
        'col1': [1, 2, 3, 4.0, 10],
        'col2': [3, 4, 5, 6, 0],
        'col3': ['q', '2', 'c', '4', 'x'],
        'cold':
        pd.Series([
            '05-09-1988', '22-12-1994', '21-11-1995', '22-06-1994',
            '12-12-2012'
        ],
                  dtype='datetime64[ns]')
    }
    f = Frame(d)
    f = f.setIndex('col1')

    g = Frame.fromShape(f.shape)

    s = Shape()
    # fromShape does preserve index
    s.colNames = ['cold', 'col2', 'col3']
    s.colTypes = [Types.Datetime, Types.Numeric, Types.String]
    s.index = ['col1']
    s.indexTypes = [IndexType(Types.Numeric)]
    assert g.shape == s == f.shape
Esempio n. 10
0
def test_set_index_num():
    e = {
        'cowq': [1, 2, 3, 4.0, 10],
        'col2':
        pd.Categorical([3, 4, 5, 6, 0]),
        'col3': ['q', '2', 'c', '4', 'x'],
        'date':
        pd.Series([
            '05-09-1988', '22-12-1994', '21-11-1995', '22-06-1994',
            '12-12-2012'
        ],
                  dtype='datetime64[ns]')
    }
    g = data.Frame(e)

    op = SetIndex()
    assert op.getOptions() == {'selected': dict()}
    ops = {'selected': {0: None}}
    op.setOptions(**ops)

    assert op.getOptions() == ops
    assert isDictDeepCopy(op.getOptions(), ops)

    assert op.getOutputShape() is None

    op.addInputShape(g.shape, 0)
    s = Shape()
    s.colNames = ['col3', 'col2', 'date']
    s.colTypes = [Types.String, Types.Nominal, Types.Datetime]
    s.index = ['cowq']
    s.indexTypes = [IndexType(Types.Numeric)]
    assert op.getOutputShape() == s

    h = op.execute(g)
    assert h.shape == s

    # Reset index

    op = ResetIndex()
    assert op.getOutputShape() is None
    op.addInputShape(h.shape, 0)
    s = Shape()
    s.colNames = ['cowq', 'col2', 'date', 'col3']
    s.colTypes = [Types.Numeric, Types.Nominal, Types.Datetime, Types.String]
    s.index = ['Unnamed']
    s.indexTypes = [IndexType(Types.Numeric)]
    assert op.getOutputShape() == s
    j = op.execute(h)
    assert j.shape == s
Esempio n. 11
0
def test_set_index_string():
    e = {
        'cowq': [1, 2, 3, 4.0, 10],
        'col2':
        pd.Categorical(['3', 4, 5, 6, 0]),
        'col3': ['q', '2', 'c', '4', 'x'],
        'date':
        pd.Series([
            '05-09-1988', '22-12-1994', '21-11-1995', '22-06-1994',
            '12-12-2012'
        ],
                  dtype='datetime64[ns]')
    }
    g = data.Frame(e)

    op = SetIndex()
    op.setOptions(selected={2: None})

    assert op.getOutputShape() is None

    op.addInputShape(g.shape, 0)
    s = Shape()
    s.colNames = ['cowq', 'col2', 'date']
    s.colTypes = [Types.Numeric, Types.Nominal, Types.Datetime]
    s.index = ['col3']
    s.indexTypes = [IndexType(Types.String)]
    os = op.getOutputShape()
    assert os == s

    h = op.execute(g)
    hs = h.shape
    assert hs == s

    # Reset index

    op = ResetIndex()
    assert op.getOutputShape() is None
    op.addInputShape(h.shape, 0)
    s = Shape()
    s.colNames = ['cowq', 'col2', 'date', 'col3']
    s.colTypes = [Types.Numeric, Types.Nominal, Types.Datetime, Types.String]
    s.index = ['Unnamed']
    s.indexTypes = [IndexType(Types.Numeric)]
    assert op.getOutputShape() == s
    j = op.execute(h)
    assert j.shape == s