def test_set_index_num(): e = { 'cowq': [1, 2, 3, 4.0, 10], 'col2': pd.Categorical([3, 4, 5, 6, 0]), 'col3': ['q', '2', 'c', '4', 'x'], 'date': pd.Series([ '05-09-1988', '22-12-1994', '21-11-1995', '22-06-1994', '12-12-2012' ], dtype='datetime64[ns]') } g = data.Frame(e) op = SetIndex() assert op.getOptions() == {'selected': dict()} ops = {'selected': {0: None}} op.setOptions(**ops) assert op.getOptions() == ops assert isDictDeepCopy(op.getOptions(), ops) assert op.getOutputShape() is None op.addInputShape(g.shape, 0) s = Shape() s.colNames = ['col3', 'col2', 'date'] s.colTypes = [Types.String, Types.Nominal, Types.Datetime] s.index = ['cowq'] s.indexTypes = [IndexType(Types.Numeric)] assert op.getOutputShape() == s h = op.execute(g) assert h.shape == s # Reset index op = ResetIndex() assert op.getOutputShape() is None op.addInputShape(h.shape, 0) s = Shape() s.colNames = ['cowq', 'col2', 'date', 'col3'] s.colTypes = [Types.Numeric, Types.Nominal, Types.Datetime, Types.String] s.index = ['Unnamed'] s.indexTypes = [IndexType(Types.Numeric)] assert op.getOutputShape() == s j = op.execute(h) assert j.shape == s
def test_fillnan_ffill(): e = {'col1': [np.nan, 2, np.nan, 4, 10], 'col2': pd.Categorical(['3', '4', np.nan, np.nan, '0'], ordered=True), 'col3': ['q', '2', 'c', np.nan, np.nan], 'date': pd.Series(['05-09-1988', np.nan, np.nan, '22-06-1994', '12-12-2012'], dtype='datetime64[ns]')} g = data.Frame(e) g = g.setIndex('col1') op = FillNan() assert op.getOutputShape() is None op.addInputShape(g.shape, 0) op.setOptions(selected={0: None, 1: None, 2: None}, fillMode='ffill') assert op.getOptions() == { 'selected': {0: None, 1: None, 2: None}, 'fillMode': 'ffill' } s = Shape() s.colNames = ['col3', 'col2', 'date'] s.colTypes = [Types.String, Types.Ordinal, Types.Datetime] s.index = ['col1'] s.indexTypes = [IndexType(Types.Numeric)] assert op.getOutputShape() == s h = op.execute(g) assert h.shape == s assert mapDate(roundValues(nan_to_None(h.to_dict()), decimals=3)) == { 'col3': ['q', '2', 'c', 'c', 'c'], 'col2': ['3', '4', '4', '4', '0'], 'date': [t.strftime(format='%Y-%m-%d') if not pd.isna(t) else '1988-05-09' for t in e['date']] }
def test_fromShape_categories(): d = { 'col1': [1, 2, 3, 4.0, 10], 'col2': pd.Categorical([3, 4, 5, 6, 0]), 'col3': pd.Categorical(['q', '2', 'c', '4', 'x'], ordered=True), 'cold': pd.Series([ '05-09-1988', '22-12-1994', '21-11-1995', '22-06-1994', '12-12-2012' ], dtype='datetime64[ns]') } f = Frame(d) f = f.setIndex(['col2', 'col3', 'col1']) g = Frame.fromShape(f.shape) s = Shape() # fromShape does preserve index s.colNames = ['cold'] s.colTypes = [Types.Datetime] s.index = ['col3', 'col1', 'col2'] s.indexTypes = [ IndexType(Types.Ordinal), IndexType(Types.Numeric), IndexType(Types.Nominal) ] assert g.shape == s == f.shape
def test_set_index_string(): e = { 'cowq': [1, 2, 3, 4.0, 10], 'col2': pd.Categorical(['3', 4, 5, 6, 0]), 'col3': ['q', '2', 'c', '4', 'x'], 'date': pd.Series([ '05-09-1988', '22-12-1994', '21-11-1995', '22-06-1994', '12-12-2012' ], dtype='datetime64[ns]') } g = data.Frame(e) op = SetIndex() op.setOptions(selected={2: None}) assert op.getOutputShape() is None op.addInputShape(g.shape, 0) s = Shape() s.colNames = ['cowq', 'col2', 'date'] s.colTypes = [Types.Numeric, Types.Nominal, Types.Datetime] s.index = ['col3'] s.indexTypes = [IndexType(Types.String)] os = op.getOutputShape() assert os == s h = op.execute(g) hs = h.shape assert hs == s # Reset index op = ResetIndex() assert op.getOutputShape() is None op.addInputShape(h.shape, 0) s = Shape() s.colNames = ['cowq', 'col2', 'date', 'col3'] s.colTypes = [Types.Numeric, Types.Nominal, Types.Datetime, Types.String] s.index = ['Unnamed'] s.indexTypes = [IndexType(Types.Numeric)] assert op.getOutputShape() == s j = op.execute(h) assert j.shape == s
def test_shape(): d = { 'col1': [1, 2, 3, 4, 10], 'col2': [3, 4, 5, 6, 0], 'col3': ['q', '2', 'c', '4', 'x'] } f = Frame(d) s = Shape() s.index = ['Unnamed'] s.indexTypes = [IndexType(Types.Numeric)] s.colNames = ['col1', 'col2', 'col3'] s.colTypes = [Types.Numeric, Types.Numeric, Types.String] assert f.shape == s assert f.nRows == 5
def test_fillnan_byVal_date_num(): e = {'col1': [np.nan, 2, np.nan, 4, 10], 'col2': pd.Categorical(['3', '4', np.nan, np.nan, '0'], ordered=True), 'col3': ['q', '2', 'c', np.nan, np.nan], 'date': pd.Series(['05-09-1988', np.nan, np.nan, '22-06-1994', '12-12-2012'], dtype='datetime64[ns]'), 'col4': [np.nan, 2, np.nan, 4, 10]} g = data.Frame(e) g = g.setIndex('col1') op = FillNan() assert op.getOutputShape() is None op.addInputShape(g.shape, 0) with pytest.raises(OptionValidationError): op.setOptions(selected={0: {'fill': 'pol'}, 1: {'fill': '23'}, # wrong 2: {'fill': '1966-04-02 00:00:30'}, 3: {'fill': 'march'}}, # wrong fillMode='value') op.setOptions(selected={2: {'fill': '1966-04-02 00:00:30'}, 3: {'fill': '0.9'}}, fillMode='value') assert op.getOptions() == { 'selected': {2: {'fill': '1966-04-02 00:00:30'}, 3: {'fill': '0.9'}}, 'fillMode': 'value' } s = Shape() s.colNames = ['col3', 'col2', 'date', 'col4'] s.colTypes = [Types.String, Types.Ordinal, Types.Datetime, Types.Numeric] s.index = ['col1'] s.indexTypes = [IndexType(Types.Numeric)] assert op.getOutputShape() == s h = op.execute(g) assert h.shape == s assert mapDate(roundValues(nan_to_None(h.to_dict()), decimals=3)) == { 'col3': ['q', '2', 'c', None, None], 'col2': ['3', '4', None, None, '0'], 'date': [t.strftime(format='%Y-%m-%d') if not pd.isna(t) else '1966-04-02' for t in e['date']], 'col4': [0.9, 2.0, 0.9, 4.0, 10.0] }
def test_shape_index(): d = { 'col1': [1, 2, 3, 4.0, 10], 'col2': [3, 4, 5, 6, 0], 'col3': ['q', '2', 'c', '4', 'x'] } f = Frame(d) f = Frame(f.getRawFrame().set_index('col3')) # Desired shape obj s = Shape() s.index = ['col3'] s.indexTypes = [IndexType(Types.String)] s.colNames = ['col1', 'col2'] s.colTypes = [Types.Numeric, Types.Numeric] assert f.shape == s assert f.nRows == 5
def test_cloneShape(): s = Shape() s.colNames = ['cold'] s.colTypes = [Types.Datetime] s.index = ['col3', 'col1', 'col2'] s.indexTypes = [ IndexType(Types.Ordinal), IndexType(Types.Numeric), IndexType(Types.Nominal) ] sColDict = s.columnsDict sIndexDict = s.indexDict sc = s.clone() sc.index.append('col4') sc.indexTypes.append(IndexType(Types.Numeric)) sc.colTypes[0] = Types.Ordinal sc.colNames[0] = 'col_new' assert sc != s assert s.columnsDict == {'cold': Types.Datetime} assert sc.columnsDict == {'col_new': Types.Ordinal} assert s.columnsDict == sColDict assert s.indexDict == sIndexDict
def test_fromShape_single_index(): d = { 'col1': [1, 2, 3, 4.0, 10], 'col2': [3, 4, 5, 6, 0], 'col3': ['q', '2', 'c', '4', 'x'], 'cold': pd.Series([ '05-09-1988', '22-12-1994', '21-11-1995', '22-06-1994', '12-12-2012' ], dtype='datetime64[ns]') } f = Frame(d) f = f.setIndex('col1') g = Frame.fromShape(f.shape) s = Shape() # fromShape does preserve index s.colNames = ['cold', 'col2', 'col3'] s.colTypes = [Types.Datetime, Types.Numeric, Types.String] s.index = ['col1'] s.indexTypes = [IndexType(Types.Numeric)] assert g.shape == s == f.shape