Example #1
1
 def test_replace_str_to_str_chain(self):
     a = np.arange(1, 5)
     astr = a.astype(str)
     bstr = np.arange(2, 6).astype(str)
     df = DataFrame({'a': astr})
     with tm.assertRaisesRegexp(ValueError, "Replacement not allowed .+"):
         df.replace({'a': dict(zip(astr, bstr))})
Example #2
0
    def test_nunique(self):
        df = DataFrame({
            'A': list('abbacc'),
            'B': list('abxacc'),
            'C': list('abbacx'),
        })

        expected = DataFrame({'A': [1] * 3, 'B': [1, 2, 1], 'C': [1, 1, 2]})
        result = df.groupby('A', as_index=False).nunique()
        tm.assert_frame_equal(result, expected)

        # as_index
        expected.index = list('abc')
        expected.index.name = 'A'
        result = df.groupby('A').nunique()
        tm.assert_frame_equal(result, expected)

        # with na
        result = df.replace({'x': None}).groupby('A').nunique(dropna=False)
        tm.assert_frame_equal(result, expected)

        # dropna
        expected = DataFrame({'A': [1] * 3, 'B': [1] * 3, 'C': [1] * 3},
                             index=list('abc'))
        expected.index.name = 'A'
        result = df.replace({'x': None}).groupby('A').nunique()
        tm.assert_frame_equal(result, expected)
def getphysiologicalinfo(pos, atlas="Talairach", r=5, nearest="on", rm_unfound=False):
    # load atlas :
    hdr, mask, gray, brodtxt, brodidx, label = loadatlas(atlas=atlas)

    # Get info of each electrode :
    nbchan = pos.shape[0]
    hemi, lobe, gyrus, matter, brod = [], [], [], [], []
    for k in range(0, nbchan):
        hemiC, lobeC, gyrusC, matterC, brodC = physiochannel(
            list(pos[k, :]), mask, hdr, gray, label, r=r, nearest=nearest
        )
        hemi.extend(hemiC), lobe.extend(lobeC), gyrus.extend(gyrusC)
        matter.extend(matterC), brod.extend(brodC)

    # Put everything in a panda structure :
    phyinf = DataFrame({"Hemisphere": hemi, "Lobe": lobe, "Gyrus": gyrus, "Matter": matter, "Brodmann": list(brod)})

    # Replace corrupted values :
    phyinf.replace(to_replace="*", value="Not found", inplace=True)
    phyinf["Brodmann"].replace(to_replace="Brodmann area ", value="", inplace=True, regex=True)
    phyinf["Gyrus"].replace(to_replace=" Gyrus", value="", inplace=True, regex=True)
    phyinf["Lobe"].replace(to_replace=" Lobe", value="", inplace=True, regex=True)
    phyinf["Matter"].replace(to_replace=" Matter", value="", inplace=True, regex=True)

    # Convert Brodmann to number :
    BrodStr = list(phyinf["Brodmann"])
    for k in range(0, len(BrodStr)):
        try:
            BrodStr[k] = int(BrodStr[k])
        except:
            BrodStr[k] = BrodStr[k]
    phyinf["Brodmann"] = BrodStr

    return phyinf
Example #4
0
    def test_replace_with_empty_dictlike(self, mix_abc):
        # GH 15289
        df = DataFrame(mix_abc)
        assert_frame_equal(df, df.replace({}))
        assert_frame_equal(df, df.replace(Series([])))

        assert_frame_equal(df, df.replace({'b': {}}))
        assert_frame_equal(df, df.replace(Series({'b': {}})))
Example #5
0
    def test_replace_simple_nested_dict_with_nonexistent_value(self):
        df = DataFrame({'col': range(1, 5)})
        expected = DataFrame({'col': ['a', 2, 3, 'b']})

        result = df.replace({-1: '-', 1: 'a', 4: 'b'})
        assert_frame_equal(expected, result)

        result = df.replace({'col': {-1: '-', 1: 'a', 4: 'b'}})
        assert_frame_equal(expected, result)
Example #6
0
    def test_replace_with_empty_dictlike(self):
        # GH 15289
        mix = {'a': lrange(4), 'b': list('ab..'), 'c': ['a', 'b', nan, 'd']}
        df = DataFrame(mix)
        assert_frame_equal(df, df.replace({}))
        assert_frame_equal(df, df.replace(Series([])))

        assert_frame_equal(df, df.replace({'b': {}}))
        assert_frame_equal(df, df.replace(Series({'b': {}})))
Example #7
0
    def test_regex_replace_dict_mixed(self):
        mix = {'a': lrange(4), 'b': list('ab..'), 'c': ['a', 'b', np.nan, 'd']}
        dfmix = DataFrame(mix)

        # dicts
        # single dict {re1: v1}, search the whole frame
        # need test for this...

        # list of dicts {re1: v1, re2: v2, ..., re3: v3}, search the whole
        # frame
        res = dfmix.replace({'b': r'\s*\.\s*'}, {'b': np.nan}, regex=True)
        res2 = dfmix.copy()
        res2.replace({'b': r'\s*\.\s*'}, {'b': np.nan},
                     inplace=True, regex=True)
        expec = DataFrame({'a': mix['a'], 'b': ['a', 'b', np.nan, np.nan], 'c':
                           mix['c']})
        assert_frame_equal(res, expec)
        assert_frame_equal(res2, expec)

        # list of dicts {re1: re11, re2: re12, ..., reN: re1N}, search the
        # whole frame
        res = dfmix.replace({'b': r'\s*(\.)\s*'}, {'b': r'\1ty'}, regex=True)
        res2 = dfmix.copy()
        res2.replace({'b': r'\s*(\.)\s*'}, {'b': r'\1ty'}, inplace=True,
                     regex=True)
        expec = DataFrame({'a': mix['a'], 'b': ['a', 'b', '.ty', '.ty'], 'c':
                           mix['c']})
        assert_frame_equal(res, expec)
        assert_frame_equal(res2, expec)

        res = dfmix.replace(regex={'b': r'\s*(\.)\s*'}, value={'b': r'\1ty'})
        res2 = dfmix.copy()
        res2.replace(regex={'b': r'\s*(\.)\s*'}, value={'b': r'\1ty'},
                     inplace=True)
        expec = DataFrame({'a': mix['a'], 'b': ['a', 'b', '.ty', '.ty'], 'c':
                           mix['c']})
        assert_frame_equal(res, expec)
        assert_frame_equal(res2, expec)

        # scalar -> dict
        # to_replace regex, {value: value}
        expec = DataFrame({'a': mix['a'], 'b': [np.nan, 'b', '.', '.'], 'c':
                           mix['c']})
        res = dfmix.replace('a', {'b': np.nan}, regex=True)
        res2 = dfmix.copy()
        res2.replace('a', {'b': np.nan}, regex=True, inplace=True)
        assert_frame_equal(res, expec)
        assert_frame_equal(res2, expec)

        res = dfmix.replace('a', {'b': np.nan}, regex=True)
        res2 = dfmix.copy()
        res2.replace(regex='a', value={'b': np.nan}, inplace=True)
        expec = DataFrame({'a': mix['a'], 'b': [np.nan, 'b', '.', '.'], 'c':
                           mix['c']})
        assert_frame_equal(res, expec)
        assert_frame_equal(res2, expec)
Example #8
0
    def test_replace_dict_tuple_list_ordering_remains_the_same(self):
        df = DataFrame(dict(A=[nan, 1]))
        res1 = df.replace(to_replace={nan: 0, 1: -1e8})
        res2 = df.replace(to_replace=(1, nan), value=[-1e8, 0])
        res3 = df.replace(to_replace=[1, nan], value=[-1e8, 0])

        expected = DataFrame({'A': [0, -1e8]})
        assert_frame_equal(res1, res2)
        assert_frame_equal(res2, res3)
        assert_frame_equal(res3, expected)
Example #9
0
    def test_replace_simple_nested_dict(self):
        df = DataFrame({'col': range(1, 5)})
        expected = DataFrame({'col': ['a', 2, 3, 'b']})

        result = df.replace({'col': {1: 'a', 4: 'b'}})
        assert_frame_equal(expected, result)

        # in this case, should be the same as the not nested version
        result = df.replace({1: 'a', 4: 'b'})
        assert_frame_equal(expected, result)
Example #10
0
    def test_replace_datetimetz(self):

        # GH 11326
        # behaving poorly when presented with a datetime64[ns, tz]
        df = DataFrame({'A': date_range('20130101', periods=3,
                                        tz='US/Eastern'),
                        'B': [0, np.nan, 2]})
        result = df.replace(np.nan, 1)
        expected = DataFrame({'A': date_range('20130101', periods=3,
                                              tz='US/Eastern'),
                              'B': Series([0, 1, 2], dtype='float64')})
        assert_frame_equal(result, expected)

        result = df.fillna(1)
        assert_frame_equal(result, expected)

        result = df.replace(0, np.nan)
        expected = DataFrame({'A': date_range('20130101', periods=3,
                                              tz='US/Eastern'),
                              'B': [np.nan, np.nan, 2]})
        assert_frame_equal(result, expected)

        result = df.replace(Timestamp('20130102', tz='US/Eastern'),
                            Timestamp('20130104', tz='US/Eastern'))
        expected = DataFrame({'A': [Timestamp('20130101', tz='US/Eastern'),
                                    Timestamp('20130104', tz='US/Eastern'),
                                    Timestamp('20130103', tz='US/Eastern')],
                              'B': [0, np.nan, 2]})
        assert_frame_equal(result, expected)

        result = df.copy()
        result.iloc[1, 0] = np.nan
        result = result.replace(
            {'A': pd.NaT}, Timestamp('20130104', tz='US/Eastern'))
        assert_frame_equal(result, expected)

        # coerce to object
        result = df.copy()
        result.iloc[1, 0] = np.nan
        result = result.replace(
            {'A': pd.NaT}, Timestamp('20130104', tz='US/Pacific'))
        expected = DataFrame({'A': [Timestamp('20130101', tz='US/Eastern'),
                                    Timestamp('20130104', tz='US/Pacific'),
                                    Timestamp('20130103', tz='US/Eastern')],
                              'B': [0, np.nan, 2]})
        assert_frame_equal(result, expected)

        result = df.copy()
        result.iloc[1, 0] = np.nan
        result = result.replace({'A': np.nan}, Timestamp('20130104'))
        expected = DataFrame({'A': [Timestamp('20130101', tz='US/Eastern'),
                                    Timestamp('20130104'),
                                    Timestamp('20130103', tz='US/Eastern')],
                              'B': [0, np.nan, 2]})
        assert_frame_equal(result, expected)
Example #11
0
    def test_replace_input_formats(self):
        # both dicts
        to_rep = {'A': np.nan, 'B': 0, 'C': ''}
        values = {'A': 0, 'B': -1, 'C': 'missing'}
        df = DataFrame({'A': [np.nan, 0, np.inf], 'B': [0, 2, 5],
                        'C': ['', 'asdf', 'fd']})
        filled = df.replace(to_rep, values)
        expected = {}
        for k, v in compat.iteritems(df):
            expected[k] = v.replace(to_rep[k], values[k])
        assert_frame_equal(filled, DataFrame(expected))

        result = df.replace([0, 2, 5], [5, 2, 0])
        expected = DataFrame({'A': [np.nan, 5, np.inf], 'B': [5, 2, 0],
                              'C': ['', 'asdf', 'fd']})
        assert_frame_equal(result, expected)

        # dict to scalar
        filled = df.replace(to_rep, 0)
        expected = {}
        for k, v in compat.iteritems(df):
            expected[k] = v.replace(to_rep[k], 0)
        assert_frame_equal(filled, DataFrame(expected))

        self.assertRaises(TypeError, df.replace, to_rep, [np.nan, 0, ''])

        # scalar to dict
        values = {'A': 0, 'B': -1, 'C': 'missing'}
        df = DataFrame({'A': [np.nan, 0, np.nan], 'B': [0, 2, 5],
                        'C': ['', 'asdf', 'fd']})
        filled = df.replace(np.nan, values)
        expected = {}
        for k, v in compat.iteritems(df):
            expected[k] = v.replace(np.nan, values[k])
        assert_frame_equal(filled, DataFrame(expected))

        # list to list
        to_rep = [np.nan, 0, '']
        values = [-2, -1, 'missing']
        result = df.replace(to_rep, values)
        expected = df.copy()
        for i in range(len(to_rep)):
            expected.replace(to_rep[i], values[i], inplace=True)
        assert_frame_equal(result, expected)

        self.assertRaises(ValueError, df.replace, to_rep, values[1:])

        # list to scalar
        to_rep = [np.nan, 0, '']
        result = df.replace(to_rep, -1)
        expected = df.copy()
        for i in range(len(to_rep)):
            expected.replace(to_rep[i], -1, inplace=True)
        assert_frame_equal(result, expected)
 def get_dataframe(self, data):
     dataframe = DataFrame(data)
     index = self.get_index(dataframe)
     if index:
         if self.index_none_value is not None:
             for key in index:
                 dataframe.replace({key: None}, {key: self.index_none_value}, inplace=True)
         dataframe.set_index(index, inplace=True)
     else:
         # Name auto-index column to ensure valid CSV output
         dataframe.index.name = "row"
     return dataframe
Example #13
0
    def test_replace_input_formats_listlike(self):
        # both dicts
        to_rep = {'A': np.nan, 'B': 0, 'C': ''}
        values = {'A': 0, 'B': -1, 'C': 'missing'}
        df = DataFrame({'A': [np.nan, 0, np.inf], 'B': [0, 2, 5],
                        'C': ['', 'asdf', 'fd']})
        filled = df.replace(to_rep, values)
        expected = {k: v.replace(to_rep[k], values[k]) for k, v in df.items()}
        assert_frame_equal(filled, DataFrame(expected))

        result = df.replace([0, 2, 5], [5, 2, 0])
        expected = DataFrame({'A': [np.nan, 5, np.inf], 'B': [5, 2, 0],
                              'C': ['', 'asdf', 'fd']})
        assert_frame_equal(result, expected)

        # scalar to dict
        values = {'A': 0, 'B': -1, 'C': 'missing'}
        df = DataFrame({'A': [np.nan, 0, np.nan], 'B': [0, 2, 5],
                        'C': ['', 'asdf', 'fd']})
        filled = df.replace(np.nan, values)
        expected = {k: v.replace(np.nan, values[k]) for k, v in df.items()}
        assert_frame_equal(filled, DataFrame(expected))

        # list to list
        to_rep = [np.nan, 0, '']
        values = [-2, -1, 'missing']
        result = df.replace(to_rep, values)
        expected = df.copy()
        for i in range(len(to_rep)):
            expected.replace(to_rep[i], values[i], inplace=True)
        assert_frame_equal(result, expected)

        msg = r"Replacement lists must match in length\. Expecting 3 got 2"
        with pytest.raises(ValueError, match=msg):
            df.replace(to_rep, values[1:])
Example #14
0
    def historicalRequest (self, securities, fields, startDate, endDate, **kwargs):
        """ Equivalent to the Excel BDH Function.
        
            If securities are provided as a list, the returned DataFrame will
            have a MultiIndex.
        """
        defaults = {'startDate'       : startDate,
            'endDate'                 : endDate,
            'periodicityAdjustment'   : 'ACTUAL',
            'periodicitySelection'    : 'DAILY',
            'nonTradingDayFillOption' : 'ACTIVE_DAYS_ONLY',
            'adjustmentNormal'        : False,
            'adjustmentAbnormal'      : False,
            'adjustmentSplit'         : True,
            'adjustmentFollowDPDF'    : False}   
        defaults.update(kwargs)

        response = self.sendRequest('HistoricalData', securities, fields, defaults)
        
        data = []
        keys = []
        
        for msg in response:
            securityData = msg.getElement('securityData')
            fieldData = securityData.getElement('fieldData')
            fieldDataList = [fieldData.getValueAsElement(i) for i in range(fieldData.numValues())]
            
            df = DataFrame()
            
            for fld in fieldDataList:
                for v in [fld.getElement(i) for i in range(fld.numElements()) if fld.getElement(i).name() != 'date']:
                    df.ix[fld.getElementAsDatetime('date'), str(v.name())] = v.getValue()

            df.index = df.index.to_datetime()
            df.replace('#N/A History', np.nan, inplace=True)
            
            keys.append(securityData.getElementAsString('security'))
            data.append(df)
        
        if len(data) == 0:
            return DataFrame()
        if type(securities) == str:
            data = pd.concat(data, axis=1)
            data.columns.name = 'Field'
        else:
            data = pd.concat(data, keys=keys, axis=1, names=['Security','Field'])
            
        data.index.name = 'Date'
        return data
Example #15
0
 def test_regex_replace_numeric_to_object_conversion(self):
     mix = {'a': lrange(4), 'b': list('ab..'), 'c': ['a', 'b', nan, 'd']}
     df = DataFrame(mix)
     expec = DataFrame({'a': ['a', 1, 2, 3], 'b': mix['b'], 'c': mix['c']})
     res = df.replace(0, 'a')
     assert_frame_equal(res, expec)
     self.assertEqual(res.a.dtype, np.object_)
Example #16
0
 def test_regex_replace_dict_nested(self):
     # nested dicts will not work until this is implemented for Series
     mix = {'a': lrange(4), 'b': list('ab..'), 'c': ['a', 'b', nan, 'd']}
     dfmix = DataFrame(mix)
     res = dfmix.replace({'b': {r'\s*\.\s*': nan}}, regex=True)
     res2 = dfmix.copy()
     res4 = dfmix.copy()
     res2.replace({'b': {r'\s*\.\s*': nan}}, inplace=True, regex=True)
     res3 = dfmix.replace(regex={'b': {r'\s*\.\s*': nan}})
     res4.replace(regex={'b': {r'\s*\.\s*': nan}}, inplace=True)
     expec = DataFrame({'a': mix['a'], 'b': ['a', 'b', nan, nan], 'c':
                        mix['c']})
     assert_frame_equal(res, expec)
     assert_frame_equal(res2, expec)
     assert_frame_equal(res3, expec)
     assert_frame_equal(res4, expec)
Example #17
0
    def test_fillna_dtype_conversion(self):
        # make sure that fillna on an empty frame works
        df = DataFrame(index=["A", "B", "C"], columns=[1, 2, 3, 4, 5])
        result = df.get_dtype_counts().sort_values()
        expected = Series({'object': 5})
        assert_series_equal(result, expected)

        result = df.fillna(1)
        expected = DataFrame(1, index=["A", "B", "C"], columns=[1, 2, 3, 4, 5])
        result = result.get_dtype_counts().sort_values()
        expected = Series({'int64': 5})
        assert_series_equal(result, expected)

        # empty block
        df = DataFrame(index=lrange(3), columns=['A', 'B'], dtype='float64')
        result = df.fillna('nan')
        expected = DataFrame('nan', index=lrange(3), columns=['A', 'B'])
        assert_frame_equal(result, expected)

        # equiv of replace
        df = DataFrame(dict(A=[1, np.nan], B=[1., 2.]))
        for v in ['', 1, np.nan, 1.0]:
            expected = df.replace(np.nan, v)
            result = df.fillna(v)
            assert_frame_equal(result, expected)
Example #18
0
    def test_replace_regex_metachar(self):
        metachars = '[]', '()', r'\d', r'\w', r'\s'

        for metachar in metachars:
            df = DataFrame({'a': [metachar, 'else']})
            result = df.replace({'a': {metachar: 'paren'}})
            expected = DataFrame({'a': ['paren', 'else']})
            assert_frame_equal(result, expected)
Example #19
0
 def test_replace_convert(self):
     # gh 3907
     df = DataFrame([['foo', 'bar', 'bah'], ['bar', 'foo', 'bah']])
     m = {'foo': 1, 'bar': 2, 'bah': 3}
     rep = df.replace(m)
     expec = Series([np.int64] * 3)
     res = rep.dtypes
     assert_series_equal(expec, res)
Example #20
0
 def test_regex_replace_dict_nested(self, mix_abc):
     # nested dicts will not work until this is implemented for Series
     dfmix = DataFrame(mix_abc)
     res = dfmix.replace({'b': {r'\s*\.\s*': np.nan}}, regex=True)
     res2 = dfmix.copy()
     res4 = dfmix.copy()
     res2.replace({'b': {r'\s*\.\s*': np.nan}}, inplace=True, regex=True)
     res3 = dfmix.replace(regex={'b': {r'\s*\.\s*': np.nan}})
     res4.replace(regex={'b': {r'\s*\.\s*': np.nan}}, inplace=True)
     expec = DataFrame({'a': mix_abc['a'],
                        'b': ['a', 'b', np.nan, np.nan],
                        'c': mix_abc['c'],
                        })
     assert_frame_equal(res, expec)
     assert_frame_equal(res2, expec)
     assert_frame_equal(res3, expec)
     assert_frame_equal(res4, expec)
Example #21
0
 def test_regex_replace_numeric_to_object_conversion(self, mix_abc):
     df = DataFrame(mix_abc)
     expec = DataFrame({'a': ['a', 1, 2, 3],
                        'b': mix_abc['b'],
                        'c': mix_abc['c'],
                        })
     res = df.replace(0, 'a')
     assert_frame_equal(res, expec)
     assert res.a.dtype == np.object_
Example #22
0
    def fillna_dict(cls, prop):
        """
        Use trade history then fill empty with value row above
        """
        df = DataFrame(prop)
        df = df.replace(['', 'DEBIT', 'CREDIT'], numpy.nan)
        df = df.fillna(method='ffill')

        return [r.to_dict() for k, r in df.iterrows()]
Example #23
0
    def test_replace_method(self, to_replace, method, expected):
        # GH 19632
        df = DataFrame({'A': [0, 1, 2],
                        'B': [5, np.nan, 7],
                        'C': ['a', 'b', 'c']})

        result = df.replace(to_replace=to_replace, value=None, method=method)
        expected = DataFrame(expected)
        assert_frame_equal(result, expected)
Example #24
0
    def test_regex_replace_list_obj(self):
        obj = {'a': list('ab..'), 'b': list('efgh'), 'c': list('helo')}
        dfobj = DataFrame(obj)

        # lists of regexes and values
        # list of [re1, re2, ..., reN] -> [v1, v2, ..., vN]
        to_replace_res = [r'\s*\.\s*', r'e|f|g']
        values = [np.nan, 'crap']
        res = dfobj.replace(to_replace_res, values, regex=True)
        expec = DataFrame({'a': ['a', 'b', np.nan, np.nan],
                           'b': ['crap'] * 3 + ['h'],
                           'c': ['h', 'crap', 'l', 'o'],
                           })
        assert_frame_equal(res, expec)

        # list of [re1, re2, ..., reN] -> [re1, re2, .., reN]
        to_replace_res = [r'\s*(\.)\s*', r'(e|f|g)']
        values = [r'\1\1', r'\1_crap']
        res = dfobj.replace(to_replace_res, values, regex=True)
        expec = DataFrame({'a': ['a', 'b', '..', '..'],
                           'b': ['e_crap', 'f_crap', 'g_crap', 'h'],
                           'c': ['h', 'e_crap', 'l', 'o'],
                           })
        assert_frame_equal(res, expec)

        # list of [re1, re2, ..., reN] -> [(re1 or v1), (re2 or v2), ..., (reN
        # or vN)]
        to_replace_res = [r'\s*(\.)\s*', r'e']
        values = [r'\1\1', r'crap']
        res = dfobj.replace(to_replace_res, values, regex=True)
        expec = DataFrame({'a': ['a', 'b', '..', '..'],
                           'b': ['crap', 'f', 'g', 'h'],
                           'c': ['h', 'crap', 'l', 'o'],
                           })
        assert_frame_equal(res, expec)

        to_replace_res = [r'\s*(\.)\s*', r'e']
        values = [r'\1\1', r'crap']
        res = dfobj.replace(value=values, regex=to_replace_res)
        expec = DataFrame({'a': ['a', 'b', '..', '..'],
                           'b': ['crap', 'f', 'g', 'h'],
                           'c': ['h', 'crap', 'l', 'o'],
                           })
        assert_frame_equal(res, expec)
Example #25
0
    def test_regex_replace_list_mixed(self, mix_ab):
        # mixed frame to make sure this doesn't break things
        dfmix = DataFrame(mix_ab)

        # lists of regexes and values
        # list of [re1, re2, ..., reN] -> [v1, v2, ..., vN]
        to_replace_res = [r'\s*\.\s*', r'a']
        values = [np.nan, 'crap']
        mix2 = {'a': list(range(4)), 'b': list('ab..'), 'c': list('halo')}
        dfmix2 = DataFrame(mix2)
        res = dfmix2.replace(to_replace_res, values, regex=True)
        expec = DataFrame({'a': mix2['a'],
                           'b': ['crap', 'b', np.nan, np.nan],
                           'c': ['h', 'crap', 'l', 'o'],
                           })
        assert_frame_equal(res, expec)

        # list of [re1, re2, ..., reN] -> [re1, re2, .., reN]
        to_replace_res = [r'\s*(\.)\s*', r'(a|b)']
        values = [r'\1\1', r'\1_crap']
        res = dfmix.replace(to_replace_res, values, regex=True)
        expec = DataFrame({'a': mix_ab['a'],
                           'b': ['a_crap', 'b_crap', '..', '..'],
                           })
        assert_frame_equal(res, expec)

        # list of [re1, re2, ..., reN] -> [(re1 or v1), (re2 or v2), ..., (reN
        # or vN)]
        to_replace_res = [r'\s*(\.)\s*', r'a', r'(b)']
        values = [r'\1\1', r'crap', r'\1_crap']
        res = dfmix.replace(to_replace_res, values, regex=True)
        expec = DataFrame({'a': mix_ab['a'],
                           'b': ['crap', 'b_crap', '..', '..'],
                           })
        assert_frame_equal(res, expec)

        to_replace_res = [r'\s*(\.)\s*', r'a', r'(b)']
        values = [r'\1\1', r'crap', r'\1_crap']
        res = dfmix.replace(regex=to_replace_res, value=values)
        expec = DataFrame({'a': mix_ab['a'],
                           'b': ['crap', 'b_crap', '..', '..'],
                           })
        assert_frame_equal(res, expec)
Example #26
0
    def test_replace_mixed(self):
        mf = self.mixed_frame
        mf.iloc[5:20, mf.columns.get_loc('foo')] = nan
        mf.iloc[-10:, mf.columns.get_loc('A')] = nan

        result = self.mixed_frame.replace(np.nan, -18)
        expected = self.mixed_frame.fillna(value=-18)
        assert_frame_equal(result, expected)
        assert_frame_equal(result.replace(-18, nan), self.mixed_frame)

        result = self.mixed_frame.replace(np.nan, -1e8)
        expected = self.mixed_frame.fillna(value=-1e8)
        assert_frame_equal(result, expected)
        assert_frame_equal(result.replace(-1e8, nan), self.mixed_frame)

        # int block upcasting
        df = DataFrame({'A': Series([1.0, 2.0], dtype='float64'),
                        'B': Series([0, 1], dtype='int64')})
        expected = DataFrame({'A': Series([1.0, 2.0], dtype='float64'),
                              'B': Series([0.5, 1], dtype='float64')})
        result = df.replace(0, 0.5)
        assert_frame_equal(result, expected)

        df.replace(0, 0.5, inplace=True)
        assert_frame_equal(df, expected)

        # int block splitting
        df = DataFrame({'A': Series([1.0, 2.0], dtype='float64'),
                        'B': Series([0, 1], dtype='int64'),
                        'C': Series([1, 2], dtype='int64')})
        expected = DataFrame({'A': Series([1.0, 2.0], dtype='float64'),
                              'B': Series([0.5, 1], dtype='float64'),
                              'C': Series([1, 2], dtype='int64')})
        result = df.replace(0, 0.5)
        assert_frame_equal(result, expected)

        # to object block upcasting
        df = DataFrame({'A': Series([1.0, 2.0], dtype='float64'),
                        'B': Series([0, 1], dtype='int64')})
        expected = DataFrame({'A': Series([1, 'foo'], dtype='object'),
                              'B': Series([0, 1], dtype='int64')})
        result = df.replace(2, 'foo')
        assert_frame_equal(result, expected)

        expected = DataFrame({'A': Series(['foo', 'bar'], dtype='object'),
                              'B': Series([0, 'foo'], dtype='object')})
        result = df.replace([1, 2], ['foo', 'bar'])
        assert_frame_equal(result, expected)

        # test case from
        df = DataFrame({'A': Series([3, 0], dtype='int64'),
                        'B': Series([0, 3], dtype='int64')})
        result = df.replace(3, df.mean().to_dict())
        expected = df.copy().astype('float64')
        m = df.mean()
        expected.iloc[0, 0] = m[0]
        expected.iloc[1, 1] = m[1]
        assert_frame_equal(result, expected)
Example #27
0
    def test_replace_input_formats_scalar(self):
        df = DataFrame({'A': [np.nan, 0, np.inf], 'B': [0, 2, 5],
                        'C': ['', 'asdf', 'fd']})

        # dict to scalar
        to_rep = {'A': np.nan, 'B': 0, 'C': ''}
        filled = df.replace(to_rep, 0)
        expected = {k: v.replace(to_rep[k], 0)
                    for k, v in compat.iteritems(df)}
        assert_frame_equal(filled, DataFrame(expected))

        pytest.raises(TypeError, df.replace, to_rep, [np.nan, 0, ''])

        # list to scalar
        to_rep = [np.nan, 0, '']
        result = df.replace(to_rep, -1)
        expected = df.copy()
        for i in range(len(to_rep)):
            expected.replace(to_rep[i], -1, inplace=True)
        assert_frame_equal(result, expected)
Example #28
0
    def test_replace_series_dict(self):
        # from GH 3064
        df = DataFrame({'zero': {'a': 0.0, 'b': 1}, 'one': {'a': 2.0, 'b': 0}})
        result = df.replace(0, {'zero': 0.5, 'one': 1.0})
        expected = DataFrame(
            {'zero': {'a': 0.5, 'b': 1}, 'one': {'a': 2.0, 'b': 1.0}})
        assert_frame_equal(result, expected)

        result = df.replace(0, df.mean())
        assert_frame_equal(result, expected)

        # series to series/dict
        df = DataFrame({'zero': {'a': 0.0, 'b': 1}, 'one': {'a': 2.0, 'b': 0}})
        s = Series({'zero': 0.0, 'one': 2.0})
        result = df.replace(s, {'zero': 0.5, 'one': 1.0})
        expected = DataFrame(
            {'zero': {'a': 0.5, 'b': 1}, 'one': {'a': 1.0, 'b': 0.0}})
        assert_frame_equal(result, expected)

        result = df.replace(s, df.mean())
        assert_frame_equal(result, expected)
Example #29
0
    def test_replace_input_formats_scalar(self):
        df = DataFrame({'A': [np.nan, 0, np.inf], 'B': [0, 2, 5],
                        'C': ['', 'asdf', 'fd']})

        # dict to scalar
        to_rep = {'A': np.nan, 'B': 0, 'C': ''}
        filled = df.replace(to_rep, 0)
        expected = {k: v.replace(to_rep[k], 0) for k, v in df.items()}
        assert_frame_equal(filled, DataFrame(expected))

        msg = "value argument must be scalar, dict, or Series"
        with pytest.raises(TypeError, match=msg):
            df.replace(to_rep, [np.nan, 0, ''])

        # list to scalar
        to_rep = [np.nan, 0, '']
        result = df.replace(to_rep, -1)
        expected = df.copy()
        for i in range(len(to_rep)):
            expected.replace(to_rep[i], -1, inplace=True)
        assert_frame_equal(result, expected)
Example #30
0
    def test_replace_dtypes(self):
        # int
        df = DataFrame({'ints': [1, 2, 3]})
        result = df.replace(1, 0)
        expected = DataFrame({'ints': [0, 2, 3]})
        assert_frame_equal(result, expected)

        df = DataFrame({'ints': [1, 2, 3]}, dtype=np.int32)
        result = df.replace(1, 0)
        expected = DataFrame({'ints': [0, 2, 3]}, dtype=np.int32)
        assert_frame_equal(result, expected)

        df = DataFrame({'ints': [1, 2, 3]}, dtype=np.int16)
        result = df.replace(1, 0)
        expected = DataFrame({'ints': [0, 2, 3]}, dtype=np.int16)
        assert_frame_equal(result, expected)

        # bools
        df = DataFrame({'bools': [True, False, True]})
        result = df.replace(False, True)
        self.assertTrue(result.values.all())

        # complex blocks
        df = DataFrame({'complex': [1j, 2j, 3j]})
        result = df.replace(1j, 0j)
        expected = DataFrame({'complex': [0j, 2j, 3j]})
        assert_frame_equal(result, expected)

        # datetime blocks
        prev = datetime.today()
        now = datetime.today()
        df = DataFrame({'datetime64': Index([prev, now, prev])})
        result = df.replace(prev, now)
        expected = DataFrame({'datetime64': Index([now] * 3)})
        assert_frame_equal(result, expected)