def setup_method(self, method): # aggregate multiple columns self.df = DataFrame({ "key1": get_test_data(), "key2": get_test_data(), "data1": np.random.randn(N), "data2": np.random.randn(N), }) # exclude a couple keys for fun self.df = self.df[self.df["key2"] > 1] self.df2 = DataFrame({ "key1": get_test_data(n=N // 5), "key2": get_test_data(ngroups=NGROUPS // 2, n=N // 5), "value": np.random.randn(N // 5), }) index, data = tm.getMixedTypeDict() self.target = DataFrame(data, index=index) # Join on string value self.source = DataFrame({ "MergedA": data["A"], "MergedD": data["D"] }, index=data["C"])
def test_join(self): index, data = common.getMixedTypeDict() target = self.klass(data, index=index) # Join on string value source = self.klass({ 'MergedA': data['A'], 'MergedD': data['D'] }, index=data['C']) merged = target.join(source, on='C') self.assert_(np.array_equal(merged['MergedA'], target['A'])) self.assert_(np.array_equal(merged['MergedD'], target['D'])) # Test when some are missing # merge column not p resent self.assertRaises(Exception, target.join, source, on='E') # corner cases # nothing to merge merged = target.join(source.reindex([]), on='C') # overlap source_copy = source.copy() source_copy['A'] = 0 self.assertRaises(Exception, target.join, source_copy, on='A') # can't specify how self.assertRaises(Exception, target.join, source, on='C', how='left')
def setUp(self): # aggregate multiple columns self.df = DataFrame({ 'key1': get_test_data(), 'key2': get_test_data(), 'data1': np.random.randn(N), 'data2': np.random.randn(N) }) # exclude a couple keys for fun self.df = self.df[self.df['key2'] > 1] self.df2 = DataFrame({ 'key1': get_test_data(n=N // 5), 'key2': get_test_data(ngroups=NGROUPS // 2, n=N // 5), 'value': np.random.randn(N // 5) }) index, data = tm.getMixedTypeDict() self.target = DataFrame(data, index=index) # Join on string value self.source = DataFrame({ 'MergedA': data['A'], 'MergedD': data['D'] }, index=data['C'])
def setUp(self): # aggregate multiple columns self.df = DataFrame({'key1': get_test_data(), 'key2': get_test_data(), 'data1': np.random.randn(N), 'data2': np.random.randn(N)}) # exclude a couple keys for fun self.df = self.df[self.df['key2'] > 1] self.df2 = DataFrame({'key1' : get_test_data(n=N//5), 'key2' : get_test_data(ngroups=NGROUPS//2, n=N//5), 'value': np.random.randn(N // 5)}) index, data = tm.getMixedTypeDict() self.target = DataFrame(data, index=index) # Join on string value self.source = DataFrame({'MergedA': data['A'], 'MergedD': data['D']}, index=data['C']) self.left = DataFrame({'key': ['a', 'b', 'c', 'd', 'e', 'e', 'a'], 'v1': np.random.randn(7)}) self.right = DataFrame({'v2': np.random.randn(4)}, index=['d', 'b', 'c', 'a'])
def test_join(self): index, data = common.getMixedTypeDict() target = self.klass(data, index=index) # Join on string value source = self.klass({'MergedA' : data['A'], 'MergedD' : data['D']}, index=data['C']) merged = target.join(source, on='C') self.assert_(np.array_equal(merged['MergedA'], target['A'])) self.assert_(np.array_equal(merged['MergedD'], target['D'])) # Test when some are missing # merge column not p resent self.assertRaises(Exception, target.join, source, on='E') # corner cases # nothing to merge merged = target.join(source.reindex([]), on='C') # overlap source_copy = source.copy() source_copy['A'] = 0 self.assertRaises(Exception, target.join, source_copy, on='A') # can't specify how self.assertRaises(Exception, target.join, source, on='C', how='left')
def setUp(self): # aggregate multiple columns self.df = DataFrame( {"key1": get_test_data(), "key2": get_test_data(), "data1": np.random.randn(N), "data2": np.random.randn(N)} ) # exclude a couple keys for fun self.df = self.df[self.df["key2"] > 1] self.df2 = DataFrame( { "key1": get_test_data(n=N // 5), "key2": get_test_data(ngroups=NGROUPS // 2, n=N // 5), "value": np.random.randn(N // 5), } ) index, data = tm.getMixedTypeDict() self.target = DataFrame(data, index=index) # Join on string value self.source = DataFrame({"MergedA": data["A"], "MergedD": data["D"]}, index=data["C"]) self.left = DataFrame({"key": ["a", "b", "c", "d", "e", "e", "a"], "v1": np.random.randn(7)}) self.right = DataFrame({"v2": np.random.randn(4)}, index=["d", "b", "c", "a"])
def test_creation_mixed(self): index, data = tm.getMixedTypeDict() indexed_frame = DataFrame.from_dict( data, orient=DataFrame.COLUMNS).set_index(index).build() # noqa unindexed_frame = DataFrame.from_dict( data, orient=DataFrame.COLUMNS).build() # noqa assert self.mixed_frame['foo'].dtype == np.object_
def test_map(self, datetime_series): index, data = tm.getMixedTypeDict() source = Series(data["B"], index=data["C"]) target = Series(data["C"][:4], index=data["D"][:4]) merged = target.map(source) for k, v in merged.items(): assert v == source[target[k]] # input could be a dict merged = target.map(source.to_dict()) for k, v in merged.items(): assert v == source[target[k]] # function result = datetime_series.map(lambda x: x * 2) tm.assert_series_equal(result, datetime_series * 2) # GH 10324 a = Series([1, 2, 3, 4]) b = Series(["even", "odd", "even", "odd"], dtype="category") c = Series(["even", "odd", "even", "odd"]) exp = Series(["odd", "even", "odd", np.nan], dtype="category") tm.assert_series_equal(a.map(b), exp) exp = Series(["odd", "even", "odd", np.nan]) tm.assert_series_equal(a.map(c), exp) a = Series(["a", "b", "c", "d"]) b = Series([1, 2, 3, 4], index=pd.CategoricalIndex(["b", "c", "d", "e"])) c = Series([1, 2, 3, 4], index=Index(["b", "c", "d", "e"])) exp = Series([np.nan, 1, 2, 3]) tm.assert_series_equal(a.map(b), exp) exp = Series([np.nan, 1, 2, 3]) tm.assert_series_equal(a.map(c), exp) a = Series(["a", "b", "c", "d"]) b = Series( ["B", "C", "D", "E"], dtype="category", index=pd.CategoricalIndex(["b", "c", "d", "e"]), ) c = Series(["B", "C", "D", "E"], index=Index(["b", "c", "d", "e"])) exp = Series( pd.Categorical([np.nan, "B", "C", "D"], categories=["B", "C", "D", "E"])) tm.assert_series_equal(a.map(b), exp) exp = Series([np.nan, "B", "C", "D"]) tm.assert_series_equal(a.map(c), exp)
def test_map(self): index, data = tm.getMixedTypeDict() source = Series(data['B'], index=data['C']) target = Series(data['C'][:4], index=data['D'][:4]) merged = target.map(source) for k, v in compat.iteritems(merged): assert v == source[target[k]] # input could be a dict merged = target.map(source.to_dict()) for k, v in compat.iteritems(merged): assert v == source[target[k]] # function result = self.ts.map(lambda x: x * 2) tm.assert_series_equal(result, self.ts * 2) # GH 10324 a = Series([1, 2, 3, 4]) b = Series(["even", "odd", "even", "odd"], dtype="category") c = Series(["even", "odd", "even", "odd"]) exp = Series(["odd", "even", "odd", np.nan], dtype="category") tm.assert_series_equal(a.map(b), exp) exp = Series(["odd", "even", "odd", np.nan]) tm.assert_series_equal(a.map(c), exp) a = Series(['a', 'b', 'c', 'd']) b = Series([1, 2, 3, 4], index=pd.CategoricalIndex(['b', 'c', 'd', 'e'])) c = Series([1, 2, 3, 4], index=Index(['b', 'c', 'd', 'e'])) exp = Series([np.nan, 1, 2, 3]) tm.assert_series_equal(a.map(b), exp) exp = Series([np.nan, 1, 2, 3]) tm.assert_series_equal(a.map(c), exp) a = Series(['a', 'b', 'c', 'd']) b = Series(['B', 'C', 'D', 'E'], dtype='category', index=pd.CategoricalIndex(['b', 'c', 'd', 'e'])) c = Series(['B', 'C', 'D', 'E'], index=Index(['b', 'c', 'd', 'e'])) exp = Series( pd.Categorical([np.nan, 'B', 'C', 'D'], categories=['B', 'C', 'D', 'E'])) tm.assert_series_equal(a.map(b), exp) exp = Series([np.nan, 'B', 'C', 'D']) tm.assert_series_equal(a.map(c), exp)
def test_map(self): index, data = tm.getMixedTypeDict() source = Series(data['B'], index=data['C']) target = Series(data['C'][:4], index=data['D'][:4]) merged = target.map(source) for k, v in compat.iteritems(merged): assert v == source[target[k]] # input could be a dict merged = target.map(source.to_dict()) for k, v in compat.iteritems(merged): assert v == source[target[k]] # function result = self.ts.map(lambda x: x * 2) tm.assert_series_equal(result, self.ts * 2) # GH 10324 a = Series([1, 2, 3, 4]) b = Series(["even", "odd", "even", "odd"], dtype="category") c = Series(["even", "odd", "even", "odd"]) exp = Series(["odd", "even", "odd", np.nan], dtype="category") tm.assert_series_equal(a.map(b), exp) exp = Series(["odd", "even", "odd", np.nan]) tm.assert_series_equal(a.map(c), exp) a = Series(['a', 'b', 'c', 'd']) b = Series([1, 2, 3, 4], index=pd.CategoricalIndex(['b', 'c', 'd', 'e'])) c = Series([1, 2, 3, 4], index=Index(['b', 'c', 'd', 'e'])) exp = Series([np.nan, 1, 2, 3]) tm.assert_series_equal(a.map(b), exp) exp = Series([np.nan, 1, 2, 3]) tm.assert_series_equal(a.map(c), exp) a = Series(['a', 'b', 'c', 'd']) b = Series(['B', 'C', 'D', 'E'], dtype='category', index=pd.CategoricalIndex(['b', 'c', 'd', 'e'])) c = Series(['B', 'C', 'D', 'E'], index=Index(['b', 'c', 'd', 'e'])) exp = Series(pd.Categorical([np.nan, 'B', 'C', 'D'], categories=['B', 'C', 'D', 'E'])) tm.assert_series_equal(a.map(b), exp) exp = Series([np.nan, 'B', 'C', 'D']) tm.assert_series_equal(a.map(c), exp)
def test_merge(self): index, data = common.getMixedTypeDict() source = Series(data['B'], index=data['C']) target = Series(data['C'][:4], index=data['D'][:4]) merged = target.merge(source) for k, v in merged.iteritems(): self.assertEqual(v, source[target[k]]) # input could be a dict merged = target.merge(source.toDict()) for k, v in merged.iteritems(): self.assertEqual(v, source[target[k]])
def test_transpose(self): frame = self.frame dft = frame.T for idx, series in dft.iteritems(): for col, value in series.iteritems(): if np.isnan(value): self.assert_(np.isnan(frame[col][idx])) else: self.assertEqual(value, frame[col][idx]) # mixed type index, data = common.getMixedTypeDict() mixed = self.klass(data, index=index) mixed_T = mixed.T for col, s in mixed_T.iteritems(): self.assert_(s.dtype == np.object_)
def test_transpose(self, float_frame): frame = float_frame dft = frame.T for idx, series in dft.items(): for col, value in series.items(): if np.isnan(value): assert np.isnan(frame[col][idx]) else: assert value == frame[col][idx] # mixed type index, data = tm.getMixedTypeDict() mixed = self.klass(data, index=index) mixed_T = mixed.T for col, s in mixed_T.items(): assert s.dtype == np.object_
def test_transpose(self, float_frame): frame = float_frame dft = frame.T for idx, series in compat.iteritems(dft): for col, value in compat.iteritems(series): if np.isnan(value): assert np.isnan(frame[col][idx]) else: assert value == frame[col][idx] # mixed type index, data = tm.getMixedTypeDict() mixed = self.klass(data, index=index) mixed_T = mixed.T for col, s in compat.iteritems(mixed_T): assert s.dtype == np.object_
def test_transpose(self): frame = self.frame dft = frame.T for idx, series in compat.iteritems(dft): for col, value in compat.iteritems(series): if np.isnan(value): assert np.isnan(frame[col][idx]) else: assert value == frame[col][idx] # mixed type index, data = tm.getMixedTypeDict() mixed = DataFrame(data, index=index) mixed_T = mixed.T for col, s in compat.iteritems(mixed_T): assert s.dtype == np.object_
def test_transpose(self): frame = self.frame dft = frame.T for idx, series in compat.iteritems(dft): for col, value in compat.iteritems(series): if np.isnan(value): self.assertTrue(np.isnan(frame[col][idx])) else: self.assertEqual(value, frame[col][idx]) # mixed type index, data = tm.getMixedTypeDict() mixed = DataFrame(data, index=index) mixed_T = mixed.T for col, s in compat.iteritems(mixed_T): self.assertEqual(s.dtype, np.object_)
def test_map(self): index, data = tm.getMixedTypeDict() source = Series(data['B'], index=data['C']) target = Series(data['C'][:4], index=data['D'][:4]) merged = target.map(source) for k, v in merged.iteritems(): self.assertEqual(v, source[target[k]]) # input could be a dict merged = target.map(source.to_dict()) for k, v in merged.iteritems(): self.assertEqual(v, source[target[k]]) # function result = self.ts.map(lambda x: x * 2) self.assert_(np.array_equal(result, self.ts * 2))
def setup_method(self, method): # aggregate multiple columns self.df = DataFrame({'key1': get_test_data(), 'key2': get_test_data(), 'data1': np.random.randn(N), 'data2': np.random.randn(N)}) # exclude a couple keys for fun self.df = self.df[self.df['key2'] > 1] self.df2 = DataFrame({'key1': get_test_data(n=N // 5), 'key2': get_test_data(ngroups=NGROUPS // 2, n=N // 5), 'value': np.random.randn(N // 5)}) index, data = tm.getMixedTypeDict() self.target = DataFrame(data, index=index) # Join on string value self.source = DataFrame({'MergedA': data['A'], 'MergedD': data['D']}, index=data['C'])
def test_constructor_mixed(self): index, data = common.getMixedTypeDict() indexed_frame = self.klass(data, index=index) unindexed_frame = self.klass(data)