コード例 #1
0
 def test_aggregate(self):
     coll = self.coll
     df = self.df
     result = MDataFrame(coll).groupby(['x']).agg({'x': 'sum'})
     testagg = df.groupby('x').agg({'x': 'sum'})
     testagg = testagg.rename(columns=dict(x='x_sum'))
     self.assertTrue(result.equals(testagg))
コード例 #2
0
 def test_mdataframe(self):
     coll = self.coll
     df = self.df
     mdf = MDataFrame(coll)
     result = mdf.value
     self.assertEqual(set(MDataFrame(coll).columns), set(list(df.columns)))
     self.assertTrue(result.equals(df))
     self.assertEqual(mdf.shape, df.shape)
コード例 #3
0
 def test_count_multi_columns(self):
     coll = self.coll
     df = self.df
     # add a column
     mdf = MDataFrame(coll)
     mdf['z'] = 5
     df['z'] = 5
     # group by and count
     counts = mdf.groupby(['x']).count()
     test_counts = df.groupby('x').count()
     self.assertTrue(test_counts.equals(counts))
コード例 #4
0
 def test_mdataframe_xlarge(self):
     df = pd.DataFrame({
         'a': list(range(0, int(1e4 + 1))),
         'b': list(range(0, int(1e4 + 1)))
     })
     store = self.om.datasets
     store.put(df, 'mydata-xlarge', append=False)
     coll = store.collection('mydata-xlarge')
     result = MDataFrame(coll).value
     self.assertEqual(set(MDataFrame(coll).columns), set(list(df.columns)))
     self.assertTrue(result.equals(df))
コード例 #5
0
 def test_verylarge_dataframe(self):
     if not os.environ.get('TEST_LARGE'):
         return
     other = pd.DataFrame({
         'x': list(range(0, int(10e6))),
         'y': list(range(0, int(10e6))),
         'z': list(range(0, int(10e6)))
     })
     coll = self.coll
     df = self.df
     result = MDataFrame(coll).value
     self.assertEqual(set(MDataFrame(coll).columns), set(list(df.columns)))
     self.assertTrue(result.equals(df))
コード例 #6
0
 def test_mdataframe_merge(self):
     coll = self.coll
     df = self.df
     om = self.om
     other = pd.DataFrame({
         'x': list(range(0, 20)),
         'y': list(range(0, 20)),
         'z': list(range(0, 20))
     })
     om.datasets.put(other, 'samplez', append=False)
     coll2 = om.datasets.collection('samplez')
     result = MDataFrame(coll).merge(coll2, on='x', how='left').value
     testdf = df.merge(other, on='x', how='left')
     self.assertTrue(result.equals(testdf))
コード例 #7
0
 def test_unique_series(self):
     coll = self.coll
     df = self.df
     om = self.om
     om.datasets.put(df, 'uniques', append=False)
     coll = om.datasets.collection('uniques')
     result = MDataFrame(coll).x.unique().value
     self.assertListEqual(list(result), list(df.x.unique()))
コード例 #8
0
 def test_aggregate_multi_stats(self):
     coll = self.coll
     df = self.df
     stats = {'x': ['sum', 'mean', 'max', 'min', 'std']}
     result = MDataFrame(coll).groupby(['x']).agg(stats)
     testagg = df.groupby('x').agg(stats)
     testagg.columns = testagg.columns.map(flatten_columns)
     testagg = testagg[result.columns]
     assert_frame_equal(testagg, result, check_dtype=False)
コード例 #9
0
ファイル: test_mdataframe.py プロジェクト: omegaml/omegaml
 def test_mdataframe_merge_right_cartesian(self):
     coll = self.coll
     df = self.df
     om = self.om
     other = pd.DataFrame({
         'x': list(range(0, 5)),
         'y': list(range(0, 5)),
         'z': list(range(0, 5))
     })
     om.datasets.put(other, 'samplez', append=False)
     om.datasets.put(other, 'samplez', append=True)
     other = om.datasets.get('samplez')
     coll2 = om.datasets.collection('samplez')
     result = MDataFrame(coll).merge(coll2, on='x', how='left',
                                     sort=True).value
     testdf = df.merge(other, on='x', how='left', sort=True)
     testdf = testdf[result.columns]
     self.assertTrue(result.equals(testdf))
コード例 #10
0
 def test_groupby(self):
     coll = self.coll
     df = self.df
     keys = []
     for key, groupdf in MDataFrame(coll).groupby(['x']):
         x = key.get('x')
         keys.append(x)
         subdf = df[df.x == x]
         assert_frame_equal(subdf, groupdf.value)
     self.assertEqual(set(keys), set(df.x))
コード例 #11
0
ファイル: apply.py プロジェクト: databill86/omegaml
 def apply(self, fn, inplace=False, preparefn=None):
     if inplace:
         obj = self
     else:
         kwargs = self._getcopy_kwargs()
         kwargs.update(preparefn=preparefn)
         if isinstance(self, MSeries):
             obj = MSeries(self.collection, **kwargs)
         else:
             obj = MDataFrame(self.collection, **kwargs)
     obj.apply_fn = fn
     return obj
コード例 #12
0
ファイル: test_mdataframe.py プロジェクト: omegaml/omegaml
 def test_mdataframe_merge_filtered(self):
     coll = self.coll
     df = self.df
     om = self.om
     other = pd.DataFrame({
         'x': list(range(0, 5)),
         'y': list(range(0, 5)),
         'z': list(range(0, 5))
     })
     om.datasets.put(other, 'samplez', append=False)
     om.datasets.put(other, 'samplez', append=True)
     other = om.datasets.get('samplez')
     coll2 = om.datasets.collection('samplez')
     result = MDataFrame(coll).merge(coll2,
                                     on='x',
                                     how='left',
                                     sort=True,
                                     filter=dict(x__in=[1, 2])).value
     q = df['x'].isin([1, 2])
     testdf = df[q].merge(other, on='x', how='left', sort=True)
     testdf = testdf[result.columns]
     self.assertTrue(result.equals(testdf))
コード例 #13
0
 def get(self,
         name,
         version=-1,
         lazy=False,
         raw=False,
         parser=None,
         **kwargs):
     collection = self.data_store.collection(name)
     # json_normalize needs a list of dicts to work, not a generator
     json_normalizer = lambda v: json_normalize([r for r in v])
     parser = parser or json_normalizer
     mdf = MDataFrame(collection,
                      query=kwargs,
                      parser=parser,
                      raw=raw,
                      **kwargs)
     return mdf if lazy else mdf.value
コード例 #14
0
 def test_mdataframe_merge_append(self):
     ## FIXME this does not work
     coll = self.coll
     df = self.df
     om = self.om
     other = pd.DataFrame({
         'x': list(range(0, 5)),
         'y': list(range(0, 5)),
         'z': list(range(0, 5))
     })
     om.datasets.put(other, 'samplez', append=False)
     mdf = om.datasets.getl('samplez')
     mdf.append(mdf)
     coll2 = om.datasets.collection('samplez')
     result = MDataFrame(coll).merge(coll2,
                                     on='x',
                                     how='left',
                                     suffixes=('', '')).value
     testdf = df.append(other, ignore_index=True)
     testdf = testdf[result.columns]
     assert_frame_equal(result, testdf)
コード例 #15
0
 def test_mdataframe_count(self):
     coll = self.coll
     df = self.df
     mdf = MDataFrame(coll)
     assert_series_equal(df.count(), mdf.count())
     self.assertEqual(len(mdf), len(mdf))
コード例 #16
0
 def test_mdataframe_column_attribute(self):
     coll = self.coll
     df = self.df
     result = MDataFrame(coll).y.value
     self.assertTrue(df.y.equals(result))
コード例 #17
0
 def test_mdataframe_columns_slice(self):
     coll = self.coll
     df = self.df
     result = MDataFrame(coll)[['x', 'y']].value
     self.assertTrue(df[['x', 'y']].equals(result))
コード例 #18
0
 def test_mdataframe_sort(self):
     coll = self.coll
     df = self.df
     result = MDataFrame(coll).sort(['-x', '-y']).value
     df = df.sort_values(['x', 'y'], ascending=[False, False])
     assert_frame_equal(df, result)
コード例 #19
0
 def test_count_column(self):
     coll = self.coll
     df = self.df
     result = MDataFrame(coll).groupby(['x']).x.count()
     testgroup = df.groupby('x').x.count()
     self.assertTrue(result.equals(testgroup))
コード例 #20
0
 def test_count(self):
     coll = self.coll
     df = self.df
     counts = MDataFrame(coll).groupby(['x']).count()
     test_counts = df.groupby('x').count()
     self.assertTrue(test_counts.equals(counts))