Example #1
0
 def test_aggregate(self):
     coll = self.coll
     df = self.df
     result = MDataFrame(coll).groupby(['x']).agg({'x': 'sum'})
     testagg = df.groupby('x').agg({'x': 'sum'})
     testagg = testagg.rename(columns=dict(x='x_sum'))
     self.assertTrue(result.equals(testagg))
Example #2
0
 def test_mdataframe_xlarge(self):
     df = pd.DataFrame({
         'a': list(range(0, int(1e4 + 1))),
         'b': list(range(0, int(1e4 + 1)))
     })
     store = self.om.datasets
     store.put(df, 'mydata-xlarge', append=False)
     coll = store.collection('mydata-xlarge')
     result = MDataFrame(coll).value
     self.assertEqual(set(MDataFrame(coll).columns), set(list(df.columns)))
     self.assertTrue(result.equals(df))
Example #3
0
 def test_verylarge_dataframe(self):
     if not os.environ.get('TEST_LARGE'):
         return
     other = pd.DataFrame({
         'x': list(range(0, int(10e6))),
         'y': list(range(0, int(10e6))),
         'z': list(range(0, int(10e6)))
     })
     coll = self.coll
     df = self.df
     result = MDataFrame(coll).value
     self.assertEqual(set(MDataFrame(coll).columns), set(list(df.columns)))
     self.assertTrue(result.equals(df))
Example #4
0
 def test_mdataframe_merge(self):
     coll = self.coll
     df = self.df
     om = self.om
     other = pd.DataFrame({
         'x': list(range(0, 20)),
         'y': list(range(0, 20)),
         'z': list(range(0, 20))
     })
     om.datasets.put(other, 'samplez', append=False)
     coll2 = om.datasets.collection('samplez')
     result = MDataFrame(coll).merge(coll2, on='x', how='left').value
     testdf = df.merge(other, on='x', how='left')
     self.assertTrue(result.equals(testdf))
Example #5
0
 def test_mdataframe_merge_right_cartesian(self):
     coll = self.coll
     df = self.df
     om = self.om
     other = pd.DataFrame({
         'x': list(range(0, 5)),
         'y': list(range(0, 5)),
         'z': list(range(0, 5))
     })
     om.datasets.put(other, 'samplez', append=False)
     om.datasets.put(other, 'samplez', append=True)
     other = om.datasets.get('samplez')
     coll2 = om.datasets.collection('samplez')
     result = MDataFrame(coll).merge(coll2, on='x', how='left',
                                     sort=True).value
     testdf = df.merge(other, on='x', how='left', sort=True)
     testdf = testdf[result.columns]
     self.assertTrue(result.equals(testdf))
Example #6
0
 def test_mdataframe_merge_filtered(self):
     coll = self.coll
     df = self.df
     om = self.om
     other = pd.DataFrame({
         'x': list(range(0, 5)),
         'y': list(range(0, 5)),
         'z': list(range(0, 5))
     })
     om.datasets.put(other, 'samplez', append=False)
     om.datasets.put(other, 'samplez', append=True)
     other = om.datasets.get('samplez')
     coll2 = om.datasets.collection('samplez')
     result = MDataFrame(coll).merge(coll2,
                                     on='x',
                                     how='left',
                                     sort=True,
                                     filter=dict(x__in=[1, 2])).value
     q = df['x'].isin([1, 2])
     testdf = df[q].merge(other, on='x', how='left', sort=True)
     testdf = testdf[result.columns]
     self.assertTrue(result.equals(testdf))
Example #7
0
 def test_count_column(self):
     coll = self.coll
     df = self.df
     result = MDataFrame(coll).groupby(['x']).x.count()
     testgroup = df.groupby('x').x.count()
     self.assertTrue(result.equals(testgroup))