def testSparseWithoutShape(self):
        import pandas as pd

        pdf = pd.DataFrame([[1, 2, 3.1]], columns=['i', 'j', 'v'])
        df = DataFrame(pdf)
        with self.assertRaises(ValueError):
            df.to_mars_tensor_via_oss(['i', 'j'],
                                      'v',
                                      15,
                                      sparse=True,
                                      oss_path='fake')
    def testSparseVectorToMars(self):
        import pandas as pd
        import numpy as np

        shape = (50, )

        data = np.random.rand(*shape)
        kv = [(i, data[i]) for i in range(shape[0])]
        pdf = pd.DataFrame(kv, columns=['i', 'v'])
        df = DataFrame(pdf).persist(tn('test_vector_to_mars'),
                                    lifecycle=1,
                                    odps=self.odps)

        oss_access_id, oss_secret_access_key, oss_bucket_name, oss_endpoint = self.config.oss

        t = df.to_mars_tensor_via_oss(['i'],
                                      'v',
                                      15,
                                      oss_access_id=oss_access_id,
                                      oss_access_key=oss_secret_access_key,
                                      oss_bucket_name=oss_bucket_name,
                                      oss_endpoint=oss_endpoint,
                                      oss_path=tn('test_vector_to_mars'),
                                      shape=shape,
                                      sparse=True)

        table_name = tn('test_vector_to_mars_store')
        self.odps.delete_table(table_name, if_exists=True)
        self.odps.persist_tensor_via_oss(t,
                                         table_name, ['x'],
                                         'y',
                                         oss_access_id=oss_access_id,
                                         oss_access_key=oss_secret_access_key,
                                         oss_bucket_name=oss_bucket_name,
                                         oss_endpoint=oss_endpoint,
                                         oss_path=table_name)

        with self.odps.get_table(table_name).open_reader() as reader:
            result = sorted([(r['x'], r['y']) for r in reader],
                            key=lambda x: x[0])
            self.assertEqual(kv, result)
    def testNoPartitionToMars(self):
        import numpy as np
        import pandas as pd

        shape = (100, 50)

        data = np.random.rand(*shape)
        kv = [(i, j, data[i, j])
              for i, j in product(*[range(s) for s in shape])]
        pdf = pd.DataFrame(kv, columns=['i', 'j', 'v'])
        df = DataFrame(pdf).persist(tn('test_no_partition_dense_to_mars'),
                                    lifecycle=1,
                                    odps=self.odps)

        oss_access_id, oss_secret_access_key, oss_bucket_name, oss_endpoint = self.config.oss

        # test dense
        t = df.to_mars_tensor_via_oss(
            ['i', 'j'],
            'v',
            15,
            oss_access_id=oss_access_id,
            oss_access_key=oss_secret_access_key,
            oss_bucket_name=oss_bucket_name,
            oss_endpoint=oss_endpoint,
            oss_path=tn('test_no_partition_dense_to_mars_oss'),
            shape=shape)

        # test if oss file exist
        t = df.to_mars_tensor_via_oss(
            ['i', 'j'],
            'v',
            15,
            oss_access_id=oss_access_id,
            oss_access_key=oss_secret_access_key,
            oss_bucket_name=oss_bucket_name,
            oss_endpoint=oss_endpoint,
            oss_path=tn('test_no_partition_dense_to_mars_oss'),
            oss_file_exist=True)

        table_name = tn('test_no_partition_dense_to_mars_store')
        self.odps.delete_table(table_name, if_exists=True)
        self.odps.persist_tensor_via_oss(t,
                                         table_name, ['x', 'y'],
                                         'z',
                                         oss_access_id=oss_access_id,
                                         oss_access_key=oss_secret_access_key,
                                         oss_bucket_name=oss_bucket_name,
                                         oss_endpoint=oss_endpoint,
                                         oss_path=table_name)

        with self.odps.get_table(table_name).open_reader() as reader:
            result = sorted([(r['x'], r['y'], r['z']) for r in reader],
                            key=lambda x: (x[0], x[1]))
            self.assertEqual(kv, result)

        # test dense without setting chunks
        t = df.to_mars_tensor_via_oss(
            ['i', 'j'],
            'v',
            oss_access_id=oss_access_id,
            oss_access_key=oss_secret_access_key,
            oss_bucket_name=oss_bucket_name,
            oss_endpoint=oss_endpoint,
            oss_path=tn('test_no_partition_dense_to_mars_oss'),
            shape=shape)
        table_name = tn('test_no_partition_dense_to_mars_store')
        self.odps.delete_table(table_name, if_exists=True)
        self.odps.persist_tensor_via_oss(t,
                                         table_name, ['x', 'y'],
                                         'z',
                                         oss_access_id=oss_access_id,
                                         oss_access_key=oss_secret_access_key,
                                         oss_bucket_name=oss_bucket_name,
                                         oss_endpoint=oss_endpoint,
                                         oss_path=table_name)

        with self.odps.get_table(table_name).open_reader() as reader:
            result = sorted([(r['x'], r['y'], r['z']) for r in reader],
                            key=lambda x: (x[0], x[1]))
            self.assertEqual(kv, result)

        # test sparse
        t = df.to_mars_tensor_via_oss(
            ['i', 'j'],
            'v',
            15,
            oss_access_id=oss_access_id,
            oss_access_key=oss_secret_access_key,
            oss_bucket_name=oss_bucket_name,
            oss_endpoint=oss_endpoint,
            oss_path=tn('test_no_partition_dense_to_mars_oss'),
            shape=shape,
            sparse=True)
        self.assertTrue(t.issparse())
        table_name = tn('test_no_partition_dense_to_mars_store')
        self.odps.delete_table(table_name, if_exists=True)
        self.odps.persist_tensor_via_oss(t,
                                         table_name, ['x', 'y'],
                                         'z',
                                         oss_access_id=oss_access_id,
                                         oss_access_key=oss_secret_access_key,
                                         oss_bucket_name=oss_bucket_name,
                                         oss_endpoint=oss_endpoint,
                                         oss_path=table_name)

        with self.odps.get_table(table_name).open_reader() as reader:
            result = sorted([(r['x'], r['y'], r['z']) for r in reader],
                            key=lambda x: (x[0], x[1]))
            self.assertEqual(kv, result)