コード例 #1
0
ファイル: test_cluster.py プロジェクト: kevintsok/mars
    def testEagerMode(self):
        with new_cluster(scheduler_n_process=2,
                         worker_n_process=2,
                         shared_memory='20M',
                         web=True) as cluster:

            self.assertIsInstance(Session.default_or_local()._sess,
                                  LocalClusterSession)

            with option_context({'eager_mode': True}):
                a_data = np.random.rand(10, 10)

                a = mt.tensor(a_data, chunk_size=3)
                np.testing.assert_array_equal(a, a_data)

                r1 = a + 1
                expected1 = a_data + 1
                np.testing.assert_array_equal(r1, expected1)

                r2 = r1.dot(r1)
                expected2 = expected1.dot(expected1)
                np.testing.assert_array_almost_equal(r2, expected2)

            a = mt.ones((10, 10), chunk_size=3)
            with self.assertRaises(ValueError):
                a.fetch()

            r = a.dot(a)
            np.testing.assert_array_equal(r.execute(), np.ones((10, 10)) * 10)

            with new_session('http://' + cluster._web_endpoint).as_default():
                self.assertIsInstance(Session.default_or_local()._sess,
                                      WebSession)

                with option_context({'eager_mode': True}):
                    a_data = np.random.rand(10, 10)

                    a = mt.tensor(a_data, chunk_size=3)
                    np.testing.assert_array_equal(a, a_data)

                    r1 = a + 1
                    expected1 = a_data + 1
                    np.testing.assert_array_equal(r1, expected1)

                    r2 = r1.dot(r1)
                    expected2 = expected1.dot(expected1)
                    np.testing.assert_array_almost_equal(r2, expected2)

                a = mt.ones((10, 10), chunk_size=3)
                with self.assertRaises(ValueError):
                    a.fetch()

                r = a.dot(a)
                np.testing.assert_array_equal(r.execute(),
                                              np.ones((10, 10)) * 10)
コード例 #2
0
    def testFetch(self):
        from mars.session import Session

        with option_context({'eager_mode': True}):
            arr1 = mt.ones((10, 5), chunk_size=4)
            np.testing.assert_array_equal(arr1, np.ones((10, 5)))

            sess = Session.default_or_local()
            executor = sess._sess._executor
            executor.chunk_result[get_tiled(arr1).chunks[0].key] = np.ones(
                (4, 4)) * 2

            arr2 = mt.ones((10, 5), chunk_size=4) - 1
            result = arr2.fetch()
            np.testing.assert_array_equal(result[:4, :4], np.ones((4, 4)))
            np.testing.assert_array_equal(result[8:, :4], np.zeros((2, 4)))

        arr3 = mt.ones((10, 5), chunk_size=4) - 1
        # arr1's data is used by arr2,
        # so if arr2 not deleted, arr1's data will not be gc collected
        del arr2

        with self.assertRaises(ValueError):
            arr3.fetch()

        result = arr3.execute()
        np.testing.assert_array_equal(result[:4, :4], np.ones((4, 4)))
        np.testing.assert_array_equal(result[8:, :4], np.zeros((2, 4)))
コード例 #3
0
    def testDataFrameExecuteNotFetch(self):
        data1 = pd.DataFrame(np.random.random((5, 4)), columns=list('abcd'))
        sess = Session.default_or_local()

        df1 = md.DataFrame(data1, chunk_size=2)

        with self.assertRaises(ValueError):
            sess.fetch(df1)

        self.assertIs(df1.execute(), df1)
        self.assertEqual(
            len(df1[df1['a'] > 1].to_pandas(fetch_kwargs={'batch_size': 2})),
            0)
        self.assertEqual(
            len(df1[df1['a'] > 1]['a'].to_pandas(
                fetch_kwargs={'batch_size': 2})), 0)

        # modify result
        executor = sess._sess._executor
        executor.chunk_result[get_tiled(
            df1).chunks[0].key] = data1.iloc[:2, :2] * 3

        expected = data1
        expected.iloc[:2, :2] = data1.iloc[:2, :2] * 3

        pd.testing.assert_frame_equal(df1.to_pandas(), expected)
        pd.testing.assert_frame_equal(
            df1.to_pandas(fetch_kwargs={'batch_size': 2}), expected)
コード例 #4
0
    def testReExecuteSame(self):
        data = np.random.random((5, 9))

        # test run the same tensor
        arr4 = mt.tensor(data.copy(), chunk_size=3) + 1
        result1 = arr4.to_numpy()
        expected = data + 1

        np.testing.assert_array_equal(result1, expected)

        result2 = arr4.to_numpy()

        np.testing.assert_array_equal(result1, result2)

        # test run the same tensor with single chunk
        arr4 = mt.tensor(data.copy())
        result1 = arr4.to_numpy()
        expected = data

        np.testing.assert_array_equal(result1, expected)

        result2 = arr4.to_numpy()
        np.testing.assert_array_equal(result1, result2)

        # modify result
        sess = Session.default_or_local()
        executor = sess._sess._executor
        executor.chunk_result[get_tiled(arr4).chunks[0].key] = data + 2

        result3 = arr4.to_numpy()
        np.testing.assert_array_equal(result3, data + 2)

        # test run same key tensor
        arr5 = mt.ones((10, 10), chunk_size=3)
        result1 = arr5.to_numpy()

        del arr5
        arr6 = mt.ones((10, 10), chunk_size=3)
        result2 = arr6.to_numpy()

        np.testing.assert_array_equal(result1, result2)

        # test copy, make sure it will not let the execution cache missed
        df = md.DataFrame(mt.ones((10, 3), chunk_size=5))
        executed = [False]

        def add_one(x):
            if executed[0]:  # pragma: no cover
                raise ValueError('executed before')
            return x + 1

        df2 = df.apply(add_one)
        pd.testing.assert_frame_equal(df2.to_pandas(), pd.DataFrame(np.ones((10, 3)) + 1))

        executed[0] = True

        df3 = df2.copy()
        df4 = df3 * 2
        pd.testing.assert_frame_equal(df4.to_pandas(), pd.DataFrame(np.ones((10, 3)) * 4))
コード例 #5
0
ファイル: test_session.py プロジェクト: queenwu/mars
    def testExecuteBothExecutedAndNot(self):
        data = np.random.random((5, 9))

        arr1 = mt.tensor(data, chunk_size=4) * 2
        arr2 = mt.tensor(data) + 1

        np.testing.assert_array_equal(arr2.execute(), data + 1)

        # modify result
        sess = Session.default_or_local()
        executor = sess._sess._executor
        executor.chunk_result[arr2.chunks[0].key] = data + 2

        results = sess.run(arr1, arr2)
        np.testing.assert_array_equal(results[0], data * 2)
        np.testing.assert_array_equal(results[1], data + 2)
コード例 #6
0
    def testSingleOutputTensorExecute(self):
        with new_cluster(scheduler_n_process=2, worker_n_process=2) as cluster:
            self.assertIs(cluster.session, Session.default_or_local())

            t = mt.random.rand(10)
            r = t.sum()

            res = r.execute()
            self.assertTrue(np.isscalar(res))
            self.assertLess(res, 10)

            t = mt.random.rand(10)
            r = t.sum() * 4 - 1

            res = r.execute()
            self.assertLess(res, 39)
コード例 #7
0
ファイル: test_eager_mode.py プロジェクト: tangyiyong/mars
    def testFetch(self):
        from mars.session import Session

        with option_context({'eager_mode': True}):
            arr1 = mt.ones((10, 5), chunk_size=4)
            np.testing.assert_array_equal(arr1, np.ones((10, 5)))

            sess = Session.default_or_local()
            executor = sess._sess._executor
            executor.chunk_result[get_tiled(arr1).chunks[0].key] = np.ones(
                (4, 4)) * 2

            arr2 = mt.ones((10, 5), chunk_size=4) - 1
            result = arr2.fetch()
            np.testing.assert_array_equal(result[:4, :4], np.ones((4, 4)))
            np.testing.assert_array_equal(result[8:, :4], np.zeros((2, 4)))
コード例 #8
0
    def testNamed(self):
        rs = np.random.RandomState(0)
        raw = rs.rand(10, 10)

        sess = Session.default_or_local()

        # test named tensor
        t = mt.tensor(raw, chunk_size=3)
        name = 't_name'
        r1 = t.execute(name=name, session=sess)
        np.testing.assert_array_equal(r1, raw)

        t2 = mt.named_tensor(name=name, session=sess)
        self.assertEqual(t2.order, TensorOrder.C_ORDER)
        r2 = (t2 + 1).execute(session=sess).fetch()
        np.testing.assert_array_equal(r2, raw + 1)

        # test named series
        name = 's_name'
        raw = pd.Series([1, 2, 3])
        s = md.Series(raw)
        r1 = s.execute(name=name, session=sess).fetch()
        pd.testing.assert_series_equal(r1, raw)

        s2 = md.named_series(name=name, session=sess)
        self.assertEqual(s2.dtype, s.dtype)
        pd.testing.assert_index_equal(s2.index_value.to_pandas(),
                                      s.index_value.to_pandas())
        r2 = s2.execute(session=sess).fetch()
        pd.testing.assert_series_equal(r2, raw)

        # test dataframe
        name = 'd_name'
        raw = pd.DataFrame(np.random.rand(10, 3))
        d = md.DataFrame(raw, chunk_size=4)
        r1 = d.execute(name=name, session=sess).fetch()
        pd.testing.assert_frame_equal(r1, raw)

        d2 = md.named_dataframe(name=name, session=sess)
        pd.testing.assert_series_equal(d2.dtypes, d.dtypes)
        pd.testing.assert_index_equal(d2.index_value.to_pandas(),
                                      d.index_value.to_pandas())
        pd.testing.assert_index_equal(d2.columns_value.to_pandas(),
                                      d.columns_value.to_pandas())
        r2 = d2.execute(session=sess).fetch()
        pd.testing.assert_frame_equal(r2, raw)
コード例 #9
0
ファイル: test_session.py プロジェクト: queenwu/mars
    def testExecuteNotFetch(self):
        data = np.random.random((5, 9))
        sess = Session.default_or_local()

        arr1 = mt.tensor(data, chunk_size=2) * 2

        with self.assertRaises(ValueError):
            sess.fetch(arr1)

        self.assertIsNone(arr1.execute(fetch=False))

        # modify result
        executor = sess._sess._executor
        executor.chunk_result[arr1.chunks[0].key] = data[:2, :2] * 3

        expected = data * 2
        expected[:2, :2] = data[:2, :2] * 3

        np.testing.assert_array_equal(arr1.execute(), expected)
コード例 #10
0
ファイル: test_session.py プロジェクト: zvrr/mars
    def testDataFrameExecuteNotFetch(self):
        data1 = pd.DataFrame(np.random.random((5, 4)), columns=list('abcd'))
        sess = Session.default_or_local()

        df1 = md.DataFrame(data1, chunk_size=2)

        with self.assertRaises(ValueError):
            sess.fetch(df1)

        self.assertIsNone(df1.execute(fetch=False))

        # modify result
        executor = sess._sess._executor
        executor.chunk_result[df1.chunks[0].key] = data1.iloc[:2, :2] * 3

        expected = data1
        expected.iloc[:2, :2] = data1.iloc[:2, :2] * 3

        pd.testing.assert_frame_equal(df1.execute(), expected)
コード例 #11
0
    def testSingleOutputTensorExecute(self, *_):
        with new_cluster(scheduler_n_process=2,
                         worker_n_process=2,
                         shared_memory='20M') as cluster:
            self.assertIs(cluster.session, Session.default_or_local())

            t = mt.random.rand(10)
            r = t.sum()

            res = r.to_numpy()
            self.assertTrue(np.isscalar(res))
            self.assertLess(res, 10)

            raw = np.random.rand(10)
            t = mt.tensor(raw)
            r = (mt.linalg.norm(t) * 4 - 1).sum()

            res = r.to_numpy()
            expected = (np.linalg.norm(raw) * 4 - 1).sum()
            np.testing.assert_array_almost_equal(res, expected)
コード例 #12
0
ファイル: test_session.py プロジェクト: queenwu/mars
    def testReExecuteSame(self):
        data = np.random.random((5, 9))

        # test run the same tensor
        arr4 = mt.tensor(data.copy(), chunk_size=3) + 1
        result1 = arr4.execute()
        expected = data + 1

        np.testing.assert_array_equal(result1, expected)

        result2 = arr4.execute()

        np.testing.assert_array_equal(result1, result2)

        # test run the same tensor with single chunk
        arr4 = mt.tensor(data.copy())
        result1 = arr4.execute()
        expected = data

        np.testing.assert_array_equal(result1, expected)

        result2 = arr4.execute()
        np.testing.assert_array_equal(result1, result2)

        # modify result
        sess = Session.default_or_local()
        executor = sess._sess._executor
        executor.chunk_result[arr4.chunks[0].key] = data + 2

        result3 = arr4.execute()
        np.testing.assert_array_equal(result3, data + 2)

        # test run same key tensor
        arr5 = mt.ones((10, 10), chunk_size=3)
        result1 = arr5.execute()

        del arr5
        arr6 = mt.ones((10, 10), chunk_size=3)
        result2 = arr6.execute()

        np.testing.assert_array_equal(result1, result2)
コード例 #13
0
ファイル: test_session.py プロジェクト: tomzhang/mars-1
    def testNamed(self):
        rs = np.random.RandomState(0)
        raw = rs.rand(10, 10)

        sess = Session.default_or_local()

        # test named tensor
        t = mt.tensor(raw, chunk_size=3)
        name = 't_name'
        r1 = t.execute(name=name, session=sess)
        np.testing.assert_array_equal(r1, raw)

        t2 = mt.named_tensor(name=name, session=sess)
        r2 = (t2 + 1).execute(session=sess).fetch()
        np.testing.assert_array_equal(r2, raw + 1)

        # test named series
        name = 's_name'
        raw = pd.Series([1, 2, 3])
        s = md.Series(raw)
        r1 = s.execute(name=name, session=sess).fetch()
        pd.testing.assert_series_equal(r1, raw)

        s2 = md.named_series(name=name, session=sess)
        r2 = s2.execute(session=sess).fetch()
        pd.testing.assert_series_equal(r2, raw)

        # test dataframe
        name = 'd_name'
        raw = pd.DataFrame(np.random.rand(10, 3))
        d = md.DataFrame(raw, chunk_size=4)
        r1 = d.execute(name=name, session=sess).fetch()
        pd.testing.assert_frame_equal(r1, raw)

        d2 = md.named_dataframe(name=name, session=sess)
        r2 = d2.execute(session=sess).fetch()
        pd.testing.assert_frame_equal(r2, raw)
コード例 #14
0
ファイル: test_eager_mode.py プロジェクト: wzxJayce/mars
    def testFetch(self):
        from mars.session import Session

        with option_context({'eager_mode': True}):
            arr1 = mt.ones((10, 5), chunk_size=4)
            np.testing.assert_array_equal(arr1, np.ones((10, 5)))

            sess = Session.default_or_local()
            executor = sess._sess._executor
            executor.chunk_result[arr1.chunks[0].key] = np.ones((4, 4)) * 2

            arr2 = mt.ones((10, 5), chunk_size=4) - 1
            result = arr2.fetch()
            np.testing.assert_array_equal(result[:4, :4], np.ones((4, 4)))
            np.testing.assert_array_equal(result[4:8, :4], np.zeros((4, 4)))

        arr3 = mt.ones((10, 5), chunk_size=4) - 1

        with self.assertRaises(ValueError):
            arr3.fetch()

        result = arr3.execute()
        np.testing.assert_array_equal(result[:4, :4], np.ones((4, 4)))
        np.testing.assert_array_equal(result[4:8, :4], np.zeros((4, 4)))
コード例 #15
0
ファイル: test_cluster.py プロジェクト: ueshin/mars
    def testEagerMode(self, *_):
        with new_cluster(scheduler_n_process=2,
                         worker_n_process=2,
                         shared_memory='20M',
                         web=True) as cluster:

            self.assertIsInstance(Session.default_or_local()._sess,
                                  LocalClusterSession)

            with option_context({'eager_mode': True}):
                a_data = np.random.rand(10, 10)

                a = mt.tensor(a_data, chunk_size=3)
                np.testing.assert_array_equal(a, a_data)

                r1 = a + 1
                expected1 = a_data + 1
                np.testing.assert_array_equal(r1, expected1)

                r2 = r1.dot(r1)
                expected2 = expected1.dot(expected1)
                np.testing.assert_array_almost_equal(r2, expected2)

            a = mt.ones((10, 10), chunk_size=3)
            with self.assertRaises(ValueError):
                a.fetch()

            r = a.dot(a)
            np.testing.assert_array_equal(r.execute(), np.ones((10, 10)) * 10)

            with new_session('http://' + cluster._web_endpoint).as_default():
                self.assertIsInstance(Session.default_or_local()._sess,
                                      WebSession)

                with option_context({'eager_mode': True}):
                    a_data = np.random.rand(10, 10)

                    a = mt.tensor(a_data, chunk_size=3)
                    np.testing.assert_array_equal(a, a_data)

                    r1 = a + 1
                    expected1 = a_data + 1
                    np.testing.assert_array_equal(r1, expected1)

                    r2 = r1.dot(r1)
                    expected2 = expected1.dot(expected1)
                    np.testing.assert_array_almost_equal(r2, expected2)

                    web_session = Session.default_or_local()._sess
                    self.assertEqual(web_session.get_task_count(), 3)

                a = mt.ones((10, 10), chunk_size=3)
                with self.assertRaises(ValueError):
                    a.fetch()

                r = a.dot(a)
                np.testing.assert_array_equal(r.execute(),
                                              np.ones((10, 10)) * 10)

            with new_session('http://' + cluster._web_endpoint).as_default():
                from mars.dataframe.datasource.dataframe import from_pandas as from_pandas_df
                from mars.dataframe.datasource.series import from_pandas as from_pandas_series
                from mars.dataframe.arithmetic import add

                self.assertIsInstance(Session.default_or_local()._sess,
                                      WebSession)

                with option_context({'eager_mode': True}):
                    data1 = pd.DataFrame(
                        np.random.rand(10, 10),
                        index=[0, 10, 2, 3, 4, 5, 6, 7, 8, 9],
                        columns=[4, 1, 3, 2, 10, 5, 9, 8, 6, 7])
                    df1 = from_pandas_df(data1, chunk_size=5)
                    pd.testing.assert_frame_equal(df1.fetch(), data1)

                    data2 = pd.DataFrame(
                        np.random.rand(10, 10),
                        index=[11, 1, 2, 5, 7, 6, 8, 9, 10, 3],
                        columns=[5, 9, 12, 3, 11, 10, 6, 4, 1, 2])
                    df2 = from_pandas_df(data2, chunk_size=6)
                    pd.testing.assert_frame_equal(df2.fetch(), data2)

                    df3 = add(df1, df2)
                    pd.testing.assert_frame_equal(df3.fetch(), data1 + data2)

                    s1 = pd.Series(np.random.rand(10),
                                   index=[11, 1, 2, 5, 7, 6, 8, 9, 10, 3])
                    series1 = from_pandas_series(s1)
                    pd.testing.assert_series_equal(series1.fetch(), s1)

                web_session = Session.default_or_local()._sess
                self.assertEqual(web_session.get_task_count(), 4)
コード例 #16
0
def persist_tensor_via_oss(odps, *args, **kwargs):
    from mars.session import Session
    from .tensor.datastore import write_coo

    session = kwargs.pop('session', Session.default_or_local())
    oss_endpoint = kwargs.pop('oss_endpoint')
    oss_access_id = kwargs.pop('oss_access_id')
    oss_access_key = kwargs.pop('oss_access_key')
    oss_bucket_name = kwargs.pop('oss_bucket_name')
    oss_path = kwargs.pop('oss_path')

    oss_prefix = 'oss://%s/' % oss_bucket_name
    if oss_path.startswith(oss_prefix):
        oss_path = oss_path[len(oss_prefix):]

    oss_opts = dict(endpoint=oss_endpoint,
                    bucket_name=oss_bucket_name,
                    access_id=oss_access_id,
                    secret_access_key=oss_access_key)

    tensor, table_name, dim_columns, value_column = args
    oss_dir = 'oss://%s' % oss_path
    _clean_oss_object(oss_path, **oss_opts)

    t_type = None
    partitions = None

    # submit tensor to mars cluster
    tensors = []
    if isinstance(tensor, dict):
        for p, t in tensor.items():
            if t_type is None:
                t_type = t.dtype
            p_spec = PartitionSpec(p)
            if partitions is None:
                partitions = p_spec.keys
            else:
                if set(partitions) != set(p_spec.keys):
                    raise TypeError(
                        "all tensors partitions name must be the same.")

            if t.ndim > len(dim_columns):
                raise TypeError(
                    'tensor dimensions cannot more than dim_columns length')

            # write shape to oss
            shape_path = '%s/meta/%s/shape' % (oss_dir, p.replace(',', '/'))
            _write_shape_to_oss(t.shape, shape_path, **oss_opts)

            # write data to oss
            data_path = '%s/data/%s' % (oss_dir, p.replace(',', '/'))
            writer_tensor = write_coo(t,
                                      data_path,
                                      dim_columns,
                                      value_column,
                                      global_index=True,
                                      **oss_opts)
            tensors.append(writer_tensor)

        session.run(tensors)
    else:
        shape_path = oss_dir + '/meta/shape'
        _write_shape_to_oss(tensor.shape, shape_path, **oss_opts)

        t_type = tensor.dtype
        data_path = oss_dir + '/data'
        writer_tensor = write_coo(tensor,
                                  data_path,
                                  dim_columns,
                                  value_column,
                                  global_index=True,
                                  **oss_opts)
        session.run(writer_tensor)

    # persist to odps table
    ext_table_name = 'mars_persist_ext_%s' % str(uuid.uuid4()).replace(
        '-', '_')
    column_types = ['bigint'] * len(dim_columns) + [np_to_odps_types[t_type]]
    ext_column_types = ['bigint'] * (2 * len(dim_columns)) + [
        np_to_odps_types[t_type]
    ]
    column_names = dim_columns + [value_column]
    ext_column_names = list(chain(*([c, 'global_' + c]
                                    for c in dim_columns))) + [value_column]
    if partitions:
        if isinstance(partitions, six.string_types):
            partitions = [partitions]
        target_schema = Schema.from_lists(column_names, column_types,
                                          partitions,
                                          ['string'] * len(partitions))
        ext_schema = Schema.from_lists(ext_column_names, ext_column_types,
                                       partitions,
                                       ['string'] * len(partitions))
    else:
        target_schema = Schema.from_lists(column_names, column_types)
        ext_schema = Schema.from_lists(ext_column_names, ext_column_types)

    ext_table = odps.create_table(
        ext_table_name,
        ext_schema,
        external_stored_as='PARQUET',
        location='oss://%s:%s@%s/%s/%s/data' %
        (oss_opts['access_id'], oss_opts['secret_access_key'],
         oss_opts['endpoint'].split('://')[1], oss_opts['bucket_name'],
         oss_path),
    )
    if partitions:
        for partition in tensor.keys():
            ext_table.create_partition(partition)
    odps.create_table(table_name, target_schema, if_not_exists=True)
    ext_df = ext_table.to_df()
    fields = [
        ext_df['global_' + f].rename(f) for f in target_schema.names[:-1]
    ] + target_schema.names[-1:]
    if partitions:
        fields = fields + partitions
        ext_df[fields].persist(table_name, partitions=partitions)
    else:
        ext_df[fields].persist(table_name)