コード例 #1
0
ファイル: tests.py プロジェクト: nteract/coffee_boat
    def test_existing_spark(self):
        sc = pyspark.context.SparkContext(master="spark://localhost:7077")
        from coffee_boat import Captain
        try:
            subprocess.call(["pip", "uninstall", "-y", "pyarrow"])
            with self.assertRaises(ImportError):
                import pyarrow
            captain = Captain(accept_conda_license=True)
            captain.add_pip_packages("pyarrow")
            captain.launch_ship()
            rdd = sc.parallelize(range(2))

            def find_info(x):
                import os
                import pyarrow
                import sys
                return (x, sys.executable, os.environ['PYTHONPATH'])

            result = rdd.map(find_info).collect()

            # Add a live package!
            captain.add_pip_packages("pybmp")

            def check_pybmp(x):
                import pybmp
                return 1

            rdd.map(check_pybmp).count()
        finally:
            sc.stop()
        self.assertTrue("auto" in result[0][1])
        self.assertTrue("python" in result[0][1])
コード例 #2
0
ファイル: tests.py プロジェクト: willingc/coffee_boat
    def test_non_local_env(self):
        import os
        print(os.environ)
        from coffee_boat import Captain
        # Creat a captain
        captain = Captain(install_local=False, accept_conda_license=True)
        # Validate we don't have kubepy installed in local context
        import subprocess
        subprocess.call(["pip", "uninstall", "-y", "kubepy"])
        with self.assertRaises(ImportError):
            import kubepy
        captain.add_pip_packages("pandas==0.22.0", "kubepy")
        # We should now have it distributed but not local
        with self.assertRaises(ImportError):
            import kubepy
        captain.launch_ship()
        sc = pyspark.context.SparkContext(master="spark://localhost:7077")
        try:
            rdd = sc.parallelize(range(2))

            def find_info(x):
                import os
                import kubepy
                import sys
                return (x, sys.executable, os.environ['PYTHONPATH'])
            result = rdd.map(find_info).collect()
        finally:
            sc.stop()
        self.assertTrue("auto" in result[0][1])
        self.assertTrue("python" in result[0][1])
コード例 #3
0
ファイル: tests.py プロジェクト: nteract/coffee_boat
    def test_simple_env(self):
        import os
        logger.info(os.environ)
        from coffee_boat import Captain
        # Create a captain
        captain = Captain(accept_conda_license=True)

        # Validate we don't have nbconvert installed in local context
        subprocess.call(["pip", "uninstall", "-y", "nbconvert"])
        with self.assertRaises(ImportError):
            import nbconvert
        captain.add_pip_packages("pandas==0.22.0", "nbconvert")

        # We should now have it....
        import nbconvert
        import pandas
        captain.launch_ship()
        logger.info("Ship launched , set PYSPARK_PYTHON to {0}".format(
            os.environ['PYSPARK_PYTHON']))
        logger.info("Connecting to local Spark master.")
        sc = pyspark.context.SparkContext(master="spark://localhost:7077")
        try:
            rdd = sc.parallelize(range(2))

            def find_info(x):
                import sys
                import os
                return (x, sys.executable, os.environ['PYTHONPATH'],
                        os.listdir('.'))

            result = rdd.map(find_info).collect()
            logger.info("Result {0}".format(result))

            def test_imports(x):
                import nbconvert
                return 1

            rdd.map(test_imports).collect()
        finally:
            sc.stop()
        self.assertTrue("auto" in result[0][1])
        self.assertTrue("python" in result[0][1])