예제 #1
0
        To run this Python script you need to install livy-python-api-*version*.tar.gz with
        easy_install first.

        python /pathTo/pi_app.py http://<livy-server>:8998 2
    """

    if len(sys.argv) != 3:
        print("Usage: pi_app <livy url> <slices>", file=sys.stderr)
        exit(-1)

    slices = int(sys.argv[2])
    samples = 100000 * slices

    client = HttpClient(sys.argv[1])

    def f(_):
        x = random() * 2 - 1
        y = random() * 2 - 1
        return 1 if x ** 2 + y ** 2 <= 1 else 0

    def pi_job(context):
        count = context.sc.parallelize(range(1, samples + 1), slices).map(f).reduce(add)
        return 4.0 * count / samples

    pi = client.submit(pi_job).result()

    print("Pi is roughly %f" % pi)
    client.stop(True)

예제 #2
0
class LivyHandler:
    def __init__(self, host='http://localhost:8998'):
        self._headers = {'Content-Type': 'application/json'}
        if host is not None:
            self.create_pyspark_session(host)

    def _wait_for_state(self, url, desired_state):
        while True:
            r = requests.get(url, headers=self._headers)
            if r.json()['state'] == desired_state:
                break
            time.sleep(1)
        return r

    def create_pyspark_session(self, host):
        self._host = host
        data = {'kind': 'pyspark'}

        # Create a Spark session
        print('Creating Spark session...')
        r = requests.post(host + '/sessions',
                          data=json.dumps(data),
                          headers=self._headers)

        # Wait until the new Spark session is ready to use
        self._session_url = host + r.headers['location']

        r = self._wait_for_state(self._session_url, 'idle')

        # Create client for Livy batch jobs
        self._lc = HttpClient(self._session_url)

    def exec_str(self, code):
        print('Submitting code...')
        statements_url = self._session_url + '/statements'
        data = {'code': code}
        r = requests.post(statements_url,
                          data=json.dumps(data),
                          headers=self._headers)

        # Wait until the code completes
        print('Running code...')
        status_url = self._host + r.headers['location']

        r = self._wait_for_state(status_url, 'available')
        output = r.json()['output']
        print('output=', output)
        if output['status'] == 'error':
            ans = {'text/plain': output['traceback']}
        else:
            ans = {'text/plain': [output['data']['text/plain']]}
        return ans

    def exec_file(self, py_uri):
        py_uri_abs = abspath(py_uri)
        self._lc.upload_pyfile(py_uri_abs)
        m = splitext(basename(py_uri_abs))[0]
        try:
            m_imp = import_module(m)
        except ImportError:
            raise

        def upload_pyfile_job(jc):
            return m_imp.main(jc.sc)

        return self._lc.submit(upload_pyfile_job).result()

    def close(self):
        print('Closing Spark session...')
        requests.delete(self._session_url, headers=self._headers)
예제 #3
0
        To run this Python script you need to install livy-python-api-*version*.tar.gz with
        easy_install first.

        python /pathTo/pi_app.py http://<livy-server>:8998 2
    """

    if len(sys.argv) != 3:
        print("Usage: pi_app <livy url> <slices>", file=sys.stderr)
        exit(-1)

    slices = int(sys.argv[2])
    samples = 100000 * slices

    client = HttpClient(sys.argv[1])

    def f(_):
        x = random() * 2 - 1
        y = random() * 2 - 1
        return 1 if x**2 + y**2 <= 1 else 0

    def pi_job(context):
        count = context.sc.parallelize(range(1, samples + 1),
                                       slices).map(f).reduce(add)
        return 4.0 * count / samples

    pi = client.submit(pi_job).result()

    print("Pi is roughly %f" % pi)
    client.stop(True)