Exemple #1
0
def mock_and_validate_create_new_session(defaults):
    global client_test
    create_session_request_mock_uri = base_uri + "/sessions/"
    app_name = 'Test App'
    conf_dict = {'spark.app.name': app_name}
    json_data = {
        u'kind': u'pyspark',
        u'log': [],
        u'proxyUser': None,
        u'state': u'starting',
        u'owner': None,
        u'id': session_id
    }
    responses.add(responses.POST,
                  create_session_request_mock_uri,
                  json=json_data,
                  status=201,
                  content_type='application/json')
    client_test = HttpClient(base_uri,
                             conf_dict=conf_dict,
                             load_defaults=defaults)
    assert client_test._config.get(client_test._CONFIG_SECTION,
                                   'spark.app.name') == app_name
    if defaults:
        assert client_test._config.has_option(client_test._CONFIG_SECTION,
                                              'spark.config')
        assert client_test._config.get(client_test._CONFIG_SECTION,
                                       'spark.config') == 'override'
Exemple #2
0
def test_connect_to_existing_session():
    reconnect_mock_request_uri = base_uri + "/sessions/" + str(session_id) + \
        "/connect"
    reconnect_session_uri = base_uri + "/sessions/" + str(session_id)
    json_data = {
        u'kind': u'pyspark',
        u'log': [],
        u'proxyUser': None,
        u'state': u'starting',
        u'owner': None,
        u'id': session_id
    }
    with responses.RequestsMock() as rsps:
        rsps.add(responses.POST,
                 reconnect_mock_request_uri,
                 json=json_data,
                 status=201,
                 content_type='application/json')

        client_reconnect = HttpClient(reconnect_session_uri,
                                      load_defaults=False)

    assert isinstance(client_reconnect, HttpClient)
    with pytest.raises(NoOptionError):
        client_reconnect._config.get(client_reconnect._CONFIG_SECTION,
                                     'spark.app.name') == 'Test App'
Exemple #3
0
    def create_pyspark_session(self, host):
        self._host = host
        data = {'kind': 'pyspark'}

        # Create a Spark session
        print('Creating Spark session...')
        r = requests.post(host + '/sessions',
                          data=json.dumps(data),
                          headers=self._headers)

        # Wait until the new Spark session is ready to use
        self._session_url = host + r.headers['location']

        r = self._wait_for_state(self._session_url, 'idle')

        # Create client for Livy batch jobs
        self._lc = HttpClient(self._session_url)
Exemple #4
0
class LivyHandler:
    def __init__(self, host='http://localhost:8998'):
        self._headers = {'Content-Type': 'application/json'}
        if host is not None:
            self.create_pyspark_session(host)

    def _wait_for_state(self, url, desired_state):
        while True:
            r = requests.get(url, headers=self._headers)
            if r.json()['state'] == desired_state:
                break
            time.sleep(1)
        return r

    def create_pyspark_session(self, host):
        self._host = host
        data = {'kind': 'pyspark'}

        # Create a Spark session
        print('Creating Spark session...')
        r = requests.post(host + '/sessions',
                          data=json.dumps(data),
                          headers=self._headers)

        # Wait until the new Spark session is ready to use
        self._session_url = host + r.headers['location']

        r = self._wait_for_state(self._session_url, 'idle')

        # Create client for Livy batch jobs
        self._lc = HttpClient(self._session_url)

    def exec_str(self, code):
        print('Submitting code...')
        statements_url = self._session_url + '/statements'
        data = {'code': code}
        r = requests.post(statements_url,
                          data=json.dumps(data),
                          headers=self._headers)

        # Wait until the code completes
        print('Running code...')
        status_url = self._host + r.headers['location']

        r = self._wait_for_state(status_url, 'available')
        output = r.json()['output']
        print('output=', output)
        if output['status'] == 'error':
            ans = {'text/plain': output['traceback']}
        else:
            ans = {'text/plain': [output['data']['text/plain']]}
        return ans

    def exec_file(self, py_uri):
        py_uri_abs = abspath(py_uri)
        self._lc.upload_pyfile(py_uri_abs)
        m = splitext(basename(py_uri_abs))[0]
        try:
            m_imp = import_module(m)
        except ImportError:
            raise

        def upload_pyfile_job(jc):
            return m_imp.main(jc.sc)

        return self._lc.submit(upload_pyfile_job).result()

    def close(self):
        print('Closing Spark session...')
        requests.delete(self._session_url, headers=self._headers)
Exemple #5
0
        Usage: pi_app [livy url] [slices]

        To run this Python script you need to install livy-python-api-*version*.tar.gz with
        easy_install first.

        python /pathTo/pi_app.py http://<livy-server>:8998 2
    """

    if len(sys.argv) != 3:
        print("Usage: pi_app <livy url> <slices>", file=sys.stderr)
        exit(-1)

    slices = int(sys.argv[2])
    samples = 100000 * slices

    client = HttpClient(sys.argv[1])

    def f(_):
        x = random() * 2 - 1
        y = random() * 2 - 1
        return 1 if x ** 2 + y ** 2 <= 1 else 0

    def pi_job(context):
        count = context.sc.parallelize(range(1, samples + 1), slices).map(f).reduce(add)
        return 4.0 * count / samples

    pi = client.submit(pi_job).result()

    print("Pi is roughly %f" % pi)
    client.stop(True)
Exemple #6
0
        Usage: pi_app [livy url] [slices]

        To run this Python script you need to install livy-python-api-*version*.tar.gz with
        easy_install first.

        python /pathTo/pi_app.py http://<livy-server>:8998 2
    """

    if len(sys.argv) != 3:
        print("Usage: pi_app <livy url> <slices>", file=sys.stderr)
        exit(-1)

    slices = int(sys.argv[2])
    samples = 100000 * slices

    client = HttpClient(sys.argv[1])

    def f(_):
        x = random() * 2 - 1
        y = random() * 2 - 1
        return 1 if x**2 + y**2 <= 1 else 0

    def pi_job(context):
        count = context.sc.parallelize(range(1, samples + 1),
                                       slices).map(f).reduce(add)
        return 4.0 * count / samples

    pi = client.submit(pi_job).result()

    print("Pi is roughly %f" % pi)
    client.stop(True)
Exemple #7
0
from flask import Flask, send_file, request, jsonify
from flask_restful import Resource, Api
from flask_restful import reqparse
from livy.client import HttpClient
from urlparse import urlparse
import service

global kdensity_response
kdensity_response = None

app = Flask(__name__, static_folder="static")
api = Api(app)
client = HttpClient(urlparse("http://172.16.2.72:8998"), load_defaults=False)
service.set_client(client)
service.upload_jar_files()


class KernelDensityInput(Resource):
    def post(self):
        global kdensity_response
        print "Inside post of Kerneldenisty"
        parser = reqparse.RequestParser()
        parser.add_argument('column', type=str, help='Column name')
        parser.add_argument('bandwidth', type=str, help='Value for bandwidth')
        parser.add_argument('points', type=str, help='Value for points list')
        args = parser.parse_args()

        _column = args['column']
        _bandwidth = args['bandwidth']
        _points = args['points']
        print "column::", _column