Example #1
0
    def check_status(self, notebook, snippet):
        api = get_spark_api(self.user)

        state = api.get_batch_status(snippet['result']['handle']['id'])
        return {
            'status': state,
        }
Example #2
0
    def create_session(self, lang='scala', properties=None):
        if not properties and USE_DEFAULT_CONFIGURATION.get():
            user_config = DefaultConfiguration.objects.get_configuration_for_user(
                app='spark', user=self.user)
            if user_config is not None:
                properties = user_config.properties_list

        props = self.get_livy_props(lang, properties)

        api = get_spark_api(self.user)
        response = api.create_session(**props)

        status = api.get_session(response['id'])
        count = 0

        while status['state'] == 'starting' and count < 120:
            status = api.get_session(response['id'])
            count += 1
            time.sleep(1)

        if status['state'] != 'idle':
            info = '\n'.join(status['log']) if status['log'] else 'timeout'
            raise QueryError(
                _('The Spark session is %s and could not be created in the cluster: %s'
                  ) % (status['state'], info))

        return {
            'type': lang,
            'id': response['id'],
            'properties': self.to_properties(props)
        }
Example #3
0
    def execute(self, notebook, snippet):
        api = get_spark_api(self.user)
        if snippet['type'] == 'jar':
            properties = {
                'file': snippet['properties'].get('app_jar'),
                'className': snippet['properties'].get('class'),
                'args': snippet['properties'].get('arguments'),
            }
        elif snippet['type'] == 'py':
            properties = {
                'file': snippet['properties'].get('py_file'),
                'args': snippet['properties'].get('argument', []),
            }
        else:
            properties = {
                'file': snippet['properties'].get('app_jar'),
                'className': snippet['properties'].get('class'),
                'args': snippet['properties'].get('arguments'),
                'pyFiles': snippet['properties'].get('py_file'),
                'files': snippet['properties'].get('files'),
                # driverMemory
                # driverCores
                # executorMemory
                # executorCores
                # archives
            }

        response = api.submit_batch(properties)
        return {'id': response['id'], 'has_result_set': True, 'properties': []}
Example #4
0
    def close_statement(self, notebook, snippet):
        api = get_spark_api(self.user)

        session_id = snippet['result']['handle']['id']
        if session_id is not None:
            api.close_batch(session_id)
            return {'session': session_id, 'status': 0}
        else:
            return {'status': -1}  # skipped
Example #5
0
    def close_session(self, session):
        api = get_spark_api(self.user)

        if session['id'] is not None:
            try:
                api.close(session['id'])
                return {'session': session['id'], 'status': 0}
            except RestException as e:
                if e.code == 404 or e.code == 500:  # TODO remove the 500
                    raise SessionExpired(e)
        else:
            return {'status': -1}
Example #6
0
    def check_status(self, notebook, snippet):
        api = get_spark_api(self.user)
        session = _get_snippet_session(notebook, snippet)
        cell = snippet['result']['handle']['id']

        try:
            response = api.fetch_data(session['id'], cell)
            return {
                'status': response['state'],
            }
        except Exception as e:
            message = force_unicode(str(e)).lower()
            if re.search("session ('\d+' )?not found", message):
                raise SessionExpired(e)
            else:
                raise e
Example #7
0
    def get_sample_data(self,
                        snippet,
                        database=None,
                        table=None,
                        column=None,
                        is_async=False,
                        operation=None):
        if operation != 'hello':
            raise NotImplementedError()

        response = {}

        api = get_spark_api(self.user)

        api.get_status()

        response['status'] = 0
        response['rows'] = []

        return response
Example #8
0
    def execute(self, notebook, snippet):
        api = get_spark_api(self.user)
        session = _get_snippet_session(notebook, snippet)

        try:
            response = api.submit_statement(session['id'],
                                            snippet['statement'])
            return {
                'id': response['id'],
                'has_result_set': True,
                'sync': False
            }
        except Exception as e:
            message = force_unicode(str(e)).lower()
            if re.search(
                    "session ('\d+' )?not found", message
            ) or 'connection refused' in message or 'session is in state busy' in message:
                raise SessionExpired(e)
            else:
                raise e
Example #9
0
 def get_api(self):
   return get_spark_api(self.user, self.interpreter)
Example #10
0
    def get_log(self, notebook, snippet, startFrom=0, size=None):
        api = get_spark_api(self.user)
        session = _get_snippet_session(notebook, snippet)

        return api.get_log(session['id'], startFrom=startFrom, size=size)
Example #11
0
    def cancel(self, notebook, snippet):
        api = get_spark_api(self.user)
        session = _get_snippet_session(notebook, snippet)
        response = api.cancel(session['id'])

        return {'status': 0}
Example #12
0
    def fetch_result(self, notebook, snippet, rows, start_over):
        api = get_spark_api(self.user)
        session = _get_snippet_session(notebook, snippet)
        cell = snippet['result']['handle']['id']

        try:
            response = api.fetch_data(session['id'], cell)
        except Exception as e:
            message = force_unicode(str(e)).lower()
            if re.search("session ('\d+' )?not found", message):
                raise SessionExpired(e)
            else:
                raise e

        content = response['output']

        if content['status'] == 'ok':
            data = content['data']
            images = []

            try:
                table = data['application/vnd.livy.table.v1+json']
            except KeyError:
                try:
                    images = [data['image/png']]
                except KeyError:
                    images = []
                if 'application/json' in data:
                    result = data['application/json']
                    data = result['data']
                    meta = [{
                        'name': field['name'],
                        'type': field['type'],
                        'comment': ''
                    } for field in result['schema']['fields']]
                    type = 'table'
                else:
                    data = [[data['text/plain']]]
                    meta = [{
                        'name': 'Header',
                        'type': 'STRING_TYPE',
                        'comment': ''
                    }]
                    type = 'text'
            else:
                data = table['data']
                headers = table['headers']
                meta = [{
                    'name': h['name'],
                    'type': h['type'],
                    'comment': ''
                } for h in headers]
                type = 'table'

            # Non start_over not supported
            if not start_over:
                data = []

            return {'data': data, 'images': images, 'meta': meta, 'type': type}
        elif content['status'] == 'error':
            tb = content.get('traceback', None)

            if tb is None or not tb:
                msg = content.get('ename', 'unknown error')

                evalue = content.get('evalue')
                if evalue is not None:
                    msg = '%s: %s' % (msg, evalue)
            else:
                msg = ''.join(tb)

            raise QueryError(msg)
Example #13
0
    def create_session(self, lang='scala', properties=None):
        if not properties:
            config = None
            if USE_DEFAULT_CONFIGURATION.get():
                config = DefaultConfiguration.objects.get_configuration_for_user(
                    app='spark', user=self.user)

            if config is not None:
                properties = config.properties_list
            else:
                properties = self.get_properties()

        props = dict([(p['name'], p['value'])
                      for p in properties]) if properties is not None else {}

        # HUE-4761: Hue's session request is causing Livy to fail with "JsonMappingException: Can not deserialize
        # instance of scala.collection.immutable.List out of VALUE_STRING token" due to List type values
        # not being formed properly, they are quoted csv strings (without brackets) instead of proper List
        # types, this is for keys; archives, jars, files and pyFiles. The Mako frontend probably should be
        # modified to pass the values as Livy expects but for now we coerce these types to be Lists.
        # Issue only occurs when non-default values are used because the default path properly sets the
        # empty list '[]' for these four values.
        # Note also that Livy has a 90 second timeout for the session request to complete, this needs to
        # be increased for requests that take longer, for example when loading large archives.
        tmp_archives = props['archives']
        if type(tmp_archives) is not list:
            props['archives'] = tmp_archives.split(",")
            LOG.debug("Check List type: archives was not a list")

        tmp_jars = props['jars']
        if type(tmp_jars) is not list:
            props['jars'] = tmp_jars.split(",")
            LOG.debug("Check List type: jars was not a list")

        tmp_files = props['files']
        if type(tmp_files) is not list:
            props['files'] = tmp_files.split(",")
            LOG.debug("Check List type: files was not a list")

        tmp_py_files = props['pyFiles']
        if type(tmp_py_files) is not list:
            props['pyFiles'] = tmp_py_files.split(",")
            LOG.debug("Check List type: pyFiles was not a list")

        # Convert the conf list to a dict for Livy
        LOG.debug("Property Spark Conf kvp list from UI is: " +
                  str(props['conf']))
        props['conf'] = {
            conf.get('key'): conf.get('value')
            for i, conf in enumerate(props['conf'])
        }
        LOG.debug("Property Spark Conf dictionary is: " + str(props['conf']))

        props['kind'] = lang

        api = get_spark_api(self.user)

        response = api.create_session(**props)

        status = api.get_session(response['id'])
        count = 0

        while status['state'] == 'starting' and count < 120:
            status = api.get_session(response['id'])
            count += 1
            time.sleep(1)

        if status['state'] != 'idle':
            info = '\n'.join(status['log']) if status['log'] else 'timeout'
            raise QueryError(
                _('The Spark session could not be created in the cluster: %s')
                % info)

        return {'type': lang, 'id': response['id'], 'properties': properties}
Example #14
0
    def get_log(self, notebook, snippet, startFrom=0, size=None):
        api = get_spark_api(self.user)

        return api.get_batch_log(snippet['result']['handle']['id'],
                                 startFrom=startFrom,
                                 size=size)