예제 #1
0
    def find_entity(self, source_type, type, name, **filters):
        """
    GET /api/v3/entities?query=((sourceType:<source_type>)AND(type:<type>)AND(originalName:<name>))
    http://cloudera.github.io/navigator/apidocs/v3/path__v3_entities.html
    """
        try:
            params = self.__params

            query_filters = {
                'sourceType': source_type,
                'type': type,
                'originalName': name,
                'deleted': 'false'
            }
            if get_navigator_hue_server_name():
                query_filters['clusterName'] = get_navigator_hue_server_name()

            for key, value in filters.items():
                query_filters[key] = value

            filter_query = 'AND'.join('(%s:%s)' % (key, value)
                                      for key, value in query_filters.items())

            params += (
                ('query', filter_query),
                ('offset', 0),
                (
                    'limit', 2
                ),  # We are looking for single entity, so limit to 2 to check for multiple results
            )

            response = self._root.get('entities',
                                      headers=self.__headers,
                                      params=params)

            if not response:
                raise NavigatorApiException(
                    'Could not find entity with query filters: %s' %
                    str(query_filters))
            elif len(response) > 1:
                raise NavigatorApiException(
                    'Found more than 1 entity with query filters: %s' %
                    str(query_filters))

            return response[0]
        except RestException, e:
            msg = 'Failed to find entity: %s' % str(e)
            LOG.exception(msg)
            raise NavigatorApiException(msg)
예제 #2
0
def get_cluster_source_ids(api):
    '''
  ClusterName is handled by getting the list of sourceIds of a Cluster. We can't filter directly on a clusterName.
  '''
    cluster_source_ids = cache.get(CLUSTER_SOURCE_IDS_CACHE_KEY)

    if cluster_source_ids is None:
        cluster_source_ids = ''
        if get_navigator_hue_server_name():
            sources = api.get_cluster_source_ids()
            LOG.info('Navigator cluster source ids: %s' % (sources, ))
            if sources:
                # Sometimes sourceId seems to be missing
                source_ids = [
                    'sourceId:%s' %
                    (_id.get('sourceId') or _id.get('identity'))
                    for _id in sources
                ]
                cluster_source_ids = '(' + ' OR '.join(source_ids) + ') AND '
            else:
                # 0 means always false
                cluster_source_ids = 'sourceId:0 AND'
        cache.set(CLUSTER_SOURCE_IDS_CACHE_KEY, cluster_source_ids,
                  60 * 60 * 12)  # 1/2 Day

    return cluster_source_ids
예제 #3
0
  def get_cluster_source_ids(self):
    params = (
      ('query', 'clusterName:"%s"' % get_navigator_hue_server_name()),
      ('limit', 200),
    )

    LOG.info(params)
    return self._root.get('entities', headers=self.__headers, params=params)
예제 #4
0
def get_cluster_source_ids(api):
  '''
  ClusterName is handled by getting the list of sourceIds of a Cluster. We can't filter directly on a clusterName.
  '''
  global CLUSTER_SOURCE_IDS

  if CLUSTER_SOURCE_IDS is None:
    CLUSTER_SOURCE_IDS = ''
    if get_navigator_hue_server_name():
      sources = api.get_cluster_source_ids()
      if sources:
        CLUSTER_SOURCE_IDS = '(' + ' OR '.join(['sourceId:%(sourceId)s' % _id for _id in api.get_cluster_source_ids()]) + ') AND '
      else:
        CLUSTER_SOURCE_IDS = 'sourceId:0 AND'

  return CLUSTER_SOURCE_IDS
예제 #5
0
    def test_search_entities(self):
        if get_navigator_hue_server_name():
            cluster_filter = '(sourceId:1 OR sourceId:2) AND (%s)'
        else:
            cluster_filter = '%s'

        assert_equal(
            cluster_filter %
            '(((originalName:cases*^3)OR(originalDescription:cases*^1)OR(name:cases*^10)OR(description:cases*^3)OR(tags:cases*^5))AND((originalName:[* TO *])OR(originalDescription:[* TO *])OR(name:[* TO *])OR(description:[* TO *])OR(tags:[* TO *]))) AND (*) AND ((type:TABLE)OR(type:VIEW)) AND (sourceType:HIVE OR sourceType:IMPALA)',
            self.api.search_entities(query_s='cases', sources=['hive'])[0][1])

        assert_equal(
            cluster_filter %
            '* AND ((type:FIELD*)) AND ((type:TABLE)OR(type:VIEW)OR(type:DATABASE)OR(type:PARTITION)OR(type:FIELD)) AND (sourceType:HIVE OR sourceType:IMPALA)',
            self.api.search_entities(query_s='type:FIELD',
                                     sources=['hive'])[0][1])

        assert_equal(
            cluster_filter %
            '* AND ((type:\\{\\}\\(\\)\\[\\]*)) AND ((type:TABLE)OR(type:VIEW)OR(type:DATABASE)OR(type:PARTITION)OR(type:FIELD)) AND (sourceType:HIVE OR sourceType:IMPALA)',
            self.api.search_entities(query_s='type:{}()[]*',
                                     sources=['hive'])[0][1])
예제 #6
0
def get_cluster_source_ids(api):
    '''
  ClusterName is handled by getting the list of sourceIds of a Cluster. We can't filter directly on a clusterName.
  '''
    global CLUSTER_SOURCE_IDS

    if CLUSTER_SOURCE_IDS is None:
        CLUSTER_SOURCE_IDS = ''
        if get_navigator_hue_server_name():
            sources = api.get_cluster_source_ids()
            if sources:
                # Sometimes sourceId seems to be missing
                source_ids = [
                    'sourceId:%s' % _id.get('sourceId') or _id.get('identity')
                    for _id in api.get_cluster_source_ids()
                ]
                CLUSTER_SOURCE_IDS = '(' + ' OR '.join(source_ids) + ') AND '
            else:
                # 0 means always false
                CLUSTER_SOURCE_IDS = 'sourceId:0 AND'

    return CLUSTER_SOURCE_IDS
예제 #7
0
    def search_entities(self, query_s, limit=100, offset=0, **filters):
        """
    Solr edismax query parser syntax.

    :param query_s: a query string of search terms (e.g. - sales quarterly);
      Currently the search will perform an OR boolean search for all terms (split on whitespace), against a whitelist
      of search_fields.
    """
        search_fields = ('originalName', 'originalDescription', 'name',
                         'description', 'tags')

        sources = filters.get('sources', [])
        default_entity_types, entity_types = self._get_types_from_sources(
            sources)

        try:
            params = self.__params

            search_terms = [term for term in query_s.strip().split()]

            query_clauses = []
            user_filters = []
            source_type_filter = []

            for term in search_terms:
                if ':' not in term:
                    query_clauses.append('OR'.join([
                        '(%s:*%s*)' % (field, term) for field in search_fields
                    ]))
                else:
                    name, val = term.split(':')
                    if val:
                        if name == 'type':
                            term = '%s:%s' % (name, val.upper().strip('*'))
                            default_entity_types = entity_types  # Make sure type value still makes sense for the source
                        user_filters.append(
                            term +
                            '*')  # Manual filter allowed e.g. type:VIE* ca

            filter_query = '*'

            if query_clauses:
                filter_query = 'OR'.join(
                    ['(%s)' % clause for clause in query_clauses])

            user_filter_clause = 'OR '.join(['(%s)' % f
                                             for f in user_filters]) or '*'
            source_filter_clause = 'OR'.join([
                '(%s:%s)' % ('type', entity_type)
                for entity_type in default_entity_types
            ])
            if 's3' in sources:
                source_type_filter.append('sourceType:s3')

            filter_query = '%s AND (%s) AND (%s)' % (
                filter_query, user_filter_clause, source_filter_clause)
            if source_type_filter:
                filter_query += ' AND (%s)' % 'OR '.join(source_type_filter)
            if get_navigator_hue_server_name():
                filter_query += 'AND clusterName:%s' % get_navigator_hue_server_name(
                )

            params += (
                ('query', filter_query),
                ('offset', offset),
                ('limit', NAVIGATOR.FETCH_SIZE_SEARCH.get()),
            )

            LOG.info(params)
            response = self._root.get('entities',
                                      headers=self.__headers,
                                      params=params)

            response = list(islice(self._secure_results(response),
                                   limit))  # Apply Sentry perms

            return response
        except RestException, e:
            msg = 'Failed to search for entities with search query: %s' % query_s
            LOG.exception(msg)
            raise NavigatorApiException(msg)
예제 #8
0
    def search_entities_interactive(self,
                                    query_s=None,
                                    limit=100,
                                    offset=0,
                                    facetFields=None,
                                    facetPrefix=None,
                                    facetRanges=None,
                                    filterQueries=None,
                                    firstClassEntitiesOnly=None,
                                    sources=None):
        try:
            pagination = {
                'offset': offset,
                'limit': NAVIGATOR.FETCH_SIZE_SEARCH_INTERACTIVE.get(),
            }

            entity_types = []
            fq_type = []
            if filterQueries is None:
                filterQueries = []

            if sources:
                default_entity_types, entity_types = self._get_types_from_sources(
                    sources)

                if 'hive' in sources or 'impala' in sources:
                    fq_type = default_entity_types
                elif 'hdfs' in sources:
                    fq_type = entity_types
                elif 's3' in sources:
                    fq_type = default_entity_types
                    filterQueries.append('sourceType:s3')

                if query_s.strip().endswith(
                        'type:*'):  # To list all available types
                    fq_type = entity_types

            search_terms = [term for term in query_s.strip().split()
                            ] if query_s else []
            query = []
            for term in search_terms:
                if ':' not in term:
                    query.append(term)
                else:
                    name, val = term.split(':')
                    if val:  # Allow to type non default types, e.g for SQL: type:FIEL*
                        if name == 'type':  # Make sure type value still makes sense for the source
                            term = '%s:%s' % (name, val.upper())
                            fq_type = entity_types
                        filterQueries.append(term)

            body = {'query': ' '.join(query) or '*'}
            if fq_type:
                filterQueries += [
                    '{!tag=type} %s' %
                    ' OR '.join(['type:%s' % fq for fq in fq_type])
                ]

            if get_navigator_hue_server_name():
                filterQueries.append('clusterName:%s' %
                                     get_navigator_hue_server_name())

            body['facetFields'] = facetFields or [
            ]  # Currently mandatory in API
            if facetPrefix:
                body['facetPrefix'] = facetPrefix
            if facetRanges:
                body['facetRanges'] = facetRanges
            if filterQueries:
                body['filterQueries'] = filterQueries
            if firstClassEntitiesOnly:
                body['firstClassEntitiesOnly'] = firstClassEntitiesOnly

            data = json.dumps(body)
            LOG.info(data)
            response = self._root.post(
                'interactive/entities?limit=%(limit)s&offset=%(offset)s' %
                pagination,
                data=data,
                contenttype=_JSON_CONTENT_TYPE,
                clear_cookies=True)

            response['results'] = list(
                islice(self._secure_results(response['results']),
                       limit))  # Apply Sentry perms

            return response
        except RestException:
            msg = 'Failed to search for entities with search query %s' % json.dumps(
                body)
            LOG.exception(msg)
            raise NavigatorApiException(msg)