예제 #1
0
def suggest(request_username, search_param_parser):
    """
    Suggest

    It must respects restrictions
     - Private apps (apps only from user's agency)
     - User's max_classification_level

    Args:
        request_username(string)
        search_param_parser(SearchParamParser): Parsed Request Search Object

    Returns:
        listing titles in a list
    """
    # Create ES client
    es_client = elasticsearch_factory.get_client()

    elasticsearch_factory.check_elasticsearch()

    if search_param_parser.search_string is None:
        return []

    user_exclude_orgs = get_user_exclude_orgs(request_username)

    # Override Limit - Only 15 results should come if limit was not set
    if search_param_parser.limit_set is False:
        search_param_parser.limit = constants.ES_SUGGEST_LIMIT

    search_query = elasticsearch_util.make_search_query_obj(
        search_param_parser, exclude_agencies=user_exclude_orgs)
    # Only Retrieve ['title', 'security_marking', 'id'] fields from Elasticsearch for suggestions
    search_query['_source'] = ['title', 'security_marking', 'id']

    # print(json.dumps(search_query, indent=4))  #  Print statement for debugging output
    res = es_client.search(index=settings.ES_INDEX_NAME, body=search_query)

    hits = res.get('hits', {}).get('hits', None)
    if not hits:
        return []

    hit_titles = []

    for hit in hits:
        source = hit.get('_source')

        exclude_bool = False
        if not source.get('security_marking'):
            exclude_bool = True
            logger.debug('Listing {0!s} has no security_marking'.format(
                source.get('title')))
        if not system_has_access_control(request_username,
                                         source.get('security_marking')):
            exclude_bool = True

        if exclude_bool is False:
            temp = {'title': source['title'], 'id': source['id']}
            hit_titles.append(temp)

    return hit_titles
예제 #2
0
def bulk_reindex():
    """
    Reindex Listing Data into an Elasticsearch Index

    Steps:
        Checks to see if elasticsearch connection is good
        Removes the index if it already exist
        Creates the index with mapping
        Reindex data
        Wait for the cluster health to turn yellow

    To check index in elasticsearch:
        http://127.0.0.1:9200/appsmall/_search?size=10000&pretty
    """
    # Create ES client
    es_client = elasticsearch_factory.get_client()

    logger.debug('Starting Indexing Process')
    elasticsearch_factory.check_elasticsearch()
    recreate_index_mapping()
    # Convert Listing Objects into Python Objects
    # Had to add order_by for test_essearch_is_enable to pass for both sqlite/postgresql
    # TODO: Investigate if results coming back from elasticsearch is order by 'Relevance score'
    all_listings = models.Listing.objects.order_by('id').all()
    serializer = ReadOnlyListingSerializer(all_listings, many=True)
    serializer_results = serializer.data

    bulk_data = []

    for record in serializer_results:
        # Transform Serializer records into records for elasticsearch
        record_clean_obj = elasticsearch_util.prepare_clean_listing_record(
            record)

        op_dict = {
            'index': {
                '_index': settings.ES_INDEX_NAME,
                '_type': settings.ES_TYPE_NAME,
                '_id': record_clean_obj[settings.ES_ID_FIELD]
            }
        }

        bulk_data.append(op_dict)
        bulk_data.append(record_clean_obj)

    # Bulk index the data
    logger.debug('Bulk indexing listings...')
    res = es_client.bulk(index=settings.ES_INDEX_NAME,
                         body=bulk_data,
                         refresh=True)

    if res.get('errors', True):
        logger.error('Error Bulk Indexing')
    else:
        logger.debug('Bulk Indexing Successful')

    logger.debug('Waiting for cluster to turn yellow')
    es_client.cluster.health(wait_for_status='yellow', request_timeout=20)
    logger.debug('Finish waiting for cluster to turn yellow')
def suggest(request_username, search_param_parser):
    """
    Suggest

    It must respects restrictions
     - Private apps (apps only from user's agency)
     - User's max_classification_level

    Args:
        request_username(string)
        search_param_parser(SearchParamParser): Parsed Request Search Object

    Returns:
        listing titles in a list
    """
    es_client = elasticsearch_factory.get_client()

    elasticsearch_factory.check_elasticsearch()

    if search_param_parser.search_string is None:
        return []

    user_exclude_orgs = get_user_exclude_orgs(request_username)

    # Override Limit - Only 15 results should come if limit was not set
    if search_param_parser.limit_set is False:
        search_param_parser.limit = constants.ES_SUGGEST_LIMIT

    search_query = elasticsearch_util.make_search_query_obj(search_param_parser, exclude_agencies=user_exclude_orgs)
    # Only Retrieve ['title', 'security_marking', 'id'] fields from Elasticsearch for suggestions
    search_query['_source'] = ['title', 'security_marking', 'id']

    # print(json.dumps(search_query, indent=4))  #  Print statement for debugging output
    res = es_client.search(index=settings.ES_INDEX_NAME, body=search_query)

    hits = res.get('hits', {}).get('hits', None)
    if not hits:
        return []

    hit_titles = []

    for hit in hits:
        source = hit.get('_source')

        exclude_bool = False
        if not source.get('security_marking'):
            exclude_bool = True
            logger.debug('Listing {0!s} has no security_marking'.format(source.get('title')))
        if not system_has_access_control(request_username, source.get('security_marking')):
            exclude_bool = True

        if exclude_bool is False:
            temp = {'title': source['title'], 'id': source['id']}
            hit_titles.append(temp)

    return hit_titles
def bulk_reindex():
    """
    Reindex Listing Data into an Elasticsearch Index

    Steps:
        Checks to see if elasticsearch connection is good
        Removes the index if it already exist
        Creates the index with mapping
        Reindex data
        Wait for the cluster health to turn yellow

    To check index in elasticsearch:
        http://127.0.0.1:9200/appsmall/_search?size=10000&pretty
    """
    # Create ES client
    es_client = elasticsearch_factory.get_client()

    logger.debug('Starting Indexing Process')
    elasticsearch_factory.check_elasticsearch()
    recreate_index_mapping()
    # Convert Listing Objects into Python Objects
    # Had to add order_by for test_essearch_is_enable to pass for both sqlite/postgresql
    # TODO: Investigate if results coming back from elasticsearch is order by 'Relevance score'
    all_listings = models.Listing.objects.order_by('id').all()
    serializer = ReadOnlyListingSerializer(all_listings, many=True)
    serializer_results = serializer.data

    bulk_data = []

    for record in serializer_results:
        # Transform Serializer records into records for elasticsearch
        record_clean_obj = elasticsearch_util.prepare_clean_listing_record(record)

        op_dict = {
            'index': {
                '_index': settings.ES_INDEX_NAME,
                '_type': settings.ES_TYPE_NAME,
                '_id': record_clean_obj[settings.ES_ID_FIELD]
            }
        }

        bulk_data.append(op_dict)
        bulk_data.append(record_clean_obj)

    # Bulk index the data
    logger.debug('Bulk indexing listings...')
    res = es_client.bulk(index=settings.ES_INDEX_NAME, body=bulk_data, refresh=True)

    if res.get('errors', True):
        logger.error('Error Bulk Indexing')
    else:
        logger.debug('Bulk Indexing Successful')

    logger.debug('Waiting for cluster to turn yellow')
    es_client.cluster.health(wait_for_status='yellow', request_timeout=20)
    logger.debug('Finish waiting for cluster to turn yellow')
예제 #5
0
    def initiate(self):
        """
        Make sure the Elasticsearch is up and running
        Making profiles for Elasticsearch Recommendations
        """
        elasticsearch_factory.check_elasticsearch()
        is_data_old = ElasticsearchRecommender.is_data_old()

        if is_data_old:
            elasticsearch_factory.recreate_index_mapping(ElasticsearchRecommender.RECOMMEND_INDEX, ElasticsearchRecommender.get_index_mapping())
            ElasticsearchRecommender.load_data_into_es_table()
예제 #6
0
    def initiate(self):
        """
        Make sure the Elasticsearch is up and running
        Making profiles for Elasticsearch Recommendations
        """
        elasticsearch_factory.check_elasticsearch()

        if ElasticsearchRecommender.is_data_old():
            elasticsearch_factory.recreate_index_mapping(
                settings.ES_RECOMMEND_USER,
                ElasticsearchRecommender.get_index_mapping())
            ElasticsearchRecommender.load_data_into_es_table()
예제 #7
0
    def setUp(self):
        self.error_string = None
        self.es_failed = False
        try:
            elasticsearch_factory.check_elasticsearch()
        except Exception as err:
            self.error_string = str(err)
            self.es_failed = True

        if not self.es_failed:
            logging.getLogger('elasticsearch').setLevel(logging.CRITICAL)
            model_access_es.bulk_reindex()
    def setUp(self):
        """
        setUp is invoked before each test method
        """
        self.maxDiff = None
        self.error_string = None
        self.es_failed = False
        try:
            elasticsearch_factory.check_elasticsearch()
        except Exception as err:
            self.error_string = str(err)
            self.es_failed = True

        if not self.es_failed:
            logging.getLogger('elasticsearch').setLevel(logging.CRITICAL)
            model_access_es.bulk_reindex()
def search(request_username, search_param_parser):
    """
    Filter Listings
    Too many variations to cache results

    Users shall be able to search for listings'
     - title
     - description
     - description_short
     - tags__name

    Filter by
     - category
     - agency
     - listing types
     - is_508_compliant

    Users shall only see what they are authorized to see
      'is_private': false,
      'approval_status': 'APPROVED',
      'is_deleted': false,
      'is_enabled': true,
      'security_marking': 'UNCLASSIFIED',

    Sorted by Relevance
      'avg_rate': 0,
      'total_votes': 0,
      'total_rate5': 0,
      'total_rate4': 0,
      'total_rate3': 0,
      'total_rate2': 0,
      'total_rate1': 0,
      'total_reviews': 0,
      'is_featured': true,

    It must respects restrictions
     - Private apps (apps only from user's agency)
     - User's max_classification_level

    Args:
        username(str): username
        search_param_parser(SearchParamParser): parameters
    """
    elasticsearch_factory.check_elasticsearch()
    # Create ES client
    es_client = elasticsearch_factory.get_client()

    user_exclude_orgs = get_user_exclude_orgs(request_username)
    search_query = elasticsearch_util.make_search_query_obj(search_param_parser, exclude_agencies=user_exclude_orgs)

    try:
        res = es_client.search(index=settings.ES_INDEX_NAME, body=search_query)
    except Exception as err:
        print(json.dumps(search_query, indent=4))
        raise err

    hits = res.get('hits', {})
    inner_hits = hits.get('hits', None)
    if not hits:
        return []

    hit_titles = []

    excluded_count = 0

    for current_innter_hit in inner_hits:
        source = current_innter_hit.get('_source')
        source['_score'] = current_innter_hit.get('_score')

        # Add URLs to icons
        image_keys_to_add_url = ['large_icon',
                                 'small_icon',
                                 'banner_icon',
                                 'large_banner_icon']

        for image_key in image_keys_to_add_url:
            if source.get(image_key) is not None:
                if search_param_parser.base_url:
                    source[image_key]['url'] = '{!s}/api/image/{!s}/'.format(search_param_parser.base_url, source[image_key]['id'])
                else:
                    source[image_key]['url'] = '/api/image/{!s}/'.format(source[image_key]['id'])

        exclude_bool = False
        if not source.get('security_marking'):
            exclude_bool = True
            logger.debug('Listing {0!s} has no security_marking'.format(source.get('title')))
        if not system_has_access_control(request_username, source.get('security_marking')):
            exclude_bool = True

        if exclude_bool is False:
            hit_titles.append(source)
        else:
            excluded_count = excluded_count + 1

    # Total Records in Elasticsearch
    final_count = hits.get('total')
    # Total Records minus what the user does not have access to see, this count should never be below zero
    # TODO: Figure out smarter logic for excluded_count compensation (rivera 11/14/2016)
    final_count_with_excluded = final_count - excluded_count

    final_results = {
        'count': final_count_with_excluded,
        'results': hit_titles
    }

    final_results['previous'] = None
    final_results['next'] = None

    # if final_count_with_excluded < 0 then previous and next should be None
    if final_count_with_excluded < 0:
        return final_results

    previous_offset_prediction = search_param_parser.offset - search_param_parser.limit
    next_offset_prediction = search_param_parser.offset + search_param_parser.limit

    final_results['next_offset_prediction'] = next_offset_prediction

    # Previous URL - previous_offset_prediction is less than zero, previous should be None
    if previous_offset_prediction >= 0:
        final_results['previous'] = generate_link(search_param_parser, previous_offset_prediction)

    # Next URL
    if next_offset_prediction <= final_count_with_excluded:
        final_results['next'] = generate_link(search_param_parser, next_offset_prediction)

    return final_results
예제 #10
0
def search(request_username, search_param_parser):
    """
    Filter Listings
    Too many variations to cache results

    Users shall be able to search for listings'
     - title
     - description
     - description_short
     - tags__name

    Filter by
     - category
     - agency
     - listing types
     - is_508_compliant

    Users shall only see what they are authorized to see
      'is_private': false,
      'approval_status': 'APPROVED',
      'is_deleted': false,
      'is_enabled': true,
      'security_marking': 'UNCLASSIFIED',

    Sorted by Relevance
      'avg_rate': 0,
      'total_votes': 0,
      'total_rate5': 0,
      'total_rate4': 0,
      'total_rate3': 0,
      'total_rate2': 0,
      'total_rate1': 0,
      'total_reviews': 0,
      'is_featured': true,

    It must respects restrictions
     - Private apps (apps only from user's agency)
     - User's max_classification_level

    Args:
        username(str): username
        search_param_parser(SearchParamParser): parameters
    """
    elasticsearch_factory.check_elasticsearch()
    # Create ES client
    es_client = elasticsearch_factory.get_client()

    user_exclude_orgs = get_user_exclude_orgs(request_username)
    search_query = elasticsearch_util.make_search_query_obj(
        search_param_parser, exclude_agencies=user_exclude_orgs)

    try:
        res = es_client.search(index=settings.ES_INDEX_NAME, body=search_query)
    except Exception as err:
        print(json.dumps(search_query, indent=4))
        raise err

    hits = res.get('hits', {})
    inner_hits = hits.get('hits', None)
    if not hits:
        return []

    hit_titles = []

    excluded_count = 0

    for current_innter_hit in inner_hits:
        source = current_innter_hit.get('_source')
        source['_score'] = current_innter_hit.get('_score')

        # Add URLs to icons
        image_keys_to_add_url = [
            'large_icon', 'small_icon', 'banner_icon', 'large_banner_icon'
        ]

        for image_key in image_keys_to_add_url:
            if source.get(image_key) is not None:
                if search_param_parser.base_url:
                    source[image_key]['url'] = '{!s}/api/image/{!s}/'.format(
                        search_param_parser.base_url, source[image_key]['id'])
                else:
                    source[image_key]['url'] = '/api/image/{!s}/'.format(
                        source[image_key]['id'])

        exclude_bool = False
        if not source.get('security_marking'):
            exclude_bool = True
            logger.debug('Listing {0!s} has no security_marking'.format(
                source.get('title')))
        if not system_has_access_control(request_username,
                                         source.get('security_marking')):
            exclude_bool = True

        if exclude_bool is False:
            hit_titles.append(source)
        else:
            excluded_count = excluded_count + 1

    # Total Records in Elasticsearch
    final_count = hits.get('total')
    # Total Records minus what the user does not have access to see, this count should never be below zero
    # TODO: Figure out smarter logic for excluded_count compensation (rivera 11/14/2016)
    final_count_with_excluded = final_count - excluded_count

    final_results = {'count': final_count_with_excluded, 'results': hit_titles}

    final_results['previous'] = None
    final_results['next'] = None

    # if final_count_with_excluded < 0 then previous and next should be None
    if final_count_with_excluded < 0:
        return final_results

    previous_offset_prediction = search_param_parser.offset - search_param_parser.limit
    next_offset_prediction = search_param_parser.offset + search_param_parser.limit

    final_results['next_offset_prediction'] = next_offset_prediction

    # Previous URL - previous_offset_prediction is less than zero, previous should be None
    if previous_offset_prediction >= 0:
        final_results['previous'] = generate_link(search_param_parser,
                                                  previous_offset_prediction)

    # Next URL
    if next_offset_prediction <= final_count_with_excluded:
        final_results['next'] = generate_link(search_param_parser,
                                              next_offset_prediction)

    return final_results