def suggest(request_username, search_param_parser): """ Suggest It must respects restrictions - Private apps (apps only from user's agency) - User's max_classification_level Args: request_username(string) search_param_parser(SearchParamParser): Parsed Request Search Object Returns: listing titles in a list """ # Create ES client es_client = elasticsearch_factory.get_client() elasticsearch_factory.check_elasticsearch() if search_param_parser.search_string is None: return [] user_exclude_orgs = get_user_exclude_orgs(request_username) # Override Limit - Only 15 results should come if limit was not set if search_param_parser.limit_set is False: search_param_parser.limit = constants.ES_SUGGEST_LIMIT search_query = elasticsearch_util.make_search_query_obj( search_param_parser, exclude_agencies=user_exclude_orgs) # Only Retrieve ['title', 'security_marking', 'id'] fields from Elasticsearch for suggestions search_query['_source'] = ['title', 'security_marking', 'id'] # print(json.dumps(search_query, indent=4)) # Print statement for debugging output res = es_client.search(index=settings.ES_INDEX_NAME, body=search_query) hits = res.get('hits', {}).get('hits', None) if not hits: return [] hit_titles = [] for hit in hits: source = hit.get('_source') exclude_bool = False if not source.get('security_marking'): exclude_bool = True logger.debug('Listing {0!s} has no security_marking'.format( source.get('title'))) if not system_has_access_control(request_username, source.get('security_marking')): exclude_bool = True if exclude_bool is False: temp = {'title': source['title'], 'id': source['id']} hit_titles.append(temp) return hit_titles
def bulk_reindex(): """ Reindex Listing Data into an Elasticsearch Index Steps: Checks to see if elasticsearch connection is good Removes the index if it already exist Creates the index with mapping Reindex data Wait for the cluster health to turn yellow To check index in elasticsearch: http://127.0.0.1:9200/appsmall/_search?size=10000&pretty """ # Create ES client es_client = elasticsearch_factory.get_client() logger.debug('Starting Indexing Process') elasticsearch_factory.check_elasticsearch() recreate_index_mapping() # Convert Listing Objects into Python Objects # Had to add order_by for test_essearch_is_enable to pass for both sqlite/postgresql # TODO: Investigate if results coming back from elasticsearch is order by 'Relevance score' all_listings = models.Listing.objects.order_by('id').all() serializer = ReadOnlyListingSerializer(all_listings, many=True) serializer_results = serializer.data bulk_data = [] for record in serializer_results: # Transform Serializer records into records for elasticsearch record_clean_obj = elasticsearch_util.prepare_clean_listing_record( record) op_dict = { 'index': { '_index': settings.ES_INDEX_NAME, '_type': settings.ES_TYPE_NAME, '_id': record_clean_obj[settings.ES_ID_FIELD] } } bulk_data.append(op_dict) bulk_data.append(record_clean_obj) # Bulk index the data logger.debug('Bulk indexing listings...') res = es_client.bulk(index=settings.ES_INDEX_NAME, body=bulk_data, refresh=True) if res.get('errors', True): logger.error('Error Bulk Indexing') else: logger.debug('Bulk Indexing Successful') logger.debug('Waiting for cluster to turn yellow') es_client.cluster.health(wait_for_status='yellow', request_timeout=20) logger.debug('Finish waiting for cluster to turn yellow')
def suggest(request_username, search_param_parser): """ Suggest It must respects restrictions - Private apps (apps only from user's agency) - User's max_classification_level Args: request_username(string) search_param_parser(SearchParamParser): Parsed Request Search Object Returns: listing titles in a list """ es_client = elasticsearch_factory.get_client() elasticsearch_factory.check_elasticsearch() if search_param_parser.search_string is None: return [] user_exclude_orgs = get_user_exclude_orgs(request_username) # Override Limit - Only 15 results should come if limit was not set if search_param_parser.limit_set is False: search_param_parser.limit = constants.ES_SUGGEST_LIMIT search_query = elasticsearch_util.make_search_query_obj(search_param_parser, exclude_agencies=user_exclude_orgs) # Only Retrieve ['title', 'security_marking', 'id'] fields from Elasticsearch for suggestions search_query['_source'] = ['title', 'security_marking', 'id'] # print(json.dumps(search_query, indent=4)) # Print statement for debugging output res = es_client.search(index=settings.ES_INDEX_NAME, body=search_query) hits = res.get('hits', {}).get('hits', None) if not hits: return [] hit_titles = [] for hit in hits: source = hit.get('_source') exclude_bool = False if not source.get('security_marking'): exclude_bool = True logger.debug('Listing {0!s} has no security_marking'.format(source.get('title'))) if not system_has_access_control(request_username, source.get('security_marking')): exclude_bool = True if exclude_bool is False: temp = {'title': source['title'], 'id': source['id']} hit_titles.append(temp) return hit_titles
def bulk_reindex(): """ Reindex Listing Data into an Elasticsearch Index Steps: Checks to see if elasticsearch connection is good Removes the index if it already exist Creates the index with mapping Reindex data Wait for the cluster health to turn yellow To check index in elasticsearch: http://127.0.0.1:9200/appsmall/_search?size=10000&pretty """ # Create ES client es_client = elasticsearch_factory.get_client() logger.debug('Starting Indexing Process') elasticsearch_factory.check_elasticsearch() recreate_index_mapping() # Convert Listing Objects into Python Objects # Had to add order_by for test_essearch_is_enable to pass for both sqlite/postgresql # TODO: Investigate if results coming back from elasticsearch is order by 'Relevance score' all_listings = models.Listing.objects.order_by('id').all() serializer = ReadOnlyListingSerializer(all_listings, many=True) serializer_results = serializer.data bulk_data = [] for record in serializer_results: # Transform Serializer records into records for elasticsearch record_clean_obj = elasticsearch_util.prepare_clean_listing_record(record) op_dict = { 'index': { '_index': settings.ES_INDEX_NAME, '_type': settings.ES_TYPE_NAME, '_id': record_clean_obj[settings.ES_ID_FIELD] } } bulk_data.append(op_dict) bulk_data.append(record_clean_obj) # Bulk index the data logger.debug('Bulk indexing listings...') res = es_client.bulk(index=settings.ES_INDEX_NAME, body=bulk_data, refresh=True) if res.get('errors', True): logger.error('Error Bulk Indexing') else: logger.debug('Bulk Indexing Successful') logger.debug('Waiting for cluster to turn yellow') es_client.cluster.health(wait_for_status='yellow', request_timeout=20) logger.debug('Finish waiting for cluster to turn yellow')
def initiate(self): """ Make sure the Elasticsearch is up and running Making profiles for Elasticsearch Recommendations """ elasticsearch_factory.check_elasticsearch() is_data_old = ElasticsearchRecommender.is_data_old() if is_data_old: elasticsearch_factory.recreate_index_mapping(ElasticsearchRecommender.RECOMMEND_INDEX, ElasticsearchRecommender.get_index_mapping()) ElasticsearchRecommender.load_data_into_es_table()
def initiate(self): """ Make sure the Elasticsearch is up and running Making profiles for Elasticsearch Recommendations """ elasticsearch_factory.check_elasticsearch() if ElasticsearchRecommender.is_data_old(): elasticsearch_factory.recreate_index_mapping( settings.ES_RECOMMEND_USER, ElasticsearchRecommender.get_index_mapping()) ElasticsearchRecommender.load_data_into_es_table()
def setUp(self): self.error_string = None self.es_failed = False try: elasticsearch_factory.check_elasticsearch() except Exception as err: self.error_string = str(err) self.es_failed = True if not self.es_failed: logging.getLogger('elasticsearch').setLevel(logging.CRITICAL) model_access_es.bulk_reindex()
def setUp(self): """ setUp is invoked before each test method """ self.maxDiff = None self.error_string = None self.es_failed = False try: elasticsearch_factory.check_elasticsearch() except Exception as err: self.error_string = str(err) self.es_failed = True if not self.es_failed: logging.getLogger('elasticsearch').setLevel(logging.CRITICAL) model_access_es.bulk_reindex()
def search(request_username, search_param_parser): """ Filter Listings Too many variations to cache results Users shall be able to search for listings' - title - description - description_short - tags__name Filter by - category - agency - listing types - is_508_compliant Users shall only see what they are authorized to see 'is_private': false, 'approval_status': 'APPROVED', 'is_deleted': false, 'is_enabled': true, 'security_marking': 'UNCLASSIFIED', Sorted by Relevance 'avg_rate': 0, 'total_votes': 0, 'total_rate5': 0, 'total_rate4': 0, 'total_rate3': 0, 'total_rate2': 0, 'total_rate1': 0, 'total_reviews': 0, 'is_featured': true, It must respects restrictions - Private apps (apps only from user's agency) - User's max_classification_level Args: username(str): username search_param_parser(SearchParamParser): parameters """ elasticsearch_factory.check_elasticsearch() # Create ES client es_client = elasticsearch_factory.get_client() user_exclude_orgs = get_user_exclude_orgs(request_username) search_query = elasticsearch_util.make_search_query_obj(search_param_parser, exclude_agencies=user_exclude_orgs) try: res = es_client.search(index=settings.ES_INDEX_NAME, body=search_query) except Exception as err: print(json.dumps(search_query, indent=4)) raise err hits = res.get('hits', {}) inner_hits = hits.get('hits', None) if not hits: return [] hit_titles = [] excluded_count = 0 for current_innter_hit in inner_hits: source = current_innter_hit.get('_source') source['_score'] = current_innter_hit.get('_score') # Add URLs to icons image_keys_to_add_url = ['large_icon', 'small_icon', 'banner_icon', 'large_banner_icon'] for image_key in image_keys_to_add_url: if source.get(image_key) is not None: if search_param_parser.base_url: source[image_key]['url'] = '{!s}/api/image/{!s}/'.format(search_param_parser.base_url, source[image_key]['id']) else: source[image_key]['url'] = '/api/image/{!s}/'.format(source[image_key]['id']) exclude_bool = False if not source.get('security_marking'): exclude_bool = True logger.debug('Listing {0!s} has no security_marking'.format(source.get('title'))) if not system_has_access_control(request_username, source.get('security_marking')): exclude_bool = True if exclude_bool is False: hit_titles.append(source) else: excluded_count = excluded_count + 1 # Total Records in Elasticsearch final_count = hits.get('total') # Total Records minus what the user does not have access to see, this count should never be below zero # TODO: Figure out smarter logic for excluded_count compensation (rivera 11/14/2016) final_count_with_excluded = final_count - excluded_count final_results = { 'count': final_count_with_excluded, 'results': hit_titles } final_results['previous'] = None final_results['next'] = None # if final_count_with_excluded < 0 then previous and next should be None if final_count_with_excluded < 0: return final_results previous_offset_prediction = search_param_parser.offset - search_param_parser.limit next_offset_prediction = search_param_parser.offset + search_param_parser.limit final_results['next_offset_prediction'] = next_offset_prediction # Previous URL - previous_offset_prediction is less than zero, previous should be None if previous_offset_prediction >= 0: final_results['previous'] = generate_link(search_param_parser, previous_offset_prediction) # Next URL if next_offset_prediction <= final_count_with_excluded: final_results['next'] = generate_link(search_param_parser, next_offset_prediction) return final_results
def search(request_username, search_param_parser): """ Filter Listings Too many variations to cache results Users shall be able to search for listings' - title - description - description_short - tags__name Filter by - category - agency - listing types - is_508_compliant Users shall only see what they are authorized to see 'is_private': false, 'approval_status': 'APPROVED', 'is_deleted': false, 'is_enabled': true, 'security_marking': 'UNCLASSIFIED', Sorted by Relevance 'avg_rate': 0, 'total_votes': 0, 'total_rate5': 0, 'total_rate4': 0, 'total_rate3': 0, 'total_rate2': 0, 'total_rate1': 0, 'total_reviews': 0, 'is_featured': true, It must respects restrictions - Private apps (apps only from user's agency) - User's max_classification_level Args: username(str): username search_param_parser(SearchParamParser): parameters """ elasticsearch_factory.check_elasticsearch() # Create ES client es_client = elasticsearch_factory.get_client() user_exclude_orgs = get_user_exclude_orgs(request_username) search_query = elasticsearch_util.make_search_query_obj( search_param_parser, exclude_agencies=user_exclude_orgs) try: res = es_client.search(index=settings.ES_INDEX_NAME, body=search_query) except Exception as err: print(json.dumps(search_query, indent=4)) raise err hits = res.get('hits', {}) inner_hits = hits.get('hits', None) if not hits: return [] hit_titles = [] excluded_count = 0 for current_innter_hit in inner_hits: source = current_innter_hit.get('_source') source['_score'] = current_innter_hit.get('_score') # Add URLs to icons image_keys_to_add_url = [ 'large_icon', 'small_icon', 'banner_icon', 'large_banner_icon' ] for image_key in image_keys_to_add_url: if source.get(image_key) is not None: if search_param_parser.base_url: source[image_key]['url'] = '{!s}/api/image/{!s}/'.format( search_param_parser.base_url, source[image_key]['id']) else: source[image_key]['url'] = '/api/image/{!s}/'.format( source[image_key]['id']) exclude_bool = False if not source.get('security_marking'): exclude_bool = True logger.debug('Listing {0!s} has no security_marking'.format( source.get('title'))) if not system_has_access_control(request_username, source.get('security_marking')): exclude_bool = True if exclude_bool is False: hit_titles.append(source) else: excluded_count = excluded_count + 1 # Total Records in Elasticsearch final_count = hits.get('total') # Total Records minus what the user does not have access to see, this count should never be below zero # TODO: Figure out smarter logic for excluded_count compensation (rivera 11/14/2016) final_count_with_excluded = final_count - excluded_count final_results = {'count': final_count_with_excluded, 'results': hit_titles} final_results['previous'] = None final_results['next'] = None # if final_count_with_excluded < 0 then previous and next should be None if final_count_with_excluded < 0: return final_results previous_offset_prediction = search_param_parser.offset - search_param_parser.limit next_offset_prediction = search_param_parser.offset + search_param_parser.limit final_results['next_offset_prediction'] = next_offset_prediction # Previous URL - previous_offset_prediction is less than zero, previous should be None if previous_offset_prediction >= 0: final_results['previous'] = generate_link(search_param_parser, previous_offset_prediction) # Next URL if next_offset_prediction <= final_count_with_excluded: final_results['next'] = generate_link(search_param_parser, next_offset_prediction) return final_results