def excerpts(request, index_id): cache = Cache() ''' Returns highlighted snippets Caches responses in Redis ''' index_id = int(index_id) index = fetch_index_name(index_id) r = request_data(request) cache_key = md5(index + json.dumps(r)).hexdigest() lock_key = 'lock:' + cache_key version = cache.version(index_id) cache_key = 'cache:excerpts:%s:%d:%s' % (cache_key, index_id, version) if not 'docs' in r: return R({}) if settings.EXCERPTS_CACHE: try: response = cache.get(cache_key) if not response is None: return R(response, request, code = 200, serialize = False) ''' lock this key for re-caching ''' start = time.time() lock = cache.get(lock_key) while ( not lock is None ): lock = cache.get(lock_key) if (time.time() - start) > settings.CACHE_LOCK_TIMEOUT: return E(message = 'Cache lock wait timeout exceeded') ''' check if key now exists in cache ''' response = cache.get(cache_key) if not response is None: return R(response, request, code = 200, serialize = False) ''' otherwise acquire lock for this session ''' cache.set(lock_key, 1, True, settings.CACHE_LOCK_TIMEOUT) # expire in 10sec except: return E(message = 'Error while examining excerpts cache') options = { "before_match" : '<b>', "after_match" : '</b>', "chunk_separator" : '...', "limit" : 256, "around" : 5, "exact_phrase" : False, "use_boundaries" : False, "query_mode" : True, "weight_order" : False, "force_all_words" : False, "limit_passages" : 0, "limit_words" : 0, "start_passage_id" : 1, "html_strip_mode" : 'index', "allow_empty" : False, "passage_boundary" : 'paragraph', "emit_zones" : False } for k, v in options.iteritems(): if k in r: if isinstance(v, int): options[k] = int(r[k]) elif isinstance(v, bool): options[k] = bool(r[k]) else: options[k] = r[k] if 'ttl' in r: cache_expiration = int(r['ttl']) else: cache_expiration = settings.EXCERPTS_CACHE_EXPIRE if isinstance(r['docs'], dict): document_ids = r['docs'].keys() documents = r['docs'].values() elif isinstance(r['docs'], list): document_ids = range(len(r['docs'])) # get a list of numeric indexes from the list documents = r['docs'] else: return E(message = 'Documents are passed as a list or dictionary structure') del r['docs'] # free up some memory ''' docs = { 838393 : 'a document with lots of text', 119996 : 'another document with text' } ''' ci = ConfigurationIndex.objects.filter(sp_index_id = index_id)[0] searchd_id = ConfigurationSearchd.objects.filter(sp_configuration_id = ci.sp_configuration_id)[0].sp_searchd_id ''' TODO: convert hard coded option ids to constants ''' so = SearchdOption.objects.filter(sp_searchd_id = searchd_id, sp_option_id = 138,).exclude(value__endswith = ':mysql41') sphinx_port = int(so[0].value) try: so = SearchdOption.objects.filter(sp_searchd_id = searchd_id, sp_option_id = 188,) if so: sphinx_host = so[0].value else: sphinx_host = 'localhost' except: sphinx_host = 'localhost' try: cl = SphinxClient() cl.SetServer(host = sphinx_host, port = sphinx_port) excerpts = cl.BuildExcerpts( documents, index, r['q'], options ) del documents if not excerpts: return E(message = 'Sphinx Excerpts Error: ' + cl.GetLastError()) else: if settings.EXCERPTS_CACHE: cache.set(cache_key, excerpts, True, cache_expiration, lock_key) excerpts = { 'excerpts' : dict(zip(document_ids, excerpts)), 'cache-key' : cache_key, } return R(json.dumps(excerpts), request) except Exception as e: return E(message = 'Error while building excerpts ' + str(e))
def search(request, index_id): cache = Cache() index = fetch_index_name(index_id) ''' Search wrapper with SphinxQL ''' r = request_data(request) if settings.SEARCH_CACHE: cache_key = md5(index + request.REQUEST['data']).hexdigest() lock_key = 'lock:' + cache_key version = cache.version(index_id) cache_key = 'cache:search:%s:%d:%s' % (cache_key, index_id, version) try: response = cache.get(cache_key) if not response is None: return R(response, 200, False) else: ''' lock this key for re-caching ''' start = time.time() lock = cache.get(lock_key) while ( not lock is None ): lock = cache.get(lock_key) if (time.time() - start) > settings.CACHE_LOCK_TIMEOUT: return E(message = 'Cache lock wait timeout exceeded') ''' check if key now exists in cache ''' response = cache.get(cache_key) if not response is None: return R(response, 200, False) ''' otherwise acquire lock for this session ''' cache.set(lock_key, 1, True, settings.CACHE_LOCK_TIMEOUT) # expire in 10sec except: pass option_mapping = { 'mode' : { 'extended' : SPH_MATCH_EXTENDED2, 'boolean' : SPH_MATCH_BOOLEAN, 'all' : SPH_MATCH_ALL, 'phrase' : SPH_MATCH_PHRASE, 'fullscan' : SPH_MATCH_FULLSCAN, 'any' : SPH_MATCH_ANY, } } options = { 'sortby' : '', 'mode' : 'extended', 'groupby' : '', 'groupsort' : '', 'offset' : 0, 'limit' : 1000, 'max_matches' : 0, 'cutoff' : 0, 'fields' : '*', } sphinxql_list_options = { 'ranker' : [ 'proximity_bm25', 'bm25', 'none', 'wordcount', 'proximity', 'matchany', 'fieldmask', 'sph04', 'expr', 'export' ], 'idf' : [ 'normalized', 'plain'], 'sort_method' : ['pq', 'kbuffer' ] } sphinxql_options = { 'agent_query_timeout' : 10000, 'boolean_simplify' : 0, 'comment' : '', 'cutoff' : 0, 'field_weights' : '', 'global_idf' : '', 'idf' : 'normalized', 'index_weights' : '', 'max_matches' : 10000, 'max_query_time' : 10000, 'ranker' : 'proximity_bm25', 'retry_count' : 2, 'retry_delay' : 100, 'reverse_scan' : 0, 'sort_method' : 'pq' } order_direction = { '-1' : 'DESC', 'DESC' : 'DESC', '1' : 'ASC', 'ASC' : 'ASC', } try: ''' Check attributes from request with stored options (sp_index_option) ''' ''' Preload host and ports per index ''' ''' SELECT select_expr [, select_expr ...] FROM index [, index2 ...] [WHERE where_condition] [GROUP BY {col_name | expr_alias}] [WITHIN GROUP ORDER BY {col_name | expr_alias} {ASC | DESC}] [ORDER BY {col_name | expr_alias} {ASC | DESC} [, ...]] [LIMIT [offset,] row_count] [OPTION opt_name = opt_value [, ...]] ''' sql_sequence = [ ('SELECT', 'fields'), ('FROM', 'indexes'), ('WHERE', 'where'), ('GROUP BY', 'group_by'), ('WITHIN GROUP ORDER BY', 'order_within_group'), ('ORDER BY', 'order_by'), ('LIMIT', 'limit'), ('OPTION', 'option') ] sql = {} for sql_clause, key in sql_sequence: sql[key] = '' if not key in r: r[key] = '' sql['indexes'] = index + ','.join( r['indexes'] ) if isinstance(r['fields'], list): sql['fields'] = ',' . join(r['fields']) else: sql['fields'] = options['fields'] if r['group_by'] != '': sql['group_by'] = r['groupby'] if not isinstance(r['limit'], dict): r['limit'] = { 'offset' : '0', 'count' : options['limit'] } r['limit'] = '%(offset)s, %(count)s' % r['limit'] sql['order_by'] = ',' . join([ '%s %s' % (order[0], order_direction(order[1].upper())) for order in r['order_by'] ]) if r['order_within_group'] != '': sql['order_within_group'] = ',' . join([ '%s %s' % (order[0], order_direction(order[1].upper())) for order in r['order_within_group'] ]) sql['where'] = [] #dictionary e.g. { 'date_from' : [[ '>' , 13445454350] ] } value_list = [] if isinstance(r['where'], dict): for field, conditions in r['where'].iteritems(): for condition in conditions: operator, value = condition value_list.append(value) sql['where'].append('%s%s%%s' % (field, operator,)) value_list.append(r['q']) sql['where'].append('MATCH(%%s)') sql['where'] = ' ' . join(sql['where']) if isinstance(r['option'], dict): sql['option'] = [] for option_name, option_value in r['option'].iteritems(): if isinstance(option_value, dict): option_value = '(' + (','. join([ '%s = %s' % (k, option_value[k]) for k in option_value.keys() ])) + ')' sql['option'].append('%s = %s' % (option_name, option_value)) sql['option'] = ',' . join(sql['option']) response = { 'results' : None, 'meta' : None } try: cursor = connections['sphinx:' + index].cursor() sql = ' ' . join([ clause[0] + ' ' + sql[clause[1]] for clause in sql_sequence if sql[clause[1]] != '' ]) cursor.execute(sql, value_list) response['results'] = cursorfetchall(cursor) except Exception as e: error_message = 'Sphinx Search Query failed with error "%s"' % str(e) return E(message = error_message) try: cursor.execute('SHOW META') response['meta'] = cursorfetchall(cursor) except: pass if settings.SEARCH_CACHE: cache.set(cache_key, response, True, SEARCH_CACHE_EXPIRE, lock_key) except Exception as e: return E(message = str(e)) return R(response)