def longitudeQuery(selections, param_name): # raises 'KeyError' or IndexError if min or max value is blank # or ranges are lopsided, all ranges for LONG query must have both sides # defined returns string sql clauses = [] # we may have a number of clauses to piece together params = [] # we are building a sql string cat_name = param_name.split('.')[0] name = param_name.split('.')[1] name_no_num = stripNumericSuffix(name) param_name_no_num = stripNumericSuffix(param_name) param_name_min = param_name_no_num + '1' param_name_max = param_name_no_num + '2' col_d_long = cat_name + '.d_' + name_no_num values_min = selections[param_name_min] values_max = selections[param_name_max] if len(values_min) != len(values_max): raise KeyError count = len(values_max) i = 0 while i < count: value_min = values_min[i] value_max = values_max[i] # find the midpoint and dx of the user's range if (value_max >= value_min): longit = (value_min + value_max) / 2. d_long = longit - value_min else: longit = (value_min + value_max + 360.) / 2. d_long = longit - value_min if (longit >= 360): longit = longit - 360. if d_long: clauses += [ "(abs(abs(mod(%s - " + param_name_no_num + " + 180., 360.)) - 180.) <= %s + " + col_d_long + ")" ] params += [longit, d_long] i += 1 clause = ' OR '.join(clauses) table_name = param_name_no_num.split('.')[0] key_field = 'obs_general_id' if cat_name != 'obs_general' else 'obs_general.id' query = "select " + key_field + " from " + table_name + " where " + clause return query, tuple(params)
def getRangeEndpoints(request, slug, fmt='json'): """ returns valid range endpoints for field given selections and extras """ # if this param is in selections we want to remove it, # want results for param as they would be without itself constrained # extras['qtypes'][''] update_metrics(request) param_info = search.views.get_param_info_by_slug(slug) param_name = param_info.param_name() form_type = param_info.form_type table_name = param_info.category_name param_name1 = stripNumericSuffix(param_name.split('.')[1]) + '1' param_name2 = stripNumericSuffix(param_name.split('.')[1]) + '2' param_name_no_num = stripNumericSuffix(param_name1) table_model = apps.get_model('search', table_name.title().replace('_', '')) if form_type == 'RANGE' and '1' not in param_info.slug and '2' not in param_info.slug: param_name1 = param_name2 = param_name_no_num try: (selections, extras) = search.views.urlToSearchParams(request.GET) user_table = search.views.getUserQueryTable(selections, extras) has_selections = True except TypeError: selections = {} has_selections = False user_table = False # we remove this param from the user's query if it's there, because # otherwise it will just return it's own values if param_name1 in selections: del selections[param_name1] if param_name2 in selections: del selections[param_name2] # cached already? cache_key = "rangeep" + param_name_no_num if user_table: cache_key += str(search.views.setUserSearchNo(selections, extras)) if cache.get(cache_key) is not None: range_endpoints = cache.get(cache_key) return responseFormats(range_endpoints, fmt, template='mults.html') try: results = table_model.objects # this is a count(*), group_by query except AttributeError, e: log.error(e) log.error("could not find table model for table_name: %s" % table_name) raise Http404("Does Not Exist")
def get_param_info_by_param(param_name): cat_name = param_name.split('.')[0] name = param_name.split('.')[1] try: return ParamInfo.objects.get(category_name=cat_name, name=name) except ParamInfo.DoesNotExist: # single column range queries will not have the numeric suffix try: name_no_num = stripNumericSuffix(name) return ParamInfo.objects.get(category_name=cat_name, name=name_no_num) except ParamInfo.DoesNotExist: return False
def get_param_info_by_slug(slug): slug_no_num = stripNumericSuffix(slug) try: return ParamInfo.objects.get(slug=slug_no_num) except ParamInfo.DoesNotExist: try: return ParamInfo.objects.get(slug=slug) # qtypes for ranges come through as the param_name_no num which doesn't exist in param_info, so grab the param_info for the lower side of hte ragne except ParamInfo.DoesNotExist: try: return ParamInfo.objects.get(slug=slug + '1') # qtypes for ranges come through as the param_name_no num which doesn't exist in param_info, so grab the param_info for the lower side of hte ragne # this is not a query param, ignore it except ParamInfo.DoesNotExist: return False
def constructQueryString(selections, extras): all_qtypes = extras['qtypes'] if 'qtypes' in extras else [] # keeping track of some things long_queries = [] # special longitudinal queries are pure sql string_queries = [] # special handling for string queries ugh! q_objects = [] # for building up the query object finished_ranges = [ ] # ranges are done for both sides at once.. so track which are finished to avoid duplicates # buld the django query from metadata.views import getMultName # avoids circular import issue for param_name, value_list in selections.items(): # lookup info about this param_name param_name_no_num = stripNumericSuffix( param_name) # this is used later for other things! cat_name = param_name.split('.')[0] cat_model_name = ''.join(cat_name.lower().split('_')) name = param_name.split('.')[1] param_info = get_param_info_by_param(param_name) if not param_info: log.error('constructQueryString: No param_info for %s', param_name) log.error('.. Selections: %s', str(selections)) log.error('.. Extras: %s', str(extras)) return False form_type = param_info.form_type special_query = param_info.special_query # define any qtypes for this param_name from query qtypes = all_qtypes[ param_name_no_num] if param_name_no_num in all_qtypes else [] # now build the q_objects to run the query, by form_type: # MULTs if form_type in settings.MULT_FORM_TYPES: mult_name = getMultName(param_name) model_name = mult_name.title().replace('_', '') model = apps.get_model('search', model_name) mult_values = [ x['pk'] for x in list( model.objects.filter( Q(label__in=value_list) | Q(value__in=value_list)).values('pk')) ] if cat_name != 'obs_general': q_objects.append( Q(**{ "%s__%s__in" % (cat_model_name, mult_name): mult_values })) else: q_objects.append(Q(**{"%s__in" % mult_name: mult_values})) # RANGE if form_type in settings.RANGE_FIELDS: # this prevents range queries from getting through twice # if one range side has been processed can skip the 2nd, it gets done when the 1st is if param_name_no_num in finished_ranges: # this range has already been done, skip to next param in the loop continue finished_ranges += [param_name_no_num] # longitude queries if special_query == 'long': # this parameter requires a longitudinal query # these are crazy sql and can't use Django's model interface # so after converting the rest of the query params from django model # statements to sql these are tacked on at the end # both sides of range must be defined by user for this to work if selections[param_name_no_num + '1'] and selections[param_name_no_num + '2']: lq, lq_params = longitudeQuery(selections, param_name) long_queries.append((lq, lq_params)) else: raise ValidationError else: # get the range query object and append it to the query q_obj = range_query_object(selections, param_name, qtypes) q_objects.append(q_obj) # STRING if form_type == 'STRING': q_obj = string_query_object(param_name, value_list, qtypes) q_objects.append(q_obj) # construct our query, we'll be breaking into raw sql, but for that # we'll be using the sql django generates through its model interface try: sql, params = ObsGeneral.objects.filter( *q_objects).values('pk').query.sql_with_params() # append any longitudinal queries to the query string if long_queries: params = list(params) # q += " ".join([" and (%s) " % long_query for long_query in long_queries]) if 'where' in sql.lower(): sql = sql + ' AND obs_general.id in ' else: sql = sql + ' where obs_general.id in ' sql = sql + ' AND obs_general.id in '.join( [" (%s) " % long_query[0] for long_query in long_queries]) for long_q in long_queries: params += list(long_query[1]) params = tuple(params) return sql, params except EmptyResultSet: return False
def range_query_object(selections, param_name, qtypes): """ builds query for numeric ranges where 2 data columns represent min and max values oh and also single column ranges # just some text for searching this file any all only any / all / only any/all/only """ # grab some info about this param param_info = get_param_info_by_param(param_name) if not param_info: return False form_type = param_info.form_type table_name = param_info.category_name # we will define both sides of the query, so define those param names param_name_no_num = stripNumericSuffix(param_name) param_name_min = param_name_no_num + '1' param_name_max = param_name_no_num + '2' # grab min and max values from query selections object values_min = selections[ param_name_min] if param_name_min in selections else [] values_max = selections[ param_name_max] if param_name_max in selections else [] # but, for constructing the query, # if this is a single column range, the param_names are both the same if is_single_column_range(param_name): param_name_min = param_name_max = param_name_no_num # to follow related models, we need the lowercase model name, not the param name # UNLESS this param is in the obs_General table, then must leave out the model name! if table_name == 'obs_general': param_model_name_min = param_name_min.split('.')[1] param_model_name_max = param_name_max.split('.')[1] else: param_model_name_min = table_name.lower().replace( '_', '') + '__' + param_name_min.split('.')[1] param_model_name_max = table_name.lower().replace( '_', '') + '__' + param_name_max.split('.')[1] # if these are times convert values from time string to seconds if form_type == 'TIME': values_min = convertTimes(values_min) try: index = values_min.index(None) raise Exception("InvalidTimes") except: pass values_max = convertTimes(values_max) try: index = values_max.index(None) raise Exception("InvalidTimes") except: pass # we need to know how many times to go through this loop count = max(len(values_min), len(values_max)) # sometimes you can have queries # that define multiple ranges for same widget # (not currently implemented in UI) if count < len(qtypes): log.error( 'Passed qtypes is shorter in length than longest range values list, defaulting to "any"' ) log.error('.. values_min: %s', str(values_min)) log.error('.. values_max: %s', str(values_max)) log.error('.. qtypes: %s', str(qtypes)) # now collect the query expressions all_query_expressions = [] # these will be joined by OR i = 0 while i < count: # define some things value_min, value_max = None, None try: value_min = values_min[i] except IndexError: pass try: value_max = values_max[i] except IndexError: pass try: qtype = qtypes[i] except IndexError: qtype = ['any'] # reverse value_min and value_max if value_min < value_max if value_min is not None and value_max is not None: (value_min, value_max) = sorted([value_min, value_max]) # we should end up with 2 query expressions q_exp, q_exp1, q_exp2 = None, None, None # q_exp will hold the concat # of q_exp1 and q_exp2 if qtype == 'all': if value_min: # param_name_min <= value_min q_exp1 = Q(**{"%s__lte" % param_model_name_min: value_min}) if value_max: # param_name_max >= value_max q_exp2 = Q(**{"%s__gte" % param_model_name_max: value_max}) elif qtype == 'only': if value_min: # param_name_min >= value_min q_exp1 = Q(**{"%s__gte" % param_model_name_min: value_min}) if value_max: # param_name_max <= value_max q_exp2 = Q(**{"%s__lte" % param_model_name_max: value_max}) else: # defaults to qtype = any if value_max: # param_name_min <= value_max q_exp1 = Q(**{"%s__lte" % param_model_name_min: value_max}) if value_min: # param_name_max >= value_min q_exp2 = Q(**{"%s__gte" % param_model_name_max: value_min}) # put the query expressions together as "&" queries if q_exp1 and q_exp2: q_exp = q_exp1 & q_exp2 elif q_exp1: q_exp = q_exp1 elif q_exp2: q_exp = q_exp2 all_query_expressions.append(q_exp) i += 1 # now we have all query expressions, join them with 'OR' return reduce(OR, all_query_expressions)
def urlToSearchParams(request_get): """ OPUS lets users put nice readable things in the URL, like "planet=Jupiter" rather than "planet_id=3" this function takes the url params and translates it into a list that contains 2 dictionaries the first dict is the user selections: keys of the dictionary are param_names of data columns in the data table values are always lists and represent the users selections the 2nd dict is any extras being passed by user, like qtypes that define what types of queries will be performed for each param-value set in the first dict NOTE: pass request_get = request.GET to this func please (this func doesn't return an http response so unit tests freak if you pass it an http request :) example command line usage: >>>> from search.views import * >>>> from django.http import QueryDict >>>> q = QueryDict("planet=Saturn") >>>> (selections,extras) = urlToSearchParams(q) >>>> selections {'planet_id': [u'Saturn']} >>>> extras {'qtypes': {}} """ selections = {} qtypes = {} for searchparam in request_get.items(): # try: slug = searchparam[0] slug_no_num = stripNumericSuffix(slug) values = searchparam[1].strip(',').split(',') qtype = False # assume this is not a qtype statement if slug.find('qtype') == 0: qtype = True # this is a statement of query type! slug = slug.split('-')[1] slug_no_num = stripNumericSuffix(slug) param_info = get_param_info_by_slug(slug) if not param_info: continue param_name = param_info.param_name() form_type = param_info.form_type param_name_no_num = stripNumericSuffix(param_name) if qtype: qtypes[param_name_no_num] = request_get.get( 'qtype-' + slug_no_num, False).strip(',').split(',') continue if form_type in settings.MULT_FORM_TYPES: # mult form types can be sorted to save duplicate queries being built selections[param_name] = sorted( searchparam[1].strip(',').split(',')) else: # no other form types can be sorted since their ordering corresponds to qtype ordering if searchparam[1]: # if it has a value if form_type == "RANGE": if param_name == param_name_no_num: # this is a single column range query ext = slug[-1] selections[param_name + ext] = map(float, values) else: # normal 2-column range query selections[param_name] = map(float, values) else: selections[param_name] = values # except: pass # the param passed doesn't exist or is a USER PREF AAAAAACK if len(selections.keys()) > 0: extras = {} extras['qtypes'] = qtypes results = [] results.append(selections) results.append(extras) return results else: return [{}, {}]
def getRangeEndpoints(request, slug, fmt='json'): """ fetch range widget hinting data for widget defined by slug based on current search defined in request this is the valid range endpoints that appear in range widgets (green numbers) returns a dictionary like: { min: 63.592, max: 88.637, nulls: 2365} """ # if this param is in selections we want to remove it, # want results for param as they would be without itself constrained # extras['qtypes'][''] update_metrics(request) param_info = search.views.get_param_info_by_slug(slug) param_name = param_info.param_name() form_type = param_info.form_type table_name = param_info.category_name # "param" is the field name, the param_name with the table_name stripped param1 = stripNumericSuffix(param_name.split('.')[1]) + '1' param2 = stripNumericSuffix(param_name.split('.')[1]) + '2' param_no_num = stripNumericSuffix(param1) table_model = apps.get_model('search', table_name.title().replace('_', '')) if form_type == 'RANGE' and '1' not in param_info.slug and '2' not in param_info.slug: param1 = param2 = param_no_num # single column range query try: (selections, extras) = search.views.urlToSearchParams(request.GET) user_table = search.views.getUserQueryTable(selections, extras) has_selections = True except TypeError: selections = {} has_selections = False user_table = False # remove this param from the user's query if it is constrained # this keeps the green hinting numbers from reacting # to changes to its own field param_name_no_num = stripNumericSuffix(param_name) to_remove = [ param_name_no_num, param_name_no_num + '1', param_name_no_num + '2' ] for p in to_remove: if p in selections: del selections[p] if not bool(selections): has_selections = False user_table = False # cached already? cache_key = "rangeep" + param_no_num if user_table: cache_key += str(search.views.setUserSearchNo(selections, extras)) if cache.get(cache_key) is not None: range_endpoints = cache.get(cache_key) return responseFormats(range_endpoints, fmt, template='mults.html') # no cache found, calculating.. try: results = table_model.objects # this is a count(*), group_by query except AttributeError, e: log.error("getRangeEndpoints threw: %s", str(e)) log.error("Could not find table model for table_name: %s", table_name) raise Http404("Does Not Exist")