def read_xls_response(self, xls_response): data = excel_to_dict(xls_response.read()) results = [] for row in data: #print 'row:', row if row: try: item = { 'pubnumber': row['Publication number'], 'pubdate': row['Publication date'] and date_iso(from_german(row['Publication date'])) or None, 'appdate': row['Application date'] and date_iso(from_german(row['Application date'])) or None, 'title': row['Title'], 'applicant': row['Applicant/Owner'], 'inventor': row['Inventor'], } except KeyError as ex: logger.error( 'Could not decode row from DEPATISnet. row={row}, exception={exception}\n{trace}' .format(row=row, exception=ex, trace=_exception_traceback())) raise results.append(item) return results
def sip_published_data_crawl_handler(request): """Crawl published-data at SIP""" # XML query expression query = request.params.get('expression', '') log.info('query raw: ' + query) if should_be_quoted(query): query = '"%s"' % query # constituents: abstract, biblio and/or full-cycle constituents = request.matchdict.get('constituents', 'full-cycle') #print 'constituents:', constituents chunksize = int(request.params.get('chunksize', '2500')) try: result = sip_published_data_crawl(constituents, query, chunksize) return result except Exception as ex: if hasattr(ex, 'user_info'): message = ex.user_info else: message = unicode(ex) request.errors.add('sip-crawl', 'crawl', message) log.error(request.errors) log.error(u'query="{0}", exception:\n{1}'.format(query, _exception_traceback()))
def ops_published_data_crawl_handler(request): """Crawl published-data at OPS""" # constituents: abstract, biblio and/or full-cycle constituents = request.matchdict.get('constituents', 'full-cycle') print('constituents:', constituents) # CQL query string query = request.params.get('expression', '') log.info('query raw: ' + query) # Transcode CQL query expression search = cql_prepare_query(query) # Propagate keywords to highlighting component keywords_to_response(request, search=search) log.info('query cql: ' + search.expression) chunksize = int(request.params.get('chunksize', '100')) try: result = ops_published_data_crawl(constituents, search.expression, chunksize) return result except Exception as ex: log.error( 'OPS crawler error: query="{0}", reason={1}, Exception was:\n{2}'. format(query, ex, _exception_traceback())) request.errors.add('ops-published-data-crawl', 'query', str(ex))
def depatech_published_data_crawl_handler(request): """Crawl published-data at MTC depa.tech""" # Get hold of query expression and filter query = SmartBunch({ 'expression': request.params.get('expression', ''), 'filter': request.params.get('filter', ''), }) log.info('query: {}'.format(query)) if should_be_quoted(query.expression): query.expression = '"%s"' % query.expression # constituents: abstract, biblio and/or full-cycle constituents = request.matchdict.get('constituents', 'full-cycle') #print 'constituents:', constituents chunksize = int(request.params.get('chunksize', '5000')) try: result = depatech_crawl(constituents, query, chunksize) return result except Exception as ex: request.errors.add('depatech-crawl', 'crawl', str(ex)) log.error(request.errors) log.error('query="{0}", exception:\n{1}'.format(query, _exception_traceback()))
def ops_published_data_crawl_handler(request): """Crawl published-data at OPS""" # constituents: abstract, biblio and/or full-cycle constituents = request.matchdict.get('constituents', 'full-cycle') print 'constituents:', constituents # CQL query string query = request.params.get('expression', '') log.info('query raw: ' + query) query_object, query = cql_prepare_query(query) propagate_keywords(request, query_object) log.info('query cql: ' + query) chunksize = int(request.params.get('chunksize', '100')) try: result = ops_published_data_crawl(constituents, query, chunksize) return result except Exception as ex: log.error( u'OPS crawler error: query="{0}", reason={1}, Exception was:\n{2}'. format(query, ex, _exception_traceback())) request.errors.add('ops-published-data-crawl', 'query', str(ex))
def depatisnet_published_data_crawl_handler(request): """Crawl published-data at DEPATISnet""" search, options = prepare_search(request) chunksize = 1000 options.update({'limit': chunksize}) try: result = dpma_published_data_search(search.expression, options) return result except SyntaxError as ex: request.errors.add('depatisnet-search', 'expression', str(ex.msg)) log.warn(request.errors) except Exception as ex: http_response = None if hasattr(ex, 'http_response'): http_response = ex.http_response log.error( u'DEPATISnet crawler error: query="{0}", reason={1}\nresponse:\n{2}\nexception:\n{3}' .format(query, ex, http_response, _exception_traceback())) message = u'An exception occurred while processing your query<br/>Reason: {}'.format( ex) request.errors.add('depatisnet-search', 'crawl', message)
def parse_expression_cql(self, expression): # Fixup query: Wrap into quotes if CQL expression is a) unspecific, b) contains spaces and c) is still unquoted if should_be_quoted(expression) and u'within' not in expression: expression = u'"%s"' % expression # Parse and recompile CQL query string to apply number normalization query_object = None try: # v1: Cheshire3 CQL parser #query_object = cql_parse(query) #query = query_object.toCQL().strip() # v2 pyparsing CQL parser query_object = CQL(expression, grammar=self.grammar, keyword_fields=self.keyword_fields).polish() query_recompiled = query_object.dumps() if query_recompiled: expression = query_recompiled if query_recompiled != expression: logger.info( u'Recompiled search expression to "{query}"'.format( query=expression)) except Exception as ex: # TODO: Can we get more details from diagnostic information to just stop here w/o propagating obviously wrong query to OPS? logger.warn( u'CQL parse error: query="{0}", reason={1}, Exception was:\n{2}' .format(expression, ex, _exception_traceback())) self.cql_parser = query_object self.expression = expression if query_object: keywords = [] try: keywords = query_object.keywords() self.keywords_origin = 'grammar' except AttributeError: keywords = compute_keywords(query_object) self.keywords_origin = 'compute' # List of keywords should contain only unique items self.keywords = unique_sequence(keywords)
def handle_generic_exception(request, ex, backend_name, query): if isinstance(ex, cornice.util._JSONError): raise http_response = None if hasattr(ex, 'http_response'): http_response = ex.http_response module_name = ex.__class__.__module__ class_name = ex.__class__.__name__ reason = u'{}.{}: {}'.format(module_name, class_name, ex.message) log.critical(u'{backend_name} error: query="{query}", reason={reason}\nresponse:\n{http_response}\nexception:\n{exception}'.format( exception=_exception_traceback(), **locals())) message = u'An exception occurred while processing your query.<br/>\nReason: {}<br/><br/>\n'.format(reason) if module_name == 'pymongo.errors': message += 'Error connecting to cache database. Please report this problem to us.' return message
def depatisnet_published_data_crawl_handler(request): """Crawl published-data at DEPATISnet""" # CQL query string query = request.params.get('expression', '') log.info('query raw: ' + query) query_object, query = cql_prepare_query(query) propagate_keywords(request, query_object) chunksize = 1000 # Compute query options, like # - limit # - sorting # - whether to remove family members options = {} options.update({'limit': chunksize}) # propagate request parameters to search options parameters request_to_options(request, options) log.info('query cql: ' + query) try: result = dpma_published_data_search(query, options) return result except SyntaxError as ex: request.errors.add('depatisnet-search', 'expression', str(ex.msg)) log.warn(request.errors) except Exception as ex: http_response = None if hasattr(ex, 'http_response'): http_response = ex.http_response log.error(u'DEPATISnet crawler error: query="{0}", reason={1}\nresponse:\n{2}\nexception:\n{3}'.format( query, ex, http_response, _exception_traceback())) message = u'An exception occurred while processing your query<br/>Reason: {}'.format(ex) request.errors.add('depatisnet-search', 'crawl', message)
def config_parameters(self): request = get_current_request() # prefix environment and settings in configuration model environment = dict_prefix_key(self.environment(), 'request.') setting_params = dict_prefix_key(self.config_settings(), 'setting.') request_params = dict(request.params) user_params = {} if request.user: # Formulate JS-domain settings user_params = dict_prefix_key({ 'modules': request.user.modules, 'tags': request.user.tags}, 'user.') # Get representation of user attributes user_dict = json.loads(request.user.to_json()) # Strip sensitive information if '_id' in user_dict: del user_dict['_id'] if 'password' in user_dict: del user_dict['password'] if 'upstream_credentials' in user_dict: del user_dict['upstream_credentials'] # Add whole user attributes to JS-domain user_params['user'] = user_dict request_opaque = dict(request.opaque) request_opaque_meta = dict_prefix_key(dict(request.opaque_meta), 'opaque.meta.') try: unixtime = request.opaque_meta.get('exp') if unixtime: request_opaque['link_expires'] = datetime_isoformat(unixtime_to_datetime(int(unixtime))) except Exception as ex: log.error( 'Could not compute opaque parameter link expiry time, unixtime=%s. ' 'Exception was: %s\n%s', unixtime, ex, _exception_traceback()) # A. parameter firewall, INPUT # determine if we're in view-only mode by matching against the hostname host = request.headers.get('Host') isviewer = 'patentview' in host or 'viewer' in host or 'patview' in host # 1. don't allow "query" from outside on view-only domains if 'query' in request_params and isviewer: log.warn('parameter "query=%s" not allowed on this vhost, purging it', request_params['query']) del request_params['query'] # B. merge parameters # 1. use "environment" as foundation (prefixed "request.") # 2. merge "settings" (prefixed "setting.") # 3. merge "opaque meta" parameters (prefixed "opaque.meta.") # 4. merge "request parameters" # 5. merge "user parameters" # 6. merge "opaque parameters" taking the highest precedence params = {} params['system'] = self.datasource_settings() params.update(environment) params.update(setting_params) params.update(request_opaque_meta) params.update(request_params) params.update(user_params) params.update(request_opaque) # C. parameter firewall, OUTPUT # remove "opaque parameter" if 'op' in params: del params['op'] # D. special customizations # 0. Vendor params['vendor'] = self.vendor.name # 1. On patentview domains, limit access to liveview mode only params['isviewer'] = isviewer if isviewer: params['mode'] = 'liveview' # 2. Compute whether data sources are enabled params['datasources_enabled'] = [] for datasource in self.registry.datasource_settings.datasources: if self.is_datasource_enabled(datasource): params['datasources_enabled'].append(datasource) # E. backward-compat amendments for key, value in params.items(): if key.startswith('ship_'): newkey = key.replace('ship_', 'ship-') params[newkey] = value del params[key] return params
def pair_to_solr(cls, key, value, modifiers=None): try: fieldname = cls.datasource_indexnames[key] except KeyError: return expression = None format = u'{0}:{1}' # ------------------------------------------ # value mogrifiers # ------------------------------------------ if key == 'patentnumber': # TODO: parse more sophisticated to make things like "EP666666 or EP666667" or "?query=pn%3AEP666666&datasource=ifi" possible # TODO: use different normalization flavor for IFI, e.g. JP01153210A will not work as JPH01153210A, which is required by OPS value = normalize_patent(value, for_ops=False) elif key == 'pubdate': """ - pd:[19800101 TO 19851231] - pd:[* TO 19601231] - pdyear:[1980 TO 1985] - pdyear:[* TO 1960] """ try: parsed = False # e.g. 1991 if len(value) == 4 and value.isdigit(): fieldname = 'pdyear' parsed = True # e.g. 1990-2014, 1990 - 2014 value = year_range_to_within(value) # e.g. # within 1978,1986 # within 1900,2009-08-20 # within 2009-08-20,2011-03-03 if 'within' in value: within_dates = parse_date_within(value) elements_are_years = all([len(value) == 4 and value.isdigit() for value in within_dates.values()]) if elements_are_years: fieldname = 'pdyear' else: if within_dates['startdate']: within_dates['startdate'] = parse_date_universal(within_dates['startdate']).format('YYYYMMDD') if within_dates['enddate']: within_dates['enddate'] = parse_date_universal(within_dates['enddate']).format('YYYYMMDD') if not within_dates['startdate']: within_dates['startdate'] = '*' if not within_dates['enddate']: within_dates['enddate'] = '*' expression = '{fieldname}:[{startdate} TO {enddate}]'.format(fieldname=fieldname, **within_dates) elif not parsed: value_date = parse_date_universal(value) if value_date: value = value_date.format('YYYYMMDD') else: raise ValueError(value) except Exception as ex: message = 'IFI CLAIMS query: Invalid date or range expression "{0}". Reason: {1}.'.format(value, ex) logger.warn(message + '\nException was:\n{0}'.format(_exception_traceback())) return {'error': True, 'message': message} elif key == 'inventor' or key == 'applicant': if not has_booleans(value) and should_be_quoted(value): value = u'"{0}"'.format(value) elif key == 'class': # v1: Naive implementation can only handle single values #value = ifi_convert_class(value) # v2: Advanced implementation can handle expressions on field "class" # Translate class expression from "H04L12/433 or H04L12/24" # to "(ic:H04L0012433 OR cpc:H04L0012433) OR (ic:H04L001224 OR cpc:H04L001224)" try: # Put value into parenthesis, to properly capture expressions if value: value = u'({value})'.format(value=value) # Parse value as simple query expression query_object = CQL(cql=value) # Rewrite all patent classifications in query expression ast from OPS format to IFI format rewrite_classes_ifi(query_object, format, fieldname) # Serialize into appropriate upstream datasource query expression syntax expression = query_object.dumps() except pyparsing.ParseException as ex: return {'error': True, 'message': '<pre>' + str(ex.explanation) + '</pre>'} # ------------------------------------------ # surround with parentheses # ------------------------------------------ if key in ['fulltext', 'inventor', 'applicant', 'country', 'citation']: if has_booleans(value) and not should_be_quoted(value) and not '{!complexphrase' in value: value = u'({0})'.format(value) # ------------------------------------------ # expression formatter # ------------------------------------------ # Serialize into appropriate upstream datasource query expression syntax if not expression: if key == 'fulltext' and '{!complexphrase' in value: expression = value else: expression = format_expression(format, fieldname, value) #print 'expression:', expression # ------------------------------------------ # final polishing # ------------------------------------------ # Solr(?) syntax: boolean operators must be uppercase if has_booleans(expression): boolis = [' or ', ' and ', ' not '] for booli in boolis: expression = expression.replace(booli, booli.upper()) return {'query': expression}
def ificlaims_download_multi(numberlist, formats): logger.info( 'ificlaims_download_multi: numberlist={numberlist}, formats={formats}'. format(**locals())) report = OrderedDict() results = [] for number in numberlist: report.setdefault(number, OrderedDict({'format': OrderedDict()})) for format in formats: format_parts = format.split(u':') # decode modifiers if len(format_parts) == 1: format_real = format modifiers = [] else: format_real = format_parts[0] modifiers = format_parts[1:] # initialize availability status report[number]['format'][format_real] = False # compute options options = {} if 'pretty' in modifiers: options['pretty'] = True # collect nested documents, i.e. multiple drawings if format_real in ['tif', 'png']: count = 0 try: result_first = ificlaims_download_single( number, format_real, options) except Exception as ex: logger.error('IFI: {ex}\n{traceback}'.format( ex=ex, traceback=_exception_traceback())) continue if result_first: report[number]['format'][format_real] = True report[number]['ucid'] = result_first.ucid report[number]['ucid-natural'] = result_first.ucid_natural results.append(result_first.__dict__) count += 1 # fetch more drawings until exhaust for seq in range(2, 50): options['seq'] = seq try: result_next = ificlaims_download_single( number, format_real, options) except Exception as ex: logger.error('IFI: {ex}\n{traceback}'.format( ex=ex, traceback=_exception_traceback())) break if not result_next: break results.append(result_next.__dict__) count += 1 report[number].setdefault('count', OrderedDict()) report[number]['count'][format_real] = count else: try: result_single = ificlaims_download_single( number, format_real, options) except Exception as ex: logger.error('IFI: {ex}\n{traceback}'.format( ex=ex, traceback=_exception_traceback())) continue if result_single: report[number]['format'][format_real] = True report[number]['ucid'] = result_single.ucid report[number]['ucid-natural'] = result_single.ucid_natural results.append(result_single.__dict__) response = { 'report': report, 'results': results, } return response
def cql_prepare_query(query, grammar=None, keyword_fields=None): log.info(u'Parsing search expression "{query}" with grammar "{grammar}"'.format( query=query, grammar=grammar and grammar.__name__ or u'default')) keyword_fields = keyword_fields or ops_keyword_fields + DpmaDepatisnetAccess.keyword_fields # fixup query: wrap into quotes if cql string is a) unspecific, b) contains spaces and c) is still unquoted if should_be_quoted(query) and u'within' not in query: query = u'"%s"' % query # Parse and recompile CQL query string to apply number normalization query_object = None try: # v1: Cheshire3 CQL parser #query_object = cql_parse(query) #query = query_object.toCQL().strip() # v2 pyparsing CQL parser query_object = CQL(query, grammar=grammar, keyword_fields=keyword_fields).polish() query_recompiled = query_object.dumps() if query_recompiled: query = query_recompiled except Exception as ex: # TODO: can we get more details from diagnostic information to just stop here w/o propagating obviously wrong query to OPS? log.warn(u'CQL parse error: query="{0}", reason={1}, Exception was:\n{2}'.format(query, ex, _exception_traceback())) return query_object, query
def pair_to_sip_xml(cls, key, value, modifiers): # reformat modifiers to lower booleans # {u'fulltext': {u'claim': True, u'abstract': True, u'description': True, u'title': True} # -> # {u'fulltext': {u'claim': 'true', u'abstract': 'true', u'description': 'true', u'title': 'true'} for modifier_field, modifier_values in modifiers.iteritems(): if type(modifiers[modifier_field]) is types.DictionaryType: for modifier_name, modifier_value in modifiers[ modifier_field].iteritems(): modifiers[modifier_field][modifier_name] = str( modifier_value).lower() elif type(modifiers[modifier_field]) is types.BooleanType: modifiers[modifier_field] = str( modifiers[modifier_field]).lower() xml_part = None keywords = None if key == 'pubdate': try: if len(value) == 4 and value.isdigit(): # e.g. 1978 value = u'within {year}-01-01,{year}-12-31'.format( year=value) # e.g. 1990-2014, 1990 - 2014 value = year_range_to_within(value) if 'within' in value: try: within_dates = parse_date_within(value) except: raise ValueError('Could not parse "within" expression') if len(within_dates['startdate'] ) == 4 and within_dates['startdate'].isdigit(): within_dates[ 'startdate'] = within_dates['startdate'] + '-01-01' if len(within_dates['enddate'] ) == 4 and within_dates['enddate'].isdigit(): within_dates[ 'enddate'] = within_dates['enddate'] + '-12-31' if all(within_dates.values()): template = cls.sip_xml_expression_templates[key][ 'both'] elif within_dates['startdate']: template = cls.sip_xml_expression_templates[key][ 'startdate'] # API not capable of handling "enddate"-only attribute #elif within_dates['enddate']: # template = cls.sip_xml_expression_templates[key]['enddate'] else: raise ValueError( 'SIP cannot handle date ranges with end date only') xml_part = template.format( startdate=iso_to_german(within_dates['startdate']), enddate=iso_to_german(within_dates['enddate'])) else: template = cls.sip_xml_expression_templates[key]['both'] xml_part = template.format(startdate=iso_to_german(value), enddate=iso_to_german(value)) except Exception as ex: message = 'SIP query: Invalid date or range expression "{0}". Reason: {1}'.format( value, ex) logger.warn( message + ' Exception was: {0}'.format(_exception_traceback())) return {'error': True, 'message': message} elif key == 'country': if ' and ' in value.lower(): message = 'SIP query: Concatenating offices with "AND" would yield zero results' logger.warn(message) return {'error': True, 'message': message} entries = re.split(' or ', value, flags=re.IGNORECASE) entries = [entry.strip() for entry in entries] ccids = [] for country in entries: country = country.upper() sip_country = SipCountry.objects(cc=country).first() if sip_country: sip_ccid = sip_country.ccid ccids.append(sip_ccid) else: message = 'SIP query: Country "{0}" could not be resolved'.format( country) logger.warn(message) return {'error': True, 'message': message} if ccids: xml_part = '<country>\n' + '\n'.join([ '<ccid>{ccid}</ccid>'.format(ccid=ccid) for ccid in ccids ]) + '\n</country>' elif key == 'class': try: expression = SipCqlClass(value) xml_part = expression.dumpxml() # debugging #print '-' * 42 #print pretty_print(xml_part) except ClassDecodingError as ex: return {'error': True, 'message': str(ex)} except pyparsing.ParseException as ex: return { 'error': True, 'message': '<pre>' + str(ex.explanation) + '</pre>' } elif key == 'fulltext': """ parse cql subexpression (possible fields are ti, ab, de, cl, bi) and map to SIP syntax """ try: expression = SipCqlFulltext(value, modifiers=modifiers.get(key, {})) xml_part = expression.dumpxml() keywords = expression.keywords() # debugging #print '-' * 42 #print pretty_print(xml_part) except FulltextDecodingError as ex: return {'error': True, 'message': unicode(ex)} except pyparsing.ParseException as ex: return { 'error': True, 'message': u'<pre>' + ex.explanation + '</pre>' } except SyntaxError as ex: return { 'error': True, 'message': u'<pre>' + unicode(ex) + '</pre>' } elif key in cls.sip_xml_expression_templates: template = cls.sip_xml_expression_templates[key] if key == 'patentnumber': value = value.upper() xml_part = template.format(key=key, value=value.strip(), **modifiers.get(key, {})) else: logger.warn('SIP query: Could not handle pair {0}={1}'.format( key, value)) response = {} if xml_part: response = {'query': xml_part} if keywords: response.update({'keywords': keywords}) return response
def pair_to_elasticsearch(cls, key, value, modifiers=None): try: fieldname = cls.datasource_indexnames[key] except KeyError: return expression = None format = u'{0}:{1}' # ------------------------------------------ # value mogrifiers # ------------------------------------------ if key == 'patentnumber': # Transform into distinct fields PC, DE, KI #if has_booleans(value): # value = '({})'.format(value) expression_parts = [] # Publication number patent = split_patent_number(value) patent_normalized = normalize_patent(patent, for_ops=False) if patent_normalized: patent = patent_normalized if patent: subexpression = u'PC:{country} AND DE:{number}'.format( **patent) if patent['kind']: subexpression += u' AND KI:{kind}'.format(**patent) expression_parts.append(u'({})'.format(subexpression)) # Application number subexpression = u'AN:{}'.format(value) expression_parts.append(subexpression) expression = u' OR '.join(expression_parts) # Priority number subexpression = u'NP:{}'.format(value) expression_parts.append(subexpression) expression = u' OR '.join(expression_parts) elif key == 'pubdate': """ - DP:[19800101 TO 19851231] - DP:[* TO 19601231] """ try: parsed = False # e.g. 1991 if len(value) == 4 and value.isdigit(): value = u'within {}0101,{}1231'.format(value, value) # e.g. 1990-2014, 1990 - 2014 value = year_range_to_within(value) # e.g. # within 1978,1986 # within 1900,2009-08-20 # within 2009-08-20,2011-03-03 if 'within' in value: within_dates = parse_date_within(value) if within_dates['startdate']: if len(within_dates['startdate']) == 4: within_dates['startdate'] += '0101' within_dates['startdate'] = parse_date_universal( within_dates['startdate']).format('YYYYMMDD') else: within_dates['startdate'] = '*' if within_dates['enddate']: if len(within_dates['enddate']) == 4: within_dates['enddate'] += '1231' within_dates['enddate'] = parse_date_universal( within_dates['enddate']).format('YYYYMMDD') else: within_dates['enddate'] = '*' expression = '{fieldname}:[{startdate} TO {enddate}]'.format( fieldname=fieldname, **within_dates) elif not parsed: value_date = parse_date_universal(value) if value_date: value = value_date.format('YYYYMMDD') else: raise ValueError(value) except Exception as ex: message = 'depatech query: Invalid date or range expression "{0}". Reason: {1}.'.format( value, ex) logger.warn( message + ' Exception was: {0}'.format(_exception_traceback())) return {'error': True, 'message': message} elif key == 'inventor' or key == 'applicant': if not has_booleans(value) and should_be_quoted(value): value = u'"{0}"'.format(value) elif key == 'class': # v1: Naive implementation can only handle single values #value = lucene_convert_class(value) # v2: Advanced implementation can handle expressions on field "class" # Translate class expression from "H04L12/433 or H04L12/24" # to "(ic:H04L0012433 OR cpc:H04L0012433) OR (ic:H04L001224 OR cpc:H04L001224)" try: # Put value into parenthesis, to properly capture expressions if value: value = u'({value})'.format(value=value) # Parse value as simple query expression query_object = CQL(cql=value) # Rewrite all patent classifications in query expression ast from OPS format to Lucene format rewrite_classes_lucene(query_object, format, fieldname) # Serialize into appropriate upstream datasource query expression syntax expression = query_object.dumps() except pyparsing.ParseException as ex: return { 'error': True, 'message': '<pre>' + str(ex.explanation) + '</pre>' } elif key == 'country': value = value.upper() # ------------------------------------------ # surround with parentheses # ------------------------------------------ if key in ['fulltext', 'inventor', 'applicant', 'country', 'citation']: if has_booleans(value) and not should_be_quoted(value): value = u'({0})'.format(value) # ------------------------------------------ # expression formatter # ------------------------------------------ # Serialize into appropriate upstream datasource query expression syntax if not expression: expression = format_expression(format, fieldname, value) #print 'expression:', expression # ------------------------------------------ # final polishing # ------------------------------------------ # Solr(?) syntax: boolean operators must be uppercase if has_booleans(expression): boolis = [' or ', ' and ', ' not '] for booli in boolis: expression = expression.replace(booli, booli.upper()) return {'query': expression}