def test_smart_split(self): testdata = [ ('This is "a person" test.', ['This', 'is', '"a person"', 'test.']), ('This is "a person\'s" test.', ['This', 'is', '"a person\'s"', 'test.']), ('This is "a person\\"s" test.', ['This', 'is', '"a person\\"s"', 'test.']), ('"a \'one', ['"a', "'one"]), ('all friends\' tests', ['all', 'friends\'', 'tests']), ('url search_page words="something else"', ['url', 'search_page', 'words="something else"']), ("url search_page words='something else'", ['url', 'search_page', "words='something else'"]), ('url search_page words "something else"', ['url', 'search_page', 'words', '"something else"']), ('url search_page words-"something else"', ['url', 'search_page', 'words-"something else"']), ('url search_page words=hello', ['url', 'search_page', 'words=hello']), ('url search_page words="something else', ['url', 'search_page', 'words="something', 'else']), ("cut:','|cut:' '", ["cut:','|cut:' '"]), (lazystr("a b c d"), # Test for #20231 ['a', 'b', 'c', 'd']), ] for test, expected in testdata: self.assertEqual(list(text.smart_split(test)), expected)
def build_search(searchstring, filters, max_query_length=4, combine=operator.and_): count = 0 search_filter = models.Q() for word in text_utils.smart_split(searchstring): if word[0] in ['"', "'"]: if word[0] == word[-1]: word = word[1:-1] else: word = word[1:] if count > max_query_length: break local_filter = models.Q() for f in filters: local_filter |= models.Q(**{f: word}) search_filter = combine(search_filter, local_filter) count += 1 return search_filter
def test_smart_split(self): testdata = [ ('This is "a person" test.', ['This', 'is', '"a person"', 'test.']), ('This is "a person\'s" test.', ['This', 'is', '"a person\'s"', 'test.']), ('This is "a person\\"s" test.', ['This', 'is', '"a person\\"s"', 'test.']), ('"a \'one', ['"a', "'one"]), ('all friends\' tests', ['all', 'friends\'', 'tests']), ('url search_page words="something else"', ['url', 'search_page', 'words="something else"']), ("url search_page words='something else'", ['url', 'search_page', "words='something else'"]), ('url search_page words "something else"', ['url', 'search_page', 'words', '"something else"']), ('url search_page words-"something else"', ['url', 'search_page', 'words-"something else"']), ('url search_page words=hello', ['url', 'search_page', 'words=hello']), ('url search_page words="something else', ['url', 'search_page', 'words="something', 'else']), ("cut:','|cut:' '", ["cut:','|cut:' '"]), ( lazystr("a b c d"), # Test for #20231 ['a', 'b', 'c', 'd']), ] for test, expected in testdata: self.assertEqual(list(text.smart_split(test)), expected)
def parse_git_command(string): command, _, tail = string.lstrip().partition(' ') if command == 'help': return command, tail.split() try: parser = _parsers[command] except KeyError: raise Error('Command "%s" is not supported.' % command) args = [] for arg in smart_split(tail): try: arg = unescape_string_literal(arg) except ValueError: pass args.append(arg) namespace = parser.parse_args(args) if command in ('push', 'pull', 'fetch'): for url in namespace.pos: _check_url(url) elif command == 'remote': if namespace.pos and namespace.pos[0] == 'add': _check_url(namespace.pos[-1]) elif command == 'tag': if namespace.a and namespace.m is None: raise Error('The -m option is required if -a is used.') return command, args
def do_imageid(dummy, token): """Renders an Imagetag by ImageID. See pydoc huimages for information about ImageID. Syntax: {% imageid <imageid> <size> [nolink] [urlonly] %} See huimages for further Documentation on size. Example:: {% imageid spare.bild "320x240" %} <a class="imagelink" href="http://i.hdimg.net/o/TZGBY4PX2BXMKXXGNMYHZBVYZXSJBOLT01.jpeg"><img src="http://i.hdimg.net/320x240/TZGBY4PX2BXMKXXGNMYHZBVYZXSJBOLT01.jpeg" /></a> {% imageid "DEQOQMIPRJPPTAKLE3NQY5BTMMNTFGG201" "320x240" nolink %} <img src="http://i.hdimg.net/320x240/DEQOQMIPRJPPTAKLE3NQY5BTMMNTFGG201.jpeg" /> {% imageid spare.bild.name "150x150!" urlonly %} http://i.hdimg.net/150x150!/TZGBY4PX2BXMKXXGNMYHZBVYZXSJBOLT01.jpeg """ options = {'urlonly': False, 'nolink': False, 'size': '1024x768'} try: tokens = list(smart_split(token.contents)) dummy, obj = tokens[:2] if len(tokens) > 2: options['size'] = tokens[2].strip('"\'') for option in tokens[2:]: if option.strip('"\'') == 'urlonly': options['urlonly'] = True if option.strip('"\'') == 'nolink': options['nolink'] = True except ValueError: raise template.TemplateSyntaxError, "%r tag requires a single argument" % token.contents.split()[0] return ImageLink(obj, options)
def split_text_query(query): """Filter out stopwords but only if there are useful words""" split_query = list(smart_split(query)) filtered_query = [bit for bit in split_query if bit not in STOPWORDS] return filtered_query if len(filtered_query) else split_query
def transmogrify(parser, token): """ """ bits = smart_split(token.contents) tagname = bits.next() try: imageurl = bits.next() except StopIteration: raise template.TemplateSyntaxError("%r tag requires at least the image url" % tagname) # Parse the actions into a list of (action, arg) tuples. # The "border" and "letterbox" actions are a special case: they take two arguments. actions = [] for action in bits: if action not in ACTIONS: raise template.TemplateSyntaxError("Unknown action in %r tag: %r" % (tagname, action)) if action in ["border", "letterbox"]: param1 = bits.next() color = bits.next() color = color.lstrip("#") actions.append((action, param1, color)) else: actions.append((action, bits.next())) # No actions is an error if not actions: raise template.TemplateSyntaxError("%r tag requires at least one action" % (tagname)) return MogrifyNode(imageurl, actions)
def transmogrify(parser, token): """ """ bits = smart_split(token.contents) tagname = bits.next() try: imageurl = bits.next() except StopIteration: raise template.TemplateSyntaxError( "%r tag requires at least the image url" % tagname) # Parse the actions into a list of (action, arg) tuples. # The "border" and "letterbox" actions are a special case: they take two arguments. actions = [] for action in bits: if action not in ACTIONS: raise template.TemplateSyntaxError("Unknown action in %r tag: %r" % (tagname, action)) if action in ["border", "letterbox"]: param1 = bits.next() color = bits.next() color = color.lstrip("#") actions.append((action, param1, color)) else: actions.append((action, bits.next())) # No actions is an error if not actions: raise template.TemplateSyntaxError( "%r tag requires at least one action" % (tagname)) return MogrifyNode(imageurl, actions)
def appquery(self, djp): '''This function implements the search query. The query is build using the search fields specifies in :attr:`djpcms.views.appsite.ModelApplication.search_fields`. It returns a queryset. ''' qs = super(SearchView,self).appquery(djp) request = djp.request slist = self.appmodel.opts.search_fields if request.method == 'GET': data = dict(request.GET.items()) else: data = dict(request.POST.items()) search_string = data.get(self.search_text,None) if slist and search_string: bits = smart_split(search_string) #bits = search_string.split(' ') for bit in bits: bit = isexact(bit) if not bit: continue or_queries = [Q(**{construct_search(field_name): bit}) for field_name in slist] other_qs = QuerySet(self.appmodel.modelsearch()) other_qs.dup_select_related(qs) other_qs = other_qs.filter(reduce(operator.or_, or_queries)) qs = qs & other_qs return qs
def _media_tags_process(groups): params = list(smart_split(groups.group('text'))) if params[0] == 'photo': if not has_photologue: if settings.DEBUG: raise Exception, '[[photo]] tag found but photologue app not installed.' else: return u'' photo = _get_photo(_strip(params[1])) return photo and _render_photo(photo, params[2:]) or u'' if params[0] == 'video': video = _get_video(_strip(params[1])) return video and _render_video(video, params[2:]) or u'' if params[0] == 'pdf': pdf = _get_pdf(_strip(params[1])) return pdf and _render_pdf(pdf, params[2:]) or u'' file = _strip(params[0]) opts = {} for param in params[1:]: param = _strip(param) m = SIZE_RE.match(param) if m: opts['width'] = m.group('width') opts['height'] = m.group('height') elif opts.has_key('title'): opts['caption'] = param else: opts['title'] = param return _render_media_tag(file, opts)
def do_render_object(parser, token): """ {% render_object object template_name [context] %} """ bits = list(smart_split(token.contents)) if len(bits) not in (3, 4): raise TemplateSyntaxError(_('wrong arguments to render_object')) return RenderObjectNode(*bits[1:])
def do_render_collection(parser, token): """ {% render_collection objects template_name [context [item_context]] %} """ bits = list(smart_split(token.contents)) if len(bits) < 3 or len(bits) > 5: raise TemplateSyntaxError(_('wrong arguments to render_collection')) return RenderCollectionNode(*bits[1:])
def get_text_query_bits(self, query_string): """filter stopwords but only if there are useful words""" split_q = list(smart_split(query_string)) filtered_q = [bit for bit in split_q if bit not in self.STOPWORD_LIST] if len(filtered_q): return filtered_q else: return split_q
def parse(cls, terms): if not terms: return [] unescaped = map( lambda x: unescape_string_literal(x) if x and x[0] in '"\'' else x, smart_split(terms)) safe_terms = filter(lambda x: x and x not in cls.stop_words, unescaped) return safe_terms
def get_options(self): options = {} bits = iter(smart_split(self.options)) for bit in bits: m = kw_pat.match(bit) if not m: # @todo - add custom validation - this should never happen raise Exception("Invalid preset option: "%bit) key = smart_str(m.group('key')) value = smart_str(m.group('value')) options[key] = value return options
def extract_terms(raw): """ Extraction based on spaces, understands double and single quotes. Returns a list of strings """ terms = list(smart_split(raw)) print terms for i, term in enumerate(terms): try: terms[i] = unescape_string_literal(terms) except ValueError: pass return terms
def split_text_query(query): """Filter stopwords, but only if there are also other words. """ stopwords = '''a,am,an,and,as,at,be,by,can,did,do,for,get,got, had,has,he,her,him,his,how,i,if,in,is,it,its,let,may,me, my,no,nor,not,of,off,on,or,our,own,say,says,she,so,than, that,the,them,then,they,this,to,too,us,was,we,were,what, when,who,whom,why,will,yet,you,your'''.split(',') split_query = list(smart_split(query)) filtered_query = [word for word in split_query if word not in stopwords] return filtered_query if len(filtered_query) else split_query
def no_param_shortcut(parser, token): """ Shortcut to transmogrify thumbnail """ bits = smart_split(token.contents) tagname = bits.next() try: imageurl = bits.next() except StopIteration: raise template.TemplateSyntaxError("%r tag requires at least the image url" % tagname) return MogrifyNode(imageurl, [(tagname, ), ])
def do_render(parser, token): """Freestyle render z wszystkim. {% render template_name key=value key1=value1 ... %} Bajery: * jako template_name i values można dać string albo nazwę zmiennej * jeśli string w value ma prefix _ (_"dupa") to jest rybką. * dziedziczy cały parent context, nadpisuje go podanymi kluczami """ bits = list(smart_split(token.contents)) if len(bits) < 2: raise TemplateSyntaxError(_('not enough arguments to render')) return RenderNode(bits[1:])
def split_contents(self): split = [] bits = iter(smart_split(self.contents)) for bit in bits: # Handle translation-marked template pieces if bit.startswith('_("') or bit.startswith("_('"): sentinal = bit[2] + ')' trans_bit = [bit] while not bit.endswith(sentinal): bit = bits.next() trans_bit.append(bit) bit = ' '.join(trans_bit) split.append(bit) return split
def split_contents(self): split = [] bits = smart_split(self.contents) for bit in bits: # Handle translation-marked template pieces if bit.startswith(('_("', "_('")): sentinel = bit[2] + ")" trans_bit = [bit] while not bit.endswith(sentinel): bit = next(bits) trans_bit.append(bit) bit = " ".join(trans_bit) split.append(bit) return split
def clean_keywords(self): """ Validates that no search keyword is shorter than 3 characters. """ for keyword in smart_split(self.cleaned_data['keywords']): keyword_len = len(keyword) if keyword[0] in ('+', '-'): keyword_len = keyword_len - 1 elif keyword[0] == '"' and keyword[-1] == '"' or \ keyword[0] == "'" and keyword[-1] == "'": keyword_len = keyword_len - 2 if keyword_len < 3: raise forms.ValidationError('Keywords must be a minimun of 3 characters long.') return self.cleaned_data['keywords']
def split_contents(self): split = [] bits = iter(smart_split(self.contents)) for bit in bits: # Handle translation-marked template pieces if bit.startswith(('_("', "_('")): sentinel = bit[2] + ")" trans_bit = [bit] while not bit.endswith(sentinel): bit = next(bits) trans_bit.append(bit) bit = " ".join(trans_bit) split.append(bit) return split
def get_text_query_bits(self, query_string): """filter stopwords but only if there are useful words""" split_q = list(smart_split(query_string)) filtered_q = [] for bit in split_q: if bit not in self.STOPWORD_LIST: filtered_q.append(bit) if len(filtered_q): return filtered_q else: return split_q
def get_plugin_index_data(base_plugin, request): text_bits = [] instance, plugin_type = base_plugin.get_plugin_instance() if instance is None: # this is an empty plugin return text_bits if hasattr(instance, 'search_fulltext'): # check if the plugin instance has search enabled search_contents = instance.search_fulltext elif hasattr(base_plugin, 'search_fulltext'): # now check in the base plugin instance (CMSPlugin) search_contents = base_plugin.search_fulltext elif hasattr(plugin_type, 'search_fulltext'): # last check in the plugin class (CMSPluginBase) search_contents = plugin_type.search_fulltext else: # enable by default search_contents = True for field in getattr(instance, 'search_fields', []): field_content = strip_tags(getattr(instance, field, '')) if field_content: field_content = force_unicode(field_content) text_bits.extend(smart_split(field_content)) if search_contents: plugin_contents = instance.render_plugin( context=RequestContext(request)) if plugin_contents: plugin_contents = strip_tags(plugin_contents) text_bits.extend(smart_split(plugin_contents)) return text_bits
def postgres_search(query, fields=['content']): query_string = query.strip() vector = SearchVector('title') + SearchVector('content') # List of Q() filters used to preform search filters = reduce(SearchQuery.__or__, [SearchQuery(s) for s in smart_split(query_string)]) print(filters, "filters", "*" * 10) #vector = SearchVector('title') + SearchVector('content') results = Post.objects.annotate(search=vector).filter(search=filters) return results
def no_param_shortcut(parser, token): """ Shortcut to transmogrify thumbnail """ bits = smart_split(token.contents) tagname = bits.next() try: imageurl = bits.next() except StopIteration: raise template.TemplateSyntaxError( "%r tag requires at least the image url" % tagname) return MogrifyNode(imageurl, [ (tagname, ), ])
def get_plugin_index_data(base_plugin, request): text_bits = [] instance, plugin_type = base_plugin.get_plugin_instance() if instance is None: # this is an empty plugin return text_bits if hasattr(instance, 'search_fulltext'): # check if the plugin instance has search enabled search_contents = instance.search_fulltext elif hasattr(base_plugin, 'search_fulltext'): # now check in the base plugin instance (CMSPlugin) search_contents = base_plugin.search_fulltext elif hasattr(plugin_type, 'search_fulltext'): # last check in the plugin class (CMSPluginBase) search_contents = plugin_type.search_fulltext else: # enable by default search_contents = True for field in getattr(instance, 'search_fields', []): field_content = strip_tags(getattr(instance, field, '')) if field_content: field_content = force_unicode(field_content) text_bits.extend(smart_split(field_content)) if search_contents: plugin_contents = instance.render_plugin(context=RequestContext(request)) if plugin_contents: plugin_contents = strip_tags(plugin_contents) text_bits.extend(smart_split(plugin_contents)) return text_bits
def split_proximity(text): # TODO: add not / - option text = text.replace("*", "").replace(":", "").replace("'", '"') tokens = smart_split(text) for t in tokens: t_cl = clean(t, lang="de", lower=False, no_punct=False) t_cl_p = clean(t, lang="de", lower=False, no_punct=True) if t_cl.lower() == "or": continue if " " in t or '"' in t_cl: yield "' " + t_cl_p.replace(" ", " <-> ") + " '" else: yield t_cl_p + ":*"
def postgres_search(query, fields=None): query_string = query.strip() vector = reduce(SearchVector.__add__, [SearchVector(f) for f in fields]) # List of Q() filters used to preform search filters = reduce(SearchQuery.__or__, [SearchQuery(s) for s in smart_split(query_string)]) # print(filters, "filters", "*"*10) # vector = SearchVector('title') + SearchVector('content') results = Post.objects.annotate(search=vector).filter(search=filters) logger.info("Preform postgres search.") return results
def get_search_results(self, request, queryset, search_term): """ Returns a tuple containing a queryset to implement the search, and a boolean indicating if the results may contain duplicates. """ # Apply keyword searches. def construct_search(field_name): if field_name.startswith('^'): return "%s__istartswith" % field_name[1:] elif field_name.startswith('='): return "%s__iexact" % field_name[1:] elif field_name.startswith('@'): return "%s__search" % field_name[1:] else: return "%s__icontains" % field_name # Group using quotes def unescape_string_literal_if_possible(bit): try: return unescape_string_literal(bit) except ValueError: return bit use_distinct = False search_fields = self.get_search_fields(request) if search_fields and search_term: search_term_list = [ unescape_string_literal_if_possible(bit) for bit in smart_split(search_term) ] orm_lookups = [ construct_search(str(search_field)) for search_field in search_fields ] for bit in search_term_list: or_queries = [ models.Q(**{orm_lookup: bit}) for orm_lookup in orm_lookups ] queryset = queryset.filter(reduce(operator.or_, or_queries)) if not use_distinct: for search_spec in orm_lookups: if lookup_needs_distinct(self.opts, search_spec): use_distinct = True break return queryset, use_distinct
def test_smart_split(self): testdata = [ ('This is "a person" test.', ["This", "is", '"a person"', "test."]), ('This is "a person\'s" test.', ["This", "is", '"a person\'s"', "test."]), ('This is "a person\\"s" test.', ["This", "is", '"a person\\"s"', "test."]), ("\"a 'one", ['"a', "'one"]), ("all friends' tests", ["all", "friends'", "tests"]), ('url search_page words="something else"', ["url", "search_page", 'words="something else"']), ("url search_page words='something else'", ["url", "search_page", "words='something else'"]), ('url search_page words "something else"', ["url", "search_page", "words", '"something else"']), ('url search_page words-"something else"', ["url", "search_page", 'words-"something else"']), ("url search_page words=hello", ["url", "search_page", "words=hello"]), ('url search_page words="something else', ["url", "search_page", 'words="something', "else"]), ("cut:','|cut:' '", ["cut:','|cut:' '"]), (lazystr("a b c d"), ["a", "b", "c", "d"]), # Test for #20231 ] for test, expected in testdata: self.assertEqual(list(text.smart_split(test)), expected)
def planet_post_list(parser, token): """ Render a list of posts using the planet/list.html template. Params: limit: limit to this number of entries tag: select only Posts that matches this tag category: select only Posts that belongs to Feeds under this Category template: render using a different template Examples: {% planet_post_list with limit=10 tag=tag %} {% planet_post_list with tag="Redis" %} {% planet_post_list with category="PyPy" %} """ bits = list(smart_split(token.contents)) len_bits = len(bits) kwargs = {} if len_bits > 1: if bits[1] != 'with': raise TemplateSyntaxError( _("if given, fourth argument to %s tag must be 'with'") % bits[0]) for i in range(2, len_bits): try: name, value = bits[i].split('=') if name in ('tag', 'category', 'template', 'limit'): kwargs[str(name)] = value else: raise TemplateSyntaxError( _("%(tag)s tag was given an invalid option: '%(option)s'" ) % { 'tag': bits[0], 'option': name, }) except ValueError: raise TemplateSyntaxError( _("%(tag)s tag was given a badly formatted option: '%(option)s'" ) % { 'tag': bits[0], 'option': bits[i], }) return PlanetPostList(**kwargs)
def split_contents(self): """ 分隔contents中的内容,使用' '分隔 1. smart_split,不分割'"包含的内容 2. 如果使用翻译,则正确处理。例如: '_("a" "b" "c")' ==> '_("a" "b" "c")',不会拆分成三个部分 :return: 字符串list """ split = [] bits = smart_split(self.contents) for bit in bits: # Handle translation-marked template pieces if bit.startswith(('_("', "_('")): sentinel = bit[2] + ')' trans_bit = [bit] while not bit.endswith(sentinel): bit = next(bits) trans_bit.append(bit) bit = ' '.join(trans_bit) split.append(bit) return split
def get_text_search_query(self, query_string): filters = [] def construct_search(field_name): if field_name.startswith('^'): return "%s__istartswith" % field_name[1:] elif field_name.startswith('='): return "%s__iexact" % field_name[1:] elif field_name.startswith('@'): if settings.DATABASE_ENGINE == 'mysql': return "%s__search" % field_name[1:] else: return "%s__icontains" % field_name[1:] else: return "%s__icontains" % field_name for bit in smart_split(query_string): or_queries = [Q(**{construct_search(str(field_name)): bit}) for field_name in self.Meta.search_fields] filters.append(reduce(Q.__or__, or_queries)) return reduce(Q.__and__, filters)
def sql_search(query, fields=None): """search_fields example: ['name', 'category__name', '@description', '=id'] """ query_string = query.strip() filters = [] query_list = [s for s in smart_split(query_string) if s not in STOP] for bit in query_list: queries = [ Q(**{f"{field_name}__icontains": bit}) for field_name in fields ] filters.append(reduce(Q.__or__, queries)) filters = reduce(Q.__and__, filters) if len(filters) else Q(pk=None) results = Post.objects.filter(filters) logger.info("Preform sql lite search.") return results
def deserialize_options(option_string): """ Split out options from option_string and return them as a dict Raise ThumbnailParseError if any syntax errors are discovered. """ options = {} bits = iter(smart_split(option_string)) for bit in bits: m = kw_pat.match(bit) if not m: raise ThumbnailParseError("Invalid thumbnail option: %s" % bit) key = smart_str(m.group("key")) value = smart_str(m.group("value")).strip("\"'") # if the key is "crop" then validate the crop options - raises ThumbnailParseError is invalid if key == "crop": parse_crop(value, [0, 0], [0, 0]) options[key] = value return options
def generate_phrases(): Phrase.objects.all().delete() incis = [] res = [] c = Counter() for inci in tqdm(Incident.objects.all(), desc="generating ngrams / phrases"): all_ngrams = [] for t in [inci.title, inci.description]: if not t or len(t) == 0: continue tokens = t.replace('"', "").replace("'", "") tokens = list(smart_split(tokens)) tokens = [t for t in tokens if t.lower() not in STOP_WORDS] tokens = list(find_german_nouns(tokens)) if len(tokens) == 0: continue for i in range(1, min(4, len(tokens) + 1)): ngrams = find_ngrams(tokens, i) ngrams = [" ".join(sorted(x)) for x in ngrams] c.update(ngrams) all_ngrams += ngrams res.append(all_ngrams) incis.append(inci) real_phrases = c.most_common(len(c) // 2) phrase_list = [Phrase(option=x[0], count=x[1]) for x in real_phrases] Phrase.objects.bulk_create(phrase_list) phrase_dict = {x.option: x for x in phrase_list} phrase_set = set(x[0] for x in real_phrases) for i, array_ph in tqdm(enumerate(res), total=len(res), desc="adding phrases to incidents"): real_ph = [phrase_dict[x] for x in array_ph if x in phrase_set] incis[i].phrases.add(*real_ph) Phrase.objects.sync()
def do_command(self, e, c, cmd, from_nick): try: if cmd == 'echo': self.reply(e, "Wow, there is an echo here") if cmd.startswith('count'): ans ="" words = cmd.split(" ") while True: mcmd = words.pop(0) model = words.pop(0) if model=='torrents': qs = Torrent.objects.all() #if model=='anime': # qs = Anime.objects.all() if not words: break if mcmd == 'count': ans += '%s %s '% (qs.count(), model) self.reply(e, ans) if cmd.startswith('search'): from django.utils.text import smart_split query = smart_split(cmd.lsplit(" ")[1]) if len(query) == 1: pass if cmd == 'help': self.say_private(from_nick, """Commands: help Display this message. search "query" search the tracker""") else: self.reply(e, "I don't understand '%s'."%(cmd)) except: self.reply(e, "Ow, you hurt my brain.") exc_type, exc_value, exc_traceback = sys.exc_info() traceback.print_exception(exc_type, exc_value, exc_traceback)
def planet_post_list(__, token): """ Render a list of posts using the planet/list.html template. Params: limit: limit to this number of entries tag: select only Posts that matches this tag category: select only Posts that belongs to Feeds under this Category template: render using a different template hidden: show also hidden posts Examples: {% planet_post_list with limit=10 tag=tag %} {% planet_post_list with tag="Redis" %} {% planet_post_list with category="PyPy" hidden="True" %} """ bits = list(smart_split(token.contents)) len_bits = len(bits) kwargs = {} if len_bits > 1: if bits[1] != 'with': raise TemplateSyntaxError(_("if given, fourth argument to %s tag must be 'with'") % bits[0]) for i in range(2, len_bits): try: name, value = bits[i].split('=') if name in ('tag', 'category', 'template', 'limit', 'hidden', 'days', 'page'): kwargs[str(name)] = value else: raise TemplateSyntaxError(_("%(tag)s tag was given an invalid option: '%(option)s'") % { 'tag': bits[0], 'option': name, }) except ValueError: raise TemplateSyntaxError(_("%(tag)s tag was given a badly formatted option: '%(option)s'") % { 'tag': bits[0], 'option': bits[i], }) return PlanetPostList(**kwargs)
def test_smart_split(self): testdata = [ ('This is "a person" test.', ["This", "is", '"a person"', "test."]), ('This is "a person\'s" test.', ["This", "is", '"a person\'s"', "test."]), ('This is "a person\\"s" test.', ["This", "is", '"a person\\"s"', "test."]), ("\"a 'one", ['"a', "'one"]), ("all friends' tests", ["all", "friends'", "tests"]), ( 'url search_page words="something else"', ["url", "search_page", 'words="something else"'], ), ( "url search_page words='something else'", ["url", "search_page", "words='something else'"], ), ( 'url search_page words "something else"', ["url", "search_page", "words", '"something else"'], ), ( 'url search_page words-"something else"', ["url", "search_page", 'words-"something else"'], ), ("url search_page words=hello", ["url", "search_page", "words=hello"]), ( 'url search_page words="something else', ["url", "search_page", 'words="something', "else"], ), ("cut:','|cut:' '", ["cut:','|cut:' '"]), (lazystr("a b c d"), ["a", "b", "c", "d"]), # Test for #20231 ] for test, expected in testdata: with self.subTest(value=test): self.assertEqual(list(text.smart_split(test)), expected)
def split_terms(s): return filter(None, map(lambda t: t.strip("'\" "), smart_split(s)))
def _extract_terms(self, query): return [self._clean_term(word) for word in smart_split(query)]
def apply_queryset_options(self, queryset): """ Interprets the datatable options. Options requiring manual massaging of the queryset are handled here. The output of this method should be treated as a list, since complex options might convert it out of the original queryset form. """ options = self._get_datatable_options() # These will hold residue queries that cannot be handled in at the database level. Anything # in these variables by the end will be handled manually (read: less efficiently) sort_fields = [] searches = [] # This count is for the benefit of the frontend datatables.js total_initial_record_count = queryset.count() if options['ordering']: db_fields, sort_fields = split_real_fields(self.model, options['ordering']) queryset = queryset.order_by(*db_fields) if options['search']: db_fields, searches = filter_real_fields(self.model, options['columns'], key=get_first_orm_bit) db_fields.extend(options['search_fields']) queries = [] # Queries generated to search all fields for all terms search_terms = map(lambda q: q.strip("'\" "), smart_split(options['search'])) for term in search_terms: term_queries = [] # Queries generated to search all fields for this term # Every concrete database lookup string in 'columns' is followed to its trailing field descriptor. For example, "subdivision__name" terminates in a CharField. The field type determines how it is probed for search. for column in db_fields: column = get_field_definition(column) for component_name in column.fields: field_queries = [] # Queries generated to search this database field for the search term field = resolve_orm_path(self.model, component_name) if field.choices: # Query the database for the database value rather than display value choices = field.get_flatchoices() length = len(choices) database_values = [] display_values = [] for choice in choices: database_values.append(choice[0]) display_values.append(choice[1].lower()) for i in range(length): if term.lower() in display_values[i]: field_queries = [{component_name + '__iexact': database_values[i]}] elif isinstance(field, tuple(FIELD_TYPES['text'])): field_queries = [{component_name + '__icontains': term}] elif isinstance(field, tuple(FIELD_TYPES['date'])): try: date_obj = dateutil.parser.parse(term) except ValueError: # This exception is theoretical, but it doesn't seem to raise. pass except TypeError: # Failed conversions can lead to the parser adding ints to None. pass except OverflowError: # Catches OverflowError: signed integer is greater than maximum pass else: field_queries.append({component_name: date_obj}) # Add queries for more granular date field lookups try: numerical_value = int(term) except ValueError: pass else: if datetime.MINYEAR < numerical_value < datetime.MAXYEAR - 1: field_queries.append({component_name + '__year': numerical_value}) if 0 < numerical_value <= 12: field_queries.append({component_name + '__month': numerical_value}) if 0 < numerical_value <= 31: field_queries.append({component_name + '__day': numerical_value}) elif isinstance(field, tuple(FIELD_TYPES['boolean'])): if term.lower() in ('true', 'yes'): term = True elif term.lower() in ('false', 'no'): term = False else: continue field_queries = [{component_name: term}] elif isinstance(field, tuple(FIELD_TYPES['integer'])): try: field_queries = [{component_name: int(term)}] except ValueError: pass elif isinstance(field, tuple(FIELD_TYPES['float'])): try: field_queries = [{component_name: float(term)}] except ValueError: pass elif isinstance(field, tuple(FIELD_TYPES['ignored'])): pass else: raise ValueError("Unhandled field type for %s (%r) in search." % (component_name, type(field))) # print field_queries # Append each field inspection for this term term_queries.extend(map(lambda q: Q(**q), field_queries)) # Append the logical OR of all field inspections for this term if len(term_queries): queries.append(reduce(operator.or_, term_queries)) # Apply the logical AND of all term inspections if len(queries): queryset = queryset.filter(reduce(operator.and_, queries)) # TODO: Remove "and not searches" from this conditional, since manual searches won't be done if not sort_fields and not searches: # We can shortcut and speed up the process if all operations are database-backed. object_list = queryset if options['search']: object_list._dtv_unpaged_total = queryset.count() else: object_list._dtv_unpaged_total = total_initial_record_count else: object_list = ObjectListResult(queryset) # # Manual searches # # This is broken until it searches all items in object_list previous to the database # # sort. That represents a runtime load that hits every row in code, rather than in the # # database. If enabled, this would cripple performance on large datasets. # if options['i_walk_the_dangerous_line_between_genius_and_insanity']: # length = len(object_list) # for i, obj in enumerate(reversed(object_list)): # keep = False # for column_info in searches: # column_index = options['columns'].index(column_info) # rich_data, plain_data = self.get_column_data(column_index, column_info, obj) # for term in search_terms: # if term.lower() in plain_data.lower(): # keep = True # break # if keep: # break # # if not keep: # removed = object_list.pop(length - 1 - i) # # print column_info # # print data # # print '====' # Sort the results manually for whatever remaining sort options are left over def data_getter_orm(field_name): def key(obj): try: return reduce(getattr, [obj] + field_name.split('__')) except (AttributeError, ObjectDoesNotExist): return None return key def data_getter_custom(i): def key(obj): rich_value, plain_value = self.get_column_data(i, options['columns'][i], obj) return plain_value return key # Sort the list using the manual sort fields, back-to-front. `sort` is a stable # operation, meaning that multiple passes can be made on the list using different # criteria. The only catch is that the passes must be made in reverse order so that # the "first" sort field with the most priority ends up getting applied last. for sort_field in sort_fields[::-1]: if sort_field.startswith('-'): reverse = True sort_field = sort_field[1:] else: reverse = False if sort_field.startswith('!'): key_function = data_getter_custom sort_field = int(sort_field[1:]) else: key_function = data_getter_orm try: object_list.sort(key=key_function(sort_field), reverse=reverse) except TypeError as err: log.error("Unable to sort on {0} - {1}".format(sort_field, err)) object_list._dtv_unpaged_total = len(object_list) object_list._dtv_total_initial_record_count = total_initial_record_count return object_list
def split_contents(self): return list(smart_split(self.contents))
def test_smart_split(self): self.assertEqual(list(smart_split(r'''This is "a person" test.''')), [u'This', u'is', u'"a person"', u'test.']) self.assertEqual( list(smart_split(r'''This is "a person's" test.'''))[2], u'"a person\'s"') self.assertEqual( list(smart_split(r'''This is "a person\"s" test.'''))[2], u'"a person\\"s"') self.assertEqual(list(smart_split('''"a 'one''')), [u'"a', u"'one"]) self.assertEqual( list(smart_split(r'''all friends' tests'''))[1], "friends'") self.assertEqual( list(smart_split(u'url search_page words="something else"')), [u'url', u'search_page', u'words="something else"']) self.assertEqual( list(smart_split(u"url search_page words='something else'")), [u'url', u'search_page', u"words='something else'"]) self.assertEqual( list(smart_split(u'url search_page words "something else"')), [u'url', u'search_page', u'words', u'"something else"']) self.assertEqual( list(smart_split(u'url search_page words-"something else"')), [u'url', u'search_page', u'words-"something else"']) self.assertEqual(list(smart_split(u'url search_page words=hello')), [u'url', u'search_page', u'words=hello']) self.assertEqual( list(smart_split(u'url search_page words="something else')), [u'url', u'search_page', u'words="something', u'else']) self.assertEqual(list(smart_split("cut:','|cut:' '")), [u"cut:','|cut:' '"])
def apply_queryset_options(self, queryset): """ Interprets the datatable options. Options requiring manual massaging of the queryset are handled here. The output of this method should be treated as a list, since complex options might convert it out of the original queryset form. """ options = self._get_datatable_options() # These will hold residue queries that cannot be handled in at the database level. Anything # in these variables by the end will be handled manually (read: less efficiently) sort_fields = [] searches = [] # This count is for the benefit of the frontend datatables.js total_initial_record_count = queryset.count() if options['ordering']: db_fields, sort_fields = split_real_fields(self.model, options['ordering']) queryset = queryset.order_by(*db_fields) if options['search']: db_fields, searches = filter_real_fields(self.model, options['columns'], key=get_first_orm_bit) db_fields.extend(options['search_fields']) print(">>> searches:", searches) queries = [] # Queries generated to search all fields for all terms search_terms = map(lambda q: q.strip("'\" "), smart_split(options['search'])) for term in search_terms: term_queries = [] # Queries generated to search all fields for this term # Every concrete database lookup string in 'columns' is followed to its trailing field descriptor. For example, "subdivision__name" terminates in a CharField. The field type determines how it is probed for search. for column in db_fields: column = get_field_definition(column) for component_name in column.fields: field_queries = [] # Queries generated to search this database field for the search term field = resolve_orm_path(self.model, component_name) if isinstance(field, (models.CharField, models.TextField, models.FileField)): field_queries = [{component_name + '__icontains': term}] elif isinstance(field, models.DateField): try: date_obj = dateutil.parser.parse(term) except ValueError: # This exception is theoretical, but it doesn't seem to raise. pass except TypeError: # Failed conversions can lead to the parser adding ints to None. pass else: field_queries.append({component_name: date_obj}) # Add queries for more granular date field lookups try: numerical_value = int(term) except ValueError: pass else: if 0 < numerical_value < 3000: field_queries.append({component_name + '__year': numerical_value}) if 0 < numerical_value <= 12: field_queries.append({component_name + '__month': numerical_value}) if 0 < numerical_value <= 31: field_queries.append({component_name + '__day': numerical_value}) elif isinstance(field, models.BooleanField): if term.lower() in ('true', 'yes'): term = True elif term.lower() in ('false', 'no'): term = False else: continue field_queries = [{component_name: term}] elif isinstance(field, (models.IntegerField, models.AutoField)): try: field_queries = [{component_name: int(term)}] except ValueError: pass elif isinstance(field, (models.FloatField, models.DecimalField)): try: field_queries = [{component_name: float(term)}] except ValueError: pass elif isinstance(field, models.ForeignKey): pass else: raise ValueError("Unhandled field type for %s (%r) in search." % (component_name, type(field))) # print field_queries # Append each field inspection for this term term_queries.extend(map(lambda q: Q(**q), field_queries)) # Append the logical OR of all field inspections for this term if len(term_queries): queries.append(reduce(operator.or_, term_queries)) # Apply the logical AND of all term inspections if len(queries): queryset = queryset.filter(reduce(operator.and_, queries)) # TODO: Remove "and not searches" from this conditional, since manual searches won't be done if not sort_fields and not searches: # We can shortcut and speed up the process if all operations are database-backed. object_list = queryset object_list._dtv_unpaged_total = queryset.count() else: object_list = ObjectListResult(queryset) # # Manual searches # # This is broken until it searches all items in object_list previous to the database # # sort. That represents a runtime load that hits every row in code, rather than in the # # database. If enabled, this would cripple performance on large datasets. # if options.i_walk_the_dangerous_line_between_genius_and_insanity: # length = len(object_list) # for i, obj in enumerate(reversed(object_list)): # keep = False # for column_info in searches: # column_index = options.columns.index(column_info) # rich_data, plain_data = self.get_column_data(column_index, column_info, obj) # for term in search_terms: # if term.lower() in plain_data.lower(): # keep = True # break # if keep: # break # # if not keep: # removed = object_list.pop(length - 1 - i) # # print column_info # # print data # # print '====' # Sort the results manually for whatever remaining sort options are left over def data_getter_orm(field_name): def key(obj): try: return reduce(getattr, [obj] + field_name.split('__')) except (AttributeError, ObjectDoesNotExist): return None return key def data_getter_custom(i): def key(obj): rich_value, plain_value = self.get_column_data(i, options['columns'][i], obj) return plain_value return key # Sort the list using the manual sort fields, back-to-front. `sort` is a stable # operation, meaning that multiple passes can be made on the list using different # criteria. The only catch is that the passes must be made in reverse order so that # the "first" sort field with the most priority ends up getting applied last. for sort_field in sort_fields[::-1]: if sort_field.startswith('-'): reverse = True sort_field = sort_field[1:] else: reverse = False if sort_field.startswith('!'): key_function = data_getter_custom sort_field = int(sort_field[1:]) else: key_function = data_getter_orm try: object_list.sort(key=key_function(sort_field), reverse=reverse) except TypeError as err: log.error("Unable to sort on {0} - {1}".format(sort_field, err)) object_list._dtv_unpaged_total = len(object_list) object_list._dtv_total_initial_record_count = total_initial_record_count return object_list
def _buildSearchQueries( self, db_fields, search_query, is_regex ): queries = [] if is_regex: field_queries = [] for column in db_fields: column = get_field_definition( column ) for component_name in column.fields + column.search_fields: field = resolve_orm_path( self.model, component_name ) if isinstance( field, tuple( FIELD_TYPES['text'] ) ): field_queries.append( Q( **{ component_name + u'__iregex' : search_query } ) ) queries.append( reduce( operator.or_, field_queries ) ) else: search_terms = map( lambda q: q.strip( "'\" " ), smart_split( search_query ) ) for term in search_terms: term_queries = [ ] # Queries generated to search all fields for this term # Every concrete database lookup string in 'columns' is followed to its trailing field descriptor. For example, "subdivision__name" terminates in a CharField. The field type determines how it is probed for search. for column in db_fields: column = get_field_definition( column ) for component_name in column.fields + column.search_fields: field_queries = [ ] # Queries generated to search this database field for the search term field = resolve_orm_path( self.model, component_name ) if isinstance( field, tuple( FIELD_TYPES[ 'text' ] ) ): field_queries = [ { component_name + '__icontains': term } ] elif isinstance( field, tuple( FIELD_TYPES[ 'date' ] ) ): try: date_obj = dateutil.parser.parse( term ) except ValueError: # This exception is theoretical, but it doesn't seem to raise. pass except TypeError: # Failed conversions can lead to the parser adding ints to None. pass else: field_queries.append( { component_name: date_obj } ) # Add queries for more granular date field lookups try: numerical_value = int( term ) except ValueError: pass else: if 0 < numerical_value < 3000: field_queries.append( { component_name + '__year': numerical_value } ) if 0 < numerical_value <= 12: field_queries.append( { component_name + '__month': numerical_value } ) if 0 < numerical_value <= 31: field_queries.append( { component_name + '__day': numerical_value } ) elif isinstance( field, tuple( FIELD_TYPES[ 'boolean' ] ) ): if term.lower( ) in ('true', 'yes'): term = True elif term.lower( ) in ('false', 'no'): term = False else: continue field_queries = [ { component_name: term } ] elif isinstance( field, tuple( FIELD_TYPES[ 'integer' ] ) ): try: field_queries = [ { component_name: int( term ) } ] except ValueError: pass elif isinstance( field, tuple( FIELD_TYPES[ 'float' ] ) ): try: field_queries = [ { component_name: float( term ) } ] except ValueError: pass elif isinstance( field, tuple( FIELD_TYPES[ 'ignored' ] ) ): pass else: raise ValueError( "Unhandled field type for %s (%r) in search." % (component_name, type( field )) ) # print field_queries # Append each field inspection for this term term_queries.extend( map( lambda q: Q( **q ), field_queries ) ) # Append the logical OR of all field inspections for this term if len( term_queries ): queries.append( reduce( operator.or_, term_queries ) ) return queries
def get_cleaned_bits(data): decoded = force_unicode(data) stripped = strip_tags(decoded) return smart_split(stripped)
def _buildSearchQueries(self, db_fields, search_query, is_regex, column_search=False): queries = [] if is_regex: field_queries = [] for column in db_fields: column = get_field_definition(column) for component_name in column.fields + column.search_fields: field = resolve_orm_path(self.get_model(), component_name) if isinstance(field, tuple(FIELD_TYPES['text'])): field_queries.append( Q(**{component_name + u'__iregex': search_query})) queries.append(reduce(operator.or_, field_queries)) else: search_terms = map(lambda q: q.strip("'\" "), smart_split(search_query)) for term in search_terms: term_queries = [ ] # Queries generated to search all fields for this term # Every concrete database lookup string in 'columns' is followed to its trailing field descriptor. For example, "subdivision__name" terminates in a CharField. The field type determines how it is probed for search. for column in db_fields: column = get_field_definition(column) for component_name in column.fields + column.search_fields: field_queries = [ ] # Queries generated to search this database field for the search term field = resolve_orm_path(self.get_model(), component_name) field_method_name = 'search_' + field.name if hasattr(self, field_method_name): # Call field specific method to get the field queries field_queries.append( getattr(self, field_method_name)(field, term, component_name)) elif field.choices: # Query the database for the database value rather than display value database_values, display_values = zip( *field.get_flatchoices()) string_database_values = [ unicode(value).lower() for value in database_values ] display_values = [ unicode(value).lower() for value in display_values ] all_values = zip(display_values, string_database_values, database_values) search_term = term.lower() # If searching a specific column then match against the database value if column_search: for display_value, string_database_value, database_value in all_values: if search_term == string_database_value: field_queries.append({ component_name + '__exact': database_value }) # If searching globally then match against the display value else: for display_value, string_database_value, database_value in all_values: if search_term in display_value: field_queries.append({ component_name + '__exact': database_value }) elif isinstance(field, tuple(FIELD_TYPES['text'])): field_queries = [{ component_name + '__icontains': term }] elif isinstance(field, tuple(FIELD_TYPES['date'])): try: date_obj = dateutil.parser.parse(term) except ValueError: # This exception is theoretical, but it doesn't seem to raise. pass except TypeError: # Failed conversions can lead to the parser adding ints to None. pass except OverflowError: # Catches OverflowError: signed integer is greater than maximum pass else: field_queries.append( {component_name: date_obj}) # Add queries for more granular date field lookups try: numerical_value = int(term) except ValueError: pass else: if datetime.MINYEAR < numerical_value < datetime.MAXYEAR - 1: field_queries.append({ component_name + '__year': numerical_value }) if 0 < numerical_value <= 12: field_queries.append({ component_name + '__month': numerical_value }) if 0 < numerical_value <= 31: field_queries.append({ component_name + '__day': numerical_value }) elif isinstance(field, tuple(FIELD_TYPES['boolean'])): if term.lower() in ('true', 'yes'): term = True elif term.lower() in ('false', 'no'): term = False else: continue field_queries = [{component_name: term}] elif isinstance(field, tuple(FIELD_TYPES['integer'])): try: field_queries = [{component_name: int(term)}] except ValueError: pass elif isinstance(field, tuple(FIELD_TYPES['float'])): try: field_queries = [{component_name: float(term)}] except ValueError: pass elif isinstance(field, tuple(FIELD_TYPES['ignored'])): pass else: raise ValueError( "Unhandled field type for %s (%r) in search." % (component_name, type(field))) # print field_queries # Append each field inspection for this term term_queries.extend( map(lambda q: Q(**q), field_queries)) # Append the logical OR of all field inspections for this term if len(term_queries): queries.append(reduce(operator.or_, term_queries)) return queries