Esempi in Python per smart_split, esempi in Python per django.utils.text.smart_split

Esempio n. 1

0

Mostra file

File: test_text.py Progetto: carlio/django

 def test_smart_split(self):
     testdata = [
         ('This is "a person" test.',
             ['This', 'is', '"a person"', 'test.']),
         ('This is "a person\'s" test.',
             ['This', 'is', '"a person\'s"', 'test.']),
         ('This is "a person\\"s" test.',
             ['This', 'is', '"a person\\"s"', 'test.']),
         ('"a \'one',
             ['"a', "'one"]),
         ('all friends\' tests',
             ['all', 'friends\'', 'tests']),
         ('url search_page words="something else"',
             ['url', 'search_page', 'words="something else"']),
         ("url search_page words='something else'",
             ['url', 'search_page', "words='something else'"]),
         ('url search_page words "something else"',
             ['url', 'search_page', 'words', '"something else"']),
         ('url search_page words-"something else"',
             ['url', 'search_page', 'words-"something else"']),
         ('url search_page words=hello',
             ['url', 'search_page', 'words=hello']),
         ('url search_page words="something else',
             ['url', 'search_page', 'words="something', 'else']),
         ("cut:','|cut:' '",
             ["cut:','|cut:' '"]),
         (lazystr("a b c d"),  # Test for #20231
             ['a', 'b', 'c', 'd']),
     ]
     for test, expected in testdata:
         self.assertEqual(list(text.smart_split(test)), expected)

Esempio n. 2

0

Mostra file

File: utils.py Progetto: tinashe13/plan

def build_search(searchstring,
                 filters,
                 max_query_length=4,
                 combine=operator.and_):
    count = 0
    search_filter = models.Q()

    for word in text_utils.smart_split(searchstring):
        if word[0] in ['"', "'"]:
            if word[0] == word[-1]:
                word = word[1:-1]
            else:
                word = word[1:]

        if count > max_query_length:
            break

        local_filter = models.Q()
        for f in filters:
            local_filter |= models.Q(**{f: word})

        search_filter = combine(search_filter, local_filter)
        count += 1

    return search_filter

Esempio n. 3

0

Mostra file

File: test_text.py Progetto: raghava4u/Rdjango

 def test_smart_split(self):
     testdata = [
         ('This is "a person" test.', ['This', 'is', '"a person"',
                                       'test.']),
         ('This is "a person\'s" test.',
          ['This', 'is', '"a person\'s"', 'test.']),
         ('This is "a person\\"s" test.',
          ['This', 'is', '"a person\\"s"', 'test.']),
         ('"a \'one', ['"a', "'one"]),
         ('all friends\' tests', ['all', 'friends\'', 'tests']),
         ('url search_page words="something else"',
          ['url', 'search_page', 'words="something else"']),
         ("url search_page words='something else'",
          ['url', 'search_page', "words='something else'"]),
         ('url search_page words "something else"',
          ['url', 'search_page', 'words', '"something else"']),
         ('url search_page words-"something else"',
          ['url', 'search_page', 'words-"something else"']),
         ('url search_page words=hello',
          ['url', 'search_page', 'words=hello']),
         ('url search_page words="something else',
          ['url', 'search_page', 'words="something', 'else']),
         ("cut:','|cut:' '", ["cut:','|cut:' '"]),
         (
             lazystr("a b c d"),  # Test for #20231
             ['a', 'b', 'c', 'd']),
     ]
     for test, expected in testdata:
         self.assertEqual(list(text.smart_split(test)), expected)

Esempio n. 4

0

Mostra file

File: git.py Progetto: akshell/chatlanian

def parse_git_command(string):
    command, _, tail = string.lstrip().partition(' ')
    if command == 'help':
        return command, tail.split()
    try:
        parser = _parsers[command]
    except KeyError:
        raise Error('Command "%s" is not supported.' % command)
    args = []
    for arg in smart_split(tail):
        try:
            arg = unescape_string_literal(arg)
        except ValueError:
            pass
        args.append(arg)
    namespace = parser.parse_args(args)
    if command in ('push', 'pull', 'fetch'):
        for url in namespace.pos:
            _check_url(url)
    elif command == 'remote':
        if namespace.pos and namespace.pos[0] == 'add':
            _check_url(namespace.pos[-1])
    elif command == 'tag':
        if namespace.a and namespace.m is None:
            raise Error('The -m option is required if -a is used.')
    return command, args

Esempio n. 5

0

Mostra file

File: hudjango.py Progetto: hudora/huDjango

def do_imageid(dummy, token):
    """Renders an Imagetag by ImageID. See pydoc huimages for information about ImageID.
       
       Syntax: {% imageid <imageid> <size> [nolink] [urlonly] %}
       
       See huimages for further Documentation on size.
       
       Example::
       
           {% imageid spare.bild "320x240" %} 
           <a class="imagelink" href="http://i.hdimg.net/o/TZGBY4PX2BXMKXXGNMYHZBVYZXSJBOLT01.jpeg"><img src="http://i.hdimg.net/320x240/TZGBY4PX2BXMKXXGNMYHZBVYZXSJBOLT01.jpeg" /></a>
           
           {% imageid "DEQOQMIPRJPPTAKLE3NQY5BTMMNTFGG201" "320x240" nolink %} 
           <img src="http://i.hdimg.net/320x240/DEQOQMIPRJPPTAKLE3NQY5BTMMNTFGG201.jpeg" />
           
           {% imageid spare.bild.name "150x150!" urlonly %} 
           http://i.hdimg.net/150x150!/TZGBY4PX2BXMKXXGNMYHZBVYZXSJBOLT01.jpeg
    """
    options = {'urlonly': False, 'nolink': False, 'size': '1024x768'}
    try:
        tokens = list(smart_split(token.contents))
        dummy, obj = tokens[:2]
        if len(tokens) > 2:
            options['size'] = tokens[2].strip('"\'')
        for option in tokens[2:]:
            if option.strip('"\'') == 'urlonly':
                options['urlonly'] = True
            if option.strip('"\'') == 'nolink':
                options['nolink'] = True
    except ValueError:
        raise template.TemplateSyntaxError, "%r tag requires a single argument" % token.contents.split()[0]
    return ImageLink(obj, options)

Esempio n. 6

0

Mostra file

File: search.py Progetto: zsutx2005/biostar-engine

def split_text_query(query):
    """Filter out stopwords but only if there are useful words"""

    split_query = list(smart_split(query))
    filtered_query = [bit for bit in split_query if bit not in STOPWORDS]

    return filtered_query if len(filtered_query) else split_query

Esempio n. 7

0

Mostra file

File: transmogrifiers.py Progetto: ericmoritz/Transmogrify

def transmogrify(parser, token):
    """
    """    
    bits = smart_split(token.contents)
    tagname = bits.next()
    try:
        imageurl = bits.next()
    except StopIteration:
        raise template.TemplateSyntaxError("%r tag requires at least the image url" % tagname)
    
    # Parse the actions into a list of (action, arg) tuples.
    # The "border" and "letterbox" actions are a special case: they take two arguments.
    actions = []
    for action in bits:
        if action not in ACTIONS:
            raise template.TemplateSyntaxError("Unknown action in %r tag: %r" % (tagname, action))
        if action in ["border", "letterbox"]:
            param1 = bits.next()
            color = bits.next()
            color = color.lstrip("#")
            actions.append((action, param1, color))
        else:
            actions.append((action, bits.next()))
    
    # No actions is an error
    if not actions:
        raise template.TemplateSyntaxError("%r tag requires at least one action" % (tagname))
    
    return MogrifyNode(imageurl, actions)

Esempio n. 8

0

Mostra file

File: transmogrifiers.py Progetto: natgeosociety/Transmogrify

def transmogrify(parser, token):
    """
    """
    bits = smart_split(token.contents)
    tagname = bits.next()
    try:
        imageurl = bits.next()
    except StopIteration:
        raise template.TemplateSyntaxError(
            "%r tag requires at least the image url" % tagname)

    # Parse the actions into a list of (action, arg) tuples.
    # The "border" and "letterbox" actions are a special case: they take two arguments.
    actions = []
    for action in bits:
        if action not in ACTIONS:
            raise template.TemplateSyntaxError("Unknown action in %r tag: %r" %
                                               (tagname, action))
        if action in ["border", "letterbox"]:
            param1 = bits.next()
            color = bits.next()
            color = color.lstrip("#")
            actions.append((action, param1, color))
        else:
            actions.append((action, bits.next()))

    # No actions is an error
    if not actions:
        raise template.TemplateSyntaxError(
            "%r tag requires at least one action" % (tagname))

    return MogrifyNode(imageurl, actions)

Esempio n. 9

0

Mostra file

File: appview.py Progetto: strogo/djpcms

    def appquery(self, djp):
        '''This function implements the search query.
The query is build using the search fields specifies in
:attr:`djpcms.views.appsite.ModelApplication.search_fields`.
It returns a queryset.
        '''
        qs = super(SearchView,self).appquery(djp)
        request = djp.request
        slist = self.appmodel.opts.search_fields
        if request.method == 'GET':
            data = dict(request.GET.items())
        else:
            data = dict(request.POST.items())
        search_string = data.get(self.search_text,None)
        if slist and search_string:
            bits  = smart_split(search_string)
            #bits  = search_string.split(' ')
            for bit in bits:
                bit = isexact(bit)
                if not bit:
                    continue
                or_queries = [Q(**{construct_search(field_name): bit}) for field_name in slist]
                other_qs   = QuerySet(self.appmodel.modelsearch())
                other_qs.dup_select_related(qs)
                other_qs   = other_qs.filter(reduce(operator.or_, or_queries))
                qs         = qs & other_qs    
        return qs

Esempio n. 10

0

Mostra file

File: media_helpers.py Progetto: itavor/itavor_lib

    def _media_tags_process(groups):
        params = list(smart_split(groups.group('text')))

        if params[0] == 'photo':
            if not has_photologue:
                if settings.DEBUG:
                    raise Exception, '[[photo]] tag found but photologue app not installed.'
                else:
                    return u''
            photo = _get_photo(_strip(params[1]))
            return photo and _render_photo(photo, params[2:]) or u''

        if params[0] == 'video':
            video = _get_video(_strip(params[1]))
            return video and _render_video(video, params[2:]) or u''

        if params[0] == 'pdf':
            pdf = _get_pdf(_strip(params[1]))
            return pdf and _render_pdf(pdf, params[2:]) or u''

        file = _strip(params[0])
        opts = {}
        for param in params[1:]:
            param = _strip(param)
            m = SIZE_RE.match(param)
            if m:
                opts['width'] = m.group('width')
                opts['height'] = m.group('height')
            elif opts.has_key('title'):
                opts['caption'] = param
            else:
                opts['title'] = param
        return _render_media_tag(file, opts)

Esempio n. 11

0

Mostra file

File: git.py Progetto: akshell/chatlanian

def parse_git_command(string):
    command, _, tail = string.lstrip().partition(' ')
    if command == 'help':
        return command, tail.split()
    try:
        parser = _parsers[command]
    except KeyError:
        raise Error('Command "%s" is not supported.' % command)
    args = []
    for arg in smart_split(tail):
        try:
            arg = unescape_string_literal(arg)
        except ValueError:
            pass
        args.append(arg)
    namespace = parser.parse_args(args)
    if command in ('push', 'pull', 'fetch'):
        for url in namespace.pos:
            _check_url(url)
    elif command == 'remote':
        if namespace.pos and namespace.pos[0] == 'add':
            _check_url(namespace.pos[-1])
    elif command == 'tag':
        if namespace.a and namespace.m is None:
            raise Error('The -m option is required if -a is used.')
    return command, args

Esempio n. 12

0

Mostra file

File: render.py Progetto: kamil/dyw

def do_render_object(parser, token):
    """
    {% render_object object template_name [context] %}
    """
    bits = list(smart_split(token.contents))
    if len(bits) not in (3, 4):
        raise TemplateSyntaxError(_('wrong arguments to render_object'))
    
    return RenderObjectNode(*bits[1:])

Esempio n. 13

0

Mostra file

File: render.py Progetto: kamil/dyw

def do_render_collection(parser, token):
    """
    {% render_collection objects template_name [context [item_context]] %}
    """
    bits = list(smart_split(token.contents))
    if len(bits) < 3 or len(bits) > 5:
        raise TemplateSyntaxError(_('wrong arguments to render_collection'))
    
    return RenderCollectionNode(*bits[1:])

Esempio n. 14

0

Mostra file

File: forms.py Progetto: pvleite/TG

    def get_text_query_bits(self, query_string):
        """filter stopwords but only if there are useful words"""

        split_q = list(smart_split(query_string))
        filtered_q = [bit for bit in split_q if bit not in self.STOPWORD_LIST]

        if len(filtered_q):
            return filtered_q
        else:
            return split_q

Esempio n. 15

0

Mostra file

File: managers.py Progetto: camswords/python-django-surf-conditions

    def parse(cls, terms):
        if not terms:
            return []

        unescaped = map(
            lambda x: unescape_string_literal(x) if x and x[0] in '"\'' else x,
            smart_split(terms))
        safe_terms = filter(lambda x: x and x not in cls.stop_words, unescaped)

        return safe_terms

Esempio n. 16

0

Mostra file

File: models.py Progetto: powderflask/sorl-thumbnail

 def get_options(self):
     options = {}
     bits = iter(smart_split(self.options))
     for bit in bits:
         m = kw_pat.match(bit)
         if not m:  # @todo - add custom validation - this should never happen
             raise Exception("Invalid preset option: "%bit)
         key = smart_str(m.group('key'))
         value = smart_str(m.group('value'))
         options[key] = value
     return options

Esempio n. 17

0

Mostra file

File: search.py Progetto: kanimozhimurugan/Nxt4

def extract_terms(raw):
    """ Extraction based on spaces, understands double and single quotes. Returns a list of strings """

    terms = list(smart_split(raw))

    print terms
    for i, term in enumerate(terms):
        try:
            terms[i] = unescape_string_literal(terms)
        except ValueError:
            pass
    return terms

Esempio n. 18

0

Mostra file

def split_text_query(query):
    """Filter stopwords, but only if there are also other words.
    """
    stopwords = '''a,am,an,and,as,at,be,by,can,did,do,for,get,got,
        had,has,he,her,him,his,how,i,if,in,is,it,its,let,may,me,
        my,no,nor,not,of,off,on,or,our,own,say,says,she,so,than,
        that,the,them,then,they,this,to,too,us,was,we,were,what,
        when,who,whom,why,will,yet,you,your'''.split(',')
    split_query = list(smart_split(query))
    filtered_query = [word for word in split_query if word not in stopwords]

    return filtered_query if len(filtered_query) else split_query

Esempio n. 19

0

Mostra file

File: search_functions.py Progetto: dipapaspyros/opencolibri

def extract_terms(raw):
    """ Extraction based on spaces, understands double and single quotes. Returns a list of strings """

    terms = list(smart_split(raw))

    print terms
    for i, term in enumerate(terms):
        try:
            terms[i] = unescape_string_literal(terms)
        except ValueError:
            pass
    return terms

Esempio n. 20

0

Mostra file

File: transmogrifiers.py Progetto: callowayproject/Transmogrify

def no_param_shortcut(parser, token):
    """
    Shortcut to transmogrify thumbnail
    """
    bits = smart_split(token.contents)
    tagname = bits.next()
    try:
        imageurl = bits.next()
    except StopIteration:
        raise template.TemplateSyntaxError("%r tag requires at least the image url" % tagname)

    return MogrifyNode(imageurl, [(tagname, ), ])

Esempio n. 21

0

Mostra file

File: utils.py Progetto: parksandwildlife/it-assets

def split_text_query(query):
    """Filter stopwords, but only if there are also other words.
    """
    stopwords = '''a,am,an,and,as,at,be,by,can,did,do,for,get,got,
        had,has,he,her,him,his,how,i,if,in,is,it,its,let,may,me,
        my,no,nor,not,of,off,on,or,our,own,say,says,she,so,than,
        that,the,them,then,they,this,to,too,us,was,we,were,what,
        when,who,whom,why,will,yet,you,your'''.split(',')
    split_query = list(smart_split(query))
    filtered_query = [word for word in split_query if word not in stopwords]

    return filtered_query if len(filtered_query) else split_query

Esempio n. 22

0

Mostra file

File: render.py Progetto: kamil/dyw

def do_render(parser, token):
    """Freestyle render z wszystkim.
    {% render template_name key=value key1=value1 ... %}
    Bajery:
     * jako template_name i values można dać string albo nazwę zmiennej
     * jeśli string w value ma prefix _ (_"dupa") to jest rybką.
     * dziedziczy cały parent context, nadpisuje go podanymi kluczami
     
    """
    bits = list(smart_split(token.contents))
    if len(bits) < 2:
        raise TemplateSyntaxError(_('not enough arguments to render'))
    
    return RenderNode(bits[1:])

Esempio n. 23

0

Mostra file

 def split_contents(self):
     split = []
     bits = iter(smart_split(self.contents))
     for bit in bits:
         # Handle translation-marked template pieces
         if bit.startswith('_("') or bit.startswith("_('"):
             sentinal = bit[2] + ')'
             trans_bit = [bit]
             while not bit.endswith(sentinal):
                 bit = bits.next()
                 trans_bit.append(bit)
             bit = ' '.join(trans_bit)
         split.append(bit)
     return split

Esempio n. 24

0

Mostra file

File: base.py Progetto: elisarocha/django-tdd-course

 def split_contents(self):
     split = []
     bits = smart_split(self.contents)
     for bit in bits:
         # Handle translation-marked template pieces
         if bit.startswith(('_("', "_('")):
             sentinel = bit[2] + ")"
             trans_bit = [bit]
             while not bit.endswith(sentinel):
                 bit = next(bits)
                 trans_bit.append(bit)
             bit = " ".join(trans_bit)
         split.append(bit)
     return split

Esempio n. 25

0

Mostra file

File: forms.py Progetto: tsoporan/tehorng

 def clean_keywords(self):
     """
     Validates that no search keyword is shorter than 3 characters.
     """
     for keyword in smart_split(self.cleaned_data['keywords']):
         keyword_len = len(keyword)
         if keyword[0] in ('+', '-'):
             keyword_len = keyword_len - 1
         elif keyword[0] == '"' and keyword[-1] == '"' or \
              keyword[0] == "'" and keyword[-1] == "'":
             keyword_len = keyword_len - 2
         if keyword_len < 3:
             raise forms.ValidationError('Keywords must be a minimun of 3 characters long.')
     return self.cleaned_data['keywords']

Esempio n. 26

0

Mostra file

File: base.py Progetto: yogidtu/django

 def split_contents(self):
     split = []
     bits = iter(smart_split(self.contents))
     for bit in bits:
         # Handle translation-marked template pieces
         if bit.startswith(('_("', "_('")):
             sentinel = bit[2] + ")"
             trans_bit = [bit]
             while not bit.endswith(sentinel):
                 bit = next(bits)
                 trans_bit.append(bit)
             bit = " ".join(trans_bit)
         split.append(bit)
     return split

Esempio n. 27

0

Mostra file

File: base.py Progetto: maraujop/django-old

 def split_contents(self):
     split = []
     bits = iter(smart_split(self.contents))
     for bit in bits:
         # Handle translation-marked template pieces
         if bit.startswith('_("') or bit.startswith("_('"):
             sentinal = bit[2] + ')'
             trans_bit = [bit]
             while not bit.endswith(sentinal):
                 bit = bits.next()
                 trans_bit.append(bit)
             bit = ' '.join(trans_bit)
         split.append(bit)
     return split

Esempio n. 28

0

Mostra file

 def clean_keywords(self):
     """
     Validates that no search keyword is shorter than 3 characters.
     """
     for keyword in smart_split(self.cleaned_data['keywords']):
         keyword_len = len(keyword)
         if keyword[0] in ('+', '-'):
             keyword_len = keyword_len - 1
         elif keyword[0] == '"' and keyword[-1] == '"' or \
              keyword[0] == "'" and keyword[-1] == "'":
             keyword_len = keyword_len - 2
         if keyword_len < 3:
             raise forms.ValidationError('Keywords must be a minimun of 3 characters long.')
     return self.cleaned_data['keywords']

Esempio n. 29

0

Mostra file

File: search.py Progetto: AntLink/AntLinkProject

    def get_text_query_bits(self, query_string):
        """filter stopwords but only if there are useful words"""

        split_q = list(smart_split(query_string))
        filtered_q = []

        for bit in split_q:
            if bit not in self.STOPWORD_LIST:
                filtered_q.append(bit)

        if len(filtered_q):
            return filtered_q
        else:
            return split_q

Esempio n. 30

0

Mostra file

File: helpers.py Progetto: leo-naeka/aldryn-search

def get_plugin_index_data(base_plugin, request):
    text_bits = []
    instance, plugin_type = base_plugin.get_plugin_instance()

    if instance is None:
        # this is an empty plugin
        return text_bits

    if hasattr(instance, 'search_fulltext'):
        # check if the plugin instance has search enabled
        search_contents = instance.search_fulltext
    elif hasattr(base_plugin, 'search_fulltext'):
        # now check in the base plugin instance (CMSPlugin)
        search_contents = base_plugin.search_fulltext
    elif hasattr(plugin_type, 'search_fulltext'):
        # last check in the plugin class (CMSPluginBase)
        search_contents = plugin_type.search_fulltext
    else:
        # enable by default
        search_contents = True

    for field in getattr(instance, 'search_fields', []):
        field_content = strip_tags(getattr(instance, field, ''))

        if field_content:
            field_content = force_unicode(field_content)
            text_bits.extend(smart_split(field_content))

    if search_contents:
        plugin_contents = instance.render_plugin(
            context=RequestContext(request))

        if plugin_contents:
            plugin_contents = strip_tags(plugin_contents)
            text_bits.extend(smart_split(plugin_contents))

    return text_bits

Esempio n. 31

0

Mostra file

File: search.py Progetto: nithylsairam/myforum-master

def postgres_search(query, fields=['content']):
    query_string = query.strip()

    vector = SearchVector('title') + SearchVector('content')

    # List of Q() filters used to preform search
    filters = reduce(SearchQuery.__or__,
                     [SearchQuery(s) for s in smart_split(query_string)])

    print(filters, "filters", "*" * 10)
    #vector = SearchVector('title') + SearchVector('content')

    results = Post.objects.annotate(search=vector).filter(search=filters)

    return results

Esempio n. 32

0

Mostra file

File: transmogrifiers.py Progetto: natgeosociety/Transmogrify

def no_param_shortcut(parser, token):
    """
    Shortcut to transmogrify thumbnail
    """
    bits = smart_split(token.contents)
    tagname = bits.next()
    try:
        imageurl = bits.next()
    except StopIteration:
        raise template.TemplateSyntaxError(
            "%r tag requires at least the image url" % tagname)

    return MogrifyNode(imageurl, [
        (tagname, ),
    ])

Esempio n. 33

0

Mostra file

File: helpers.py Progetto: Madec/aldryn-search

def get_plugin_index_data(base_plugin, request):
    text_bits = []
    instance, plugin_type = base_plugin.get_plugin_instance()

    if instance is None:
        # this is an empty plugin
        return text_bits

    if hasattr(instance, 'search_fulltext'):
        # check if the plugin instance has search enabled
        search_contents = instance.search_fulltext
    elif hasattr(base_plugin, 'search_fulltext'):
        # now check in the base plugin instance (CMSPlugin)
        search_contents = base_plugin.search_fulltext
    elif hasattr(plugin_type, 'search_fulltext'):
        # last check in the plugin class (CMSPluginBase)
        search_contents = plugin_type.search_fulltext
    else:
        # enable by default
        search_contents = True

    for field in getattr(instance, 'search_fields', []):
        field_content = strip_tags(getattr(instance, field, ''))

        if field_content:
            field_content = force_unicode(field_content)
            text_bits.extend(smart_split(field_content))

    if search_contents:
        plugin_contents = instance.render_plugin(context=RequestContext(request))

        if plugin_contents:
            plugin_contents = strip_tags(plugin_contents)
            text_bits.extend(smart_split(plugin_contents))

    return  text_bits

Esempio n. 34

0

Mostra file

File: search.py Progetto: tatortrechts/api.tatortrechts.de

def split_proximity(text):
    # TODO: add not / - option
    text = text.replace("*", "").replace(":", "").replace("'", '"')
    tokens = smart_split(text)

    for t in tokens:
        t_cl = clean(t, lang="de", lower=False, no_punct=False)
        t_cl_p = clean(t, lang="de", lower=False, no_punct=True)

        if t_cl.lower() == "or":
            continue
        if " " in t or '"' in t_cl:
            yield "' " + t_cl_p.replace(" ", " <-> ") + " '"
        else:
            yield t_cl_p + ":*"

Esempio n. 35

0

Mostra file

File: search.py Progetto: ashikin0112/biostar-central

def postgres_search(query, fields=None):
    query_string = query.strip()

    vector = reduce(SearchVector.__add__, [SearchVector(f) for f in fields])

    # List of Q() filters used to preform search
    filters = reduce(SearchQuery.__or__,
                     [SearchQuery(s) for s in smart_split(query_string)])

    # print(filters, "filters", "*"*10)
    # vector = SearchVector('title') + SearchVector('content')

    results = Post.objects.annotate(search=vector).filter(search=filters)
    logger.info("Preform postgres search.")

    return results

Esempio n. 36

0

Mostra file

    def get_search_results(self, request, queryset, search_term):
        """
        Returns a tuple containing a queryset to implement the search,
        and a boolean indicating if the results may contain duplicates.
        """

        # Apply keyword searches.
        def construct_search(field_name):
            if field_name.startswith('^'):
                return "%s__istartswith" % field_name[1:]
            elif field_name.startswith('='):
                return "%s__iexact" % field_name[1:]
            elif field_name.startswith('@'):
                return "%s__search" % field_name[1:]
            else:
                return "%s__icontains" % field_name

        # Group using quotes
        def unescape_string_literal_if_possible(bit):
            try:
                return unescape_string_literal(bit)
            except ValueError:
                return bit

        use_distinct = False
        search_fields = self.get_search_fields(request)
        if search_fields and search_term:
            search_term_list = [
                unescape_string_literal_if_possible(bit)
                for bit in smart_split(search_term)
            ]
            orm_lookups = [
                construct_search(str(search_field))
                for search_field in search_fields
            ]
            for bit in search_term_list:
                or_queries = [
                    models.Q(**{orm_lookup: bit}) for orm_lookup in orm_lookups
                ]
                queryset = queryset.filter(reduce(operator.or_, or_queries))
            if not use_distinct:
                for search_spec in orm_lookups:
                    if lookup_needs_distinct(self.opts, search_spec):
                        use_distinct = True
                        break

        return queryset, use_distinct

Esempio n. 37

0

Mostra file

File: test_text.py Progetto: isotoma/django

 def test_smart_split(self):
     testdata = [
         ('This is "a person" test.', ["This", "is", '"a person"', "test."]),
         ('This is "a person\'s" test.', ["This", "is", '"a person\'s"', "test."]),
         ('This is "a person\\"s" test.', ["This", "is", '"a person\\"s"', "test."]),
         ("\"a 'one", ['"a', "'one"]),
         ("all friends' tests", ["all", "friends'", "tests"]),
         ('url search_page words="something else"', ["url", "search_page", 'words="something else"']),
         ("url search_page words='something else'", ["url", "search_page", "words='something else'"]),
         ('url search_page words "something else"', ["url", "search_page", "words", '"something else"']),
         ('url search_page words-"something else"', ["url", "search_page", 'words-"something else"']),
         ("url search_page words=hello", ["url", "search_page", "words=hello"]),
         ('url search_page words="something else', ["url", "search_page", 'words="something', "else"]),
         ("cut:','|cut:' '", ["cut:','|cut:' '"]),
         (lazystr("a b c d"), ["a", "b", "c", "d"]),  # Test for #20231
     ]
     for test, expected in testdata:
         self.assertEqual(list(text.smart_split(test)), expected)

Esempio n. 38

0

Mostra file

File: planet_tags.py Progetto: linkedblogs/django-planet

def planet_post_list(parser, token):
    """
    Render a list of posts using the planet/list.html template.

    Params:
        limit: limit to this number of entries
        tag: select only Posts that matches this tag
        category: select only Posts that belongs to Feeds under this Category
        template: render using a different template

    Examples:
        {% planet_post_list with limit=10 tag=tag %}
        {% planet_post_list with tag="Redis" %}
        {% planet_post_list with category="PyPy" %}
    """
    bits = list(smart_split(token.contents))
    len_bits = len(bits)
    kwargs = {}
    if len_bits > 1:
        if bits[1] != 'with':
            raise TemplateSyntaxError(
                _("if given, fourth argument to %s tag must be 'with'") %
                bits[0])
        for i in range(2, len_bits):
            try:
                name, value = bits[i].split('=')
                if name in ('tag', 'category', 'template', 'limit'):
                    kwargs[str(name)] = value
                else:
                    raise TemplateSyntaxError(
                        _("%(tag)s tag was given an invalid option: '%(option)s'"
                          ) % {
                              'tag': bits[0],
                              'option': name,
                          })
            except ValueError:
                raise TemplateSyntaxError(
                    _("%(tag)s tag was given a badly formatted option: '%(option)s'"
                      ) % {
                          'tag': bits[0],
                          'option': bits[i],
                      })

    return PlanetPostList(**kwargs)

Esempio n. 39

0

Mostra file

File: base.py Progetto: lleej/django-study

 def split_contents(self):
     """
     分隔contents中的内容，使用' '分隔
     1. smart_split，不分割'"包含的内容
     2. 如果使用翻译，则正确处理。例如： '_("a" "b" "c")' ==> '_("a" "b" "c")'，不会拆分成三个部分
     :return: 字符串list
     """
     split = []
     bits = smart_split(self.contents)
     for bit in bits:
         # Handle translation-marked template pieces
         if bit.startswith(('_("', "_('")):
             sentinel = bit[2] + ')'
             trans_bit = [bit]
             while not bit.endswith(sentinel):
                 bit = next(bits)
                 trans_bit.append(bit)
             bit = ' '.join(trans_bit)
         split.append(bit)
     return split

Esempio n. 40

0

Mostra file

File: simple_search.py Progetto: ivh/TmyCMS

    def get_text_search_query(self, query_string):
        filters = []

        def construct_search(field_name):
            if field_name.startswith('^'):
                return "%s__istartswith" % field_name[1:]
            elif field_name.startswith('='):
                return "%s__iexact" % field_name[1:]
            elif field_name.startswith('@'):
                if settings.DATABASE_ENGINE == 'mysql':
                    return "%s__search" % field_name[1:]
                else:
                    return "%s__icontains" % field_name[1:]
            else:
                return "%s__icontains" % field_name

        for bit in smart_split(query_string):
            or_queries = [Q(**{construct_search(str(field_name)): bit}) for field_name in self.Meta.search_fields]
            filters.append(reduce(Q.__or__, or_queries))
        return reduce(Q.__and__, filters)

Esempio n. 41

0

Mostra file

File: search.py Progetto: ashikin0112/biostar-central

def sql_search(query, fields=None):
    """search_fields example: ['name', 'category__name', '@description', '=id']
    """

    query_string = query.strip()

    filters = []

    query_list = [s for s in smart_split(query_string) if s not in STOP]
    for bit in query_list:
        queries = [
            Q(**{f"{field_name}__icontains": bit}) for field_name in fields
        ]
        filters.append(reduce(Q.__or__, queries))

    filters = reduce(Q.__and__, filters) if len(filters) else Q(pk=None)

    results = Post.objects.filter(filters)
    logger.info("Preform sql lite search.")
    return results

Esempio n. 42

0

Mostra file

File: models.py Progetto: powderflask/cms-thumbnail

def deserialize_options(option_string):
    """ 
       Split out options from option_string and return them as a dict 
       Raise ThumbnailParseError if any syntax errors are discovered.
    """
    options = {}
    bits = iter(smart_split(option_string))
    for bit in bits:
        m = kw_pat.match(bit)
        if not m:
            raise ThumbnailParseError("Invalid thumbnail option: %s" % bit)
        key = smart_str(m.group("key"))
        value = smart_str(m.group("value")).strip("\"'")

        # if the key is "crop" then validate the crop options - raises ThumbnailParseError is invalid
        if key == "crop":
            parse_crop(value, [0, 0], [0, 0])

        options[key] = value
    return options

Esempio n. 43

0

Mostra file

File: autocomplete.py Progetto: tatortrechts/api.tatortrechts.de

def generate_phrases():
    Phrase.objects.all().delete()

    incis = []
    res = []
    c = Counter()

    for inci in tqdm(Incident.objects.all(),
                     desc="generating ngrams / phrases"):
        all_ngrams = []
        for t in [inci.title, inci.description]:
            if not t or len(t) == 0:
                continue
            tokens = t.replace('"', "").replace("'", "")
            tokens = list(smart_split(tokens))
            tokens = [t for t in tokens if t.lower() not in STOP_WORDS]
            tokens = list(find_german_nouns(tokens))

            if len(tokens) == 0:
                continue
            for i in range(1, min(4, len(tokens) + 1)):
                ngrams = find_ngrams(tokens, i)
                ngrams = [" ".join(sorted(x)) for x in ngrams]
                c.update(ngrams)
                all_ngrams += ngrams
        res.append(all_ngrams)
        incis.append(inci)

    real_phrases = c.most_common(len(c) // 2)
    phrase_list = [Phrase(option=x[0], count=x[1]) for x in real_phrases]
    Phrase.objects.bulk_create(phrase_list)
    phrase_dict = {x.option: x for x in phrase_list}
    phrase_set = set(x[0] for x in real_phrases)

    for i, array_ph in tqdm(enumerate(res),
                            total=len(res),
                            desc="adding phrases to incidents"):
        real_ph = [phrase_dict[x] for x in array_ph if x in phrase_set]
        incis[i].phrases.add(*real_ph)

    Phrase.objects.sync()

Esempio n. 44

0

Mostra file

File: commands.py Progetto: twoolie/ProjectNarwhal

    def do_command(self, e, c, cmd, from_nick):
        try:
            if cmd == 'echo':
                self.reply(e, "Wow, there is an echo here")
            
            if cmd.startswith('count'):
                ans =""
                words = cmd.split(" ")
                while True:
                    mcmd = words.pop(0)
                    model = words.pop(0)
                    if model=='torrents':
                        qs = Torrent.objects.all()
                    #if model=='anime':
                    #    qs = Anime.objects.all()
                    
                    if not words:
                        break
                
                if mcmd == 'count':
                    ans += '%s %s '% (qs.count(), model)
                self.reply(e, ans)
            
            if cmd.startswith('search'):
                from django.utils.text import smart_split
                query = smart_split(cmd.lsplit(" ")[1])
                if len(query) == 1:
                    pass
                
                
            if cmd == 'help':
                self.say_private(from_nick, 
"""Commands:
    help             Display this message.
    search "query"   search the tracker""")
            else:
                self.reply(e, "I don't understand '%s'."%(cmd))
        except:
            self.reply(e, "Ow, you hurt my brain.")
            exc_type, exc_value, exc_traceback = sys.exc_info()
            traceback.print_exception(exc_type, exc_value, exc_traceback)

Esempio n. 45

0

Mostra file

File: planet_tags.py Progetto: alexey-sveshnikov/django-planet

def planet_post_list(__, token):
    """
    Render a list of posts using the planet/list.html template.

    Params:
        limit: limit to this number of entries
        tag: select only Posts that matches this tag
        category: select only Posts that belongs to Feeds under this Category
        template: render using a different template
        hidden: show also hidden posts

    Examples:
        {% planet_post_list with limit=10 tag=tag %}
        {% planet_post_list with tag="Redis" %}
        {% planet_post_list with category="PyPy" hidden="True" %}
    """
    bits = list(smart_split(token.contents))
    len_bits = len(bits)
    kwargs = {}
    if len_bits > 1:
        if bits[1] != 'with':
            raise TemplateSyntaxError(_("if given, fourth argument to %s tag must be 'with'") % bits[0])
        for i in range(2, len_bits):
            try:
                name, value = bits[i].split('=')
                if name in ('tag', 'category', 'template', 'limit', 'hidden',
                            'days', 'page'):
                    kwargs[str(name)] = value
                else:
                    raise TemplateSyntaxError(_("%(tag)s tag was given an invalid option: '%(option)s'") % {
                        'tag': bits[0],
                        'option': name,
                    })
            except ValueError:
                raise TemplateSyntaxError(_("%(tag)s tag was given a badly formatted option: '%(option)s'") % {
                    'tag': bits[0],
                    'option': bits[i],
                })

    return PlanetPostList(**kwargs)

Esempio n. 46

0

Mostra file

File: test_text.py Progetto: thibaudcolas/django

 def test_smart_split(self):
     testdata = [
         ('This is "a person" test.', ["This", "is", '"a person"',
                                       "test."]),
         ('This is "a person\'s" test.',
          ["This", "is", '"a person\'s"', "test."]),
         ('This is "a person\\"s" test.',
          ["This", "is", '"a person\\"s"', "test."]),
         ("\"a 'one", ['"a', "'one"]),
         ("all friends' tests", ["all", "friends'", "tests"]),
         (
             'url search_page words="something else"',
             ["url", "search_page", 'words="something else"'],
         ),
         (
             "url search_page words='something else'",
             ["url", "search_page", "words='something else'"],
         ),
         (
             'url search_page words "something else"',
             ["url", "search_page", "words", '"something else"'],
         ),
         (
             'url search_page words-"something else"',
             ["url", "search_page", 'words-"something else"'],
         ),
         ("url search_page words=hello",
          ["url", "search_page", "words=hello"]),
         (
             'url search_page words="something else',
             ["url", "search_page", 'words="something', "else"],
         ),
         ("cut:','|cut:' '", ["cut:','|cut:' '"]),
         (lazystr("a b c d"), ["a", "b", "c", "d"]),  # Test for #20231
     ]
     for test, expected in testdata:
         with self.subTest(value=test):
             self.assertEqual(list(text.smart_split(test)), expected)

Esempio n. 47

0

Mostra file

File: utils.py Progetto: HelleIrina/plan

def build_search(searchstring, filters, max_query_length=4,
                 combine=operator.and_):
    count = 0
    search_filter = models.Q()

    for word in text_utils.smart_split(searchstring):
        if word[0] in ['"', "'"]:
            if word[0] == word[-1]:
                word = word[1:-1]
            else:
                word = word[1:]

        if count > max_query_length:
            break

        local_filter = models.Q()
        for f in filters:
            local_filter |= models.Q(**{f: word})

        search_filter = combine(search_filter, local_filter)
        count += 1

    return search_filter

Esempio n. 48

0

Mostra file

File: utils.py Progetto: pivotal-energy-solutions/django-datatable-view

def split_terms(s):
    return filter(None, map(lambda t: t.strip("'\" "), smart_split(s)))

Esempio n. 49

0

Mostra file

File: views.py Progetto: GustavoCruz12/educacao

 def _extract_terms(self, query):
     return [self._clean_term(word) for word in smart_split(query)]

Esempio n. 50

0

Mostra file

File: views.py Progetto: JsseL/django-datatable-view

    def apply_queryset_options(self, queryset):
        """
        Interprets the datatable options.

        Options requiring manual massaging of the queryset are handled here.  The output of this
        method should be treated as a list, since complex options might convert it out of the
        original queryset form.

        """

        options = self._get_datatable_options()

        # These will hold residue queries that cannot be handled in at the database level.  Anything
        # in these variables by the end will be handled manually (read: less efficiently)
        sort_fields = []
        searches = []

        # This count is for the benefit of the frontend datatables.js
        total_initial_record_count = queryset.count()

        if options['ordering']:
            db_fields, sort_fields = split_real_fields(self.model, options['ordering'])
            queryset = queryset.order_by(*db_fields)

        if options['search']:
            db_fields, searches = filter_real_fields(self.model, options['columns'],
                                                     key=get_first_orm_bit)
            db_fields.extend(options['search_fields'])

            queries = []  # Queries generated to search all fields for all terms
            search_terms = map(lambda q: q.strip("'\" "), smart_split(options['search']))

            for term in search_terms:
                term_queries = []  # Queries generated to search all fields for this term
                # Every concrete database lookup string in 'columns' is followed to its trailing field descriptor.  For example, "subdivision__name" terminates in a CharField.  The field type determines how it is probed for search.
                for column in db_fields:
                    column = get_field_definition(column)
                    for component_name in column.fields:
                        field_queries = []  # Queries generated to search this database field for the search term

                        field = resolve_orm_path(self.model, component_name)
                        if field.choices:
                            # Query the database for the database value rather than display value
                            choices = field.get_flatchoices()
                            length = len(choices)
                            database_values = []
                            display_values = []

                            for choice in choices:
                                database_values.append(choice[0])
                                display_values.append(choice[1].lower())

                            for i in range(length):
                                if term.lower() in display_values[i]:
                                    field_queries = [{component_name + '__iexact': database_values[i]}]

                        elif isinstance(field, tuple(FIELD_TYPES['text'])):
                            field_queries = [{component_name + '__icontains': term}]
                        elif isinstance(field, tuple(FIELD_TYPES['date'])):
                            try:
                                date_obj = dateutil.parser.parse(term)
                            except ValueError:
                                # This exception is theoretical, but it doesn't seem to raise.
                                pass
                            except TypeError:
                                # Failed conversions can lead to the parser adding ints to None.
                                pass
                            except OverflowError:
                                # Catches OverflowError: signed integer is greater than maximum
                                pass
                            else:
                                field_queries.append({component_name: date_obj})

                            # Add queries for more granular date field lookups
                            try:
                                numerical_value = int(term)
                            except ValueError:
                                pass
                            else:
                                if datetime.MINYEAR < numerical_value < datetime.MAXYEAR - 1:
                                    field_queries.append({component_name + '__year': numerical_value})
                                if 0 < numerical_value <= 12:
                                    field_queries.append({component_name + '__month': numerical_value})
                                if 0 < numerical_value <= 31:
                                    field_queries.append({component_name + '__day': numerical_value})
                        elif isinstance(field, tuple(FIELD_TYPES['boolean'])):
                            if term.lower() in ('true', 'yes'):
                                term = True
                            elif term.lower() in ('false', 'no'):
                                term = False
                            else:
                                continue

                            field_queries = [{component_name: term}]
                        elif isinstance(field, tuple(FIELD_TYPES['integer'])):
                            try:
                                field_queries = [{component_name: int(term)}]
                            except ValueError:
                                pass
                        elif isinstance(field, tuple(FIELD_TYPES['float'])):
                            try:
                                field_queries = [{component_name: float(term)}]
                            except ValueError:
                                pass
                        elif isinstance(field, tuple(FIELD_TYPES['ignored'])):
                            pass
                        else:
                            raise ValueError("Unhandled field type for %s (%r) in search." % (component_name, type(field)))

                        # print field_queries

                        # Append each field inspection for this term
                        term_queries.extend(map(lambda q: Q(**q), field_queries))
                # Append the logical OR of all field inspections for this term
                if len(term_queries):
                    queries.append(reduce(operator.or_, term_queries))
            # Apply the logical AND of all term inspections
            if len(queries):
                queryset = queryset.filter(reduce(operator.and_, queries))

        # TODO: Remove "and not searches" from this conditional, since manual searches won't be done
        if not sort_fields and not searches:
            # We can shortcut and speed up the process if all operations are database-backed.
            object_list = queryset
            if options['search']:
                object_list._dtv_unpaged_total = queryset.count()
            else:
                object_list._dtv_unpaged_total = total_initial_record_count
        else:
            object_list = ObjectListResult(queryset)

            # # Manual searches
            # # This is broken until it searches all items in object_list previous to the database
            # # sort. That represents a runtime load that hits every row in code, rather than in the
            # # database. If enabled, this would cripple performance on large datasets.
            # if options['i_walk_the_dangerous_line_between_genius_and_insanity']:
            #     length = len(object_list)
            #     for i, obj in enumerate(reversed(object_list)):
            #         keep = False
            #         for column_info in searches:
            #             column_index = options['columns'].index(column_info)
            #             rich_data, plain_data = self.get_column_data(column_index, column_info, obj)
            #             for term in search_terms:
            #                 if term.lower() in plain_data.lower():
            #                     keep = True
            #                     break
            #             if keep:
            #                 break
            #
            #         if not keep:
            #             removed = object_list.pop(length - 1 - i)
            #             # print column_info
            #             # print data
            #             # print '===='

            # Sort the results manually for whatever remaining sort options are left over
            def data_getter_orm(field_name):
                def key(obj):
                    try:
                        return reduce(getattr, [obj] + field_name.split('__'))
                    except (AttributeError, ObjectDoesNotExist):
                        return None
                return key

            def data_getter_custom(i):
                def key(obj):
                    rich_value, plain_value = self.get_column_data(i, options['columns'][i], obj)
                    return plain_value
                return key

            # Sort the list using the manual sort fields, back-to-front.  `sort` is a stable
            # operation, meaning that multiple passes can be made on the list using different
            # criteria.  The only catch is that the passes must be made in reverse order so that
            # the "first" sort field with the most priority ends up getting applied last.
            for sort_field in sort_fields[::-1]:
                if sort_field.startswith('-'):
                    reverse = True
                    sort_field = sort_field[1:]
                else:
                    reverse = False

                if sort_field.startswith('!'):
                    key_function = data_getter_custom
                    sort_field = int(sort_field[1:])
                else:
                    key_function = data_getter_orm

                try:
                    object_list.sort(key=key_function(sort_field), reverse=reverse)
                except TypeError as err:
                    log.error("Unable to sort on {0} - {1}".format(sort_field, err))

            object_list._dtv_unpaged_total = len(object_list)

        object_list._dtv_total_initial_record_count = total_initial_record_count
        return object_list

Esempio n. 51

0

Mostra file

def split_terms(s):
    return filter(None, map(lambda t: t.strip("'\" "), smart_split(s)))

Esempio n. 52

0

Mostra file

File: __init__.py Progetto: johnnypenn/advisoryscan

 def split_contents(self):
     return list(smart_split(self.contents))

Esempio n. 53

0

Mostra file

 def _extract_terms(self, query):
     return [self._clean_term(word) for word in smart_split(query)]

Esempio n. 54

0

Mostra file

    def test_smart_split(self):

        self.assertEqual(list(smart_split(r'''This is "a person" test.''')),
                         [u'This', u'is', u'"a person"', u'test.'])

        self.assertEqual(
            list(smart_split(r'''This is "a person's" test.'''))[2],
            u'"a person\'s"')

        self.assertEqual(
            list(smart_split(r'''This is "a person\"s" test.'''))[2],
            u'"a person\\"s"')

        self.assertEqual(list(smart_split('''"a 'one''')), [u'"a', u"'one"])

        self.assertEqual(
            list(smart_split(r'''all friends' tests'''))[1], "friends'")

        self.assertEqual(
            list(smart_split(u'url search_page words="something else"')),
            [u'url', u'search_page', u'words="something else"'])

        self.assertEqual(
            list(smart_split(u"url search_page words='something else'")),
            [u'url', u'search_page', u"words='something else'"])

        self.assertEqual(
            list(smart_split(u'url search_page words "something else"')),
            [u'url', u'search_page', u'words', u'"something else"'])

        self.assertEqual(
            list(smart_split(u'url search_page words-"something else"')),
            [u'url', u'search_page', u'words-"something else"'])

        self.assertEqual(list(smart_split(u'url search_page words=hello')),
                         [u'url', u'search_page', u'words=hello'])

        self.assertEqual(
            list(smart_split(u'url search_page words="something else')),
            [u'url', u'search_page', u'words="something', u'else'])

        self.assertEqual(list(smart_split("cut:','|cut:' '")),
                         [u"cut:','|cut:' '"])

Esempio n. 55

0

Mostra file

File: views.py Progetto: jacobvalenta/django-datatable-view

    def apply_queryset_options(self, queryset):
        """
        Interprets the datatable options.

        Options requiring manual massaging of the queryset are handled here.  The output of this
        method should be treated as a list, since complex options might convert it out of the
        original queryset form.

        """

        options = self._get_datatable_options()

        # These will hold residue queries that cannot be handled in at the database level.  Anything
        # in these variables by the end will be handled manually (read: less efficiently)
        sort_fields = []
        searches = []

        # This count is for the benefit of the frontend datatables.js
        total_initial_record_count = queryset.count()

        if options['ordering']:
            db_fields, sort_fields = split_real_fields(self.model, options['ordering'])
            queryset = queryset.order_by(*db_fields)

        if options['search']:
            db_fields, searches = filter_real_fields(self.model, options['columns'],
                                                     key=get_first_orm_bit)
            db_fields.extend(options['search_fields'])

            print(">>> searches:", searches)

            queries = []  # Queries generated to search all fields for all terms
            search_terms = map(lambda q: q.strip("'\" "), smart_split(options['search']))

            for term in search_terms:
                term_queries = []  # Queries generated to search all fields for this term
                # Every concrete database lookup string in 'columns' is followed to its trailing field descriptor.  For example, "subdivision__name" terminates in a CharField.  The field type determines how it is probed for search.
                for column in db_fields:
                    column = get_field_definition(column)
                    for component_name in column.fields:
                        field_queries = []  # Queries generated to search this database field for the search term

                        field = resolve_orm_path(self.model, component_name)

                        if isinstance(field, (models.CharField, models.TextField, models.FileField)):
                            field_queries = [{component_name + '__icontains': term}]
                        elif isinstance(field, models.DateField):
                            try:
                                date_obj = dateutil.parser.parse(term)
                            except ValueError:
                                # This exception is theoretical, but it doesn't seem to raise.
                                pass
                            except TypeError:
                                # Failed conversions can lead to the parser adding ints to None.
                                pass
                            else:
                                field_queries.append({component_name: date_obj})

                            # Add queries for more granular date field lookups
                            try:
                                numerical_value = int(term)
                            except ValueError:
                                pass
                            else:
                                if 0 < numerical_value < 3000:
                                    field_queries.append({component_name + '__year': numerical_value})
                                if 0 < numerical_value <= 12:
                                    field_queries.append({component_name + '__month': numerical_value})
                                if 0 < numerical_value <= 31:
                                    field_queries.append({component_name + '__day': numerical_value})
                        elif isinstance(field, models.BooleanField):
                            if term.lower() in ('true', 'yes'):
                                term = True
                            elif term.lower() in ('false', 'no'):
                                term = False
                            else:
                                continue

                            field_queries = [{component_name: term}]
                        elif isinstance(field, (models.IntegerField, models.AutoField)):
                            try:
                                field_queries = [{component_name: int(term)}]
                            except ValueError:
                                pass
                        elif isinstance(field, (models.FloatField, models.DecimalField)):
                            try:
                                field_queries = [{component_name: float(term)}]
                            except ValueError:
                                pass
                        elif isinstance(field, models.ForeignKey):
                            pass
                        else:
                            raise ValueError("Unhandled field type for %s (%r) in search." % (component_name, type(field)))

                        # print field_queries

                        # Append each field inspection for this term
                        term_queries.extend(map(lambda q: Q(**q), field_queries))
                # Append the logical OR of all field inspections for this term
                if len(term_queries):
                    queries.append(reduce(operator.or_, term_queries))
            # Apply the logical AND of all term inspections
            if len(queries):
                queryset = queryset.filter(reduce(operator.and_, queries))

        # TODO: Remove "and not searches" from this conditional, since manual searches won't be done
        if not sort_fields and not searches:
            # We can shortcut and speed up the process if all operations are database-backed.
            object_list = queryset
            object_list._dtv_unpaged_total = queryset.count()
        else:
            object_list = ObjectListResult(queryset)

            # # Manual searches
            # # This is broken until it searches all items in object_list previous to the database
            # # sort. That represents a runtime load that hits every row in code, rather than in the
            # # database. If enabled, this would cripple performance on large datasets.
            # if options.i_walk_the_dangerous_line_between_genius_and_insanity:
            #     length = len(object_list)
            #     for i, obj in enumerate(reversed(object_list)):
            #         keep = False
            #         for column_info in searches:
            #             column_index = options.columns.index(column_info)
            #             rich_data, plain_data = self.get_column_data(column_index, column_info, obj)
            #             for term in search_terms:
            #                 if term.lower() in plain_data.lower():
            #                     keep = True
            #                     break
            #             if keep:
            #                 break
            #
            #         if not keep:
            #             removed = object_list.pop(length - 1 - i)
            #             # print column_info
            #             # print data
            #             # print '===='

            # Sort the results manually for whatever remaining sort options are left over
            def data_getter_orm(field_name):
                def key(obj):
                    try:
                        return reduce(getattr, [obj] + field_name.split('__'))
                    except (AttributeError, ObjectDoesNotExist):
                        return None
                return key

            def data_getter_custom(i):
                def key(obj):
                    rich_value, plain_value = self.get_column_data(i, options['columns'][i], obj)
                    return plain_value
                return key

            # Sort the list using the manual sort fields, back-to-front.  `sort` is a stable
            # operation, meaning that multiple passes can be made on the list using different
            # criteria.  The only catch is that the passes must be made in reverse order so that
            # the "first" sort field with the most priority ends up getting applied last.
            for sort_field in sort_fields[::-1]:
                if sort_field.startswith('-'):
                    reverse = True
                    sort_field = sort_field[1:]
                else:
                    reverse = False

                if sort_field.startswith('!'):
                    key_function = data_getter_custom
                    sort_field = int(sort_field[1:])
                else:
                    key_function = data_getter_orm

                try:
                    object_list.sort(key=key_function(sort_field), reverse=reverse)
                except TypeError as err:
                    log.error("Unable to sort on {0} - {1}".format(sort_field, err))

            object_list._dtv_unpaged_total = len(object_list)

        object_list._dtv_total_initial_record_count = total_initial_record_count
        return object_list

Esempio n. 56

0

Mostra file

File: __init__.py Progetto: jonaustin/advisoryscan

 def split_contents(self):
     return list(smart_split(self.contents))

Esempio n. 57

0

Mostra file

File: views.py Progetto: jpugsley/django-datatable-view

    def _buildSearchQueries( self, db_fields, search_query, is_regex ):
        queries = []

        if is_regex:
            field_queries = []
            for column in db_fields:
                column = get_field_definition( column )
                for component_name in column.fields + column.search_fields:
                    field = resolve_orm_path( self.model, component_name )
                    if isinstance( field, tuple( FIELD_TYPES['text'] ) ):
                        field_queries.append( Q( **{ component_name + u'__iregex' : search_query } ) )
            queries.append( reduce( operator.or_, field_queries ) )
        else:
            search_terms = map( lambda q: q.strip( "'\" " ), smart_split( search_query ) )
            for term in search_terms:
                term_queries = [ ]  # Queries generated to search all fields for this term
                # Every concrete database lookup string in 'columns' is followed to its trailing field descriptor.  For example, "subdivision__name" terminates in a CharField.  The field type determines how it is probed for search.
                for column in db_fields:
                    column = get_field_definition( column )
                    for component_name in column.fields + column.search_fields:
                        field_queries = [ ]  # Queries generated to search this database field for the search term
                        field = resolve_orm_path( self.model, component_name )
                        if isinstance( field, tuple( FIELD_TYPES[ 'text' ] ) ):
                            field_queries = [ { component_name + '__icontains': term } ]
                        elif isinstance( field, tuple( FIELD_TYPES[ 'date' ] ) ):
                            try:
                                date_obj = dateutil.parser.parse( term )
                            except ValueError:
                                # This exception is theoretical, but it doesn't seem to raise.
                                pass
                            except TypeError:
                                # Failed conversions can lead to the parser adding ints to None.
                                pass
                            else:
                                field_queries.append( { component_name: date_obj } )

                            # Add queries for more granular date field lookups
                            try:
                                numerical_value = int( term )
                            except ValueError:
                                pass
                            else:
                                if 0 < numerical_value < 3000:
                                    field_queries.append( { component_name + '__year': numerical_value } )
                                if 0 < numerical_value <= 12:
                                    field_queries.append( { component_name + '__month': numerical_value } )
                                if 0 < numerical_value <= 31:
                                    field_queries.append( { component_name + '__day': numerical_value } )
                        elif isinstance( field, tuple( FIELD_TYPES[ 'boolean' ] ) ):
                            if term.lower( ) in ('true', 'yes'):
                                term = True
                            elif term.lower( ) in ('false', 'no'):
                                term = False
                            else:
                                continue

                            field_queries = [ { component_name: term } ]
                        elif isinstance( field, tuple( FIELD_TYPES[ 'integer' ] ) ):
                            try:
                                field_queries = [ { component_name: int( term ) } ]
                            except ValueError:
                                pass
                        elif isinstance( field, tuple( FIELD_TYPES[ 'float' ] ) ):
                            try:
                                field_queries = [ { component_name: float( term ) } ]
                            except ValueError:
                                pass
                        elif isinstance( field, tuple( FIELD_TYPES[ 'ignored' ] ) ):
                            pass
                        else:
                            raise ValueError(
                                "Unhandled field type for %s (%r) in search." % (component_name, type( field )) )

                        # print field_queries

                        # Append each field inspection for this term
                        term_queries.extend( map( lambda q: Q( **q ), field_queries ) )
                # Append the logical OR of all field inspections for this term
                if len( term_queries ):
                    queries.append( reduce( operator.or_, term_queries ) )

        return queries

Esempio n. 58

0

Mostra file

File: helpers.py Progetto: softformance/aldryn-search

def get_cleaned_bits(data):
    decoded = force_unicode(data)
    stripped = strip_tags(decoded)
    return smart_split(stripped)

Esempio n. 59

0

Mostra file

File: views.py Progetto: nicobav/django-datatable-view

    def _buildSearchQueries(self,
                            db_fields,
                            search_query,
                            is_regex,
                            column_search=False):
        queries = []

        if is_regex:
            field_queries = []
            for column in db_fields:
                column = get_field_definition(column)
                for component_name in column.fields + column.search_fields:
                    field = resolve_orm_path(self.get_model(), component_name)
                    if isinstance(field, tuple(FIELD_TYPES['text'])):
                        field_queries.append(
                            Q(**{component_name + u'__iregex': search_query}))
            queries.append(reduce(operator.or_, field_queries))
        else:
            search_terms = map(lambda q: q.strip("'\" "),
                               smart_split(search_query))
            for term in search_terms:
                term_queries = [
                ]  # Queries generated to search all fields for this term
                # Every concrete database lookup string in 'columns' is followed to its trailing field descriptor.  For example, "subdivision__name" terminates in a CharField.  The field type determines how it is probed for search.
                for column in db_fields:
                    column = get_field_definition(column)
                    for component_name in column.fields + column.search_fields:
                        field_queries = [
                        ]  # Queries generated to search this database field for the search term
                        field = resolve_orm_path(self.get_model(),
                                                 component_name)

                        field_method_name = 'search_' + field.name
                        if hasattr(self, field_method_name):
                            # Call field specific method to get the field queries
                            field_queries.append(
                                getattr(self,
                                        field_method_name)(field, term,
                                                           component_name))
                        elif field.choices:
                            # Query the database for the database value rather than display value
                            database_values, display_values = zip(
                                *field.get_flatchoices())
                            string_database_values = [
                                unicode(value).lower()
                                for value in database_values
                            ]
                            display_values = [
                                unicode(value).lower()
                                for value in display_values
                            ]

                            all_values = zip(display_values,
                                             string_database_values,
                                             database_values)

                            search_term = term.lower()

                            # If searching a specific column then match against the database value
                            if column_search:
                                for display_value, string_database_value, database_value in all_values:
                                    if search_term == string_database_value:
                                        field_queries.append({
                                            component_name + '__exact':
                                            database_value
                                        })
                            # If searching globally then match against the display value
                            else:
                                for display_value, string_database_value, database_value in all_values:
                                    if search_term in display_value:
                                        field_queries.append({
                                            component_name + '__exact':
                                            database_value
                                        })

                        elif isinstance(field, tuple(FIELD_TYPES['text'])):
                            field_queries = [{
                                component_name + '__icontains':
                                term
                            }]
                        elif isinstance(field, tuple(FIELD_TYPES['date'])):
                            try:
                                date_obj = dateutil.parser.parse(term)
                            except ValueError:
                                # This exception is theoretical, but it doesn't seem to raise.
                                pass
                            except TypeError:
                                # Failed conversions can lead to the parser adding ints to None.
                                pass
                            except OverflowError:
                                # Catches OverflowError: signed integer is greater than maximum
                                pass
                            else:
                                field_queries.append(
                                    {component_name: date_obj})

                            # Add queries for more granular date field lookups
                            try:
                                numerical_value = int(term)
                            except ValueError:
                                pass
                            else:
                                if datetime.MINYEAR < numerical_value < datetime.MAXYEAR - 1:
                                    field_queries.append({
                                        component_name + '__year':
                                        numerical_value
                                    })
                                if 0 < numerical_value <= 12:
                                    field_queries.append({
                                        component_name + '__month':
                                        numerical_value
                                    })
                                if 0 < numerical_value <= 31:
                                    field_queries.append({
                                        component_name + '__day':
                                        numerical_value
                                    })
                        elif isinstance(field, tuple(FIELD_TYPES['boolean'])):
                            if term.lower() in ('true', 'yes'):
                                term = True
                            elif term.lower() in ('false', 'no'):
                                term = False
                            else:
                                continue

                            field_queries = [{component_name: term}]
                        elif isinstance(field, tuple(FIELD_TYPES['integer'])):
                            try:
                                field_queries = [{component_name: int(term)}]
                            except ValueError:
                                pass
                        elif isinstance(field, tuple(FIELD_TYPES['float'])):
                            try:
                                field_queries = [{component_name: float(term)}]
                            except ValueError:
                                pass
                        elif isinstance(field, tuple(FIELD_TYPES['ignored'])):
                            pass
                        else:
                            raise ValueError(
                                "Unhandled field type for %s (%r) in search." %
                                (component_name, type(field)))

                        # print field_queries

                        # Append each field inspection for this term
                        term_queries.extend(
                            map(lambda q: Q(**q), field_queries))
                # Append the logical OR of all field inspections for this term
                if len(term_queries):
                    queries.append(reduce(operator.or_, term_queries))

        return queries