Esempio n. 1
0
def _group_words(chars, chinese_only=False):
    obj_list = []
    loop = 0        
    skip = 0

    for x in chars:
        
        if skip != 0:
            skip -= 1
            loop += 1
            continue
        
        obj = {
             'chars': x,
             'wordset': loop,   
        }
                
        nc = False
                
        # IS IT A LINEBREAK
        if nc == False and x == '\n':
            obj['is_linebreak'] = True
            nc = True

        # IS IT A SPACE    
        if nc == False and x == ' ':
            obj['is_space'] = True
            nc = True

        # IS IT PUNCTUATION
        if nc == False and _is_punctuation(x):
            obj['is_punctuation'] = True 
            nc = True
    
        
        # IS IT A NUMBER?          
        if nc == False and _is_number(x):
            obj['is_number'] = True
            number = True
            num = x
            while number == True:
            
                # if the next character is also a number, add it to this one
                try:
                    next = chars[loop+1]
                except:
                    break
                
                if _is_number(next):
                    num = "%s%s" % (num, next)
                    chars.pop(loop+1)

                else:
                    break
                            

            obj['chars'] = num
            nc = True
        
        
        
        
        # IS THE CHARACTER ENGLISH?            
        if nc == False and _is_english(x):            
            obj['is_english'] = True
            english = True
            eng_word = x
            while english == True:
            
                # IF THE NEXT CHAR IS ENGLISH, LETS BUILD THE ENGLISH WORD
                try:
                    next = chars[loop+1]
                except:
                    break
                
                if _is_english(next):
                    eng_word = "%s%s" % (eng_word, next)
                    chars.pop(loop+1)

                else:
                    break
                            

            obj['chars'] = eng_word
            nc = True
        
        # IF THE CHARACTER IS NOT CHINESE
        if nc == True:
            if chinese_only == False:
                obj_list.append(obj)
                
            loop += 1
            continue

        search_string = [x,]
                
        # THIS LOOP WILL BUILD OUR CHINESE WORD - GUESSING WE WON'T HAVE MANY MORE THAN 10 CHARS
        for i in range(1,10):
            try:
                next_chars = chars[loop+i]
                if _is_punctuation(next_chars):
                    next_chars = None
                    break
                else:
                    search_string.append(next_chars)
            except:
                break
        
        
        r_server = _get_redis()
        r = False   
        
        
        while r == False and len(search_string) > 0:            
            
            key = "ZH:%sC:%s" % ( len(search_string), "".join(search_string))
            r = r_server.exists(key)
            
            if r:
                break
            else:
                try:
                    search_string.pop()
                except IndexError:
                    pass
        

                
        # initialise a ChineseWord object and add it to our object_list
        the_string = "".join(search_string)
        word = ChineseWord(chars=the_string)
        obj_list.append(word)
        
        
        # tells us how many characters need to be skipped before we start searching again
        # because maybe this word included the subsequent 3 chars, so let's not searhc them
        # again
        skip += (len(search_string)-1)
        loop += 1
        
     
    return obj_list  
Esempio n. 2
0
def search(request, search_string=None, title='Search', words=None):
    
    r_server = _get_redis()
        
    # replace search string underscores with spaces
    if search_string:
        search_string = search_string.strip().replace('_', ' ')        
               

    # HANDLES EMPTY OR NULL SEARCH STRING
    if search_string == None and request.method != 'POST':
        form = SearchForm()
        return _render(request, 'website/search.html', locals())
          
          
    # CHECK IF IT'S A POST REQUEST OR URL SEARCH
    if search_string == None and request.method == 'POST':
        form = SearchForm(request.POST)
        if form.is_valid():
            search_string = form.cleaned_data['char']

        else:
            # POST AND NO SEARCH STRING - SHOW THEM THE PLAIN JANE SEARCH PAGE
            form = SearchForm()
            return _render(request, 'website/search.html', locals())


    # HANDLES AN AMBIGUOUS SEARCH
    if _is_ambiguous(search_string):
        message = messages.AMBIGUOUS_WORD
        return render(request, 'problem.html', locals())


    if r_server.exists((settings.PINYIN_WORD_KEY % _pinyin_to_ascii(search_string))):  
        return _pinyin_search(request, search_string)


    if _is_english(search_string):
        return _english_search(request, search_string)


    # IF THE SEARCH IS OVER 10 CHARACTERS, RETURN A TEXT
    #if len(search_string) > 12:
    #    from creader.views import text                
    #    return text(request, words=search_string)
    
    
    if not words:
        things = _split_unicode_chrs(search_string)        
        words = _group_words(things)   

        
    # IF THE USER WAS LOGGED IN, RECORD IT IN THEIR 'SAVED WORDS'
    if request.user.is_authenticated():
        for x in words:
            word_searched.send(
                sender=word_searched, 
                word=x.chars, 
                time=datetime.datetime.now(), 
                user_id=request.user.email
            )
    
    
    # if there's only 1 word, take us straight to the single word definition
    if len(words) == 1:
        word = words[0]
        url = reverse('single_word', args=[word])
        return HttpResponseRedirect(url)
    
    return _render(request, 'website/wordlist.html', locals())