Beispiel #1
0
    def _get_random(self, number=1, chars=None, length='*', pinyin=None):
        """ Returns random words the same length as the character provided """
        
        if chars:
            length = len(chars)
        
        r_server = _get_redis()
        pattern = "ZH:%sC:*" % length

        randoms = []
        loop = 0
        for x in r_server.scan_iter(pattern):
            randoms.append(x)
            loop += 1
            if loop > 20:
                break
        random.shuffle(randoms,random.random)
        
               
        if number == 1:
            return json.loads(_search_redis( randoms[0] ))
        else:
            count = 0
            words = []
            while number > 0:
                words.append(json.loads(_search_redis( randoms[count] )))
                count += 1
                number -= 1
            
            return words
        
        return
Beispiel #2
0
def text(request, hashkey=None, words=None):
    
    if not hashkey:
        hashkey = ''.join(random.choice(string.ascii_lowercase + string.digits) for x in range(5))
        key = "text:%s" % hashkey
    
        if request.user.is_authenticated():
            user = request.user.email
        else:
            user = '******'
            
        mapping = {
            'user': user,
            'title': '', 
            'chars': words,
            'timestamp': time.time(),
            'hash': hashkey,
            'url' : '',
        }
        
        # ADD IT TO REDIS
        r_server = _get_redis()
        r_server.hmset(key, mapping)
                        
    else:
        key = 'text:%s' % hashkey
    
    
    obj = None
    if _search_redis(key, lookup=False):
        obj = _search_redis(key)
    
    if not obj:
        if request.is_ajax():
            html = render_to_string('website/problem_snippet.html', locals())
            return HttpResponse(html)
        
        return _render(request, 'website/problem.html', locals())
    
    title = 'Article'
    try:
        url = urlparse(obj['url']).netloc
    except KeyError:
        pass
        
    chars = obj['chars'].decode('utf-8') # because redis stores things as strings...
    things = _split_unicode_chrs(chars)
    obj_list = _group_words(things) 
    
    
    list_template = 'creader/text_page_snippet.html' 
    
    if request.GET.get('page'):
        template = 'creader/text_page_snippet.html'
        return render_to_response(template, locals())
        
    return _render(request, 'creader/text.html', locals())
Beispiel #3
0
 def __init__(self, words=None):
     
     self.key = settings.ENGLISH_WORD_KEY % (len(words.split('_')), words.replace(' ', '_'))
     word = _search_redis(self.key)
     if word:
         word = json.loads(word)
         self.english = word['english']
         self.characters = []
         for x in word['chars']:
             self.characters.append(ChineseWord(chars=x))
Beispiel #4
0
 def __init__(self, chars=None):
     
     if chars:
         self.key = settings.CHINESE_WORD_KEY % (len(chars), chars)     
         
         x = json.loads(_search_redis(self.key))
         self.chars = x['chars']
         self.length = len(chars)
         self.meanings = x['meanings']
         self.starts_with = x.get('starts_with')
         self.contains = x.get('contains')
    def handle_noargs(self, **options):
        # 一事無成 一事无成 [yi1 shi4 wu2 cheng2] /to have achieved nothing/to be a total failure/to get nowhere/

        # EMPTY ALL ZH + PY KEYS
        self._del_keys('ZH:*')
        self._del_keys('PY:*')
        
        # NOW LETS START
        file = open(settings.DICT_FILE_LOCATION)
        item_count = 0
        for line in file:
            if line.startswith("#"):
                pass
            else:
                
                # OPEN REDIS CONNECTION NOW
                r_server = _get_redis()
                
                # GATHER ALL THE MAIN VARIABLES
                new = line.split()
                numbered_pinyin = line[(line.index('[')+1):(line.index(']'))]
                f = ReadingFactory()
                tonal_pinyin =  f.convert(numbered_pinyin, 'Pinyin', 'Pinyin',
                    sourceOptions={'toneMarkType': 'numbers', 'yVowel': 'v',
                    'missingToneMark': 'fifth'})
                meanings = line[(line.index('/')+1):(line.rindex('/'))]               
                characters = new[1]
                
                # REMOVE ALL THE UGLY CHARACTERS
                if ',' in characters:
                    characters = characters.replace(',', '')
                
                
                # GET AND CLEAN THE MEASURE WORD
                mws = None
                if "CL:" in meanings:
                    new_meanings = meanings.split('/')
                    for idx, val in enumerate(new_meanings):
                        if "CL:" in val:
                            mws = []
                            for x in val.replace('CL:', '').split(','):
                                
                                x = x[:(x.index('['))]
                                if '|' in x:
                                    x = x[(x.index('|')+1):]
                                    
                                    
                                # ADD THE MEAASURE WORDS ENTRY
                                # ----------------------------
                                mws_key = settings.MEASURE_WORD_KEY % x   
                                if r_server.exists(mws_key):
                                    values = json.loads(_search_redis(mws_key))
                                    values['chars'].append(characters)
                                else:
                                    values = {'chars': [characters,]}
                                r_server.set(mws_key, json.dumps(values))                                
                                    
                                mws.append(x)
                            
                            
                            
                            new_meanings.pop(idx)
                    meanings = "/".join(new_meanings)
                

                    
                    
                    
                
                
                
                char_key = settings.CHINESE_WORD_KEY % ((len((characters))/3), characters)                 
                
                # CREATE THE PRONUNCIATION/MEANING PAIR
                pair = {}
                pair['pinyin'] = tonal_pinyin
                pair['pinyin_numbered'] = _normalize_pinyin(numbered_pinyin)
                pair['meaning'] = meanings
                pair['measure_words'] = mws
                
                
                
                # ADD THE PINYIN ENTRY
                # --------------------
                
                py_key = settings.PINYIN_WORD_KEY % _pinyin_to_ascii(numbered_pinyin)
                if r_server.exists(py_key):
                    values = json.loads(_search_redis(py_key))
                    if smart_unicode(characters) not in values:
                        values.append(characters)
                else:
                    values = [characters,]
                
                r_server.set(py_key, json.dumps(values))                    
    
    
    
    
                # ADD THE CHINESE CHARACTER ENTRY
                # -------------------------------
                if r_server.exists(char_key):
                    values = json.loads(_search_redis(char_key))
                    values['meanings'].append(pair)
                else:
                    values = {
                        'chars': characters,
                        'meanings': [pair,],
                    }
                    
                r_server.set(char_key, json.dumps(values))
                
                item_count += 1
                print item_count

                
                               
        
        print "%s Chinese items added" % item_count          
        file.close()        
    def handle_noargs(self, **options):
        # EXAMPLE: 一中一台 [yi1 Zhong1 yi1 Tai2] /first meaning/second meaning/
        file = open(settings.DICT_FILE_LOCATION)
        r_server = _get_redis()
        
        # EMPTY ALL EN KEYS FROM THE DATABASE
        item_count = 0
        keys = r_server.keys('EN:*')
        for x in keys:
            r_server.delete(x)
            item_count += 1
        print "Deleted %s items" % item_count
        
        
        # NOW LETS START
        item_count = 0
        for line in file:
            if not line.startswith("#"):

                # GATHER ALL THE MAIN VARIABLES
                new = line.split()
                characters = new[1]
                numbered_pinyin = line[(line.index('[')+1):(line.index(']'))]
                f = ReadingFactory()
                tonal_pinyin =  f.convert(numbered_pinyin, 'Pinyin', 'Pinyin',
                    sourceOptions={'toneMarkType': 'numbers', 'yVowel': 'v',
                    'missingToneMark': 'fifth'})
                meanings = line[(line.index('/')+1):(line.rindex('/'))]               
                
                # CREATE AN INDEX: What we'll do first is try to strip out
                # as much crap as possible from each definition, and as close as
                # possible find a single word that we can index on.
                
                for x in meanings.split('/'):
                    
                    ns = x # new_string
                    
                    # REMOVE ANYTHING BETWEEN BRACKETS
                    try:
                        ns = ns.replace(ns[(ns.index('(')+1):(ns.index(')'))], '')
                        ns = ns.replace('(', '').replace(')', '') #replace the brackets too
                    except ValueError:
                        pass
                    
                    # REMOVE ANYTHING BETWEEN SQUARE BRACKETS
                    try:
                        ns = ns.replace(ns[(ns.index('[')+1):(ns.index(']'))], '')
                        ns = ns.replace('[', '').replace(']', '') #replace the brackets too
                    except ValueError:
                        pass
                    
                    # IGNORE THE MEANING IF IT CONTAINS AN EXCLUDED PHRASE 
                    if len(filter(lambda y: y not in ns, EXCLUSIONS)) != len(EXCLUSIONS):
                        continue
                                        
                    # IF THE MEANING IS NOW EMPTY, IGNORE IT
                    ns = ns.strip()
                    if ns == '':
                        continue
                    
                    # DEAL WITH INFINITIVE VERBS LIKE "TO DO" WITH 2 WORDS
                    if len(ns.split(' ')) <= 3 and ns.startswith('to '):
                        ns = ns.split(' ', 1)[1]
                    
                    # REMOVE ITEMS LIKE "SEE XYZ"
                    if ns.split(' ')[0] == 'see' and ns[-1] not in string.ascii_letters:
                        continue
                    
                    # THERE'S ALSO SOME ANNOYING "..." MARKS TOO
                    if "..." in ns:
                        ns = ns.replace('...', '')                    
                    
                    
                    # FOR NOW, JUST ADD ITEMS WITH 2 WORDs
                    if len(ns.split(' ')) <= 3:
                        
                        key = "EN:%sW:%s" % (len(ns.split(' ')), ns.lower())
                        print key
                        if r_server.exists(key):
                            values = json.loads(_search_redis(key))
                            values['characters'].append(characters)
                            r_server.set(key, json.dumps(values))

                        else:
                            
                            values = {
                                'english': x,
                                'characters': [characters,],
                            }
                            
                            r_server.set(key, json.dumps(values))
                        
                        item_count += 1
                        print item_count
                        
            #if item_count > 20:
            #    break
                                        
                    
                
                
                
                                
        
        print "%s English dictionary items added" % item_count          
        file.close()