def _get_random(self, number=1, chars=None, length='*', pinyin=None): """ Returns random words the same length as the character provided """ if chars: length = len(chars) r_server = _get_redis() pattern = "ZH:%sC:*" % length randoms = [] loop = 0 for x in r_server.scan_iter(pattern): randoms.append(x) loop += 1 if loop > 20: break random.shuffle(randoms,random.random) if number == 1: return json.loads(_search_redis( randoms[0] )) else: count = 0 words = [] while number > 0: words.append(json.loads(_search_redis( randoms[count] ))) count += 1 number -= 1 return words return
def text(request, hashkey=None, words=None): if not hashkey: hashkey = ''.join(random.choice(string.ascii_lowercase + string.digits) for x in range(5)) key = "text:%s" % hashkey if request.user.is_authenticated(): user = request.user.email else: user = '******' mapping = { 'user': user, 'title': '', 'chars': words, 'timestamp': time.time(), 'hash': hashkey, 'url' : '', } # ADD IT TO REDIS r_server = _get_redis() r_server.hmset(key, mapping) else: key = 'text:%s' % hashkey obj = None if _search_redis(key, lookup=False): obj = _search_redis(key) if not obj: if request.is_ajax(): html = render_to_string('website/problem_snippet.html', locals()) return HttpResponse(html) return _render(request, 'website/problem.html', locals()) title = 'Article' try: url = urlparse(obj['url']).netloc except KeyError: pass chars = obj['chars'].decode('utf-8') # because redis stores things as strings... things = _split_unicode_chrs(chars) obj_list = _group_words(things) list_template = 'creader/text_page_snippet.html' if request.GET.get('page'): template = 'creader/text_page_snippet.html' return render_to_response(template, locals()) return _render(request, 'creader/text.html', locals())
def __init__(self, words=None): self.key = settings.ENGLISH_WORD_KEY % (len(words.split('_')), words.replace(' ', '_')) word = _search_redis(self.key) if word: word = json.loads(word) self.english = word['english'] self.characters = [] for x in word['chars']: self.characters.append(ChineseWord(chars=x))
def __init__(self, chars=None): if chars: self.key = settings.CHINESE_WORD_KEY % (len(chars), chars) x = json.loads(_search_redis(self.key)) self.chars = x['chars'] self.length = len(chars) self.meanings = x['meanings'] self.starts_with = x.get('starts_with') self.contains = x.get('contains')
def handle_noargs(self, **options): # 一事無成 一事无成 [yi1 shi4 wu2 cheng2] /to have achieved nothing/to be a total failure/to get nowhere/ # EMPTY ALL ZH + PY KEYS self._del_keys('ZH:*') self._del_keys('PY:*') # NOW LETS START file = open(settings.DICT_FILE_LOCATION) item_count = 0 for line in file: if line.startswith("#"): pass else: # OPEN REDIS CONNECTION NOW r_server = _get_redis() # GATHER ALL THE MAIN VARIABLES new = line.split() numbered_pinyin = line[(line.index('[')+1):(line.index(']'))] f = ReadingFactory() tonal_pinyin = f.convert(numbered_pinyin, 'Pinyin', 'Pinyin', sourceOptions={'toneMarkType': 'numbers', 'yVowel': 'v', 'missingToneMark': 'fifth'}) meanings = line[(line.index('/')+1):(line.rindex('/'))] characters = new[1] # REMOVE ALL THE UGLY CHARACTERS if ',' in characters: characters = characters.replace(',', '') # GET AND CLEAN THE MEASURE WORD mws = None if "CL:" in meanings: new_meanings = meanings.split('/') for idx, val in enumerate(new_meanings): if "CL:" in val: mws = [] for x in val.replace('CL:', '').split(','): x = x[:(x.index('['))] if '|' in x: x = x[(x.index('|')+1):] # ADD THE MEAASURE WORDS ENTRY # ---------------------------- mws_key = settings.MEASURE_WORD_KEY % x if r_server.exists(mws_key): values = json.loads(_search_redis(mws_key)) values['chars'].append(characters) else: values = {'chars': [characters,]} r_server.set(mws_key, json.dumps(values)) mws.append(x) new_meanings.pop(idx) meanings = "/".join(new_meanings) char_key = settings.CHINESE_WORD_KEY % ((len((characters))/3), characters) # CREATE THE PRONUNCIATION/MEANING PAIR pair = {} pair['pinyin'] = tonal_pinyin pair['pinyin_numbered'] = _normalize_pinyin(numbered_pinyin) pair['meaning'] = meanings pair['measure_words'] = mws # ADD THE PINYIN ENTRY # -------------------- py_key = settings.PINYIN_WORD_KEY % _pinyin_to_ascii(numbered_pinyin) if r_server.exists(py_key): values = json.loads(_search_redis(py_key)) if smart_unicode(characters) not in values: values.append(characters) else: values = [characters,] r_server.set(py_key, json.dumps(values)) # ADD THE CHINESE CHARACTER ENTRY # ------------------------------- if r_server.exists(char_key): values = json.loads(_search_redis(char_key)) values['meanings'].append(pair) else: values = { 'chars': characters, 'meanings': [pair,], } r_server.set(char_key, json.dumps(values)) item_count += 1 print item_count print "%s Chinese items added" % item_count file.close()
def handle_noargs(self, **options): # EXAMPLE: 一中一台 [yi1 Zhong1 yi1 Tai2] /first meaning/second meaning/ file = open(settings.DICT_FILE_LOCATION) r_server = _get_redis() # EMPTY ALL EN KEYS FROM THE DATABASE item_count = 0 keys = r_server.keys('EN:*') for x in keys: r_server.delete(x) item_count += 1 print "Deleted %s items" % item_count # NOW LETS START item_count = 0 for line in file: if not line.startswith("#"): # GATHER ALL THE MAIN VARIABLES new = line.split() characters = new[1] numbered_pinyin = line[(line.index('[')+1):(line.index(']'))] f = ReadingFactory() tonal_pinyin = f.convert(numbered_pinyin, 'Pinyin', 'Pinyin', sourceOptions={'toneMarkType': 'numbers', 'yVowel': 'v', 'missingToneMark': 'fifth'}) meanings = line[(line.index('/')+1):(line.rindex('/'))] # CREATE AN INDEX: What we'll do first is try to strip out # as much crap as possible from each definition, and as close as # possible find a single word that we can index on. for x in meanings.split('/'): ns = x # new_string # REMOVE ANYTHING BETWEEN BRACKETS try: ns = ns.replace(ns[(ns.index('(')+1):(ns.index(')'))], '') ns = ns.replace('(', '').replace(')', '') #replace the brackets too except ValueError: pass # REMOVE ANYTHING BETWEEN SQUARE BRACKETS try: ns = ns.replace(ns[(ns.index('[')+1):(ns.index(']'))], '') ns = ns.replace('[', '').replace(']', '') #replace the brackets too except ValueError: pass # IGNORE THE MEANING IF IT CONTAINS AN EXCLUDED PHRASE if len(filter(lambda y: y not in ns, EXCLUSIONS)) != len(EXCLUSIONS): continue # IF THE MEANING IS NOW EMPTY, IGNORE IT ns = ns.strip() if ns == '': continue # DEAL WITH INFINITIVE VERBS LIKE "TO DO" WITH 2 WORDS if len(ns.split(' ')) <= 3 and ns.startswith('to '): ns = ns.split(' ', 1)[1] # REMOVE ITEMS LIKE "SEE XYZ" if ns.split(' ')[0] == 'see' and ns[-1] not in string.ascii_letters: continue # THERE'S ALSO SOME ANNOYING "..." MARKS TOO if "..." in ns: ns = ns.replace('...', '') # FOR NOW, JUST ADD ITEMS WITH 2 WORDs if len(ns.split(' ')) <= 3: key = "EN:%sW:%s" % (len(ns.split(' ')), ns.lower()) print key if r_server.exists(key): values = json.loads(_search_redis(key)) values['characters'].append(characters) r_server.set(key, json.dumps(values)) else: values = { 'english': x, 'characters': [characters,], } r_server.set(key, json.dumps(values)) item_count += 1 print item_count #if item_count > 20: # break print "%s English dictionary items added" % item_count file.close()