def test_word_guess_category(): """ Should guess and set the word category """ word = Word() word.word = 'word' word.save() assert word.category == 'word' word = Word() word.word = 'come up with' word.save() assert word.category == 'phrasal_verb' word = Word() word.word = 'get over' word.save() assert word.category == 'phrasal_verb' word = Word() word.word = 'to put it mildly' word.save() assert word.category == 'phrase'
def test_saving_and_retrieving_items(self): word_ = Word() word_.save() first_item = Item() first_item.text = 'kaizen' first_item.word = word_ first_item.save() second_item = Item() second_item.text = 'genki' second_item.word = word_ second_item.save() saved_words = Word.objects.first() self.assertEqual(saved_words, word_) saved_items = Item.objects.all() self.assertEqual(2, saved_items.count()) first_saved_item = saved_items[0] second_saved_item = saved_items[1] self.assertEqual(first_saved_item.text, 'kaizen') self.assertEqual(second_saved_item.text, 'genki') self.assertEqual(first_saved_item.word, word_) self.assertEqual(second_saved_item.word, word_)
def handle(self, *args, **options): Word.objects.all().delete() files_list = [ f for f in listdir(settings.STATIC_AUDIO) if isfile(join(settings.STATIC_AUDIO, f)) ] for file_path in files_list: new_word = Word( word=file_path.split('.')[0], transcription='[...]', audio_path=os.path.join(settings.STATIC_AUDIO, file_path), ) new_word.save()
def handle(self, *args, **options): with open(path.join(path.dirname(path.abspath(__file__)), 'lexicon'), 'r') as f: contents = f.readlines()[1:] contents = map(lambda x: x.split("\t"), contents) contents = map(lambda x: x + [''] * (10 - len(x)), contents) words = map(lambda x: Word( word=x[0], prefix=x[1], stem=x[2], tone=x[3], POS=x[4], word_class=x[5], gloss=x[6], no=int(x[7]), note=x[8], dialect=x[9] ), contents) Word.objects.bulk_create(words)
words = {} for row in reader: word = dict(text=row['Word'], language=english, frequency=int(row['Frequency'])) if word['text'] not in words: words[word['text']] = word else: words[word['text']]['frequency'] += int(word['frequency']) words = words.values() words = sorted(words, key=lambda w: w['frequency'], reverse=True) existing_words_objects = {word.text: word for word in Word.objects.all()} existing_words_text = [word.text for word in existing_words_objects.values()] for word in words: if word['text'] not in existing_words_text: word['word'] = Word(text=word['text']) word['word'].save() else: word['word'] = existing_words_objects[word['text']] raw_words = [word['word'] for word in words if word['text'] not in existing_words_text] for i in range(len(words)): words[i]['rank'] = i + 1 for word in words: print(word['rank'], word['text'], word['frequency']) word_entries = [] for word in words: word_entry = WordEntry(word=word['word'], language=english, frequency=word['frequency'], rank=word['rank']) word_entries.append(word_entry)
def _save_data_to_db(word, spelling, raw_json): new_word = Word() new_word.value = word new_word.spelling = spelling new_word.raw_od_article = raw_json new_word.save()
line = f.readline() # 每次读取一行 lst = line.split() # 切分成列表 size = len(lst) if not line: break # 至少应含有两项:名称和释义 # 找到第一个释义项的位置,释义一定含有"."因为要指明词汇类型,如"v." k = 0 for k in range(size): if lst[k].find(".") != -1: break # 从第一个释义项之前的所有项合在一起是name name = "" for i in range(k): if i != 0: name += " " name += lst[i] # 从第一个释义项到结尾合在一起是explian explain = "" for i in range(k, size): if i != k: explain += " " explain += lst[i] # 保存到数据库 word = Word() word.name = name word.explain = explain word.save() print("生成完成,见words_word表")
def __yield_words(self, dictionary): for key in dictionary.keys(): yield Word(lemma=key, description=dictionary[key])