def _vocabularyStudying(self): sqlVocab = SqliteVocabulary("studyenglish.db", "vocabulary") for item in self.items: print("_vocabularyStudying", self.tree.item(item,"text")) values = self.tree.item(item,"values") sqlVocab.update_word_status(values[0], 0) """sqlVocabStudying = SqliteVocabulary("studyenglish.db", "vocabulary_studying")
def _vocabularyIgnored(self): sqlVocab = SqliteVocabulary("studyenglish.db", "vocabulary") for item in self.items: print("_vocabularyInorge", self.tree.item(item,"text")) values = self.tree.item(item,"values") sqlVocab.update_word_status(values[0], -1) """sqlVocabIgnored = SqliteVocabulary("studyenglish.db", "vocabulary_ignored")
def _uk_pron(self): sqlVocab = SqliteVocabulary("studyenglish.db", "vocabulary") for item in self.items: print("_uk_pron", self.tree.item(item,"text")) values = self.tree.item(item,"values") sound_dir, = sqlVocab.get_uk_sound(values[0]) if not sound_dir: uks = self._get_uks_link_mp3_cambridge(values[0], values[0], item) if not uks: words = sqlVocab.query_words_with_sql("word = '{}'".format(values[0])) wn_tag = self.penn_to_wn(words[0][3]) uks = self._get_uks_link_mp3_cambridge(values[0], WordNetLemmatizer().lemmatize(values[0],wn_tag), item) if uks: sound_dir = self._download_mp3_cambridge(uks[0], 'uk_pron') if os.path.exists(sound_dir): sqlVocab.update_uk_sound(values[0], sound_dir) if os.path.exists(sound_dir): from pygame import mixer mixer.init() mixer.music.load(sound_dir) mixer.music.play() if len(self.items)>1: import time time.sleep(2) # delays for 2 seconds
def _get_uks_link_mp3_cambridge(self, org_word, word, item): BASE_URL = 'http://dictionary.cambridge.org/dictionary/english/' url = BASE_URL + word print(url) html = requests.get(url).content tree = lxml.html.fromstring(html) uks = tree.xpath("//span[@class='sound audio_play_button pron-icon uk']/@data-src-mp3") #pos_header = tree.xpath("//div[@class='pos-header']")[0] # //*[@id="dataset-british"]/div[1]/div[2]/div/div/div[1]/span[2] # //*[@id="dataset-british"]/div[1]/div[2]/div/div/div[1]/span[2]/span # //*[@id="dataset-british"]/div[1]/div[2]/div/div/div[1]/span[@class='uk']/span[@class='pron']/span[@class='ipa']/text() # uks_pron = tree.xpath("//span[@class='uk']/span[@class='pron']/span[@class='ipa']/text()") uks_pron_html = tree.xpath("//*[@id='dataset-british']/div[1]/div[2]/div/div/div[1]/span[@class='uk']/span[@class='pron']/span[@class='ipa']") sqlVocab = SqliteVocabulary("studyenglish.db", "vocabulary") #import xml.etree.ElementTree as ET uks_pron = [html.text_content() for html in uks_pron_html] prons = u'/' + u'/,/'.join(uks_pron) + u'/' #if uks_pron: # prons = u'/' + uks_pron[0] + u'/' self.tree.set(item,'#2',prons) if len(uks_pron)>0: sqlVocab.update_uk_pron(org_word, prons) return uks
def _create_treeview(self, parent): f = ttk.Frame(parent) f.pack(side=tk.TOP, fill=tk.BOTH, expand=tk.Y) # self.dataCols = ('word', 'status', 'vietnamese', 'japanese', 'study_date', 'sentence') # (word, status, vietnamese, japanese, study_date, sentence) sqlVocab = SqliteVocabulary("studyenglish.db", "vocabulary") col_names = sqlVocab.get_col_names() self.dataCols = tuple(col_names) # create the tree and scrollbars self.tree = ttk.Treeview(columns=self.dataCols) ysb = ttk.Scrollbar(orient=tk.VERTICAL, command= self.tree.yview) xsb = ttk.Scrollbar(orient=tk.HORIZONTAL, command= self.tree.xview) self.tree['yscroll'] = ysb.set self.tree['xscroll'] = xsb.set # setup column headings self.tree.heading('#0', text='#', anchor=tk.E) """self.tree.heading('word', text='word', anchor=tk.W) self.tree.heading('vietnamese', text='vietnamese', anchor=tk.W) self.tree.heading('japanese', text='japanese', anchor=tk.W) self.tree.heading('sentence', text='sentence', anchor=tk.W) self.tree.heading('status', text='status', anchor=tk.E) self.tree.heading('study_date', text='study_date', anchor=tk.W)""" self.tree.column('#0', stretch=0, width=50 , anchor=tk.E) """self.tree.column('word', stretch=0, width=160) self.tree.column('status', stretch=0, width=50, anchor=tk.E) self.tree.column('vietnamese', stretch=0, width=160) self.tree.column('japanese', stretch=0, width=160) self.tree.column('study_date', stretch=0, width=100) self.tree.column('sentence', stretch=0, width=300)""" for col in self.dataCols: self.tree.heading(col, text=col, anchor=tk.W) self.tree.column(col, stretch=0, width=160) # add tree and scrollbars to frame self.tree.grid(in_=f, row=0, column=0, sticky=tk.NSEW) ysb.grid(in_=f, row=0, column=1, sticky=tk.NS) xsb.grid(in_=f, row=1, column=0, sticky=tk.EW) # set frame resizing priorities f.rowconfigure(0, weight=1) f.columnconfigure(0, weight=1) # create fonts and tags self.normal = tkfont.Font(family='Consolas', size=10) self.boldfont = tkfont.Font(family='Consolas', size=10, weight='bold') self.whacky = tkfont.Font(family='Jokerman', size=10) self.tree.tag_configure('normal', font=self.normal) self.tree.tag_configure('timedout', background='pink', font=self.boldfont) self.tree.tag_configure('whacky', background='lightgreen', font=self.whacky)
def _query_vocabulary_data(self, sql): self.tree.delete(*self.tree.get_children()) self.data = {} sqlVocab = SqliteVocabulary("studyenglish.db", "vocabulary") words = sqlVocab.query_words_with_sql(sql) num = 0 for w in words: num += 1 self.data[num] = w
def _delete_vocabulary(self): # item = self.tree.selection()[0] #item = self.tree.identify('item',event.x,event.y) #print("you clicked on", self.tree.item(item,"text")) values = self.tree.item(self.item,"values") print(values[0]) db = 'studyenglish.db' tbl = 'vocabulary' sqlVocab = SqliteVocabulary("studyenglish.db", "vocabulary") words = sqlVocab.delete_word(values[0])
def _edit_vocabulary(self): # item = self.tree.selection()[0] #item = self.tree.identify('item',event.x,event.y) #print("you clicked on", self.tree.item(item,"text")) values = self.tree.item(self.item,"values") print(values[0]) db = 'studyenglish.db' tbl = 'vocabulary' sqlVocab = SqliteVocabulary("studyenglish.db", "vocabulary") words = sqlVocab.query_words_with_sql("word = '{}'".format(values[0])) for w in words: root = tk.Tk() entry_window = EntryWindow(root, *[db, tbl, w]) #root.mainloop() break
def _build_vocabulary_data(self, status, study_date=None): # create a dict with a number as key, and randomized contents matching # the column layout of the table self.tree.delete(*self.tree.get_children()) self.data = {} sqlVocab = SqliteVocabulary("studyenglish.db", "vocabulary") words = None if study_date == None: words = sqlVocab.query_words_with_status(status) else: words = sqlVocab.query_words_with_status_and_date(status, strftime("%Y-%m-%d", gmtime())) num = 0 for w in words: num += 1 self.data[num] = w
def main(): sent_tokenizer=nltk.data.load('tokenizers/punkt/english.pickle') text = open('document.txt').read() # nltk.corpus.gutenberg.raw('document.txt') sents = sent_tokenizer.tokenize(text) sqlVocab = SqliteVocabulary("studyenglish.db", "vocabulary") #sqlVocab.delete_vocabulary() for sent in sents: tokens = nltk.word_tokenize(sent) words = [w.lower() for w in tokens] vocab = sorted(set(words)) for v in vocab: existed_word = sqlVocab.check_existed_word(v) if not existed_word: sqlVocab.insert_vocabulary(v, 1, "", "", strftime("%Y-%m-%d", gmtime()), sent) sqlVocab.commit() sqlVocab.close()
def nature_language_processing(self): sent_tokenizer=nltk.data.load('tokenizers/punkt/english.pickle') text = st.get(1.0, END) # open('document.txt').read() # nltk.corpus.gutenberg.raw('document.txt') sents = sent_tokenizer.tokenize(text) words = nltk.word_tokenize(text) #fdist = FreqDist(words) sqlVocab = SqliteVocabulary("studyenglish.db", "vocabulary") sqlVocab.clear_local_count() for sent in sents: tokens = nltk.word_tokenize(sent) #words = [w.lower() for w in tokens] #vocab = sorted(set(words)) tagged = nltk.pos_tag(tokens) for v, t in tagged: #print(v,) #print(t) #print fdist.freq(v) existed_word = sqlVocab.check_existed_word(v.lower()) #if (not v.isdigit()) and v.isalpha(): if (not existed_word): sqlVocab.insert_vocabulary(v.lower(), "", "", t, "", "", sent, -2, strftime("%Y-%m-%d", gmtime()), 1, 1) else: sqlVocab.update_word_count(v.lower(), 1, 1) ''' for v in fdist.keys(): existed_word = sqlVocab.check_existed_word(v.lower()) if existed_word: sqlVocab.update_word_freq(v.lower(), fdist.freq(v), fdist[v]) ''' sqlVocab.commit() sqlVocab.close() self.show_all_words() self.master.destroy()