Example #1
0
 def _vocabularyStudying(self):
     sqlVocab = SqliteVocabulary("studyenglish.db", "vocabulary")
     for item in self.items:            
         print("_vocabularyStudying", self.tree.item(item,"text"))
         values = self.tree.item(item,"values")
         sqlVocab.update_word_status(values[0], 0)
     """sqlVocabStudying = SqliteVocabulary("studyenglish.db", "vocabulary_studying")
Example #2
0
 def _vocabularyIgnored(self):
     sqlVocab = SqliteVocabulary("studyenglish.db", "vocabulary")
     for item in self.items:            
         print("_vocabularyInorge", self.tree.item(item,"text"))
         values = self.tree.item(item,"values")
         sqlVocab.update_word_status(values[0], -1)
     """sqlVocabIgnored = SqliteVocabulary("studyenglish.db", "vocabulary_ignored")
Example #3
0
    def _uk_pron(self):
        sqlVocab = SqliteVocabulary("studyenglish.db", "vocabulary")
        for item in self.items:
            print("_uk_pron", self.tree.item(item,"text"))
            values = self.tree.item(item,"values")
            
            sound_dir, = sqlVocab.get_uk_sound(values[0])
            if not sound_dir:
                uks = self._get_uks_link_mp3_cambridge(values[0], values[0], item)
                if not uks:
                    words = sqlVocab.query_words_with_sql("word = '{}'".format(values[0]))
                    wn_tag = self.penn_to_wn(words[0][3])
                    uks = self._get_uks_link_mp3_cambridge(values[0], WordNetLemmatizer().lemmatize(values[0],wn_tag), item)
                if uks:
                    sound_dir = self._download_mp3_cambridge(uks[0], 'uk_pron')
                    if os.path.exists(sound_dir):
                        sqlVocab.update_uk_sound(values[0], sound_dir)

            if os.path.exists(sound_dir):
                from pygame import mixer
                mixer.init()
                mixer.music.load(sound_dir)
                mixer.music.play()
                if len(self.items)>1:
                    import time
                    time.sleep(2) # delays for 2 seconds
Example #4
0
    def _get_uks_link_mp3_cambridge(self, org_word, word, item):
        BASE_URL = 'http://dictionary.cambridge.org/dictionary/english/'
        url = BASE_URL + word
        print(url)
        html = requests.get(url).content                                          
        tree = lxml.html.fromstring(html)
        uks = tree.xpath("//span[@class='sound audio_play_button pron-icon uk']/@data-src-mp3")

        #pos_header = tree.xpath("//div[@class='pos-header']")[0]
        # //*[@id="dataset-british"]/div[1]/div[2]/div/div/div[1]/span[2]
        # //*[@id="dataset-british"]/div[1]/div[2]/div/div/div[1]/span[2]/span
        # //*[@id="dataset-british"]/div[1]/div[2]/div/div/div[1]/span[@class='uk']/span[@class='pron']/span[@class='ipa']/text()
        # uks_pron = tree.xpath("//span[@class='uk']/span[@class='pron']/span[@class='ipa']/text()")
        uks_pron_html = tree.xpath("//*[@id='dataset-british']/div[1]/div[2]/div/div/div[1]/span[@class='uk']/span[@class='pron']/span[@class='ipa']")
        sqlVocab = SqliteVocabulary("studyenglish.db", "vocabulary")
        #import xml.etree.ElementTree as ET
        uks_pron = [html.text_content() for html in uks_pron_html]
        prons = u'/' + u'/,/'.join(uks_pron) + u'/'
        #if uks_pron:
        #    prons = u'/' + uks_pron[0] + u'/'
        self.tree.set(item,'#2',prons)
        if len(uks_pron)>0:
            sqlVocab.update_uk_pron(org_word, prons)

        return uks
Example #5
0
    def _create_treeview(self, parent):
        f = ttk.Frame(parent)
        f.pack(side=tk.TOP, fill=tk.BOTH, expand=tk.Y)

        # self.dataCols = ('word', 'status', 'vietnamese', 'japanese', 'study_date', 'sentence')
        # (word, status, vietnamese, japanese, study_date, sentence)
        sqlVocab = SqliteVocabulary("studyenglish.db", "vocabulary")
        col_names = sqlVocab.get_col_names()
        self.dataCols = tuple(col_names)
        # create the tree and scrollbars
        self.tree = ttk.Treeview(columns=self.dataCols)

        ysb = ttk.Scrollbar(orient=tk.VERTICAL, command= self.tree.yview)
        xsb = ttk.Scrollbar(orient=tk.HORIZONTAL, command= self.tree.xview)
        self.tree['yscroll'] = ysb.set
        self.tree['xscroll'] = xsb.set

        # setup column headings
        self.tree.heading('#0',         text='#',           anchor=tk.E)
        """self.tree.heading('word',  text='word',  anchor=tk.W)
                                self.tree.heading('vietnamese',     text='vietnamese',      anchor=tk.W)
                                self.tree.heading('japanese',     text='japanese', anchor=tk.W)
                                self.tree.heading('sentence', text='sentence', anchor=tk.W)
                                self.tree.heading('status',   text='status',   anchor=tk.E)
                                self.tree.heading('study_date', text='study_date', anchor=tk.W)"""

        self.tree.column('#0',         stretch=0, width=50 , anchor=tk.E)
        """self.tree.column('word',  stretch=0, width=160)
                                self.tree.column('status',   stretch=0, width=50, anchor=tk.E)
                                self.tree.column('vietnamese',     stretch=0, width=160)
                                self.tree.column('japanese',     stretch=0, width=160)
                                self.tree.column('study_date', stretch=0, width=100)
                                self.tree.column('sentence', stretch=0, width=300)"""

        for col in self.dataCols:
            self.tree.heading(col,  text=col,  anchor=tk.W)
            self.tree.column(col,  stretch=0, width=160)

        # add tree and scrollbars to frame
        self.tree.grid(in_=f, row=0, column=0, sticky=tk.NSEW)
        ysb.grid(in_=f, row=0, column=1, sticky=tk.NS)
        xsb.grid(in_=f, row=1, column=0, sticky=tk.EW)

        # set frame resizing priorities
        f.rowconfigure(0, weight=1)
        f.columnconfigure(0, weight=1)

        # create fonts and tags
        self.normal   = tkfont.Font(family='Consolas', size=10)
        self.boldfont = tkfont.Font(family='Consolas', size=10, weight='bold')
        self.whacky   = tkfont.Font(family='Jokerman', size=10)

        self.tree.tag_configure('normal',   font=self.normal)
        self.tree.tag_configure('timedout', background='pink',
            font=self.boldfont)
        self.tree.tag_configure('whacky',   background='lightgreen',
            font=self.whacky)
Example #6
0
    def _query_vocabulary_data(self, sql):
        self.tree.delete(*self.tree.get_children())
        self.data = {}

        sqlVocab = SqliteVocabulary("studyenglish.db", "vocabulary")
        words = sqlVocab.query_words_with_sql(sql)
        num = 0
        for w in words:
            num += 1
            self.data[num] = w
Example #7
0
 def _delete_vocabulary(self):
     # item = self.tree.selection()[0]
     #item = self.tree.identify('item',event.x,event.y)
     #print("you clicked on", self.tree.item(item,"text"))
     values = self.tree.item(self.item,"values")
     print(values[0])
     db = 'studyenglish.db'
     tbl = 'vocabulary'
     sqlVocab = SqliteVocabulary("studyenglish.db", "vocabulary")
     words = sqlVocab.delete_word(values[0])
Example #8
0
 def _edit_vocabulary(self):
     # item = self.tree.selection()[0]
     #item = self.tree.identify('item',event.x,event.y)
     #print("you clicked on", self.tree.item(item,"text"))
     values = self.tree.item(self.item,"values")
     print(values[0])
     db = 'studyenglish.db'
     tbl = 'vocabulary'
     sqlVocab = SqliteVocabulary("studyenglish.db", "vocabulary")
     words = sqlVocab.query_words_with_sql("word = '{}'".format(values[0]))
     for w in words:
         root = tk.Tk()
         entry_window = EntryWindow(root, *[db, tbl, w])
         #root.mainloop()
         break
Example #9
0
    def _build_vocabulary_data(self, status, study_date=None):
        # create a dict with a number as key, and randomized contents matching
        # the column layout of the table

        self.tree.delete(*self.tree.get_children())
        self.data = {}

        sqlVocab = SqliteVocabulary("studyenglish.db", "vocabulary")
        words = None
        if study_date == None:
            words = sqlVocab.query_words_with_status(status)
        else:
            words = sqlVocab.query_words_with_status_and_date(status, strftime("%Y-%m-%d", gmtime()))
        num = 0
        for w in words:
            num += 1
            self.data[num] = w
Example #10
0
def main():
    sent_tokenizer=nltk.data.load('tokenizers/punkt/english.pickle')
    text = open('document.txt').read() # nltk.corpus.gutenberg.raw('document.txt')
    sents = sent_tokenizer.tokenize(text)

    sqlVocab = SqliteVocabulary("studyenglish.db", "vocabulary")
    #sqlVocab.delete_vocabulary()

    for sent in sents:
        tokens = nltk.word_tokenize(sent)
        words = [w.lower() for w in tokens]
        vocab = sorted(set(words))

        for v in vocab:
            existed_word = sqlVocab.check_existed_word(v)
            if not existed_word:
                sqlVocab.insert_vocabulary(v, 1, "", "", strftime("%Y-%m-%d", gmtime()), sent)

    sqlVocab.commit()
    sqlVocab.close()
Example #11
0
        def nature_language_processing(self):
            sent_tokenizer=nltk.data.load('tokenizers/punkt/english.pickle')
            text = st.get(1.0, END) # open('document.txt').read() # nltk.corpus.gutenberg.raw('document.txt')
            sents = sent_tokenizer.tokenize(text)
            words = nltk.word_tokenize(text)
            #fdist = FreqDist(words)

            sqlVocab = SqliteVocabulary("studyenglish.db", "vocabulary")
            sqlVocab.clear_local_count()
            for sent in sents:
                tokens = nltk.word_tokenize(sent)
                #words = [w.lower() for w in tokens]
                #vocab = sorted(set(words))
                tagged = nltk.pos_tag(tokens)

                for v, t in tagged:
                    #print(v,)
                    #print(t)
                    #print fdist.freq(v)
                    existed_word = sqlVocab.check_existed_word(v.lower())
                    #if (not v.isdigit()) and v.isalpha():
                    if (not existed_word):
                        sqlVocab.insert_vocabulary(v.lower(), "", "", t, "", "", sent, -2, strftime("%Y-%m-%d", gmtime()), 1, 1)
                    else:
                        sqlVocab.update_word_count(v.lower(), 1, 1)
            '''
            for v in fdist.keys():
                existed_word = sqlVocab.check_existed_word(v.lower())
                if existed_word:
                    sqlVocab.update_word_freq(v.lower(), fdist.freq(v), fdist[v])
            '''
            sqlVocab.commit()
            sqlVocab.close()

            self.show_all_words()
            self.master.destroy()