Example #1
0
    def _get_uks_link_mp3_cambridge(self, org_word, word, item):
        BASE_URL = 'http://dictionary.cambridge.org/dictionary/english/'
        url = BASE_URL + word
        print(url)
        html = requests.get(url).content                                          
        tree = lxml.html.fromstring(html)
        uks = tree.xpath("//span[@class='sound audio_play_button pron-icon uk']/@data-src-mp3")

        #pos_header = tree.xpath("//div[@class='pos-header']")[0]
        # //*[@id="dataset-british"]/div[1]/div[2]/div/div/div[1]/span[2]
        # //*[@id="dataset-british"]/div[1]/div[2]/div/div/div[1]/span[2]/span
        # //*[@id="dataset-british"]/div[1]/div[2]/div/div/div[1]/span[@class='uk']/span[@class='pron']/span[@class='ipa']/text()
        # uks_pron = tree.xpath("//span[@class='uk']/span[@class='pron']/span[@class='ipa']/text()")
        uks_pron_html = tree.xpath("//*[@id='dataset-british']/div[1]/div[2]/div/div/div[1]/span[@class='uk']/span[@class='pron']/span[@class='ipa']")
        sqlVocab = SqliteVocabulary("studyenglish.db", "vocabulary")
        #import xml.etree.ElementTree as ET
        uks_pron = [html.text_content() for html in uks_pron_html]
        prons = u'/' + u'/,/'.join(uks_pron) + u'/'
        #if uks_pron:
        #    prons = u'/' + uks_pron[0] + u'/'
        self.tree.set(item,'#2',prons)
        if len(uks_pron)>0:
            sqlVocab.update_uk_pron(org_word, prons)

        return uks