def init_list(): from models import XmlItem temp = [] try: f = open(xmlpath, 'r') xml = f.read() except Exception: xml = '' finally: try: f.close() except Exception: pass if not xml: return temp xxs = Selector(text=xml) items = xxs.xpath('//wordbook/item') for item in items: name = item.xpath('./name/text()').extract()[0] one = XmlItem(name) create_time = item.xpath('./create_time/text()').extract()[0] create_time = datetime.strptime(create_time, '%Y-%m-%d %H:%M:%S') access_time = item.xpath('./access_time/text()').extract()[0] access_time = datetime.strptime(access_time, '%Y-%m-%d %H:%M:%S') score = item.xpath('./score/text()').extract()[0] score = int(score) setattr(one, 'create_time', create_time) setattr(one, 'access_time', access_time) setattr(one, 'score', score) temp.append(one) return temp
def init_list(): from models import XmlItem temp = [] try: f = open(xmlpath, 'r') xml = f.read() except: xml = '' finally: try: f.close() except: pass if not xml: return temp xxs = Selector(text=xml) items = xxs.xpath('//wordbook/item') for item in items: name = item.xpath('./name/text()').extract()[0] one = XmlItem(name) create_time = item.xpath('./create_time/text()').extract()[0] create_time = datetime.strptime(create_time, '%Y-%m-%d %H:%M:%S') access_time = item.xpath('./access_time/text()').extract()[0] access_time = datetime.strptime(access_time, '%Y-%m-%d %H:%M:%S') score = item.xpath('./score/text()').extract()[0] score = int(score) setattr(one, 'create_time', create_time) setattr(one, 'access_time', access_time) setattr(one, 'score', score) temp.append(one) return temp
def query(self, word): url = "http://dict.youdao.com/search?tab=chn&keyfrom=dict.top&q=" url += word before_fetching = datetime.now() html = self.get_html(url) if not html: return '' after_fetching = datetime.now() time_fetching = after_fetching - before_fetching print print word print datetime.strftime(before_fetching, '%Y/%m/%d %H:%M:%S') print 'time_fetching %.2f' % time_fetching.total_seconds() hxs = Selector(text=html) phonetics = hxs.xpath('//div[@id="phrsListTab"]/h2[1]/div[1]/span') phonetic = '' for ph in phonetics: ph = ph.xpath('./span/text()').extract() if len(ph) > 0: phonetic += ph[0] lis = hxs.xpath( '//div[@id="phrsListTab"]/div[@class="trans-container"]/ul/li') meaning = '' for li in lis: try: temp_meaning = li.xpath('./text()').extract()[0] + '\n' except: temp_meaning = '' if word.capitalize() in temp_meaning and u'人名' in temp_meaning: continue else: meaning += temp_meaning examples = hxs.xpath('//div[@id="bilingual"]/ul/li') example = '' for li in examples: example_en = li.xpath('./p[1]/span//text()').extract() example_cn = li.xpath('./p[2]/span//text()').extract() example += ''.join(example_en) + '\n' example += ''.join(example_cn) + '\n' if not meaning: print 'wrong word ?' try: possible = hxs.xpath( '//div[@class="error-typo"]/h4/text()' ).extract()[0].strip() + '\n' possible += hxs.xpath( '//div[@class="error-typo"]//a/text()').extract()[0] except: possible = '' return possible item_dict = {} item_dict['name'] = word item_dict['phonetic'] = phonetic.encode('utf8') item_dict['meaning'] = meaning.encode('utf8') item_dict['example'] = example.encode('utf8') return item_dict
def query(self): before_fetching = datetime.now() html = self.get_html(get_url(self.word.value)) if not html: return '' after_fetching = datetime.now() time_fetching = after_fetching - before_fetching print() print(self.word.value) print(datetime.strftime(before_fetching, '%Y/%m/%d %H:%M:%S')) try: print('time_fetching %.2f' % time_fetching.total_seconds()) except Exception: print('time_fetching %.2f' % time_fetching.seconds) hxs = Selector(text=html) if self.word.lang == 'cn': return self.query_cn(hxs) else: return self.query_en(hxs)
def query(self, word): url = "http://dict.youdao.com/search?tab=chn&keyfrom=dict.top&q=" url += word before_fetching = datetime.now() html = self.get_html(url) if not html: return '' after_fetching = datetime.now() time_fetching = after_fetching - before_fetching print print word print datetime.strftime(before_fetching, '%Y/%m/%d %H:%M:%S') try: print 'time_fetching %.2f' % time_fetching.total_seconds() except: print 'time_fetching %.2f' % time_fetching.seconds hxs = Selector(text=html) phonetics = hxs.xpath('//div[@id="phrsListTab"]/h2[1]/div[1]/span') phonetic = '' for ph in phonetics: ph = ph.xpath('./span/text()').extract() if len(ph) > 0: phonetic += ph[0] lis = hxs.xpath( '//div[@id="phrsListTab"]/div[@class="trans-container"]/ul/li') meaning = '' for li in lis: try: temp_meaning = li.xpath('./text()').extract()[0] + '\n' except: temp_meaning = '' if word.capitalize() in temp_meaning and u'人名' in temp_meaning: continue else: meaning += temp_meaning examples = hxs.xpath('//div[@id="bilingual"]/ul/li') example = '' for li in examples: example_en = li.xpath('./p[1]/span//text()').extract() example_cn = li.xpath('./p[2]/span//text()').extract() example += ''.join(example_en) + '\n' example += ''.join(example_cn) + '\n' if not meaning: print 'wrong word ?' try: possible = hxs.xpath( '//div[@class="error-typo"]/h4/text()' ).extract()[0].strip() + '\n' possible += hxs.xpath( '//div[@class="error-typo"]//a/text()').extract()[0] except: possible = '' return possible item_dict = {} item_dict['name'] = word item_dict['phonetic'] = phonetic.encode('utf8') item_dict['meaning'] = meaning.encode('utf8') item_dict['example'] = example.encode('utf8') return item_dict