def test_query_normal(self, Record): content = '{"en":[{"definitions":[{"definition":"string"}]}]}' self.dict._get_raw = Mock(return_value=content) self.dict.query('mock') Record.assert_called_with(word='mock', content='{"definition": "string"}', source='wiktionary')
def test_query_normal(self, Record): self.dict._get_raw = Mock(return_value=self.raw_html) self.dict.query(self.word, self.timeout) Record.assert_called_with( word=self.word, content=json.dumps(json.loads(self.content)), source='spanish', )
def test_query_normal(self, Record): self.dict._get_raw = Mock(return_value='{"mock": true}') self.dict.query('mock') Record.assert_called_with( word='mock', content='{"mock": true}', source='urban' )
def test_query_normal(self, Record): self.dict._get_raw = Mock(return_value=SAMPLE_RESPONSE) self.dict._get_app_key = Mock(return_value=('id', 'key')) self.dict.query('string') Record.assert_called_with(word='string', content=SAMPLE_RESPONSE, source='oxford')
def test_query_normal(self, Record): self.content_json = json.dumps(self.content) self.dict._get_raw = Mock(return_value=self.content_json) self.dict.query('Japanese', timeout=5) Record.assert_called_with( word='Japanese', content=self.content_json, source='jisho', )
def save(self, query_record: Record, word: str): db_record = self.query_db_cache(word) if db_record is None: query_record.save(force_insert=True) else: db_content = json.loads(db_record.content) query_content = json.loads(query_record.content) if db_content != query_content: db_record.content = query_record.content db_record.save()
def test_query_normal(self, Record): content = ''' {"en":[{"partOfSpeech":"part_of_speech", "definitions":[{"definition":"definition","examples":["example"]}]}]} ''' self.dict._get_raw = Mock(return_value=content) self.dict.query('mock') r_content = [{"part_of_speech": "part_of_speech", "definitions": [{"definition": "definition", "examples": ["example"]}]}] Record.assert_called_with( word='mock', content=json.dumps(r_content), source='wiktionary' )
def test_show(self): content = ''' { "heteronyms": [{ "bopomofo": "ㄧㄢˋ", "bopomofo2": "yàn", "definitions": [{ "def": "假的、偽造的。", "example": ["如:「贗品」。"], "quote": ["..."], "type": "形", "synonyms": "尛", "antonyms": "萌" }], "pinyin": "yàn" }], "non_radical_stroke_count": 15, "radical": "貝", "stroke_count": 22, "title": "贗" } ''' r = Record(word='贗', content=content, source=self.dict.provider) # god bless this method, hope that it do not raise any exception self.dict.show(r)
def query(self, word: str): try: content = self._get_raw(word) except QueryError as exception: raise NotFoundError(exception.word) content = json.loads(content) try: # Get the first definition string from JSON. definition = content['en'][0]['definitions'][0]['definition'] except KeyError as exception: # API can return JSON that does not contain 'en' language. raise NotFoundError(word) else: # Clean the definition string from HTML tags. definition = BeautifulSoup(definition, "html.parser").text content = {} content['definition'] = definition record = Record( word=word, content=json.dumps(content), source=self.provider, ) return record
def query_db_cache(self, word: str) -> Record: try: record = Record.get(word=word, source=self.provider) except Record.DoesNotExist: return None else: return record
def query(self, word: str): webpage = self._get_raw(word) soup = BeautifulSoup(webpage, "html.parser") content = {} # Parse `data` and fill the information you need into `content` # # Use # ``` # except AttributeError: # raise NotFoundError(word) # ``` # while the word users try to query is not found on this dictionary. if self.args.verbose: # For verbose mode pass record = Record( word=word, content=json.dumps(content), source=self.provider, ) return record
def test_show(self): content = ''' [{"part_of_speech":"part_of_speech", "definitions":[{"definition": "definition","examples":["example"]}]}] ''' r = Record(word="string", content=content, source=self.dict.provider) self.dict.show(r)
def query(self, word: str): webpage = self._get_raw(word) data = bs4.BeautifulSoup(webpage, "html.parser") content = {} # Please bump version if the format changes again. # the `show` function will act with respect to version number. content['version'] = 2 # Here are details of each version. # # The original one, in the old era, there wasn't any concept of # version number: # content = { # 'word': ..., # 'pronounce': ..., # 'sound': (optional), # 'explain': [...], # 'verbose': [...], # } # # Verion 2, yahoo dictionary content is provided by Dy.eye # at that moment: # content = { # 'version': 2, # 'summary': { # 'word': ..., # 'pronounce': [('KK', '...'), (...)], // optional. # // e.g. 'google' # 'explain': [(optional)], # 'hospitalized' is summary-only # 'grammar': [(optional)], # }, # 'explain': [...], # 'verbose': [(optional)], # } # Construct summary (required) try: content['summary'] = self.parse_summary(data, word) except AttributeError: raise NotFoundError(word) # Handle explain (required) try: content['explain'] = self.parse_explain(data) except IndexError: raise NotFoundError(word) # Extract verbose (optional) content['verbose'] = self.parse_verbose(data) record = Record( word=word, content=json.dumps(content), source=self.provider, ) return record
def test_query_normal(self, Record): content = ''' {"en":[{"partOfSpeech":"part_of_speech", "definitions":[{"definition":"definition","examples":["example"]}]}]} ''' self.dict._get_raw = Mock(return_value=content) self.dict.query('mock') r_content = [{ "part_of_speech": "part_of_speech", "definitions": [{ "definition": "definition", "examples": ["example"] }] }] Record.assert_called_with(word='mock', content=json.dumps(r_content), source='wiktionary')
def test_show(self): content = ''' { "code": 200, "lang": "ru-en", "text": ["house"] } ''' r = Record(word='дом', content=content, source=self.dict.provider) # god bless this method, hope that it do not raise any exception self.dict.show(r)
def query(self, word: str): content = self._get_raw(word) content_json = json.loads(content) if not content_json['data']: raise NotFoundError(word) record = Record( word=word, content=content, source=self.provider, ) return record
def query(self, word: str): try: content = self._get_raw(word) except QueryError as exception: raise NotFoundError(exception.word) content = json.loads(content) try: # Get the first definition string from JSON. content = content['en'] except KeyError: # API can return JSON that does not contain 'en' language. raise NotFoundError(word) # Define a list that will be used to create a Record. r_content = [] # For every part of speech append r_content corresponding list. for i, d in enumerate(content): # Add what part of speech current definitions refers to. r_content.append({'part_of_speech': d['partOfSpeech']}) # Create a list that will store english_definitions # of the current part of speech. r_content[i]['definitions'] = [] for j, d2 in enumerate(d['definitions']): # Parse definition and append definitions list. definition = BeautifulSoup(d2['definition'], "html.parser").text r_content[i]['definitions'].append({'definition': definition}) # If API provides examples for the current definition # create a new list and append them. try: d2['examples'] except KeyError: pass else: r_content[i]['definitions'][j]['examples'] = [] for ex in d2['examples']: ex = BeautifulSoup(ex, "html.parser").text r_content[i]['definitions'][j]['examples'].append(ex) record = Record( word=word, content=json.dumps(r_content), source=self.provider, ) return record
def query(self, word: str): content = self._get_raw(word) if "no_results" in content: raise NotFoundError(word) record = Record( word=word, content=content, source=self.provider, ) return record
def query(self, word: str): try: content = self._get_raw(word) except QueryError as exception: raise NotFoundError(exception.word) record = Record( word=word, content=content, source=self.provider, ) return record
def query(self, word: str): content_str = self._get_raw(word) content_dict = json.loads(content_str) if content_dict['list'] == []: raise NotFoundError(word) record = Record( word=word, content=content_str, source=self.provider, ) return record
def test_show(self): content = ''' { "list": [ { "word": "mock", "definition": "Mock", "example": "..." } ] } ''' r = Record(word='mock', content=content, source='urban') # god bless this method, hope that it do not raise any exception self.dict.show(r)
def get_pyjoke(pyjokes, word: str): if not pyjokes: return try: # very basic string searching in jokes r = random.choice( list( filter( lambda j: word in map( lambda x: ''.join(c for c in x if c.isalnum()), j.split()), pyjokes.get_jokes()))) except IndexError: return else: return Record(word=word, content=r, source='pyjokes')
def test_show(self): content = ''' { "h": [{ "T": "bo̍k-ní", "_": "928", "d": [{ "f": "蕈`菇~`類~。`生長~`在~`朽~`腐~`的~`樹~`幹~`上~ ...", "type": "`名~" }] }], "t": "`木~`耳~" } ''' r = Record(word='木耳', content=content, source=self.dict.provider) # god bless this method, hope that it do not raise any exception self.dict.show(r)
def query(self, word: str): try: app_id, app_key = self._get_app_key() content = self._get_raw(word, headers={ 'app_id': app_id, 'app_key': app_key }) except QueryError as exception: msg = self.status_code.get(exception.status_code, 'Some bad thing happened') self.color.print('Oxford: ' + msg, 'red') raise NotFoundError(exception.word) record = Record( word=word, content=content, source=self.provider, ) return record
def query(self, word: str): requests.packages.urllib3.disable_warnings() content = self._get_raw(word, verify=False) data = {"title": word, "sources": defaultdict(list)} soup = BeautifulSoup(content, "html.parser") for tr in soup.find_all("tr", {"class": "dash"}): source = ( tr.find("td", attrs={"class": "sourceW"}).find("a").text ).strip() en = tr.find("td", attrs={"class": "ennameW"}).text.strip() zhtw = tr.find("td", attrs={"class": "zhtwnameW"}).text.strip() data["sources"][source].append((en, zhtw)) if len(data["sources"]) == 0: raise NotFoundError(word) record = Record( word=word, content=json.dumps(data), source=self.provider ) return record
def query(self, word: str): try: content = self._get_raw(word) except QueryError as exception: raise NotFoundError(exception.word) content_json = json.loads(content) status = content_json.get('code') if status != 200: # https://tech.yandex.com/translate/doc/dg/reference/translate-docpage/#codes message = self.status_code.get( status, 'Some bad thing happened with Yandex') print('Yandex: ' + message) raise NotFoundError(word) record = Record( word=word, content=content, source=self.provider, ) return record
def test_query_normal(self, Record): self.dict._get_raw = Mock(return_value='{}') self.dict.query('萌') Record.assert_called_with(word='萌', content='{}', source='moe')
def test_query_normal(self, Record): self.dict._get_raw = Mock(return_value='{}') self.dict.query('萌', timeout=666) Record.assert_called_with(word='萌', content='{}', source='moe')
def test_query_normal(self, Record): content = '{"code":200,"lang":"ru-en","text":["house"]}' self.dict._get_raw = Mock(return_value=content) self.dict.query('дом') Record.assert_called_with(word='дом', content=content, source='yandex')
def query(self, word: str): webpage = self._get_raw(word) soup = BeautifulSoup(webpage, "html.parser") content = {} en_css = "#dictionary-neodict-en" es_css = "#dictionary-neodict-es" card = soup.select_one(en_css) or soup.select_one(es_css) if card is None: raise NotFoundError(word) word_css = "div > div:nth-child(1) > span" word_element = card.select_one(word_css) if word_element is None: raise NotFoundError(word) content['word'] = word_element.text ''' COPULAR VERB # speech # categories_card 1. (used to express a permanent quality) # category_text # explanation a. ser # index # examples # example The ocean is blue. El océano es azul. 2. (used to express a temporary state) a. estar I'm not in a good mood today. Hoy no estoy de buen humor. The sky is cloudy. El cielo está nublado. ... (Another speech if it has.) ''' speech_pattern = "div > div:nth-child(2)" # "#dictionary-neodict-en > div > div:nth-child(2)" # Start to grab content['explains'] = [] speech = card.select_one(speech_pattern) while speech: result = [] speech_text, categories_card = speech.children speech_text_element = speech_text.find(['a', 'span']) content['explains'].append([speech_text_element.text, result]) for category in categories_card.children: category_text_element, explanations_card = category.children category_text = category_text_element.text explains = [] for explanation in explanations_card.children: for _ in explanation.children: index_elements, examples = (_.contents[:-1], _.contents[-1]) index = ' '.join([ _.text.strip() for _ in index_elements if _ != ' ' ]) if (not examples) and index: explains.append((index, )) continue sentences = [] for example in examples: t = example.find_all() # Should be only 3 elements # [text, —, text] ''' When Spanish => English, it will show Spanish first When English => Spanish, it will show English first So, the variables below are not definitely ''' sentences.append((t[0].text, t[2].text)) explains.append((index, sentences)) result.append([category_text, explains]) speech = speech.next_sibling record = Record( word=word, content=json.dumps(content), source=self.provider, ) return record
def query(self, word: str): webpage = self._get_raw(word) data = BeautifulSoup(webpage, "html.parser") content = {} # handle record.word try: content['word'] = data.find('span', id='term').text except AttributeError: raise NotFoundError(word) # handle pronounce pronu_value = data.find('span', id='pronunciation_pos').text if pronu_value: content['pronounce'] = [] for match in re.finditer('(\w+)(\[.*?\])', pronu_value): content['pronounce'].append(match.group(1, 2)) # handle sound pronu_sound = data.find(class_='proun_sound') if pronu_sound: content['sound'] = [ ('mp3', pronu_sound.find(class_='source', attrs={ 'data-type': 'audio/mpeg' }).attrs['data-src']), ('ogg', pronu_sound.find(class_='source', attrs={ 'data-type': 'audio/ogg' }).attrs['data-src']), ] # Handle explain main_explanations = data.find( class_='dd algo explain mt-20 lst DictionaryResults') if main_explanations: main_explanations = itertools.zip_longest( main_explanations.find_all(class_='compTitle mb-10'), main_explanations.find_all( class_='compArticleList mb-15 ml-10', )) else: main_explanations = "" content['explain'] = [] for part_of_speech, meaning in main_explanations: node = [part_of_speech.text] if part_of_speech else [''] for item in meaning.find_all('li', class_='ov-a'): pack = [item.find('h4').text] for example in (tag for tag in item.find_all('span') if 'line-height: 17px;' not in tag['style']): sentence = '' for w in example.contents: if w.name == 'b': sentence += '*' + w.text + '*' else: try: sentence += w except: pass pack.append((sentence.strip())) node.append(pack) content['explain'].append(node) # verbose info part_of_speech_list, meaning_list = [], [] content['verbose'] = [] variation_explanations = data.find( class_='dd algo variation fst DictionaryResults') if variation_explanations: part_of_speech_list.extend( variation_explanations.find_all(class_='compTitle')) meaning_list.extend( variation_explanations.find_all(class_='compArticleList')) additional_explanations = data.find( class_='dd algo othersNew lst DictionaryResults') if additional_explanations: part_of_speech_list.extend( additional_explanations.find_all(class_='compTitle mt-26')) meaning_list.extend( additional_explanations.find_all(class_='compArticleList')) more_explanations = itertools.zip_longest(part_of_speech_list, meaning_list) for part_of_speech, meaning in more_explanations: node = [part_of_speech.text] if part_of_speech else [''] if meaning: for item in meaning.find_all('li', class_='ov-a'): pack = [item.find('h4').text] for example in ( tag for tag in item.find_all('span') if 'line-height: 17px;' not in tag['style']): sentence = '' for w in example.contents: if w.name == 'b': sentence += '*' + w.text + '*' else: try: sentence += w except: pass pack.append((sentence.strip())) node.append(pack) content['verbose'].append(node) record = Record( word=word, content=json.dumps(content), source=self.provider, ) return record
def complete(self, text, state): if state == 0: # new query self.records = iter( Record.select().where(Record.word.startswith(text))) return next(self.records).word
def test_query_normal(self, Record): self.dict._get_raw = Mock(return_value='{"mock": true}') self.dict.query("mock", timeout=666) Record.assert_called_with(word="mock", content='{"mock": true}', source="urban")
def query(self, word: str): webpage = self._get_raw(word) soup = BeautifulSoup(webpage, "html.parser") response = json.loads(soup.text) # Not Found if not response.get("列表"): raise NotFoundError(word) # Show Chinese word from iTaigi in stead of user input if possible with suppress(KeyError, IndexError): word = response["列表"][0]["外語資料"] content = {} # Fetch basic words with text, pronounce and sentence try: basic_words = response["列表"][0]["新詞文本"] except Exception: raise else: content['basic_words'] = [] for basic_word in basic_words: d = {} text = self._get_word_text(basic_word) d['text'] = text pronounce = self._get_word_pronounce(basic_word) d['pronounce'] = pronounce if self.args.verbose: sentences = self._get_word_sentences(text, pronounce) d['sentences'] = sentences content['basic_words'].append(d) # Fix issue-452 for iTaigi testings # iTaigi returns basic_words in random order. # Since we store basic_words in a list, # We have to sort it before saving into database # or the unit-testings would fail. content['basic_words'].sort(key=lambda word: word['text']) # Fetch related words try: related_words = response["其他建議"] except Exception: raise else: content['related_words'] = [] for related_word in related_words: d = {} text = self._get_word_text(related_word) d['text'] = text pronounce = self._get_word_pronounce(related_word) d['pronounce'] = pronounce if self.args.verbose: sentences = self._get_word_sentences(text, pronounce) d['sentences'] = sentences content['related_words'].append(d) # Save content with word and provider. record = Record( word=word, content=json.dumps(content), source=self.provider, ) return record
def dump(pattern=r'^.*$'): return [r.word for r in Record.select() if re.fullmatch(pattern, r.word)]
def query(self, word: str): webpage = self._get_raw(word) data = BeautifulSoup(webpage, "html.parser") content = {} card = data.find('div', attrs={'class': 'card'}) entry = card.find( # just get the first one attrs={'class': 'dictionary-entry'}) if not entry: raise NotFoundError(word) content['explains'] = [] # word can be existing in both English & Spanish word_element = (card.find(attrs={'id': 'headword-en'}) or card.find(attrs={'id': 'headword-es'})) if word_element is None: raise NotFoundError(word) content['word'] = word_element.text pattern1 = {'class': 'dictionary-neodict-indent-1'} pattern2 = {'class': 'dictionary-neodict-indent-2'} pattern3 = {'class': 'dictionary-neodict-indent-3'} pattern_order = {'class': 'dictionary-neodict-translation'} pattern_example = {'class': 'dictionary-neodict-example'} pattern1_en = {'class': 'dictionary-neoharrap-indent-1'} pattern2_en = {'class': 'dictionary-neoharrap-indent-2'} pattern_order_en = {'class': 'dictionary-neoharrap-translation'} speeches = card.find_all(attrs={'class': 'part_of_speech'}) for (speech, category) in zip( speeches, entry.find_all(attrs=pattern1) or entry.find_all(attrs=pattern1_en)): result = [] content['explains'].append([speech.text, result]) context = category.find(attrs={'class': 'context'}).text explains = [] for explain in (category.find_all(attrs=pattern2) or category.find_all(attrs=pattern2_en)): orders = (explain.find_all(attrs=pattern_order) or explain.find_all(attrs=pattern_order_en)) if orders: # e.g. # # ('a. forgiveness', 'b. pardon (law)') # indices = tuple( map(lambda x: x.text.replace('\xa0', ' ').strip(), orders)) else: continue examples = explain.find_all(attrs=pattern3) for (example, index) in zip(examples, indices): t = tuple(example.find(attrs=pattern_example)) (spanish, english) = (t[0].text, t[2].text) explains.append((index, spanish, english)) if (not examples) and (len(indices) > 0): for index in indices: explains.append((index, )) result.append([context, explains]) record = Record( word=word, content=json.dumps(content), source=self.provider, ) return record
def query(self, word: str): webpage = self._get_raw(word) data = BeautifulSoup(webpage, "html.parser") content = {} # handle record.word try: content['word'] = data.find('span', id='term').text except AttributeError: raise NotFoundError(word) # handle pronounce pronu_value = data.find('span', id='pronunciation_pos').text if pronu_value: content['pronounce'] = [] for match in re.finditer('(\w+)(\[.*?\])', pronu_value): content['pronounce'].append(match.group(1, 2)) # handle sound proun_sound = data.find( 'span', style="display: none;", id="iconStyle", class_="tri", title="http://product.dreye.com.tw/", ) if proun_sound: content['sound'] = {} d = json.loads(proun_sound.text) sound_types_and_urls = (d.get('sound_url_1', []) + d.get('sound_url_2', [])) sound_accents = (d.get('sound_type_1', []) + d.get('sound_type_2', [])) for sound_type_and_url, sound_accent in zip( sound_types_and_urls, sound_accents): if sound_type_and_url: sound_type, sound_url = list(sound_type_and_url.items())[0] content['sound'].setdefault(sound_type, {}).setdefault( sound_accent, []).append(sound_url) # Handle explain main_explanations = data.find( class_='dd algo explain mt-20 lst DictionaryResults') if main_explanations: main_explanations = itertools.zip_longest( main_explanations.find_all(class_='compTitle mb-10'), main_explanations.find_all( class_='compArticleList mb-15 ml-10', )) else: main_explanations = "" content['explain'] = [] for part_of_speech, meaning in main_explanations: node = [part_of_speech.text] if part_of_speech else [''] for item in meaning.find_all('li', class_='ov-a'): pack = [item.find('h4').text] for example in ( tag for tag in item.find_all('span') if 'line-height: 17px;' not in tag.get('style', {})): sentence = '' for w in example.contents: if w.name == 'b': sentence += '*' + w.text + '*' else: try: sentence += w except Exception: pass pack.append((sentence.strip())) node.append(pack) content['explain'].append(node) # verbose info part_of_speech_list, meaning_list = [], [] content['verbose'] = [] variation_explanations = data.find( class_='dd algo variation fst DictionaryResults') if variation_explanations: part_of_speech_list.extend( variation_explanations.find_all(class_='compTitle')) meaning_list.extend( variation_explanations.find_all(class_='compArticleList')) additional_explanations = data.find( class_='dd algo othersNew lst DictionaryResults') if additional_explanations: part_of_speech_list.extend( additional_explanations.find_all(class_='compTitle mt-26')) meaning_list.extend( additional_explanations.find_all(class_='compArticleList')) more_explanations = itertools.zip_longest(part_of_speech_list, meaning_list) for part_of_speech, meaning in more_explanations: node = [part_of_speech.text] if part_of_speech else [''] if meaning: for item in meaning.find_all('li', class_='ov-a'): pack = [item.find('h4').text] for example in ( tag for tag in item.find_all('span') if 'line-height: 17px;' not in tag['style']): sentence = '' for w in example.contents: if w.name == 'b': sentence += '*' + w.text + '*' else: try: sentence += w except Exception: pass pack.append((sentence.strip())) node.append(pack) content['verbose'].append(node) record = Record( word=word, content=json.dumps(content), source=self.provider, ) return record