Python Recordの例、zdict.models.Record Pythonの例

コード例 #1

0

ファイルを表示

 def test_query_normal(self, Record):
     content = '{"en":[{"definitions":[{"definition":"string"}]}]}'
     self.dict._get_raw = Mock(return_value=content)
     self.dict.query('mock')
     Record.assert_called_with(word='mock',
                               content='{"definition": "string"}',
                               source='wiktionary')

コード例 #2

0

ファイルを表示

ファイル: test_spanish.py プロジェクト: wdv4758h/zdict

 def test_query_normal(self, Record):
     self.dict._get_raw = Mock(return_value=self.raw_html)
     self.dict.query(self.word, self.timeout)
     Record.assert_called_with(
         word=self.word,
         content=json.dumps(json.loads(self.content)),
         source='spanish',
     )

コード例 #3

0

ファイルを表示

ファイル: test_urban.py プロジェクト: ilcic/zdict

 def test_query_normal(self, Record):
     self.dict._get_raw = Mock(return_value='{"mock": true}')
     self.dict.query('mock')
     Record.assert_called_with(
         word='mock',
         content='{"mock": true}',
         source='urban'
     )

コード例 #4

0

ファイルを表示

ファイル: test_oxford.py プロジェクト: zdict/zdict

    def test_query_normal(self, Record):
        self.dict._get_raw = Mock(return_value=SAMPLE_RESPONSE)
        self.dict._get_app_key = Mock(return_value=('id', 'key'))

        self.dict.query('string')

        Record.assert_called_with(word='string',
                                  content=SAMPLE_RESPONSE,
                                  source='oxford')

コード例 #5

0

ファイルを表示

ファイル: test_jisho.py プロジェクト: wdv4758h/zdict

 def test_query_normal(self, Record):
     self.content_json = json.dumps(self.content)
     self.dict._get_raw = Mock(return_value=self.content_json)
     self.dict.query('Japanese', timeout=5)
     Record.assert_called_with(
         word='Japanese',
         content=self.content_json,
         source='jisho',
     )

コード例 #6

0

ファイルを表示

ファイル: test_oxford.py プロジェクト: zhangf911/zdict

    def test_query_normal(self, Record):
        self.dict._get_raw = Mock(return_value=SAMPLE_RESPONSE)
        self.dict._get_app_key = Mock(return_value=('id', 'key'))

        self.dict.query('string')

        Record.assert_called_with(word='string',
                                  content=SAMPLE_RESPONSE,
                                  source='oxford')

コード例 #7

0

ファイルを表示

ファイル: dictionary.py プロジェクト: zdict/zdict

    def save(self, query_record: Record, word: str):
        db_record = self.query_db_cache(word)

        if db_record is None:
            query_record.save(force_insert=True)
        else:
            db_content = json.loads(db_record.content)
            query_content = json.loads(query_record.content)

            if db_content != query_content:
                db_record.content = query_record.content
                db_record.save()

コード例 #8

0

ファイルを表示

ファイル: dictionary.py プロジェクト: ilcic/zdict

    def save(self, query_record: Record, word: str):
        db_record = self.query_db_cache(word)

        if db_record is None:
            query_record.save(force_insert=True)
        else:
            db_content = json.loads(db_record.content)
            query_content = json.loads(query_record.content)

            if db_content != query_content:
                db_record.content = query_record.content
                db_record.save()

コード例 #9

0

ファイルを表示

ファイル: test_wiktionary.py プロジェクト: zdict/zdict

 def test_query_normal(self, Record):
     content = '''
     {"en":[{"partOfSpeech":"part_of_speech",
     "definitions":[{"definition":"definition","examples":["example"]}]}]}
     '''
     self.dict._get_raw = Mock(return_value=content)
     self.dict.query('mock')
     r_content = [{"part_of_speech": "part_of_speech",
                   "definitions": [{"definition": "definition",
                                    "examples": ["example"]}]}]
     Record.assert_called_with(
         word='mock',
         content=json.dumps(r_content),
         source='wiktionary'
     )

コード例 #10

0

ファイルを表示

ファイル: test_moe.py プロジェクト: zhengda/zdict

    def test_show(self):
        content = '''
        {
            "heteronyms": [{
                "bopomofo": "ㄧㄢˋ",
                "bopomofo2": "yàn",
                "definitions": [{
                    "def": "假的、偽造的。",
                    "example": ["如：「贗品」。"],
                    "quote": ["..."],
                    "type": "形",
                    "synonyms": "尛",
                    "antonyms": "萌"
                }],
                "pinyin": "yàn"
            }],
            "non_radical_stroke_count": 15,
            "radical": "貝",
            "stroke_count": 22,
            "title": "贗"
        }
        '''
        r = Record(word='贗', content=content, source=self.dict.provider)

        # god bless this method, hope that it do not raise any exception
        self.dict.show(r)

コード例 #11

0

ファイルを表示

    def query(self, word: str):
        try:
            content = self._get_raw(word)
        except QueryError as exception:
            raise NotFoundError(exception.word)

        content = json.loads(content)

        try:
            # Get the first definition string from JSON.
            definition = content['en'][0]['definitions'][0]['definition']
        except KeyError as exception:
            # API can return JSON that does not contain 'en' language.
            raise NotFoundError(word)
        else:
            # Clean the definition string from HTML tags.
            definition = BeautifulSoup(definition, "html.parser").text
            content = {}
            content['definition'] = definition

        record = Record(
            word=word,
            content=json.dumps(content),
            source=self.provider,
        )

        return record

コード例 #12

0

ファイルを表示

ファイル: dictionary.py プロジェクト: ilcic/zdict

 def query_db_cache(self, word: str) -> Record:
     try:
         record = Record.get(word=word, source=self.provider)
     except Record.DoesNotExist:
         return None
     else:
         return record

コード例 #13

0

ファイルを表示

    def query(self, word: str):
        webpage = self._get_raw(word)
        soup = BeautifulSoup(webpage, "html.parser")
        content = {}

        # Parse `data` and fill the information you need into `content`
        #
        # Use
        # ```
        # except AttributeError:
        #    raise NotFoundError(word)
        # ```
        # while the word users try to query is not found on this dictionary.

        if self.args.verbose:
            # For verbose mode
            pass

        record = Record(
            word=word,
            content=json.dumps(content),
            source=self.provider,
        )

        return record

コード例 #14

0

ファイルを表示

ファイル: dictionary.py プロジェクト: zdict/zdict

 def query_db_cache(self, word: str) -> Record:
     try:
         record = Record.get(word=word, source=self.provider)
     except Record.DoesNotExist:
         return None
     else:
         return record

コード例 #15

0

ファイルを表示

    def test_show(self):
        content = '''
        [{"part_of_speech":"part_of_speech",
        "definitions":[{"definition": "definition","examples":["example"]}]}]
        '''

        r = Record(word="string", content=content, source=self.dict.provider)
        self.dict.show(r)

コード例 #16

0

ファイルを表示

    def query(self, word: str):
        webpage = self._get_raw(word)
        data = bs4.BeautifulSoup(webpage, "html.parser")
        content = {}

        # Please bump version if the format changes again.
        # the `show` function will act with respect to version number.

        content['version'] = 2

        # Here are details of each version.
        #
        # The original one, in the old era, there wasn't any concept of
        # version number:
        # content = {
        #     'word': ...,
        #     'pronounce': ...,
        #     'sound': (optional),
        #     'explain': [...],
        #     'verbose': [...],
        # }
        #
        # Verion 2, yahoo dictionary content is provided by Dy.eye
        # at that moment:
        # content = {
        #     'version': 2,
        #     'summary': {
        #         'word': ...,
        #         'pronounce': [('KK', '...'), (...)],  // optional.
        #                                               // e.g. 'google'
        #         'explain': [(optional)],  # 'hospitalized' is summary-only
        #         'grammar': [(optional)],
        #     },
        #     'explain': [...],
        #     'verbose': [(optional)],
        # }

        # Construct summary (required)
        try:
            content['summary'] = self.parse_summary(data, word)
        except AttributeError:
            raise NotFoundError(word)

        # Handle explain (required)
        try:
            content['explain'] = self.parse_explain(data)
        except IndexError:
            raise NotFoundError(word)

        # Extract verbose (optional)
        content['verbose'] = self.parse_verbose(data)

        record = Record(
            word=word,
            content=json.dumps(content),
            source=self.provider,
        )
        return record

コード例 #17

0

ファイルを表示

 def test_query_normal(self, Record):
     content = '''
     {"en":[{"partOfSpeech":"part_of_speech",
     "definitions":[{"definition":"definition","examples":["example"]}]}]}
     '''
     self.dict._get_raw = Mock(return_value=content)
     self.dict.query('mock')
     r_content = [{
         "part_of_speech":
         "part_of_speech",
         "definitions": [{
             "definition": "definition",
             "examples": ["example"]
         }]
     }]
     Record.assert_called_with(word='mock',
                               content=json.dumps(r_content),
                               source='wiktionary')

コード例 #18

0

ファイルを表示

ファイル: test_yandex.py プロジェクト: zhangf911/zdict

    def test_show(self):
        content = '''
        {
            "code": 200,
            "lang": "ru-en",
            "text": ["house"]
        }
        '''
        r = Record(word='дом', content=content, source=self.dict.provider)

        # god bless this method, hope that it do not raise any exception
        self.dict.show(r)

コード例 #19

0

ファイルを表示

    def query(self, word: str):
        content = self._get_raw(word)
        content_json = json.loads(content)
        if not content_json['data']:
            raise NotFoundError(word)

        record = Record(
            word=word,
            content=content,
            source=self.provider,
        )
        return record

コード例 #20

0

ファイルを表示

    def query(self, word: str):
        try:
            content = self._get_raw(word)
        except QueryError as exception:
            raise NotFoundError(exception.word)

        content = json.loads(content)

        try:
            # Get the first definition string from JSON.
            content = content['en']
        except KeyError:
            # API can return JSON that does not contain 'en' language.
            raise NotFoundError(word)

        # Define a list that will be used to create a Record.
        r_content = []

        # For every part of speech append r_content corresponding list.
        for i, d in enumerate(content):
            # Add what part of speech current definitions refers to.
            r_content.append({'part_of_speech': d['partOfSpeech']})

            # Create a list that will store english_definitions
            # of the current part of speech.
            r_content[i]['definitions'] = []

            for j, d2 in enumerate(d['definitions']):
                # Parse definition and append definitions list.
                definition = BeautifulSoup(d2['definition'],
                                           "html.parser").text
                r_content[i]['definitions'].append({'definition': definition})

                # If API provides examples for the current definition
                # create a new list and append them.
                try:
                    d2['examples']
                except KeyError:
                    pass
                else:
                    r_content[i]['definitions'][j]['examples'] = []
                    for ex in d2['examples']:
                        ex = BeautifulSoup(ex, "html.parser").text
                        r_content[i]['definitions'][j]['examples'].append(ex)

        record = Record(
            word=word,
            content=json.dumps(r_content),
            source=self.provider,
        )

        return record

コード例 #21

0

ファイルを表示

ファイル: urban.py プロジェクト: ysj123688/zdict

    def query(self, word: str):
        content = self._get_raw(word)

        if "no_results" in content:
            raise NotFoundError(word)

        record = Record(
            word=word,
            content=content,
            source=self.provider,
        )

        return record

コード例 #22

0

ファイルを表示

    def query(self, word: str):
        try:
            content = self._get_raw(word)
        except QueryError as exception:
            raise NotFoundError(exception.word)

        record = Record(
            word=word,
            content=content,
            source=self.provider,
        )

        return record

コード例 #23

0

ファイルを表示

    def query(self, word: str):
        content_str = self._get_raw(word)
        content_dict = json.loads(content_str)

        if content_dict['list'] == []:
            raise NotFoundError(word)

        record = Record(
            word=word,
            content=content_str,
            source=self.provider,
        )

        return record

コード例 #24

0

ファイルを表示

    def test_show(self):
        content = '''
        {
            "list": [
                {
                    "word": "mock",
                    "definition": "Mock",
                    "example": "..."
                }
            ]
        }
        '''
        r = Record(word='mock', content=content, source='urban')

        # god bless this method, hope that it do not raise any exception
        self.dict.show(r)

コード例 #25

0

ファイルを表示

def get_pyjoke(pyjokes, word: str):
    if not pyjokes:
        return

    try:
        # very basic string searching in jokes
        r = random.choice(
            list(
                filter(
                    lambda j: word in map(
                        lambda x: ''.join(c for c in x
                                          if c.isalnum()), j.split()),
                    pyjokes.get_jokes())))
    except IndexError:
        return
    else:
        return Record(word=word, content=r, source='pyjokes')

コード例 #26

0

ファイルを表示

    def test_show(self):
        content = '''
        {
            "h": [{
                "T": "bo̍k-ní",
                "_": "928",
                "d": [{
                    "f": "蕈`菇~`類~。`生長~`在~`朽~`腐~`的~`樹~`幹~`上~ ...",
                    "type": "`名~"
                }]
            }],
            "t": "`木~`耳~"
        }
        '''
        r = Record(word='木耳', content=content, source=self.dict.provider)

        # god bless this method, hope that it do not raise any exception
        self.dict.show(r)

コード例 #27

0

ファイルを表示

ファイル: oxford.py プロジェクト: zdict/zdict

    def query(self, word: str):
        try:
            app_id, app_key = self._get_app_key()
            content = self._get_raw(word,
                                    headers={
                                        'app_id': app_id,
                                        'app_key': app_key
                                    })
        except QueryError as exception:
            msg = self.status_code.get(exception.status_code,
                                       'Some bad thing happened')
            self.color.print('Oxford: ' + msg, 'red')
            raise NotFoundError(exception.word)

        record = Record(
            word=word,
            content=content,
            source=self.provider,
        )
        return record

コード例 #28

0

ファイルを表示

ファイル: naer.py プロジェクト: firstver/zdict

    def query(self, word: str):
        requests.packages.urllib3.disable_warnings()
        content = self._get_raw(word, verify=False)

        data = {"title": word, "sources": defaultdict(list)}
        soup = BeautifulSoup(content, "html.parser")
        for tr in soup.find_all("tr", {"class": "dash"}):
            source = (
                tr.find("td", attrs={"class": "sourceW"}).find("a").text
            ).strip()
            en = tr.find("td", attrs={"class": "ennameW"}).text.strip()
            zhtw = tr.find("td", attrs={"class": "zhtwnameW"}).text.strip()
            data["sources"][source].append((en, zhtw))

        if len(data["sources"]) == 0:
            raise NotFoundError(word)

        record = Record(
            word=word, content=json.dumps(data), source=self.provider
        )
        return record

コード例 #29

0

ファイルを表示

    def query(self, word: str):
        try:
            content = self._get_raw(word)
        except QueryError as exception:
            raise NotFoundError(exception.word)

        content_json = json.loads(content)

        status = content_json.get('code')
        if status != 200:
            # https://tech.yandex.com/translate/doc/dg/reference/translate-docpage/#codes
            message = self.status_code.get(
                status, 'Some bad thing happened with Yandex')
            print('Yandex: ' + message)
            raise NotFoundError(word)

        record = Record(
            word=word,
            content=content,
            source=self.provider,
        )
        return record

コード例 #30

0

ファイルを表示

ファイル: test_moe.py プロジェクト: zhengda/zdict

 def test_query_normal(self, Record):
     self.dict._get_raw = Mock(return_value='{}')
     self.dict.query('萌')
     Record.assert_called_with(word='萌', content='{}', source='moe')

コード例 #31

0

ファイルを表示

ファイル: test_moe.py プロジェクト: wdv4758h/zdict

 def test_query_normal(self, Record):
     self.dict._get_raw = Mock(return_value='{}')
     self.dict.query('萌', timeout=666)
     Record.assert_called_with(word='萌', content='{}', source='moe')

コード例 #32

0

ファイルを表示

ファイル: test_yandex.py プロジェクト: zhangf911/zdict

 def test_query_normal(self, Record):
     content = '{"code":200,"lang":"ru-en","text":["house"]}'
     self.dict._get_raw = Mock(return_value=content)
     self.dict.query('дом')
     Record.assert_called_with(word='дом', content=content, source='yandex')

コード例 #33

0

ファイルを表示

ファイル: spanish.py プロジェクト: zdict/zdict

    def query(self, word: str):
        webpage = self._get_raw(word)
        soup = BeautifulSoup(webpage, "html.parser")
        content = {}

        en_css = "#dictionary-neodict-en"
        es_css = "#dictionary-neodict-es"
        card = soup.select_one(en_css) or soup.select_one(es_css)
        if card is None:
            raise NotFoundError(word)

        word_css = "div > div:nth-child(1) > span"
        word_element = card.select_one(word_css)
        if word_element is None:
            raise NotFoundError(word)
        content['word'] = word_element.text
        '''
        COPULAR VERB  # speech
            # categories_card
            1. (used to express a permanent quality)  # category_text
                # explanation
                a. ser  # index
                # examples
                    # example
                    The ocean is blue.
                    El océano es azul.
            2. (used to express a temporary state)
                a. estar
                    I'm not in a good mood today.
                    Hoy no estoy de buen humor.

                    The sky is cloudy.
                    El cielo está nublado.
        ... (Another speech if it has.)
        '''
        speech_pattern = "div > div:nth-child(2)"
        # "#dictionary-neodict-en > div > div:nth-child(2)"

        # Start to grab
        content['explains'] = []
        speech = card.select_one(speech_pattern)
        while speech:
            result = []
            speech_text, categories_card = speech.children
            speech_text_element = speech_text.find(['a', 'span'])
            content['explains'].append([speech_text_element.text, result])

            for category in categories_card.children:
                category_text_element, explanations_card = category.children
                category_text = category_text_element.text

                explains = []
                for explanation in explanations_card.children:
                    for _ in explanation.children:
                        index_elements, examples = (_.contents[:-1],
                                                    _.contents[-1])
                        index = ' '.join([
                            _.text.strip() for _ in index_elements if _ != ' '
                        ])

                        if (not examples) and index:
                            explains.append((index, ))
                            continue

                        sentences = []
                        for example in examples:
                            t = example.find_all()
                            # Should be only 3 elements
                            # [text, —,  text]
                            '''
                            When Spanish => English, it will show Spanish first
                            When English => Spanish, it will show English first
                            So, the variables below are not definitely
                            '''
                            sentences.append((t[0].text, t[2].text))
                        explains.append((index, sentences))

                result.append([category_text, explains])
            speech = speech.next_sibling

        record = Record(
            word=word,
            content=json.dumps(content),
            source=self.provider,
        )

        return record

コード例 #34

0

ファイルを表示

ファイル: yahoo.py プロジェクト: hsukvn/zdict

    def query(self, word: str):
        webpage = self._get_raw(word)
        data = BeautifulSoup(webpage, "html.parser")
        content = {}

        # handle record.word
        try:
            content['word'] = data.find('span', id='term').text
        except AttributeError:
            raise NotFoundError(word)

        # handle pronounce
        pronu_value = data.find('span', id='pronunciation_pos').text
        if pronu_value:
            content['pronounce'] = []
            for match in re.finditer('(\w+)(\[.*?\])', pronu_value):
                content['pronounce'].append(match.group(1, 2))

        # handle sound
        pronu_sound = data.find(class_='proun_sound')
        if pronu_sound:
            content['sound'] = [
                ('mp3',
                 pronu_sound.find(class_='source',
                                  attrs={
                                      'data-type': 'audio/mpeg'
                                  }).attrs['data-src']),
                ('ogg',
                 pronu_sound.find(class_='source',
                                  attrs={
                                      'data-type': 'audio/ogg'
                                  }).attrs['data-src']),
            ]

        # Handle explain
        main_explanations = data.find(
            class_='dd algo explain mt-20 lst DictionaryResults')
        if main_explanations:
            main_explanations = itertools.zip_longest(
                main_explanations.find_all(class_='compTitle mb-10'),
                main_explanations.find_all(
                    class_='compArticleList mb-15 ml-10', ))
        else:
            main_explanations = ""

        content['explain'] = []
        for part_of_speech, meaning in main_explanations:
            node = [part_of_speech.text] if part_of_speech else ['']

            for item in meaning.find_all('li', class_='ov-a'):
                pack = [item.find('h4').text]

                for example in (tag for tag in item.find_all('span')
                                if 'line-height: 17px;' not in tag['style']):
                    sentence = ''

                    for w in example.contents:
                        if w.name == 'b':
                            sentence += '*' + w.text + '*'
                        else:
                            try:
                                sentence += w
                            except:
                                pass

                    pack.append((sentence.strip()))
                node.append(pack)
            content['explain'].append(node)

            # verbose info
            part_of_speech_list, meaning_list = [], []
            content['verbose'] = []

            variation_explanations = data.find(
                class_='dd algo variation fst DictionaryResults')
            if variation_explanations:
                part_of_speech_list.extend(
                    variation_explanations.find_all(class_='compTitle'))
                meaning_list.extend(
                    variation_explanations.find_all(class_='compArticleList'))

            additional_explanations = data.find(
                class_='dd algo othersNew lst DictionaryResults')
            if additional_explanations:
                part_of_speech_list.extend(
                    additional_explanations.find_all(class_='compTitle mt-26'))
                meaning_list.extend(
                    additional_explanations.find_all(class_='compArticleList'))

            more_explanations = itertools.zip_longest(part_of_speech_list,
                                                      meaning_list)

            for part_of_speech, meaning in more_explanations:
                node = [part_of_speech.text] if part_of_speech else ['']

                if meaning:
                    for item in meaning.find_all('li', class_='ov-a'):
                        pack = [item.find('h4').text]

                        for example in (
                                tag for tag in item.find_all('span')
                                if 'line-height: 17px;' not in tag['style']):
                            sentence = ''

                            for w in example.contents:
                                if w.name == 'b':
                                    sentence += '*' + w.text + '*'
                                else:
                                    try:
                                        sentence += w
                                    except:
                                        pass

                            pack.append((sentence.strip()))
                        node.append(pack)
                content['verbose'].append(node)

        record = Record(
            word=word,
            content=json.dumps(content),
            source=self.provider,
        )
        return record

コード例 #35

0

ファイルを表示

    def complete(self, text, state):
        if state == 0:  # new query
            self.records = iter(
                Record.select().where(Record.word.startswith(text)))

        return next(self.records).word

コード例 #36

0

ファイルを表示

ファイル: completer.py プロジェクト: zdict/zdict

    def complete(self, text, state):
        if state == 0:  # new query
            self.records = iter(
                Record.select().where(Record.word.startswith(text)))

        return next(self.records).word

コード例 #37

0

ファイルを表示

ファイル: test_urban.py プロジェクト: wdv4758h/zdict

 def test_query_normal(self, Record):
     self.dict._get_raw = Mock(return_value='{"mock": true}')
     self.dict.query("mock", timeout=666)
     Record.assert_called_with(word="mock", content='{"mock": true}', source="urban")

コード例 #38

0

ファイルを表示

    def query(self, word: str):
        webpage = self._get_raw(word)
        soup = BeautifulSoup(webpage, "html.parser")
        response = json.loads(soup.text)

        # Not Found
        if not response.get("列表"):
            raise NotFoundError(word)

        # Show Chinese word from iTaigi in stead of user input if possible
        with suppress(KeyError, IndexError):
            word = response["列表"][0]["外語資料"]

        content = {}

        # Fetch basic words with text, pronounce and sentence
        try:
            basic_words = response["列表"][0]["新詞文本"]
        except Exception:
            raise
        else:
            content['basic_words'] = []
            for basic_word in basic_words:
                d = {}

                text = self._get_word_text(basic_word)
                d['text'] = text

                pronounce = self._get_word_pronounce(basic_word)
                d['pronounce'] = pronounce

                if self.args.verbose:
                    sentences = self._get_word_sentences(text, pronounce)
                    d['sentences'] = sentences

                content['basic_words'].append(d)

            # Fix issue-452 for iTaigi testings
            # iTaigi returns basic_words in random order.
            # Since we store basic_words in a list,
            # We have to sort it before saving into database
            # or the unit-testings would fail.
            content['basic_words'].sort(key=lambda word: word['text'])

        # Fetch related words
        try:
            related_words = response["其他建議"]
        except Exception:
            raise
        else:
            content['related_words'] = []
            for related_word in related_words:
                d = {}

                text = self._get_word_text(related_word)
                d['text'] = text

                pronounce = self._get_word_pronounce(related_word)
                d['pronounce'] = pronounce

                if self.args.verbose:
                    sentences = self._get_word_sentences(text, pronounce)
                    d['sentences'] = sentences

                content['related_words'].append(d)

        # Save content with word and provider.
        record = Record(
            word=word,
            content=json.dumps(content),
            source=self.provider,
        )

        return record

コード例 #39

0

ファイルを表示

ファイル: api.py プロジェクト: ilcic/zdict

def dump(pattern=r'^.*$'):
    return [r.word for r in Record.select() if re.fullmatch(pattern, r.word)]

コード例 #40

0

ファイルを表示

ファイル: spanish.py プロジェクト: zhangf911/zdict

    def query(self, word: str):
        webpage = self._get_raw(word)
        data = BeautifulSoup(webpage, "html.parser")
        content = {}

        card = data.find('div', attrs={'class': 'card'})
        entry = card.find(
            # just get the first one
            attrs={'class': 'dictionary-entry'})

        if not entry:
            raise NotFoundError(word)

        content['explains'] = []

        # word can be existing in both English & Spanish
        word_element = (card.find(attrs={'id': 'headword-en'})
                        or card.find(attrs={'id': 'headword-es'}))
        if word_element is None:
            raise NotFoundError(word)
        content['word'] = word_element.text

        pattern1 = {'class': 'dictionary-neodict-indent-1'}
        pattern2 = {'class': 'dictionary-neodict-indent-2'}
        pattern3 = {'class': 'dictionary-neodict-indent-3'}
        pattern_order = {'class': 'dictionary-neodict-translation'}
        pattern_example = {'class': 'dictionary-neodict-example'}
        pattern1_en = {'class': 'dictionary-neoharrap-indent-1'}
        pattern2_en = {'class': 'dictionary-neoharrap-indent-2'}
        pattern_order_en = {'class': 'dictionary-neoharrap-translation'}

        speeches = card.find_all(attrs={'class': 'part_of_speech'})

        for (speech, category) in zip(
                speeches,
                entry.find_all(attrs=pattern1)
                or entry.find_all(attrs=pattern1_en)):
            result = []
            content['explains'].append([speech.text, result])
            context = category.find(attrs={'class': 'context'}).text
            explains = []

            for explain in (category.find_all(attrs=pattern2)
                            or category.find_all(attrs=pattern2_en)):

                orders = (explain.find_all(attrs=pattern_order)
                          or explain.find_all(attrs=pattern_order_en))

                if orders:
                    # e.g.
                    #
                    #   ('a. forgiveness', 'b. pardon (law)')
                    #
                    indices = tuple(
                        map(lambda x: x.text.replace('\xa0', ' ').strip(),
                            orders))
                else:
                    continue

                examples = explain.find_all(attrs=pattern3)

                for (example, index) in zip(examples, indices):
                    t = tuple(example.find(attrs=pattern_example))
                    (spanish, english) = (t[0].text, t[2].text)
                    explains.append((index, spanish, english))

                if (not examples) and (len(indices) > 0):
                    for index in indices:
                        explains.append((index, ))

            result.append([context, explains])

        record = Record(
            word=word,
            content=json.dumps(content),
            source=self.provider,
        )

        return record

コード例 #41

0

ファイルを表示

    def query(self, word: str):
        webpage = self._get_raw(word)
        data = BeautifulSoup(webpage, "html.parser")
        content = {}

        # handle record.word
        try:
            content['word'] = data.find('span', id='term').text
        except AttributeError:
            raise NotFoundError(word)

        # handle pronounce
        pronu_value = data.find('span', id='pronunciation_pos').text
        if pronu_value:
            content['pronounce'] = []
            for match in re.finditer('(\w+)(\[.*?\])', pronu_value):
                content['pronounce'].append(match.group(1, 2))

        # handle sound
        proun_sound = data.find(
            'span',
            style="display: none;",
            id="iconStyle",
            class_="tri",
            title="http://product.dreye.com.tw/",
        )
        if proun_sound:
            content['sound'] = {}
            d = json.loads(proun_sound.text)

            sound_types_and_urls = (d.get('sound_url_1', []) +
                                    d.get('sound_url_2', []))
            sound_accents = (d.get('sound_type_1', []) +
                             d.get('sound_type_2', []))

            for sound_type_and_url, sound_accent in zip(
                    sound_types_and_urls, sound_accents):
                if sound_type_and_url:
                    sound_type, sound_url = list(sound_type_and_url.items())[0]
                    content['sound'].setdefault(sound_type, {}).setdefault(
                        sound_accent, []).append(sound_url)

        # Handle explain
        main_explanations = data.find(
            class_='dd algo explain mt-20 lst DictionaryResults')
        if main_explanations:
            main_explanations = itertools.zip_longest(
                main_explanations.find_all(class_='compTitle mb-10'),
                main_explanations.find_all(
                    class_='compArticleList mb-15 ml-10', ))
        else:
            main_explanations = ""

        content['explain'] = []
        for part_of_speech, meaning in main_explanations:
            node = [part_of_speech.text] if part_of_speech else ['']

            for item in meaning.find_all('li', class_='ov-a'):
                pack = [item.find('h4').text]

                for example in (
                        tag for tag in item.find_all('span')
                        if 'line-height: 17px;' not in tag.get('style', {})):
                    sentence = ''

                    for w in example.contents:
                        if w.name == 'b':
                            sentence += '*' + w.text + '*'
                        else:
                            try:
                                sentence += w
                            except Exception:
                                pass

                    pack.append((sentence.strip()))
                node.append(pack)
            content['explain'].append(node)

            # verbose info
            part_of_speech_list, meaning_list = [], []
            content['verbose'] = []

            variation_explanations = data.find(
                class_='dd algo variation fst DictionaryResults')
            if variation_explanations:
                part_of_speech_list.extend(
                    variation_explanations.find_all(class_='compTitle'))
                meaning_list.extend(
                    variation_explanations.find_all(class_='compArticleList'))

            additional_explanations = data.find(
                class_='dd algo othersNew lst DictionaryResults')
            if additional_explanations:
                part_of_speech_list.extend(
                    additional_explanations.find_all(class_='compTitle mt-26'))
                meaning_list.extend(
                    additional_explanations.find_all(class_='compArticleList'))

            more_explanations = itertools.zip_longest(part_of_speech_list,
                                                      meaning_list)

            for part_of_speech, meaning in more_explanations:
                node = [part_of_speech.text] if part_of_speech else ['']

                if meaning:
                    for item in meaning.find_all('li', class_='ov-a'):
                        pack = [item.find('h4').text]

                        for example in (
                                tag for tag in item.find_all('span')
                                if 'line-height: 17px;' not in tag['style']):
                            sentence = ''

                            for w in example.contents:
                                if w.name == 'b':
                                    sentence += '*' + w.text + '*'
                                else:
                                    try:
                                        sentence += w
                                    except Exception:
                                        pass

                            pack.append((sentence.strip()))
                        node.append(pack)
                content['verbose'].append(node)

        record = Record(
            word=word,
            content=json.dumps(content),
            source=self.provider,
        )
        return record