Beispiel #1
0
 def scrape_meaning(self, page):
     """
     Return meaning.
     """
     html = Utils.text_between(page, *TAG_MEANING, force_html=True)
     text = Utils.remove_tags(html)
     return Utils.remove_spaces(text)
Beispiel #2
0
 def first_synonym(self, html):
     """
     Return the first synonym found and html without his marking.
     """
     synonym = Utils.text_between(html, *TAG_SYNONYMS_DELIMITER,
                                  force_html=True)
     synonym = Utils.remove_spaces(synonym)
     _html = html.replace(TAG_SYNONYMS_DELIMITER[0], "", 1)
     _html = _html.replace(TAG_SYNONYMS_DELIMITER[1], "", 1)
     return Word(synonym), _html
Beispiel #3
0
 def scrape_examples(self, page):
     """
     Return a list of examples.
     """
     examples = []
     html = page
     index = html.find(TAG_PHRASE_DELIMITER[0])
     while index > -1:
         example_html = Utils.text_between(html, *TAG_PHRASE_DELIMITER, force_html=True)
         examples += [Utils.remove_spaces(Utils.remove_tags(example_html))]
         html = html[index+len(TAG_PHRASE_DELIMITER[0]):]
         index = html.find(TAG_PHRASE_DELIMITER[0])
     return examples
Beispiel #4
0
 def synonyms(self, page):
     """
     Return list of synonyms.
     """
     synonyms = []
     if page.find(TAG_SYNONYMS[0]) > -1:
         synonyms_html = Utils.text_between(page, TAG_SYNONYMS[0], TAG_SYNONYMS[1], True)
         while synonyms_html.find(TAG_SYNONYMS_DELIMITER[0]) > -1:
             synonym = Utils.text_between(synonyms_html, TAG_SYNONYMS_DELIMITER[0], TAG_SYNONYMS_DELIMITER[1], True)
             synonyms.append(Word(Utils.remove_spaces(synonym)))
             synonyms_html = synonyms_html.replace(TAG_SYNONYMS_DELIMITER[0], "", 1)
             synonyms_html = synonyms_html.replace(TAG_SYNONYMS_DELIMITER[1], "", 1)
     return synonyms
Beispiel #5
0
 def __init__(self, word, meaning=None, synonyms=[], examples=[], extra={}):
     self.word = word.strip().lower()
     self.url = BASE_URL.format(Utils.remove_accents(self.word))
     self.meaning = meaning
     self.synonyms = synonyms
     self.extra = extra
     self.examples = examples
Beispiel #6
0
 def scrape_extra(self, page):
     """
     Return a dictionary of extra information.
     """
     dict_extra = {}
     try:
         if page.find(TAG_EXTRA[0]) > -1:
             html = Utils.text_between(page, *TAG_EXTRA, force_html=True)
             extra_rows = Utils.split_html_tag(Utils.remove_spaces(html),
                                               TAG_EXTRA_SEP)
             for row in extra_rows:
                 _row = Utils.remove_tags(row)
                 key, value = map(Utils.remove_spaces, _row.split(":"))
                 dict_extra[key] = value
     except:
         pass
     return dict_extra
Beispiel #7
0
 def scrape_synonyms(self, page):
     """
     Return list of synonyms.
     """
     synonyms = []
     if page.find(TAG_SYNONYMS[0]) > -1:
         html = Utils.text_between(page, *TAG_SYNONYMS, force_html=True)
         while html.find(TAG_SYNONYMS_DELIMITER[0]) > -1:
             synonym, html = self.first_synonym(html)
             synonyms.append(synonym)
     return synonyms
Beispiel #8
0
    def search(self, word):
        """
        Search for word.
        """
        if len(word.split()) > 1:
            return None

        _word = Utils.remove_accents(word).strip().lower()
        try:
            with self.get(BASE_URL.format(_word)) as request:
                page = html.unescape(request.read().decode(CHARSET))
        except:
            return None

        return Word(
            Utils.text_between(page, "<h1", "</h1>",  force_html=True).lower(),
            meaning=self.scrape_meaning(page),
            synonyms=self.scrape_synonyms(page),
            examples=self.scrape_examples(page),
            extra=self.scrape_extra(page),
        )
Beispiel #9
0
    def scrape_meaning(self, page):
        """
        Return meaning and etymology.
        """
        html = Utils.text_between(page, *TAG_MEANING, force_html=True)

        etymology = Utils.text_between(html, *TAG_ETYMOLOGY, force_html=True)
        etymology = Utils.remove_spaces(Utils.remove_tags(etymology))

        meanings = Utils.split_html_tag(html, 'br')
        meanings = [
            Utils.remove_spaces(Utils.remove_tags(x)) for x in meanings
        ]
        meaning = [x for x in meanings if x != etymology]

        return meaning, etymology
Beispiel #10
0
    def search(self, word):
        """
        Search for word.
        """
        if len(word.split()) > 1:
            return None

        _word = Utils.remove_accents(word).strip().lower()
        try:
            with self.get(BASE_URL.format(_word)) as request:
                page = html.unescape(request.read().decode(CHARSET))
        except:
            return None

        found = Word(word)

        found.meaning = self.scrape_meaning(page)
        found.synonyms = self.scrape_synonyms(page)
        found.extra = self.scrape_extra(page)

        return found
Beispiel #11
0
 def extra(self, page):
     """
     Return a dictionary of extra information.
     """
     dic_extra = {}
     try:
         if page.find(TAG_EXTRA[0]) > -1:
             extra_html = Utils.text_between(page, TAG_EXTRA[0], TAG_EXTRA[1], True)
             extra_rows = Utils.split_html_tag(Utils.remove_spaces(extra_html), TAG_EXTRA_SEP)
             for row in extra_rows:
                 _row = Utils.remove_tags(row)
                 key, value = _row.split(":")
                 dic_extra[Utils.remove_spaces(key)] = Utils.remove_spaces(value)
     except:
         pass
     return dic_extra
Beispiel #12
0
    def search(self, word):
        """
        Search for word.
        """
        if len(word.split()) > 1:
            return None

        _word = Utils.remove_accents(word).strip().lower()
        try:
            url = request.urlopen(BASE_URL.format(_word))
        except:
            return None
        page = html.unescape(url.read().decode(CHARSET))

        if page.find(TAG_ENCHANT[0]) > -1:
            return None

        found = Word(word)
        found.meaning = self.meaning(page)
        found.synonyms = self.synonyms(page)
        found.extra = self.extra(page)

        return found
Beispiel #13
0
 def meaning(self, page):
     """
     Return meaning.
     """
     return Utils.remove_spaces(Utils.remove_tags(Utils.text_between(page, TAG_MEANING[0], TAG_MEANING[1], True)))
Beispiel #14
0
 def __init__(self, word, meaning=None, synonyms=[], extra={}):
     self.word = word.strip().lower()
     self.url = BASE_URL.format(Utils.remove_accents(word).strip().lower())
     self.meaning = meaning
     self.synonyms = synonyms
     self.extra = extra