Esempio n. 1
0
def obtener_tipo_palabra(palabra):
    """
    Valida que la palabra exista. En caso afirmativo, retorna el tipo
    de palabra (Adjetivo, sustantivo o verbo). Caso negativo, retorna
    vacío.
    
    Args:
        palabra (str): Palabra a validar.

    Returns:
        Tipo (str): Tipo de palabra, siendo estos 'A' para Adjetivo,
                    'S' para Sustantivo y 'V' para Verbo.
                    En caso de que la palabra no exista, retorna vacío.
    """

    try:

        parser = WiktionaryParser()
        parser.set_default_language('spanish')

        word = parser.fetch(palabra)

        tipo_palabra = word[0]['definitions'][0]['partOfSpeech']

        if (tipo_palabra == 'adjective'):
            return (ADJETIVOS)
        elif (tipo_palabra == 'noun'):
            return (SUSTANTIVOS)
        elif (tipo_palabra == 'verb'):
            return (VERBOS)
        else:
            return ('')

    except IndexError:
        return ('')
Esempio n. 2
0
def is_noun(word, country_code):
    """
    This method takes word and country_code and returns boolean value based on the word being noun
    Args:
        word: String
        country_code: String

    Returns: Boolean

    """
    if country_code == 'EN':
        for tmp in nltk.corpus.wordnet.synsets(word):
            if tmp.name().split('.')[0] == word and tmp.name().split(
                    '.')[1] == 'n':
                return True
            return False
    if country_code == 'PL':
        parser = WiktionaryParser()
        try:
            another_word = parser.fetch(
                word, 'polish')[0]['definitions'][0]['partOfSpeech']
            if another_word == "noun":
                return True
            else:
                return False
        except:
            return False
Esempio n. 3
0
class WikiTagger:
  
  def __init__(self):
    self.wp = WiktionaryParser()
    self.wp.set_default_language('portuguese')

  def fetch(self, tokens):
    
    for x in tokens:
      if x.pos == 'TEMP': #or isinstance(x.pos,list):
        try:
          #parts = x.pos if isinstance(x.pos, list) else [] #persistencia
          parts = []
          p = self.wp.fetch(x.symbol)
          for k in p:
            for y in k['definitions']:
              gender = ''
              if 'm' in y['text'][0].split():
                gender = 'MASC'
              elif 'f' in y['text'][0].split():
                gender = 'FEMI'
              else:
                gender = 'DESC'
              coiso = PartOfSpeech(y['partOfSpeech'],gender, "TEMP")
              parts.append(coiso)
          if parts:
            x.pos = parts
          else:
            x.pos = [PartOfSpeech('proper noun','DESC','TEMP'),]
          #x.pos = wp.fetch(x.symbol)[0]['definitions'][0]['partOfSpeech']
        except Exception as e:
          print(e)
          x.pos = "ERROR"
    return tokens
def get_info_from_wiktionary(one_word):
    parser = WiktionaryParser()

    word = parser.fetch(one_word, 'german')

    definitions = word[0]['definitions']
    print(definitions)
Esempio n. 5
0
class WiktionarySkill(MycroftSkill):
    def __init__(self):
        MycroftSkill.__init__(self)

    def initialize(self):
        self.register_entity_file("word.entity")
        self.parser = WiktionaryParser()

    @intent_file_handler('fallback.wiktionary.definition.intent')
    def handle_wiktionary_definition(self, message):
        #Get word to define from utterance
        word = message.data.get('word')
        #Lookup the word using Wiktionary
        get_word_info = self.parser.fetch(word)

        #Speak definition for requested word back to user
        try:
            # Get first definition from wiktionary response
            response = get_word_info[0]['definitions'][0]['text'][1]
            # Log the definition
            LOG.info(response)
            self.speak_dialog('fallback.wiktionary', {
                'word': word,
                'definition': response
            })
        except:
            self.speak_dialog('error.wiktionary')
Esempio n. 6
0
    def getDef(word):
        dictionary = PyDictionary()
        definition = dictionary.meaning(word)
        if isinstance(definition, dict) and 'Noun' in definition:
            defs = definition['Noun']
            if isinstance(defs, list) and len(defs) > 0:
                return defs

        # wordnik dictionary
        wordApi = WordApi.WordApi(client)
        definitions = (wordApi.getDefinitions(word,
                                              partOfSpeech='noun',
                                              limit=3))
        if definitions is not None and len(definitions) > 0:
            return [(definition.text).lower() for definition in definitions]
        meaningsList = vocabulary.meaning(word)
        if meaningsList != False:
            defs = json.loads(meaningsList)
            if (len(defs) > 0):
                definitions = []
                for definition in defs:
                    if definition['text']:
                        d = re.sub('<[^<]+?>', '', definition['text'])
                        definitions.append(d.lower())
                if len(definitions) > 0:
                    return definitions
        # owlbot api
        url = 'https://owlbot.info/api/v2/dictionary/' + word
        r = requests.get(url)
        if r is not None:
            try:
                result = r.json()
                if len(result) > 0:
                    definitions = []
                    for item in result:
                        if (item['type'] == 'noun' and item['definition']):
                            definitions.append(item['definition'].lower())
                    if len(definitions) > 0:
                        return definitions
            except:
                pass
        # wiktionary
        try:
            parser = WiktionaryParser()
            result = parser.fetch(word)
            if result is not None:
                definition = result[0]['definitions']
                if definition and len(definition) > 0:
                    definition = definition[0]
                    if 'partOfSpeech' in definition:
                        if definition['partOfSpeech'] == 'noun':
                            defs = definition['text'].lower().split('\n')
                            if len(defs) > 1:
                                return defs[0:2]
                            elif len(defs) == 1:
                                return defs
        except:
            return ' '
        return ' '
 async def run(self, message: discord.Message, trigger: str,
               client: discord.Client):
     word = message.content[len(trigger):]  # string
     parser = WiktionaryParser()
     fetched = parser.fetch(
         "word", language="english"
     )  # default could also be used as default is english
     print(fetched)
Esempio n. 8
0
def get_ipa(word):
    from wiktionaryparser import WiktionaryParser
    parser = WiktionaryParser()
    another_word = parser.fetch(word, 'french')
    try:
        print(another_word[0]['pronunciations']['text'][0])
    except IndexError:
        print("WIKI ERROR: THE INDEX IS OUT OF RANGE")
Esempio n. 9
0
def get_etymology_trees(word_str: str, language: str) -> List[Word]:
    parser = WiktionaryParser()
    word_info = parser.fetch(word_str, language)
    word_origins: List[Word] = []

    for origin in word_info:
        word = Word(word_str, language)
        parse(origin['etymology'])
    return word_origins
Esempio n. 10
0
def requestAPI(word="test"):
    """ Query wiktionary API for json results using WiktionaryParser
    Args:
        word: queried word; default value, "test".
    Returns:
        word queried, and JSON table of queried word
    """
    parser = WiktionaryParser()
    word_json = parser.fetch(word)

    return word, word_json
def get_wiktionary_entry(language, word):
    """Interface to the requestion something from wiktionary.
    Arguments:
        language = language of which we want the entry
        word = word  of which we want the entry
    Returns:
        parsed wiktionary page
    """
    parser = WiktionaryParser()
    parser.set_default_language(language)
    return parser.fetch(word)
Esempio n. 12
0
 def _scrape_parser_info(cls, idiom_id: str) -> Optional[dict]:
     logger = logging.getLogger("_scrape_parser_1_info")
     # using this wiktionary parser
     parser = WiktionaryParser()
     # include alternative forms as well (e.g. beat around the bush = beat about the bush)
     parser.include_relation('alternative forms')
     try:
         idiom_info = parser.fetch(idiom_id)
     except AttributeError as ae:
         logger.warning(str(ae))
         return None
     else:
         return idiom_info
Esempio n. 13
0
def definition_english(buff):
    from wiktionaryparser import WiktionaryParser
    import json
    parser = WiktionaryParser()
    word = json.dumps(parser.fetch(buff),sort_keys=True, indent=4)
    data = json.loads(word)
    c = ""
    try:
        for i in data[0]["definitions"][0]["text"]:
            c += i + "\n"
        return c
    except IndexError:
        return
def define_word(word):
    parser = WiktionaryParser()
    json_word = parser.fetch(word, 'yiddish')
    d = []
    try:  #error handling for when wiktionaryparse returns an empty set/list
        d = json_word[0]
        return d["definitions"]
    except:
        #print("d="+str(d))
        #print(type(d))
        #print("json_word="+str(json_word))
        #print(type(json_word))
        return False
Esempio n. 15
0
def get_wiki_tags(entry):
    convert = {'adjective':'ADJ', 'adposition':'ADP', 'preposition':'ADP',\
               'adverb': 'ADV', 'auxiliary':'AUX', 'coordinating conjunction': 'CCONJ',\
               'determiner': 'DET', 'interjection':'INTJ', 'noun':'NOUN',\
               'numeral':'NUM', 'particle':'PART', 'pronoun':'PRON', 'proper noun':'PROPN',\
               'punctuation':'PUNCT', 'subordinating conjunction':'SCONJ', 'symbol':'SYM',\
               'verb':'VERB', 'other':'X', 'article':'DET', 'conjunction':'PART'}
    # ADJ: adjective
    # ADP: adposition
    # ADV: adverb
    # AUX: auxiliary
    # CCONJ: coordinating conjunction
    # DET: determiner
    # INTJ: interjection
    # NOUN: noun
    # NUM: numeral
    # PART: particle
    # PRON: pronoun
    # PROPN: proper noun
    # PUNCT: punctuation
    # SCONJ: subordinating conjunction
    # SYM: symbol
    # VERB: verb
    # X: other

    parser = WiktionaryParser()
    words = entry[1][0]
    wikitionary_tags = []

    for word in words:
        wiki_pos = 'X'
        try:
            results = parser.fetch(word, 'greek')  #[0]['definitions']
            if results:
                if results[0]['definitions']:
                    for wiki_idx in range(len(results[0]['definitions'])):
                        wiki_pos = results[0]['definitions'][wiki_idx][
                            'partOfSpeech']
                        if wiki_pos in convert:
                            wiki_pos = convert[wiki_pos]
                            break
                        else:
                            print(f'** cant convert wiki_pos: {wiki_pos}')
                            print(word)
                            print('--------------------------------------')
                            wiki_pos = 'X'
        except AttributeError as error:
            print(f'Error: {error}')
        wikitionary_tags.append(wiki_pos)
    return wikitionary_tags
Esempio n. 16
0
class wiktionary(dictionaryAPIBaseClass):
    def __init__(self):
        self.parser = WiktionaryParser()

    def __del__(self):
        pass

    def word_definition(self, word):
        try:
            retrieved_definitions = self.parser.fetch(word)
        except Exception as e:
            print(f'An exception was raised when calling wiki API: {e}')
            exit(1)

        definition_list = retrieved_definitions[0]['definitions'][0]['text']
        definition = definition_list[random.randrange(1, len(definition_list))]
        return definition

    def word_check(self, word):

        if not self.parser.fetch(word)[0]['definitions']:
            return False
        else:
            return True
Esempio n. 17
0
class Wiktionary:

	def __init__(self, bot):
		self.bot = bot

		self.parser = WiktionaryParser()
		self.words = {}
		self.output = None
		self.embed = None

		self.parser.set_default_language('english')

	def __fetch_word(self, word):
		self.words = self.parser.fetch(word)

	@commands.group(brief='Gives you a word\'s etymology, definition, examples etc.')
	async def word(self, ctx):
		pass

	@word.command(brief='Changes the language the command will use.')
	async def lang(self, ctx, lang):
		self.parser.set_default_language(lang)
		language_list = 'https://en.wiktionary.org/wiki/Wiktionary:List_of_languages'
		await ctx.send(f'Language changed to {lang}.\nThe list of languages can be found here: {language_list}')

	@word.command(brief='Gives you a word\'s etymologies.', aliases=['e', 'ety'])
	async def etymology(self, ctx, word):
		self.__fetch_word(word)
		title = word
		description = f'{len(self.words)} results found.'
		self.embed = discord.Embed(color=ctx.message.author.color, title=title, description=description)
		for i, word in enumerate(self.words[:3], 1):
			self.embed.add_field(name=i, value=word['etymology'])
		await ctx.send(embed=self.embed)

	@word.command(brief='Gives you example usages for a word.', aliases=['ex'])
	async def example(self, ctx, word):
		self.__fetch_word(word)
		self.output = [str(word['definitions'][0]['examples']) for i, word in enumerate(self.words)][:3]
		print(self.output)
		await ctx.send('\n'.join(self.output))

	@word.command(brief='Gives you a word\'s definition.', aliases=['d', 'def'])
	async def definition(self, ctx, word):
		self.__fetch_word(word)
		self.output = [str(word['definitions'][0]['text']) for i, word in enumerate(self.words)][:3]
		print(self.output)
		await ctx.send('\n'.join(self.output))
def get_wiktionary_entry(language, word):
    """Interface to the requestion something from wiktionary.
    Arguments:
        language = language of which we want the entry
        word = word  of which we want the entry
    Returns:
        parsed wiktionary page
    """
    parser = WiktionaryParser()
    parser.set_default_language(language)
    try:
        return parser.fetch(word)
    except Exception as e:
        print("problem with word {}, language {}".format(word, language))
        print(e)
        return []
Esempio n. 19
0
def translate(word: str) -> str:
    """ connect to wiktionary, get all part of speech, join them into one string, and return here """
    global LINE_DIVIDER

    parser = WiktionaryParser()
    def_ = parser.fetch(word.lower())
    ret = ""
    for word_payload in def_:
        definitions = word_payload['definitions']

        translations = {
            d['partOfSpeech']: LINE_DIVIDER.join(d['text'])
            for d in definitions
        }
        ret += LINE_DIVIDER.join(f"{k}: {v}" for k, v in translations.items())

    return ret
Esempio n. 20
0
def fetch_word(word):
    parser = WiktionaryParser()
    each_word = word.lower()
    word_details = parser.fetch(each_word)
    if len(word_details) and len(word_details[0].get('definitions', [])):
        word_details = word_details[0]
        word_definitions = word_details.pop('definitions')
        priority = 0
        pronounciation_details = word_details.pop('pronunciations')

        audio_links = pronounciation_details.get('audio', [])
        pronounciations = pronounciation_details.get('text', [])
        new_word = Word.objects.create(word_english=each_word,
                                       pronounciations=pronounciations,
                                       audio_links=audio_links)
        translations = word_details.pop('translations')
        for each_translation in translations:
            meaning = each_translation.get('meaning')
            for language_code in each_translation.get(
                    'available_translations'):
                for each_local_word in each_translation.get(
                        'available_translations').get(language_code):
                    Translation.objects.create(
                        english_word=new_word,
                        meaning=meaning,
                        local_word=each_local_word,
                        utf_encoded=each_local_word.encode('utf-8'),
                        language=language_code)

        for each_definition in word_definitions:
            definition_text = each_definition.get('text', [])
            part_of_speech = each_definition.get('partOfSpeech')
            examples = each_definition.get('examples')
            synonyms = []
            for each_related in each_definition.get('relatedWords'):
                if each_related.get('relationshipType', '') == 'synonyms':
                    synonyms = each_related.get('words', [])
            new_word_definition = WordDefinition.objects.create(
                english_word=new_word,
                definitions=definition_text,
                priority=priority,
                part_of_speech=part_of_speech,
                examples=examples,
                synonyms=synonyms)
            priority += 1
def parseAndReturn(word):
    parser = WiktionaryParser()
    defList = parser.fetch(word)[0]["definitions"]
    defText = []
    if len(defList) == 0:
        raise AttributeError("CANNOT FIND A DEFINITION")
    for i in range(len(defList)):
        if i >= constants.MAX_NUM_DEFINITIONS:
            break
        defText.append([defList[i]["partOfSpeech"]])
        if debug:
            print(len(defList[i]["text"]))
        for j in range(1, len(defList[i]["text"])):
            if j > constants.MAX_DEPTH_PER_DEF:
                break
            defText[i].append(defList[i]["text"][j])

    return defText
Esempio n. 22
0
    class WikitionaryDictionary(VimDictionary):
        def __init__(self):
            self._parser = WiktionaryParser()

        def _lookup(self, entry):
            wikitionary_result = self._parser.fetch(entry)
            result = self.parse_wikitionary_entry(entry, wikitionary_result)
            return result.strip()

        @staticmethod
        def parse_wikitionary_entry(word, wikiresult):
            lines = list()
            # lines.append(word.upper())

            for i1, entry in enumerate(wikiresult, start=1):
                lines.append(word.upper() + '\n')
                for i2, definition in enumerate(entry['definitions'], start=1):
                    lines.append('{0}. {1}'.format(i2, definition['text']))
            return '\n'.join(lines)
Esempio n. 23
0
def wiktionary_retriever(word_list, lang):
    retrieved_words = dict()
    parser = WiktionaryParser()
    for word in word_list:
        retrieved_words[word] = parser.fetch(word, lang)

    wiktionary_dict = dict()
    for k, v in retrieved_words.items():
        try:
            if len(v) != 0:
                v = v[0]
                if len(v['definitions']) != 0:
                    gender, plural = "", ""
                    # Check the information in the text field
                    if lang == "italian":
                        text = v['definitions'][0]['text'][0].replace(
                            k, "").replace("\xa0", "").split(" (plural ")
                    else:
                        text = re.split(
                            r'.*plural ',
                            v['definitions'][0]['text'][0].replace(
                                k, "").replace("\xa0", ""))

                    if len(text) == 1:
                        gender = text[0]
                    elif len(text) >= 2:
                        gender = text[0]
                        if len(text[1].split(")")[0]) > 3:
                            plural = text[1].split(")")[0].replace(")", "")
                    else:
                        pass

                    wiktionary_dict[k] = (clean_text(v['etymology']),
                                          v['definitions'][0]['partOfSpeech'],
                                          gender, plural)
                else:
                    wiktionary_dict[k] = []
            else:
                wiktionary_dict[k] = []
        except:
            print("wiktionary err:", k)

    return wiktionary_dict
Esempio n. 24
0
class WiktionaryDefiner(Definer):
    language: str = attr.ib()

    def __attrs_post_init__(self):
        self.parser = WiktionaryParser()
        self.entry = None

    def lookup_word(self, word):
        self.entry = self.parser.fetch(word, language=self.language)

    def get_definition(self,
                       word: str,
                       entry_n: int = 0,
                       definition_n: int = 0):
        self.lookup_word(word=word)

        if self.entry == []:
            print(f"No {self.language} definition found for: '{word}'")
            return None

        entry = self.entry[entry_n]
        definitions = entry["definitions"]

        try:
            definition = definitions[definition_n]
        except IndexError:
            print(f"No definition found for: '{word}'")
            return None

        text = definition["text"]
        definition = text[1]

        return definition

    def define(self, word):
        result = self.get_definition(word)
        if result is None:
            lower = word.lower()
            if lower != word:
                result = self.get_definition(lower)
        return result
def get_info(w, lang=None, parser=None, debug=False, postfix=None):
    try:
        if parser is None:
            parser = WiktionaryParser()
        info = parser.fetch(w, lang)[0]['definitions']
        pos_list, morph_list = [], []
        for info_dct in info:
            pos = info_dct['partOfSpeech']
            txt = info_dct['text']
            pos_list.append(pos)
            morph_list.append(wp_morph(txt, pos))
        result_dict = {
            'w': w,
            'src': lang[:2],
            'pos_wp': pos_list,
            'morph': morph_list
        }
        return {k + postfix: v for k, v in result_dict.items()} \
            if is_(postfix) else result_dict
    except:
        if debug:
            tb.print_exc()
        return {}
Esempio n. 26
0
def new_word_db_fetch(words, wik_word_index=0, wik_nonword_index=0):
    # setup Wiktionary Parser
    wik_parser = WiktionaryParser()
    wik_parser.set_default_language('danish')
    wik_parser.RELATIONS = []
    wik_parser.PARTS_OF_SPEECH = [
        "noun", "verb", "adjective", "adverb", "proper noun"
    ]
    new_words = []
    new_nonwords = []
    for word in tqdm(words):
        try:
            data = wik_parser.fetch(word)
            if len(data) == 0:
                new_nonwords.append(word)
            else:
                new_words.append(word)
        except AttributeError:
            print("something went wrong, with fidning a word on WikWord.")
            continue
    csv_append('word_datasets/wik_nonwords.csv', new_nonwords,
               wik_nonword_index)
    csv_append('word_datasets/wik_words.csv', new_words, wik_word_index)
    return new_words, new_nonwords
Esempio n. 27
0
#process_ending_words = [ noun for noun in oneword['word'].tolist() \
#                    if noun.endswith('tion') or noun.endswith('sion') or\
#                    noun.endswith('ing') or noun.endswith('age') or\
#                    (noun.endswith('y') and not noun.endswith('ty')) or \
#                    noun.endswith('ance') or\
#                    noun.endswith('al') or noun.endswith('sis')]

# wiktionary POS analysis
section_types = []
attribute_list = []
named_system_list = []
process_verb_list = []
attribute_verb_list = []
oneword['pos'] = ''
for word in oneword['word'].tolist():
    w = parser.fetch(word)
    pos = []
    #for e in range(len(w)):
    #loop over definitions for each etymology
    ## use first etymology only for now!
    if len(w) > 0:
        for d in range(len(w[0]['definitions'])):
            pos.append(w[0]['definitions'][d]['partOfSpeech'])
    oneword.loc[oneword['word'] == word, 'pos'] = ', '.join(pos).rstrip(', ')

#    if len(a)==1 and a[0]=='Adjective':
#        attribute_list.append(word)
#    elif len(a)==1 and a[0]=='Proper noun':
#        named_system_list.append(word)
#    elif len(a)==1 and a[0]=='Verb':
#        process_verb_list.append(word)
Esempio n. 28
0
def fetch_word(ui):
    parser = WiktionaryParser()
    parser.set_default_language('hungarian')
    input = ui.lineEdit.text()
    word = parser.fetch(input)
    return word
Esempio n. 29
0
from wiktionaryparser import WiktionaryParser
from pprint import pprint

parser = WiktionaryParser()
word = parser.fetch('test')

pprint(word)
Esempio n. 30
0
        raise Exception('response has an unexpected number of fields')
    if 'error' not in response:
        raise Exception('response is missing required error field')
    if 'result' not in response:
        raise Exception('response is missing required result field')
    if response['error'] is not None:
        raise Exception(response['error'])
    return response['result']


parser = WiktionaryParser()
ws = input("Enter comma separated words: ").split(',')
# parser.set_default_language('German')

for w in ws:
    word = parser.fetch(w)
    audio_file = 'a'
    audio = []

    try:
        audio.append({
            "url": "https:" + word[0]['pronunciations']['audio'][0],
            "filename": w + "-en.mp3",  # en English suffix
            "fields": ["Audio"]
        })
    except IndexError:
        pass

    try:
        definition = word[0]['definitions'][0]
    except IndexError:
Esempio n. 31
0
def getDefinition(word):
    parser = WiktionaryParser()
    data = parser.fetch(word)
    return data