def main():
    description = "Get the phonemes from a language"
    args = commandline.LanguageAndWordInput.parse_arguments(description)
    pronunciations = get_pronunciations(args.language, args.word)
    if not pronunciations:
        message = "No pronunciations found for word '{}' in language '{}'"
        raise RuntimeError(message.format(args.word, args.language))
    commandline.output_list(pronunciations)
Beispiel #2
0
        self.all_data.append(row)

    def _get_language_code(self):
        return language_codes.Phoibe.map(self.language)

    def get_all_phonemes(self):
        """ get a set of all phonemes for the language of the phonemeCollector
        """
        return set(row['Phoneme'].decode('utf8') for row in self.all_data)


def get_phonemes(language):
    """Main entry point for the module
    will return a set of phonemes for the given language
    Arguments:
        language=language for which to get the phonemes
    Returns:
        set of phonemes
    """
    phoibe_data = resources.phoible_database
    phonemes_collector = PhonemesCollector(language)
    phonemes_collector.parse_source(phoibe_data)
    return phonemes_collector.get_all_phonemes()


if __name__ == '__main__':
    description = 'Get the phonemes from a language'
    args = commandline.LanguageInput.parse_arguments(description)
    phonemes = get_phonemes(args.language)
    commandline.output_list(phonemes)
    pronunciation_entries = (entry.get(key, []) for entry in wiktionary_entry)
    pronunciations = itertools.chain.from_iterable(pronunciation_entries)
    return pronunciations


def list_pronunciations(pronunciation_entries):
    """ Parses all the pronunciations from the entries
    Arguments:
        pronunciation_entries = a iteratable of entries to parse
    Returns:
        set of pronunciations : set(pron1, pron2, ...)
    """
    pronunciations = set()
    pattern = re.compile('IPA: */(.*?)/')
    for entry in pronunciation_entries:
        found_pronunciations = pattern.findall(entry)
        for pronunciation in found_pronunciations:
            pronunciations.add(pronunciation)
    return pronunciations


if __name__ == '__main__':
    description = 'Get the phonemes from a language'
    args = commandline.LanguageAndWordInput.parse_arguments(
        description, extra_arguments=['local'])
    pronunciations = get_pronunciations(args.language, args.word, args.local)
    if not pronunciations:
        message = "No pronunciations found for word '{}' in language '{}'"
        raise RuntimeError(message.format(args.word, args.language))
    commandline.output_list(pronunciations)
    text = requests.get(page_path).text
    stream = StringIO.StringIO(text)
    return stream


def _get_frequency_list_from_file(file_pointer):
    """Take a pointer to a file and get the frequency list from it
    """
    with open(file_pointer) as instream:
        freq_list = _get_frequency_list_from_filestream(instream)
    return freq_list


def _get_frequency_list_from_filestream(instream):
    """Take a file stream and get the frequency list from it
    """
    freq_list = []
    for line in instream:
        if not line:
            break
        word, freq = line.split()
        freq_list.append(word)
    return freq_list


if __name__ == '__main__':
    description = 'Get the word frequencies for a language'
    args = commandline.LanguageInput.parse_arguments(description)
    frequency_list = get_frequency_list(args.language)
    commandline.output_list(frequency_list[:5])
        [resources.hermit_dave_github, language_code, page_name])
    text = requests.get(page_path).text
    stream = StringIO.StringIO(text)
    return stream


class FrequencySources(object):
    language_code = staticmethod(language_codes.HermitDave.map)
    frequency_filestream = staticmethod(_get_hermitdave_page)


def _frequency_list_from_filestream(filestream, extended_return_value=False):
    """Take a filestream and get the frequency list from it
    if extended_return -> list of (word, ranking, occurances)
    """
    freq_list = (line.strip().split() for line in filestream if line.strip())
    freq_list = [
        word if not extended_return_value else (word, i + 1, int(freq))
        for i, (word, freq) in enumerate(freq_list)
    ]
    if not freq_list:
        raise RuntimeError("No entries found for creating a frequency list")
    return freq_list


if __name__ == '__main__':
    description = 'Get the word frequencies for a language'
    args = commandline.LanguageInput.parse_arguments(description)
    frequency_list = get_frequency_list(args.language)
    commandline.output_list(frequency_list[:5])