Exemple #1
0
# with open('FullTrainData.data', 'rb') as filehandle:
#     # read the data as binary data stream
#     FullTrainCorpusList = pickle.load(filehandle)
#
# full_train_text = ' '.join(FullTrainCorpusList)

with open('PosTestData.data', 'rb') as filehandle:
    # read the data as binary data stream
    PosTestCorpusList = pickle.load(filehandle)

ShorterCorpusList = PosTestCorpusList[:100]
short_text = ' '.join(ShorterCorpusList)

pos_test_text = ' '.join(PosTestCorpusList)

print(downloader.supported_languages_table("morph2"))

words = ["preprocessing", "processor", "invaluable", "thankful", "crossed"]
for w in words:
    w = Word(w, language="en")
    print("{:<20}{}".format(w, w.morphemes))

# train_data = list(pos_test_text)
#
# io = morfessor.MorfessorIO()
#
# #train_data = list(io.read_corpus_file('training_data'))
#
# model = morfessor.BaselineModel()
#
# #model.load_data(train_data, count_modifier=lambda x: 1)
# -*- coding: utf-8 -*-
"""
Created on Tue Dec  5 17:10:01 2017

@author: tom
"""

import polyglot
from polyglot.text import Text, Word
from polyglot.downloader import downloader
print(downloader.supported_languages_table("pos2"))

#Correspondances :
#ADJ: adjective
#ADP: adposition
#ADV: adverb
#AUX: auxiliary verb
#CONJ: coordinating conjunction
#DET: determiner
#INTJ: interjection
#NOUN: noun
#NUM: numeral
#PART: particle
#PRON: pronoun
#PROPN: proper noun
#PUNCT: punctuation
#SCONJ: subordinating conjunction
#SYM: symbol
#VERB: verb
#X: other
Exemple #3
0
from polyglot.downloader import downloader

print(downloader.supported_languages_table("sentiment2", 3))
from polyglot.text import Text
# text = Text('The movie was really good.')
# print("Language Detected: Code={}, Name={}\n".format(text.language.code, text.language.name))

text = Text('这个电影真好看')
for w in text.words:
    print(f'{w}: {w.polarity}')

text1 = Text('我不喜欢太阳')
for w in text1.words:
    print(f'{w}: {w.polarity}')

text3 = Text('我赞同')
for w in text3.words:
    print(f'{w}: {w.polarity}')

text4 = Text('我反对')
for w in text4.words:
    print(f'{w}: {w.polarity}')

text2 = Text('I do not like sunshine')
for w in text2.words:
    print(f'{w}: {w.polarity}')

# print(text.words)

# text = Text(u"O primeiro uso de desobediência civil em massa ocorreu em setembro de 1906.")
#
Exemple #4
0
from polyglot.transliteration import Transliterator
from polyglot.downloader import downloader
print(downloader.supported_languages_table("transliteration2"))

from polyglot.text import Text
blob = """We will meet at eight o'clock on Thursday morning."""
text = Text(blob)
for x in text.transliterate("ar"):
    print(x)