예제 #1
0
from rake_nltk import Rake

import nltk
nltk.download('stopwords')
from nltk.corpus import stopwords

os.environ[
    'STANFORD_MODELS'] = 'stanford-segmenter-2018-10-16/data/;stanford-postagger-full-2018-10-16/models/'
os.environ['STANFORD_PARSER'] = 'stanford-parser-full-2018-10-17'
os.environ['CLASSPATH'] = 'stanford-parser-full-2018-10-17'
os.environ['JAVAHOME'] = 'C:/Program Files/Java/jdk-11.0.1'

segmenter = StanfordSegmenter(
    'stanford-segmenter-2018-10-16/stanford-segmenter-3.9.2.jar')
segmenter.default_config('ar')
text = segmenter.segment_file('sample.txt')
print(text)

tagger = STag.StanfordPOSTagger(
    'arabic.tagger',
    'stanford-postagger-full-2018-10-16/stanford-postagger.jar')
for tag in tagger.tag(text.split()):
    print(tag[1])

parser = SParse.StanfordParser(
    model_path='edu/stanford/nlp/models/lexparser/arabicFactored.ser.gz')
sentences = parser.raw_parse_sents(text.split('.'))
for line in sentences:
    for sentence in line:
        print(sentence)
        sentence.draw()
nltk.download('stopwords')
from nltk.corpus import stopwords

os.environ[
    'STANFORD_MODELS'] = 'C:\\Users\\snmuj\\OneDrive\\Documents\\salm\\stanford-segmenter-2018-10-16\\data;C:\\Users\\lenovo\\Documents\\salm\\stanford-postagger-full-2018-10-16\\models'
os.environ[
    'STANFORD_PARSER'] = 'C:\\Users\\snmuj\\OneDrive\\Documents\\salm\\stanford-parser-full-2018-10-17'
os.environ[
    'CLASSPATH'] = 'C:\\Users\\snmuj\\OneDrive\\Documents\\stanford-parser-full-2018-10-17'
os.environ['JAVAHOME'] = 'C:\Program Files\Java\jdk-14.0.2_windows-x64_bin.exe'

segmenter = StanfordSegmenter(
    'C:\\Users\\snmuj\\OneDrive\\Documents\\salm\\stanford-segmenter-2018-10-16\\stanford-segmenter-3.9.2.jar'
)
segmenter.default_config('ar')
text = segmenter.segment_file('text file')
print(text)

tagger = STag.StanfordPOSTagger(
    'arabic.tagger',
    'C:\\Users\\snmuj\\OneDrive\\Documents\\stanford-postagger-full-2018-10-16\\stanford-postagger.jar'
)
for tag in tagger.tag(text.split()):
    print(tag[1])

parser = SParse.StanfordParser(
    model_path='edu/stanford/nlp/models/lexparser/arabicFactored.ser.gz')
sentences = parser.raw_parse_sents(text.split('.'))
for line in sentences:
    for sentence in line:
        print(sentence)