from rake_nltk import Rake import nltk nltk.download('stopwords') from nltk.corpus import stopwords os.environ[ 'STANFORD_MODELS'] = 'stanford-segmenter-2018-10-16/data/;stanford-postagger-full-2018-10-16/models/' os.environ['STANFORD_PARSER'] = 'stanford-parser-full-2018-10-17' os.environ['CLASSPATH'] = 'stanford-parser-full-2018-10-17' os.environ['JAVAHOME'] = 'C:/Program Files/Java/jdk-11.0.1' segmenter = StanfordSegmenter( 'stanford-segmenter-2018-10-16/stanford-segmenter-3.9.2.jar') segmenter.default_config('ar') text = segmenter.segment_file('sample.txt') print(text) tagger = STag.StanfordPOSTagger( 'arabic.tagger', 'stanford-postagger-full-2018-10-16/stanford-postagger.jar') for tag in tagger.tag(text.split()): print(tag[1]) parser = SParse.StanfordParser( model_path='edu/stanford/nlp/models/lexparser/arabicFactored.ser.gz') sentences = parser.raw_parse_sents(text.split('.')) for line in sentences: for sentence in line: print(sentence) sentence.draw()
nltk.download('stopwords') from nltk.corpus import stopwords os.environ[ 'STANFORD_MODELS'] = 'C:\\Users\\snmuj\\OneDrive\\Documents\\salm\\stanford-segmenter-2018-10-16\\data;C:\\Users\\lenovo\\Documents\\salm\\stanford-postagger-full-2018-10-16\\models' os.environ[ 'STANFORD_PARSER'] = 'C:\\Users\\snmuj\\OneDrive\\Documents\\salm\\stanford-parser-full-2018-10-17' os.environ[ 'CLASSPATH'] = 'C:\\Users\\snmuj\\OneDrive\\Documents\\stanford-parser-full-2018-10-17' os.environ['JAVAHOME'] = 'C:\Program Files\Java\jdk-14.0.2_windows-x64_bin.exe' segmenter = StanfordSegmenter( 'C:\\Users\\snmuj\\OneDrive\\Documents\\salm\\stanford-segmenter-2018-10-16\\stanford-segmenter-3.9.2.jar' ) segmenter.default_config('ar') text = segmenter.segment_file('text file') print(text) tagger = STag.StanfordPOSTagger( 'arabic.tagger', 'C:\\Users\\snmuj\\OneDrive\\Documents\\stanford-postagger-full-2018-10-16\\stanford-postagger.jar' ) for tag in tagger.tag(text.split()): print(tag[1]) parser = SParse.StanfordParser( model_path='edu/stanford/nlp/models/lexparser/arabicFactored.ser.gz') sentences = parser.raw_parse_sents(text.split('.')) for line in sentences: for sentence in line: print(sentence)