def main(): """ Read user input, analyze, output results. """ analyzer = pyaramorph.Analyzer() print("Unicode Arabic Morphological Analyzer (press ctrl-d to exit)") while True: try: s = input("$ ") results = analyzer.analyze_text(s) except EOFError: print("Goodbye!") break for analyses in results: for solution in analyses: print(solution)
def __init__(self, raw_data=None): self.analyzer = pyaramorph.Analyzer() self.stemmer = nltk.ISRIStemmer() self.lemmatizer = nltk.WordNetLemmatizer() self.segmenter = nltk.data.load("tokenizers/punkt/english.pickle") if raw_data is not None: self.raw_data = raw_data self.org_data = raw_data self.analyze_text() self.ambig() self.load_corpus('Tashkeela') self.select_cand() self.print_result()
# -*- coding: utf-8 -*- """ @author: Asma Baccouche """ import re import pandas as pd import Lexicons import Util import pyaramorph analyzer = pyaramorph.Analyzer() file = open('Util_Files/Arabic_Pos_Bigrams.txt', 'r', encoding='utf8') Pos_ar = [line[:-1].split(' ') for line in file.readlines()] file = open('Util_Files/Arabic_Neg_Bigrams.txt', 'r', encoding='utf8') Neg_ar = [line[:-1].split(' ') for line in file.readlines()] def find_bigrams(input_list): bigram_list = [] for i in range(len(input_list) - 1): bigram_list.append((input_list[i], input_list[i + 1])) return bigram_list P, N = Lexicons.lexicons('English') def arabic_labeling(New, lang): Positive, Negative = Lexicons.lexicons(lang)
def analysText(self, text): print(text) p = pyaramorph.Analyzer() text = text.split() for w in text: self.printResult(self, p.analyze_text(w))
def __init__(self): self.analyzer = pyaramorph.Analyzer() self.stemmer = nltk.ISRIStemmer() self.lemmatizer = nltk.WordNetLemmatizer() self.segmenter = nltk.data.load("tokenizers/punkt/english.pickle")
def _GetGloss(_word): analizer = par.Analyzer() analized = analizer.analyze_text(_word) return analized