예제 #1
0
def main():
    """ Read user input, analyze, output results. """
    analyzer = pyaramorph.Analyzer()
    print("Unicode Arabic Morphological Analyzer (press ctrl-d to exit)")
    while True:
        try:
            s = input("$ ")
            results = analyzer.analyze_text(s)
        except EOFError:
            print("Goodbye!")
            break

        for analyses in results:
            for solution in analyses:
                print(solution)
예제 #2
0
    def __init__(self, raw_data=None):
        self.analyzer = pyaramorph.Analyzer()
        self.stemmer = nltk.ISRIStemmer()
        self.lemmatizer = nltk.WordNetLemmatizer()
        self.segmenter = nltk.data.load("tokenizers/punkt/english.pickle")

        if raw_data is not None:
            self.raw_data = raw_data
            self.org_data = raw_data

        self.analyze_text()
        self.ambig()
        self.load_corpus('Tashkeela')
        self.select_cand()
        self.print_result()
예제 #3
0
# -*- coding: utf-8 -*-
"""
@author: Asma Baccouche
"""
import re
import pandas as pd
import Lexicons
import Util
import pyaramorph
analyzer = pyaramorph.Analyzer()

file = open('Util_Files/Arabic_Pos_Bigrams.txt', 'r', encoding='utf8')
Pos_ar = [line[:-1].split(' ') for line in file.readlines()]

file = open('Util_Files/Arabic_Neg_Bigrams.txt', 'r', encoding='utf8')
Neg_ar = [line[:-1].split(' ') for line in file.readlines()]


def find_bigrams(input_list):
    bigram_list = []
    for i in range(len(input_list) - 1):
        bigram_list.append((input_list[i], input_list[i + 1]))
    return bigram_list


P, N = Lexicons.lexicons('English')


def arabic_labeling(New, lang):
    Positive, Negative = Lexicons.lexicons(lang)
예제 #4
0
 def analysText(self, text):
     print(text)
     p = pyaramorph.Analyzer()
     text = text.split()
     for w in text:
         self.printResult(self, p.analyze_text(w))
예제 #5
0
	def __init__(self):
		self.analyzer = pyaramorph.Analyzer()
		self.stemmer = nltk.ISRIStemmer()
		self.lemmatizer = nltk.WordNetLemmatizer()
		self.segmenter = nltk.data.load("tokenizers/punkt/english.pickle")
예제 #6
0
def _GetGloss(_word):
    analizer = par.Analyzer()
    analized = analizer.analyze_text(_word)
    return analized