Ejemplo n.º 1
0
prematureTaggingPositiveThreshold = 0.0
prematureTaggingNegativeThreshold = -0.0

overdureTaggingThreshold = None
overdureTaggingTopReservants = None
'''
The next few parameters are fixed do not change them.
'''
procliticsXmlFile = baseDirectoryOfQutufDB + 'MorphologyTransducers\\Proclitics.xml'
encliticsXmlFile = baseDirectoryOfQutufDB + 'MorphologyTransducers\\Enclitics.xml'
prematureTaggingRulesXmlFile = baseDirectoryOfQutufDB + 'TaggingRepository\\PrematureTaggingRules.xml'
overdueTaggingRulesXmlFile = baseDirectoryOfQutufDB + 'TaggingRepository\\OverdueTaggingRules.xml'
rootsFolder = 'roots2'

#Initialize:
text = TextEncapsulator()

#Load Data from Files:
text.LoadFromFiles(baseDirectoryOfAlKhalilDB, rootsFolder, \
                   procliticsXmlFile, encliticsXmlFile,\
                   prematureTaggingRulesXmlFile, \
                   overdueTaggingRulesXmlFile)

#Read input text into Qutuf:
f = codecs.open(inputTextFile, 'r', 'utf-8')
string = f.read()
f.close()
text.String = string

#Operate:
text.Tokenize()
Ejemplo n.º 2
0
from Controllers.TextEntities.Word import *;
from Controllers.Tokenization.Tokenizer import *;
from Controllers.Normalization.Normalizer import *;
from Controllers.Morphology.AffixParser import *;
from Controllers.Morphology.MorphologicalAnalyzer import *;

import codecs;
import io;



procliticsXmlFile = '..\\..\\Data\\MorphologyTransducers\\Proclitics.xml';
encliticsXmlFile = '..\\..\\Data\\MorphologyTransducers\\Enclitics.xml';


text = TextEncapsulator();
text.LoadFromFiles(None, None, \
                   procliticsXmlFile, encliticsXmlFile,\
                   None, \
                   None);


f = codecs.open('..\\..\\Data\\Cliticalization_test.txt', 'r', 'utf-8');
string = f.read();
f.close();

text.String = string;
text.Tokenize();
text.Normalize(2);

Ejemplo n.º 3
0
import io;
import os;
from os.path import join, getsize;



compoundNounsXmlFile = '../../Data/MorphologyTransducers/Proclitics.xml';
procliticsXmlFile = '../../Data/MorphologyTransducers/Proclitics.xml';
encliticsXmlFile = '../../Data/MorphologyTransducers/Enclitics.xml';
prematureTaggingRulesXmlFile = '../../Data/TaggingRepository/PrematureTaggingRules.xml';
overdueTaggingRulesXmlFile = '../../Data/TaggingRepository/OverdueTaggingRules.xml';
baseDirectoryOfAlKhalil = 'D:/temp/AlKhalil_1/db/'
rootsFolder = 'roots2'


text = TextEncapsulator();
text.LoadFromFiles(baseDirectoryOfAlKhalil, rootsFolder, \
                   procliticsXmlFile, encliticsXmlFile,\
                   prematureTaggingRulesXmlFile, \
                   overdueTaggingRulesXmlFile);

base = 'D:/temp/Latifa2/'




for root, dirs, files in os.walk(base):
    for dir in dirs:
        print('Start parsing directory: ['+dir+']');
        for subroot, subdirs, subfiles in os.walk(root+dir):
            for file in subfiles:
Ejemplo n.º 4
0
from os.path import join, getsize

compoundNounsXmlFile = '..\\..\\Data\\MorphologyTransducers\\Proclitics.xml'
procliticsXmlFile = '..\\..\\Data\\MorphologyTransducers\\Proclitics.xml'
encliticsXmlFile = '..\\..\\Data\\MorphologyTransducers\\Enclitics.xml'
prematureTaggingRulesXmlFile = '..\\..\\Data\\TaggingRepository\\PrematureTaggingRules.xml'
overdueTaggingRulesXmlFile = '..\\..\\Data\\TaggingRepository\\OverdueTaggingRules.xml'
baseDirectoryOfAlKhalil = 'D:\\temp\\AlKhalil_1\\db\\'
rootsFolder = 'roots2'

prematureTaggingPositiveThreshold = None

overdureTaggingThreshold = None
overdureTaggingTopReservants = None

text = TextEncapsulator()
text.LoadFromFiles(baseDirectoryOfAlKhalil, rootsFolder, \
                   procliticsXmlFile, encliticsXmlFile,\
                   prematureTaggingRulesXmlFile, \
                   overdueTaggingRulesXmlFile)

base = 'D:\\temp\\Latifa2\\'

for root, dirs, files in os.walk(base):
    for file in files:
        if file.endswith('.txt') and file.find('-') == -1:
            print('\tStart parsing file: [' + file + ']')

            f = codecs.open('\\'.join([root, file]), 'r', 'utf-8')
            string = f.read()
            f.close()