Python Accents Examples

Programming Language: Python

Namespace/Package Name: Accents

Class/Type: Accents

Examples at hotexamples.com: 4

Python Accents - 4 examples found. These are the top rated real world Python examples of Accents.Accents extracted from open source projects. You can rate examples to help us improve the quality of examples.

Frequently Used Methods

Show Hide

buildCodes(2)

buildAccents(1)

Example #1

Show file

File: LinguisticalThesaurus.py Project: rogergranada/Portuguese-ATC

def extractFullMIToThesaurus():
	accents = Accents()
	parameters = Parameters()
	max_qty_terms = parameters.getMaxQtyTerms()
	seeds = Seeds()
	dic_seeds = seeds.getSeeds()
	mi_file = Statistic(stat_temp+'IMT_FullStatisticalCorpus.txt')

	try:
		thesaurus_file = codecs.open('../Data/Output/T3/T3_Jaccard.xml', 'w', 'utf-8')
	except IOError:
		print 'ERROR: System cannot open the  file ../Data/Output/T3/T3_Jaccard.xml'
		sys.exit()

	thesaurus_file.write('<?xml version="1.0" encoding="ISO-8859-1"?>\n<thesaurus>\n\t<ontology id="privacy">\n')
	for seed in dic_seeds:
		qty_terms = 0
		dic_related = mi_file.getOrderedNounMIForTerm(seed)
		if dic_related != False:
			thesaurus_file.write('\t\t<seed term_id="" term_name="'+accents.buildAccents(seed)+'" type="">\n')
			for mi_related in dic_related:
				if qty_terms < max_qty_terms:
					thesaurus_file.write('\t\t\t<term id="" display="ON" similarity="'+mi_related[0]+'">'+accents.buildAccents(mi_related[1])+'</term>\n')
					qty_terms += 1
			thesaurus_file.write('\t\t</seed>\n')
	thesaurus_file.write('\t</ontology>\n</thesaurus>')
	thesaurus_file.close()

Example #2

Show file

File: Seeds.py Project: rogergranada/Portuguese-ATC

class Seeds:
	def __init__(self):		
		self.dic_seeds = {}
		self.accents = Accents()
		self.__buildDic__()

	def __buildDic__(self):
		try:
			file_seeds = codecs.open('seeds.txt', 'r', 'utf-8')
		except IOError:
			print 'ERROR: System cannot open the seeds.txt file'
			sys.exit()

		for line in file_seeds:
			if line != '':
				line = line.replace('\n','')
				line = self.accents.buildCodes(line)
				self.dic_seeds[line] = line

		file_seeds.close()

	def getQtySeeds(self):
		return len(self.dic_seeds)

	def getSeeds(self):
		return sorted(self.dic_seeds.keys())

	def printSeeds(self):
		print self.dic_seeds

	def printQtySeeds(self):
		print len(self.dic_seeds)

Example #3

Show file

File: StatisticalCorpus.py Project: rogergranada/Portuguese-ATC

	def __buildStatisticalCorpus__(self):
		try:
			root, dirs, files = os.walk(self.corpus_folder).next()[:3]
		except:
			print 'ERROR: It was not possible to open the ../Data/Corpus/Raw/ folder'
			sys.exit()

		accents = Accents()
		for corpus_file in files:
			if re.match('.*xml$', corpus_file):
				corpus_filename = corpus_file.split('.')[0]
				xmlfile = ParseXml(root+''+corpus_file)
				dic_terms = xmlfile.getDicTerms()
				dic_nouns = xmlfile.getNouns()
				dic_verbs = xmlfile.getVerbs()

				id_sentence = 1
				id_word = 1
				id_t = 's'+str(id_sentence)+'_'+str(id_word)

				string_full = ''
				string_nouns = ''
				while dic_terms.has_key(id_t):
					while dic_terms.has_key(id_t):
						if not re.match('^(pu|num|conj|art|prp|spec)', dic_terms[id_t]['pos']) and (re.search('[$]', dic_terms[id_t]['lemma']) is None) and (len(dic_terms[id_t]['lemma']) >= self.parameters.getMinWordSize()):
							lemma = accents.buildCodes(dic_terms[id_t]['lemma'])
							if dic_nouns.has_key(id_t):
								string_nouns += lemma+'__N '
								string_full += lemma+'__N '
							elif dic_verbs.has_key(id_t):
								string_nouns += lemma+'__V '
								string_full += lemma+'__V '
							else:
								string_full += lemma+'__O '
							string_nouns = string_nouns.replace('-', '_')
							string_full = string_full.replace('-', '_')
						id_word += 1
						id_t = 's'+str(id_sentence)+'_'+str(id_word)
					id_word = 1
					id_sentence += 1
					id_t = 's'+str(id_sentence)+'_'+str(id_word)
				self.__writeCorpusFile__(corpus_filename, string_full, string_nouns)

Example #4

Show file

File: Seeds.py Project: rogergranada/Portuguese-ATC

	def __init__(self):		
		self.dic_seeds = {}
		self.accents = Accents()
		self.__buildDic__()