Python ParseXml.getNouns Examples

Programming Language: Python

Namespace/Package Name: ParseXml

Class/Type: ParseXml

Method/Function: getNouns

Examples at hotexamples.com: 1

Python ParseXml.getNouns - 1 examples found. These are the top rated real world Python examples of ParseXml.ParseXml.getNouns extracted from open source projects. You can rate examples to help us improve the quality of examples.

Frequently Used Methods

Show Hide

ParseXml(3)

getDicNTStructure(1)

getDicTerms(1)

getNouns(1)

getServersList(1)

getSessionDir(1)

getVerbs(1)

loadXml(1)

parseAlbum(1)

parseList(1)

Example #1

Show file

File: StatisticalCorpus.py Project: rogergranada/Portuguese-ATC

	def __buildStatisticalCorpus__(self):
		try:
			root, dirs, files = os.walk(self.corpus_folder).next()[:3]
		except:
			print 'ERROR: It was not possible to open the ../Data/Corpus/Raw/ folder'
			sys.exit()

		accents = Accents()
		for corpus_file in files:
			if re.match('.*xml$', corpus_file):
				corpus_filename = corpus_file.split('.')[0]
				xmlfile = ParseXml(root+''+corpus_file)
				dic_terms = xmlfile.getDicTerms()
				dic_nouns = xmlfile.getNouns()
				dic_verbs = xmlfile.getVerbs()

				id_sentence = 1
				id_word = 1
				id_t = 's'+str(id_sentence)+'_'+str(id_word)

				string_full = ''
				string_nouns = ''
				while dic_terms.has_key(id_t):
					while dic_terms.has_key(id_t):
						if not re.match('^(pu|num|conj|art|prp|spec)', dic_terms[id_t]['pos']) and (re.search('[$]', dic_terms[id_t]['lemma']) is None) and (len(dic_terms[id_t]['lemma']) >= self.parameters.getMinWordSize()):
							lemma = accents.buildCodes(dic_terms[id_t]['lemma'])
							if dic_nouns.has_key(id_t):
								string_nouns += lemma+'__N '
								string_full += lemma+'__N '
							elif dic_verbs.has_key(id_t):
								string_nouns += lemma+'__V '
								string_full += lemma+'__V '
							else:
								string_full += lemma+'__O '
							string_nouns = string_nouns.replace('-', '_')
							string_full = string_full.replace('-', '_')
						id_word += 1
						id_t = 's'+str(id_sentence)+'_'+str(id_word)
					id_word = 1
					id_sentence += 1
					id_t = 's'+str(id_sentence)+'_'+str(id_word)
				self.__writeCorpusFile__(corpus_filename, string_full, string_nouns)