def getAlbumList(self): """ get post 都有效 :return: """ http = Http() params = {"sitefrom": "cloudalbum_android", "sortType": 0} res = http.getUrl("http://photo.163.com/papi/user/替换你的用户名/product/list", self.headers, params) res = res.decode('gbk') print("========获取到相册列表=========") parseXml = ParseXml() return parseXml.parseList(res)
def __init__(self, filename): self.filename_xml = filename + '.xml' print self.filename_xml self.xml = ParseXml(self.filename_xml) self.parameters = Parameters() self.dic_t_xml = self.xml.getDicTerms() self.dic_nts = self.xml.getDicNTStructure() self.dic_an = {} #self.dic_sv = {} #self.dic_vo = {} self.mountANRelations = True
def getAlbumDetail(self, albumId): """ get post 都有效 :return: """ http = Http() params = {"sitefrom": "cloudalbum_android"} # try: res = http.getUrl("http://photo.163.com/papi/user/替换你的用户名/albumid/" + albumId, self.headers, params) res = res.decode('gbk') # except: # print("albumId:"+albumId) print("\n") print("1、获取到相册:" + albumId + "=========") parseXml = ParseXml() return parseXml.parseAlbum(res)
def __buildStatisticalCorpus__(self): try: root, dirs, files = os.walk(self.corpus_folder).next()[:3] except: print 'ERROR: It was not possible to open the ../Data/Corpus/Raw/ folder' sys.exit() accents = Accents() for corpus_file in files: if re.match('.*xml$', corpus_file): corpus_filename = corpus_file.split('.')[0] xmlfile = ParseXml(root+''+corpus_file) dic_terms = xmlfile.getDicTerms() dic_nouns = xmlfile.getNouns() dic_verbs = xmlfile.getVerbs() id_sentence = 1 id_word = 1 id_t = 's'+str(id_sentence)+'_'+str(id_word) string_full = '' string_nouns = '' while dic_terms.has_key(id_t): while dic_terms.has_key(id_t): if not re.match('^(pu|num|conj|art|prp|spec)', dic_terms[id_t]['pos']) and (re.search('[$]', dic_terms[id_t]['lemma']) is None) and (len(dic_terms[id_t]['lemma']) >= self.parameters.getMinWordSize()): lemma = accents.buildCodes(dic_terms[id_t]['lemma']) if dic_nouns.has_key(id_t): string_nouns += lemma+'__N ' string_full += lemma+'__N ' elif dic_verbs.has_key(id_t): string_nouns += lemma+'__V ' string_full += lemma+'__V ' else: string_full += lemma+'__O ' string_nouns = string_nouns.replace('-', '_') string_full = string_full.replace('-', '_') id_word += 1 id_t = 's'+str(id_sentence)+'_'+str(id_word) id_word = 1 id_sentence += 1 id_t = 's'+str(id_sentence)+'_'+str(id_word) self.__writeCorpusFile__(corpus_filename, string_full, string_nouns)
def __init__(self, filename): self.filename_xml = filename+'.xml' print self.filename_xml self.xml = ParseXml(self.filename_xml) self.parameters = Parameters() self.dic_t_xml = self.xml.getDicTerms() self.dic_nts = self.xml.getDicNTStructure() self.dic_an = {} #self.dic_sv = {} #self.dic_vo = {} self.mountANRelations = True
class SyntacticContexts: def __init__(self, filename): self.filename_xml = filename + '.xml' print self.filename_xml self.xml = ParseXml(self.filename_xml) self.parameters = Parameters() self.dic_t_xml = self.xml.getDicTerms() self.dic_nts = self.xml.getDicNTStructure() self.dic_an = {} #self.dic_sv = {} #self.dic_vo = {} self.mountANRelations = True #self.mountSVRelations = True #self.mountVORelations = True def __extractRelations__(self, type_relation): if type_relation == 'AN': for id_t in self.dic_t_xml: if re.match("^(n|prop)$", self.dic_t_xml[id_t]['pos']) and len( self.dic_t_xml[id_t] ['lemma']) >= self.parameters.getMinWordSize(): id_sentence = id_t.split("_")[0] id_word = id_t.split("_")[1] id_1 = id_sentence + '_' + str((int(id_word) + 1)) id_2 = id_sentence + '_' + str((int(id_word) + 2)) id_3 = id_sentence + '_' + str((int(id_word) + 3)) if self.dic_t_xml.has_key(id_3) and len( self.dic_t_xml[id_3] ['lemma']) >= self.parameters.getMinWordSize(): ids = self.dic_t_xml[id_t][ 'pos'] + ':' + self.dic_t_xml[id_1][ 'pos'] + ':' + self.dic_t_xml[id_2][ 'pos'] + ':' + self.dic_t_xml[id_3]['pos'] if re.match( '^(n|prop):prp:(art|num|pron-indef|pron-poss|pu):(n|prop)$', ids) is not None: self.__addElementDicAN__( 'prep_' + self.dic_t_xml[id_3]['lemma'] + '#' + self.dic_t_xml[id_t]['lemma']) # 'prep_#' self.__addElementDicAN__( 'prep_' + self.dic_t_xml[id_t]['lemma'] + '#' + self.dic_t_xml[id_3]['lemma']) # 'prep_#' if re.match('^(n|prop):adj:adj:adj$', ids) is not None: self.__addElementDicAN__( 'adj_' + self.dic_t_xml[id_3]['lemma'] + '#' + self.dic_t_xml[id_t]['lemma']) # 'adj_#' if self.dic_t_xml.has_key(id_2) and len( self.dic_t_xml[id_2] ['lemma']) >= self.parameters.getMinWordSize(): ids = self.dic_t_xml[id_t][ 'pos'] + ':' + self.dic_t_xml[id_1][ 'pos'] + ':' + self.dic_t_xml[id_2]['pos'] if re.match('^(n|prop):prp:(n|prop)$', ids) is not None: self.__addElementDicAN__( 'prep_' + self.dic_t_xml[id_2]['lemma'] + '#' + self.dic_t_xml[id_t]['lemma']) # 'prep_#' self.__addElementDicAN__( 'prep_' + self.dic_t_xml[id_t]['lemma'] + '#' + self.dic_t_xml[id_2]['lemma']) # 'prep_#' if re.match('^(n|prop):adj:adj$', ids) is not None: self.__addElementDicAN__( 'adj_' + self.dic_t_xml[id_2]['lemma'] + '#' + self.dic_t_xml[id_t]['lemma']) # 'adj_#' if self.dic_t_xml.has_key(id_1) and len( self.dic_t_xml[id_1] ['lemma']) >= self.parameters.getMinWordSize(): ids = self.dic_t_xml[id_t][ 'pos'] + ':' + self.dic_t_xml[id_1]['pos'] if re.match('^(n|prop):adj$', ids) is not None: self.__addElementDicAN__( 'adj_' + self.dic_t_xml[id_1]['lemma'] + '#' + self.dic_t_xml[id_t]['lemma']) # 'adj_#' if re.match('^(n|prop):(n|prop)$', ids) is not None: self.__addElementDicAN__( 'nn_' + self.dic_t_xml[id_1]['lemma'] + '#' + self.dic_t_xml[id_t]['lemma']) # 'nn_#' self.__addElementDicAN__( 'nn_' + self.dic_t_xml[id_t]['lemma'] + '#' + self.dic_t_xml[id_1]['lemma']) # 'nn_#' id_1 = id_sentence + '_' + str((int(id_word) - 1)) id_2 = id_sentence + '_' + str((int(id_word) - 2)) id_3 = id_sentence + '_' + str((int(id_word) - 3)) if self.dic_t_xml.has_key(id_3) and len( self.dic_t_xml[id_3] ['lemma']) >= self.parameters.getMinWordSize(): ids = self.dic_t_xml[id_3][ 'pos'] + ':' + self.dic_t_xml[id_2][ 'pos'] + ':' + self.dic_t_xml[id_1][ 'pos'] + ':' + self.dic_t_xml[id_t]['pos'] if re.match('^adj:adj:adj:(n|prop)$', ids) is not None: self.__addElementDicAN__( 'adj_' + self.dic_t_xml[id_3]['lemma'] + '#' + self.dic_t_xml[id_t]['lemma']) # 'adj_#' if self.dic_t_xml.has_key(id_2) and len( self.dic_t_xml[id_2] ['lemma']) >= self.parameters.getMinWordSize(): ids = self.dic_t_xml[id_2][ 'pos'] + ':' + self.dic_t_xml[id_1][ 'pos'] + ':' + self.dic_t_xml[id_t]['pos'] if re.match('^adj:adj:(n|prop)$', ids) is not None: self.__addElementDicAN__( 'adj_' + self.dic_t_xml[id_2]['lemma'] + '#' + self.dic_t_xml[id_t]['lemma']) # 'adj_#' if self.dic_t_xml.has_key(id_1) and len( self.dic_t_xml[id_1] ['lemma']) >= self.parameters.getMinWordSize(): ids = self.dic_t_xml[id_1][ 'pos'] + ':' + self.dic_t_xml[id_t]['pos'] if re.match('^adj:(n|prop)$', ids) is not None: self.__addElementDicAN__( 'adj_' + self.dic_t_xml[id_1]['lemma'] + '#' + self.dic_t_xml[id_t]['lemma']) # 'adj_#' def __addElementDicAN__(self, relation): #relation = relation.lower() if self.dic_an.has_key(relation): self.dic_an[relation] += 1 else: self.dic_an[relation] = 1 """ Extract relations for nouns when they are subjects of a verb as noun phrases (NP). def __extractSVRelations__(self): for id_t in self.dic_t_cg: if re.match("^(@SUBJ>|@N<PRED)$", self.dic_t_cg[id_t]['synt']) and re.match("^(n|prop)$", self.dic_t_xml[id_t]['pos']) and len(self.dic_t_xml[id_t]['lemma']) >= self.parameters.getMinWordSize(): id_sentence = id_t.split("_")[0] id_word = id_t.split("_")[1] next_word = int(id_word) + 1 id_next_word = id_sentence+'_'+str(next_word) while self.dic_t_cg.has_key(id_next_word): if 'v-' in self.dic_t_xml[id_next_word]['pos']: if self.dic_t_xml[id_t]['headof'] != '': self.__addElementDicSV__('subj_'+self.dic_t_cg[id_next_word]['lemma']+'#'+self.dic_t_cg[id_t]['lemma']) nounphrase = self.__cleanStructureToNP__(self.dic_nts[self.dic_t_xml[id_t]['headof']]['structure']) self.__addElementDicSV__('subj_'+self.dic_t_cg[id_next_word]['lemma']+'#'+nounphrase) else: self.__addElementDicSV__('subj_'+self.dic_t_cg[id_next_word]['lemma']+'#'+self.dic_t_cg[id_t]['lemma']) break next_word += 1 id_next_word = id_sentence+'_'+str(next_word) if self.dic_t_cg[id_t]['synt'] == '@<SUBJ' and re.match("^(n|prop)$", self.dic_t_xml[id_t]['pos']) and len(self.dic_t_xml[id_t]['lemma']) >= self.parameters.getMinWordSize(): id_sentence = id_t.split("_")[0] id_word = id_t.split("_")[1] previous_word = int(id_word) - 1 id_previous_word = id_sentence+'_'+str(previous_word) while self.dic_t_cg.has_key(id_previous_word): if 'v-' in self.dic_t_xml[id_previous_word]['pos']: if self.dic_t_xml[id_t]['headof'] != '': self.__addElementDicSV__('subj_'+self.dic_t_cg[id_previous_word]['lemma']+'#'+self.dic_t_cg[id_t]['lemma']) nounphrase = self.__cleanStructureToNP__(self.dic_nts[self.dic_t_xml[id_t]['headof']]['structure']) self.__addElementDicSV__('subj_'+self.dic_t_cg[id_previous_word]['lemma']+'#'+nounphrase) else: self.__addElementDicSV__('subj_'+self.dic_t_cg[id_previous_word]['lemma']+'#'+self.dic_t_cg[id_t]['lemma']) break previous_word -= 1 id_previous_word = id_sentence+'_'+str(previous_word) def __addElementDicSV__(self, relation): #relation = relation.lower() if self.dic_sv.has_key(relation): self.dic_sv[relation] += 1 else: self.dic_sv[relation] = 1 """ """ Extract relations for nouns when they are the object of a verb as noun phrases (NP). def __extractVORelations__(self): for id_t in self.dic_t_cg: if re.match("^(@<ACC|@PRED>)$", self.dic_t_cg[id_t]['synt'])and re.match("^(n|prop)$", self.dic_t_xml[id_t]['pos']) and len(self.dic_t_xml[id_t]['lemma']) >= self.parameters.getMinWordSize(): id_sentence = id_t.split("_")[0] id_word = id_t.split("_")[1] previous_word = int(id_word) - 1 id_previous_word = id_sentence+'_'+str(previous_word) while self.dic_t_cg.has_key(id_previous_word): if 'v-' in self.dic_t_xml[id_previous_word]['pos']: if self.dic_t_xml[id_t]['headof'] != '': self.__addElementDicVO__('obj_'+self.dic_t_cg[id_previous_word]['lemma']+'#'+self.dic_t_cg[id_t]['lemma']) nounphrase = self.__cleanStructureToNP__(self.dic_nts[self.dic_t_xml[id_t]['headof']]['structure']) self.__addElementDicVO__('obj_'+self.dic_t_cg[id_previous_word]['lemma']+'#'+nounphrase) else: self.__addElementDicVO__('obj_'+self.dic_t_cg[id_previous_word]['lemma']+'#'+self.dic_t_cg[id_t]['lemma']) break previous_word -= 1 id_previous_word = id_sentence+'_'+str(previous_word) def __addElementDicVO__(self, relation): #relation = relation.lower() if self.dic_vo.has_key(relation): self.dic_vo[relation] += 1 else: self.dic_vo[relation] = 1 def __cleanStructureToNP__(self, noun_phrase): np = list(noun_phrase) for id_t in noun_phrase: if re.match('^(adj|n|prop)', self.dic_t_xml[id_t]['pos']): break else: np.remove(id_t) for id_t in reversed(noun_phrase): if re.match('^(n|prop|adj)', self.dic_t_xml[id_t]['pos']): break else: np.remove(id_t) phrase = ''; for id_t in np: phrase += self.dic_t_xml[id_t]['lemma']+' ' phrase = phrase.replace(' --', ',').rstrip() phrase = phrase.replace('-', '_') phrase = phrase.replace(' ', '_') phrase = phrase.replace(',,', ',') return phrase """ """ Get and Print methods """ def getDicAN(self): if self.mountANRelations: self.__extractRelations__('AN') self.mountANRelations = False return self.dic_an def printDicAN(self): if self.mountANRelations: self.__extractRelations__('AN') self.mountANRelations = False for id_an in self.dic_an: print id_an + ' = ' + str(self.dic_an[id_an]) def writeDicAN(self, filename): misc = Miscelaneous() output_an = misc.openFile(filename + '.txt', 'w') if self.mountANRelations: self.__extractRelations__('AN') self.mountANRelations = False for id_an in self.dic_an: output_an.write(id_an + '#' + str(self.dic_an[id_an]) + '\n') output_an.close() """
class SyntacticContexts: def __init__(self, filename): self.filename_xml = filename+'.xml' print self.filename_xml self.xml = ParseXml(self.filename_xml) self.parameters = Parameters() self.dic_t_xml = self.xml.getDicTerms() self.dic_nts = self.xml.getDicNTStructure() self.dic_an = {} #self.dic_sv = {} #self.dic_vo = {} self.mountANRelations = True #self.mountSVRelations = True #self.mountVORelations = True def __extractRelations__(self, type_relation): if type_relation == 'AN': for id_t in self.dic_t_xml: if re.match("^(n|prop)$", self.dic_t_xml[id_t]['pos']) and len(self.dic_t_xml[id_t]['lemma']) >= self.parameters.getMinWordSize(): id_sentence = id_t.split("_")[0] id_word = id_t.split("_")[1] id_1 = id_sentence+'_'+str((int(id_word) + 1)) id_2 = id_sentence+'_'+str((int(id_word) + 2)) id_3 = id_sentence+'_'+str((int(id_word) + 3)) if self.dic_t_xml.has_key(id_3) and len(self.dic_t_xml[id_3]['lemma']) >= self.parameters.getMinWordSize(): ids = self.dic_t_xml[id_t]['pos']+':'+self.dic_t_xml[id_1]['pos']+':'+self.dic_t_xml[id_2]['pos']+':'+self.dic_t_xml[id_3]['pos'] if re.match('^(n|prop):prp:(art|num|pron-indef|pron-poss|pu):(n|prop)$', ids) is not None: self.__addElementDicAN__('prep_'+self.dic_t_xml[id_3]['lemma']+'#'+self.dic_t_xml[id_t]['lemma']) # 'prep_#' self.__addElementDicAN__('prep_'+self.dic_t_xml[id_t]['lemma']+'#'+self.dic_t_xml[id_3]['lemma']) # 'prep_#' if re.match('^(n|prop):adj:adj:adj$', ids) is not None: self.__addElementDicAN__('adj_'+self.dic_t_xml[id_3]['lemma']+'#'+self.dic_t_xml[id_t]['lemma']) # 'adj_#' if self.dic_t_xml.has_key(id_2) and len(self.dic_t_xml[id_2]['lemma']) >= self.parameters.getMinWordSize(): ids = self.dic_t_xml[id_t]['pos']+':'+self.dic_t_xml[id_1]['pos']+':'+self.dic_t_xml[id_2]['pos'] if re.match('^(n|prop):prp:(n|prop)$', ids) is not None: self.__addElementDicAN__('prep_'+self.dic_t_xml[id_2]['lemma']+'#'+self.dic_t_xml[id_t]['lemma']) # 'prep_#' self.__addElementDicAN__('prep_'+self.dic_t_xml[id_t]['lemma']+'#'+self.dic_t_xml[id_2]['lemma']) # 'prep_#' if re.match('^(n|prop):adj:adj$', ids) is not None: self.__addElementDicAN__('adj_'+self.dic_t_xml[id_2]['lemma']+'#'+self.dic_t_xml[id_t]['lemma']) # 'adj_#' if self.dic_t_xml.has_key(id_1) and len(self.dic_t_xml[id_1]['lemma']) >= self.parameters.getMinWordSize(): ids = self.dic_t_xml[id_t]['pos']+':'+self.dic_t_xml[id_1]['pos'] if re.match('^(n|prop):adj$', ids) is not None: self.__addElementDicAN__('adj_'+self.dic_t_xml[id_1]['lemma']+'#'+self.dic_t_xml[id_t]['lemma']) # 'adj_#' if re.match('^(n|prop):(n|prop)$', ids) is not None: self.__addElementDicAN__('nn_'+self.dic_t_xml[id_1]['lemma']+'#'+self.dic_t_xml[id_t]['lemma']) # 'nn_#' self.__addElementDicAN__('nn_'+self.dic_t_xml[id_t]['lemma']+'#'+self.dic_t_xml[id_1]['lemma']) # 'nn_#' id_1 = id_sentence+'_'+str((int(id_word) - 1)) id_2 = id_sentence+'_'+str((int(id_word) - 2)) id_3 = id_sentence+'_'+str((int(id_word) - 3)) if self.dic_t_xml.has_key(id_3) and len(self.dic_t_xml[id_3]['lemma']) >= self.parameters.getMinWordSize(): ids = self.dic_t_xml[id_3]['pos']+':'+self.dic_t_xml[id_2]['pos']+':'+self.dic_t_xml[id_1]['pos']+':'+self.dic_t_xml[id_t]['pos'] if re.match('^adj:adj:adj:(n|prop)$', ids) is not None: self.__addElementDicAN__('adj_'+self.dic_t_xml[id_3]['lemma']+'#'+self.dic_t_xml[id_t]['lemma']) # 'adj_#' if self.dic_t_xml.has_key(id_2) and len(self.dic_t_xml[id_2]['lemma']) >= self.parameters.getMinWordSize(): ids = self.dic_t_xml[id_2]['pos']+':'+self.dic_t_xml[id_1]['pos']+':'+self.dic_t_xml[id_t]['pos'] if re.match('^adj:adj:(n|prop)$', ids) is not None: self.__addElementDicAN__('adj_'+self.dic_t_xml[id_2]['lemma']+'#'+self.dic_t_xml[id_t]['lemma']) # 'adj_#' if self.dic_t_xml.has_key(id_1) and len(self.dic_t_xml[id_1]['lemma']) >= self.parameters.getMinWordSize(): ids = self.dic_t_xml[id_1]['pos']+':'+self.dic_t_xml[id_t]['pos'] if re.match('^adj:(n|prop)$', ids) is not None: self.__addElementDicAN__('adj_'+self.dic_t_xml[id_1]['lemma']+'#'+self.dic_t_xml[id_t]['lemma']) # 'adj_#' def __addElementDicAN__(self, relation): #relation = relation.lower() if self.dic_an.has_key(relation): self.dic_an[relation] += 1 else: self.dic_an[relation] = 1 """ Extract relations for nouns when they are subjects of a verb as noun phrases (NP). def __extractSVRelations__(self): for id_t in self.dic_t_cg: if re.match("^(@SUBJ>|@N<PRED)$", self.dic_t_cg[id_t]['synt']) and re.match("^(n|prop)$", self.dic_t_xml[id_t]['pos']) and len(self.dic_t_xml[id_t]['lemma']) >= self.parameters.getMinWordSize(): id_sentence = id_t.split("_")[0] id_word = id_t.split("_")[1] next_word = int(id_word) + 1 id_next_word = id_sentence+'_'+str(next_word) while self.dic_t_cg.has_key(id_next_word): if 'v-' in self.dic_t_xml[id_next_word]['pos']: if self.dic_t_xml[id_t]['headof'] != '': self.__addElementDicSV__('subj_'+self.dic_t_cg[id_next_word]['lemma']+'#'+self.dic_t_cg[id_t]['lemma']) nounphrase = self.__cleanStructureToNP__(self.dic_nts[self.dic_t_xml[id_t]['headof']]['structure']) self.__addElementDicSV__('subj_'+self.dic_t_cg[id_next_word]['lemma']+'#'+nounphrase) else: self.__addElementDicSV__('subj_'+self.dic_t_cg[id_next_word]['lemma']+'#'+self.dic_t_cg[id_t]['lemma']) break next_word += 1 id_next_word = id_sentence+'_'+str(next_word) if self.dic_t_cg[id_t]['synt'] == '@<SUBJ' and re.match("^(n|prop)$", self.dic_t_xml[id_t]['pos']) and len(self.dic_t_xml[id_t]['lemma']) >= self.parameters.getMinWordSize(): id_sentence = id_t.split("_")[0] id_word = id_t.split("_")[1] previous_word = int(id_word) - 1 id_previous_word = id_sentence+'_'+str(previous_word) while self.dic_t_cg.has_key(id_previous_word): if 'v-' in self.dic_t_xml[id_previous_word]['pos']: if self.dic_t_xml[id_t]['headof'] != '': self.__addElementDicSV__('subj_'+self.dic_t_cg[id_previous_word]['lemma']+'#'+self.dic_t_cg[id_t]['lemma']) nounphrase = self.__cleanStructureToNP__(self.dic_nts[self.dic_t_xml[id_t]['headof']]['structure']) self.__addElementDicSV__('subj_'+self.dic_t_cg[id_previous_word]['lemma']+'#'+nounphrase) else: self.__addElementDicSV__('subj_'+self.dic_t_cg[id_previous_word]['lemma']+'#'+self.dic_t_cg[id_t]['lemma']) break previous_word -= 1 id_previous_word = id_sentence+'_'+str(previous_word) def __addElementDicSV__(self, relation): #relation = relation.lower() if self.dic_sv.has_key(relation): self.dic_sv[relation] += 1 else: self.dic_sv[relation] = 1 """ """ Extract relations for nouns when they are the object of a verb as noun phrases (NP). def __extractVORelations__(self): for id_t in self.dic_t_cg: if re.match("^(@<ACC|@PRED>)$", self.dic_t_cg[id_t]['synt'])and re.match("^(n|prop)$", self.dic_t_xml[id_t]['pos']) and len(self.dic_t_xml[id_t]['lemma']) >= self.parameters.getMinWordSize(): id_sentence = id_t.split("_")[0] id_word = id_t.split("_")[1] previous_word = int(id_word) - 1 id_previous_word = id_sentence+'_'+str(previous_word) while self.dic_t_cg.has_key(id_previous_word): if 'v-' in self.dic_t_xml[id_previous_word]['pos']: if self.dic_t_xml[id_t]['headof'] != '': self.__addElementDicVO__('obj_'+self.dic_t_cg[id_previous_word]['lemma']+'#'+self.dic_t_cg[id_t]['lemma']) nounphrase = self.__cleanStructureToNP__(self.dic_nts[self.dic_t_xml[id_t]['headof']]['structure']) self.__addElementDicVO__('obj_'+self.dic_t_cg[id_previous_word]['lemma']+'#'+nounphrase) else: self.__addElementDicVO__('obj_'+self.dic_t_cg[id_previous_word]['lemma']+'#'+self.dic_t_cg[id_t]['lemma']) break previous_word -= 1 id_previous_word = id_sentence+'_'+str(previous_word) def __addElementDicVO__(self, relation): #relation = relation.lower() if self.dic_vo.has_key(relation): self.dic_vo[relation] += 1 else: self.dic_vo[relation] = 1 def __cleanStructureToNP__(self, noun_phrase): np = list(noun_phrase) for id_t in noun_phrase: if re.match('^(adj|n|prop)', self.dic_t_xml[id_t]['pos']): break else: np.remove(id_t) for id_t in reversed(noun_phrase): if re.match('^(n|prop|adj)', self.dic_t_xml[id_t]['pos']): break else: np.remove(id_t) phrase = ''; for id_t in np: phrase += self.dic_t_xml[id_t]['lemma']+' ' phrase = phrase.replace(' --', ',').rstrip() phrase = phrase.replace('-', '_') phrase = phrase.replace(' ', '_') phrase = phrase.replace(',,', ',') return phrase """ """ Get and Print methods """ def getDicAN(self): if self.mountANRelations: self.__extractRelations__('AN') self.mountANRelations = False return self.dic_an def printDicAN(self): if self.mountANRelations: self.__extractRelations__('AN') self.mountANRelations = False for id_an in self.dic_an: print id_an+' = '+str(self.dic_an[id_an]) def writeDicAN(self, filename): misc = Miscelaneous() output_an = misc.openFile(filename+'.txt', 'w') if self.mountANRelations: self.__extractRelations__('AN') self.mountANRelations = False for id_an in self.dic_an: output_an.write(id_an+'#'+str(self.dic_an[id_an])+'\n') output_an.close() """