def get_meter(self, meter=None): if not meter: if self.meter: meter = self.meter elif hasattr(self, '_Text__bestparses') and self.__bestparses: return self.get_meter(sorted(self.__bestparses.keys())[0]) else: import Meter meter = Meter.genDefault() #print '>> no meter specified. defaulting to this meter:' #print meter elif type(meter) in [str, unicode]: meter = self.config['meters'][meter] else: pass return meter
def get_meter(self,meter=None): if not meter: if self.meter: meter=self.meter elif hasattr(self,'_Text__bestparses') and self.__bestparses: return self.get_meter(sorted(self.__bestparses.keys())[0]) else: import Meter meter=Meter.genDefault() #print '>> no meter specified. defaulting to this meter:' #print meter elif type(meter) in [str,unicode]: meter= self.config['meters'][meter] else: pass return meter
def report(self,meter=None,include_bounded=False,reverse=True): """ Print all parses and their violations in a structured format. """ ReportStr = '' if not meter: from Meter import Meter meter=Meter.genDefault() if (hasattr(self,'allParses')): self.om(unicode(self)) allparses=self.allParses(meter=meter,include_bounded=include_bounded) numallparses=len(allparses) #allparses = reversed(allparses) if reverse else allparses for pi,parseList in enumerate(allparses): line=self.iparse2line(pi).txt #parseList.sort(key = lambda P: P.score()) hdr="\n\n"+'='*30+'\n[line #'+str(pi+1)+' of '+str(numallparses)+']: '+line+'\n\n\t' ftr='='*30+'\n' ReportStr+=self.om(hdr+meter.printParses(parseList,reverse=reverse).replace('\n','\n\t')[:-1]+ftr,conscious=False) else: for child in self.children: if type(child)==type([]): continue ReportStr+=child.report() return ReportStr
def assess(fn,meter=None,key_meterscheme=None, key_line='line',key_parse='parse'): #from prosodic import Text import prosodic as p Text=p.Text if not meter: import Meter meter=Meter.genDefault() p.config['print_to_screen']=0 def parse2list(parse): l=[] for i,x in enumerate(parse): if not l or l[-1]!=x: l+=[x] else: l[-1]+=x return l def get_num_sylls_correct(parse_human,parse_comp): maxlen=max([len(parse_comp),len(parse_human)]) #parse_human=parse2list(parse_human) #parse_comp=parse2list(parse_comp) #parse_comp_forzip = parse_comp + ['x' for x in range(maxlen-len(parse_comp))] #parse_human_forzip = parse_human + ['x' for x in range(maxlen-len(parse_human))] parse_comp_forzip = parse_comp + ''.join(['x' for x in range(maxlen-len(parse_comp))]) parse_human_forzip = parse_human + ''.join(['x' for x in range(maxlen-len(parse_human))]) ## sylls correct? _sylls_iscorrect=[] #print '\t'.join(parse_human_forzip) #print '\t'.join(parse_comp_forzip) for syll1,syll2 in zip(parse_human_forzip,parse_comp_forzip): syll_iscorrect = int(syll1==syll2) _sylls_iscorrect+=[syll_iscorrect] return _sylls_iscorrect import codecs ld=read_ld(fn) fn_split = fn.split('.') ofn_split=fn_split[:-1] + ['evaluated','meter='+meter.id] + [fn_split[-1]] ofn_split_ot=fn_split[:-1] + ['evaluated', 'ot','meter='+meter.id] + [fn_split[-1]] ofn='.'.join(ofn_split) ofn_ot='.'.join(ofn_split_ot) def _print(dx): print for k,v in sorted(dx.items()): print k,'\t',v print 'HUMAN :','\t'.join(dx['parse_human']) print 'PROSODIC:','\t'.join(dx['parse_comp']) print ' ','\t'.join(['*' if x!=y else ' ' for x,y in zip(dx['parse_human'],dx['parse_comp'])]) print def _recapitalize(parse,code): code=' '.join([x for x in code]) parse=parse.replace('|',' ').replace('.',' ') newparse=[] for s,c in zip(parse.split(),code.split()): if c=='w': newparse+=[s.lower()] else: newparse+=[s.upper()] return ' '.join(newparse) def _writegen(): lines_iscorrect=[] lines_iscorrect_control=[] lines_iscorrect_control2=[] lines_iscorrect_human2=[] sylls_iscorrect_control=[] sylls_iscorrect_control2=[] sylls_iscorrect_human2=[] sylls_iscorrect=[] lines_iscorrect_nonbounded=[] otf=open(ofn_ot,'w') otf_nl=0 for di,d in enumerate(ld): line=d[key_line] parse_human=''.join([x for x in d[key_parse].lower() if x in ['s','w']]) if not parse_human: continue t=Text(line) t.parse(meter=meter) #if not t.isParsed: continue parse_comp=t.parse_str(viols=False, text=False).replace('|','') #if len(parse_comp) != len(parse_human): continue parse_str=t.parse_str(viols=False, text=True) parses_comp = [x.replace('|','') for x in t.parse_strs(viols=False,text=False)] parse_human2=''.join([x for x in d.get('parse_human2','').lower() if x in ['s','w']]) #parse_human,parse_human2=parse_human2,parse_human ### OT if not otf_nl: header=['','',''] for c in meter.constraints: header+=['[*'+c.name+']'] otf.write('\t'.join(header)+'\n') humans = [parse_human] if parse_human2: humans+=[parse_human2] for _i,_parses in enumerate(t.allParses()): if not _parses: continue _parses.sort(key=lambda _P: (-humans.count(_P.str_meter()), _P.totalCount)) if not humans.count(_parses[0].str_meter()): # is the good parse in the bounded ones? for _bndp in t.boundParses()[_i]: if _bndp.str_meter() in humans: _parses.insert(0,_bndp) for _pi,_parse in enumerate(_parses): otf_nl+=1 code=_parse.str_meter() row=[line.encode('utf-8',errors='ignore') if not _pi else '', str(_parse) + (' [*Bounded]' if _parse.isBounded else ''), str(humans.count(code)) if code in humans else ''] for c in meter.constraints: row+=[str(_parse.constraintCounts[c]) if _parse.constraintCounts[c] else ''] otf.write('\t'.join(row)+'\n') parse_comp_dummy2 = ''.join(['w' if not i%2 else 's' for i in range(len(parse_comp))]) if key_meterscheme: if d[key_meterscheme]=='iambic': parse_comp_dummy = ('ws'*100)[:len(parse_comp)] elif d[key_meterscheme]=='trochaic': parse_comp_dummy = ('sw'*100)[:len(parse_comp)] elif d[key_meterscheme]=='anapestic': parse_comp_dummy = ('wws'*100)[:len(parse_comp)] elif d[key_meterscheme]=='dactylic': parse_comp_dummy = ('sww'*100)[:len(parse_comp)] else: parse_comp_dummy=parse_comp_dummy2 else: parse_comp_dummy=parse_comp_dummy2 ## sylls correct? this_sylls_correct = get_num_sylls_correct(parse_human, parse_comp) this_sylls_correct_dummy = get_num_sylls_correct(parse_human, parse_comp_dummy) this_sylls_correct_dummy2 = get_num_sylls_correct(parse_human, parse_comp_dummy2) if parse_human2: this_sylls_correct_human2 = get_num_sylls_correct(parse_human, parse_human2) num_sylls_correct=sum(this_sylls_correct) num_sylls_correct_dummy = sum(this_sylls_correct_dummy) num_sylls_correct_dummy2 = sum(this_sylls_correct_dummy2) if parse_human2: num_sylls_correct_human2 = sum(this_sylls_correct_human2) sylls_iscorrect+=this_sylls_correct sylls_iscorrect_control+=this_sylls_correct_dummy sylls_iscorrect_control2+=this_sylls_correct_dummy2 if parse_human2: sylls_iscorrect_human2+=this_sylls_correct_human2 # line correct? line_iscorrect=int(parse_comp == parse_human) lines_iscorrect+=[line_iscorrect] line_iscorrect_dummy = int(parse_comp_dummy == parse_human) line_iscorrect_dummy2 = int(parse_comp_dummy2 == parse_human) if parse_human2: line_iscorrect_human2 = int(parse_human2 == parse_human) lines_iscorrect_control+=[line_iscorrect_dummy] lines_iscorrect_control2+=[line_iscorrect_dummy2] if parse_human2: lines_iscorrect_human2+=[line_iscorrect_human2] # line at least in list of nonbounded parses? line_iscorrect_nonbounded=int(parse_human in parses_comp) lines_iscorrect_nonbounded+=[line_iscorrect_nonbounded] parse_stress = [] for w in t.words(): for x in w.stress: parse_stress += ['w' if x=='U' else 's'] parse_stress=''.join(parse_stress) odx=d odx['parse_human']=parse_human if parse_human2: odx['parse_human2']=parse_human2 odx['parse_comp']=parse_comp odx['parses_comp_nonbounded']=' | '.join(parses_comp) odx['num_sylls']=len(parse_human) odx['num_sylls_correct']=num_sylls_correct odx['num_sylls_correct_control']=num_sylls_correct_dummy odx['num_sylls_correct_control_iambic']=num_sylls_correct_dummy2 if parse_human2: odx['num_sylls_correct_human2']=num_sylls_correct_human2 odx['perc_sylls_correct_human2']=num_sylls_correct_human2 / float(len(parse_human)) odx['line_iscorrect_human2']=line_iscorrect_human2 odx['perc_sylls_correct']=num_sylls_correct / float(len(parse_human)) odx['perc_sylls_correct_control']=num_sylls_correct_dummy / float(len(parse_human)) odx['perc_sylls_correct_control_iambic']=num_sylls_correct_dummy2 / float(len(parse_human)) odx['line_iscorrect']=line_iscorrect odx['line_iscorrect_dummy']=line_iscorrect_dummy odx['line_iscorrect_dummy_iambic']=line_iscorrect_dummy2 odx['line_is_in_nonbounded_parses']=line_iscorrect_nonbounded odx['parse_str_human']=_recapitalize(parse_str, parse_human) odx['parse_str_compu']=_recapitalize(parse_str, parse_comp) odx['parse_str_stress']=_recapitalize(parse_str, parse_stress) odx['prosody_ipa']=' '.join([w.str_ipasyllstress() for w in t.words()]) odx['prosody_stress']=' '.join([w.stress for w in t.words()]) odx['meter_info']=str(t.meter).replace('\n',' ').replace('\t',' ') sumconstr=0 for k,v in t.constraintViolations(use_weights=False,normalize=False).items(): odx['constraint_'+k]=v sumconstr+=v odx['constraint_SUM_VIOL']=sumconstr #if not line_iscorrect and line_iscorrect_dummy: #if len(parse_comp) != len(parse_human): #if len(parse_human)>len(parse_comp): _print(odx) yield odx print print '##'*10 print 'RESULTS SUMMARY' print '##'*10 perc_sylls_correct = sum(sylls_iscorrect) / float(len(sylls_iscorrect)) * 100 perc_lines_correct = sum(lines_iscorrect) / float(len(lines_iscorrect)) * 100 perc_lines_correct_control = sum(lines_iscorrect_control) / float(len(lines_iscorrect_control)) * 100 perc_sylls_correct_control = sum(sylls_iscorrect_control) / float(len(sylls_iscorrect_control)) * 100 perc_lines_correct_nonbound = sum(lines_iscorrect_nonbounded) / float(len(lines_iscorrect_nonbounded)) * 100 print 'PERCENT SYLLABLES CORRECT:',round(perc_sylls_correct,2),'% [vs.',round(perc_sylls_correct_control,2),'% for control]' print 'PERCENT LINES CORRECT:',round(perc_lines_correct,2),'% [vs.',round(perc_lines_correct_control,2),'% for control]' print 'PERCENT LINES IN AVAILABLE NONBOUNDED PARSES:',round(perc_lines_correct_nonbound,2),'%' writegen(ofn, _writegen)
def assess(fn,meter=None,key_meterscheme=None, key_line='line',key_parse='parse'): #from prosodic import Text import prosodic as p Text=p.Text if not meter: import Meter meter=Meter.genDefault() p.config['print_to_screen']=0 def parse2list(parse): l=[] for i,x in enumerate(parse): if not l or l[-1]!=x: l+=[x] else: l[-1]+=x return l def get_num_sylls_correct(parse_human,parse_comp): maxlen=max([len(parse_comp),len(parse_human)]) #parse_human=parse2list(parse_human) #parse_comp=parse2list(parse_comp) #parse_comp_forzip = parse_comp + ['x' for x in range(maxlen-len(parse_comp))] #parse_human_forzip = parse_human + ['x' for x in range(maxlen-len(parse_human))] parse_comp_forzip = parse_comp + ''.join(['x' for x in range(maxlen-len(parse_comp))]) parse_human_forzip = parse_human + ''.join(['x' for x in range(maxlen-len(parse_human))]) ## sylls correct? _sylls_iscorrect=[] #print '\t'.join(parse_human_forzip) #print '\t'.join(parse_comp_forzip) for syll1,syll2 in zip(parse_human_forzip,parse_comp_forzip): syll_iscorrect = int(syll1==syll2) _sylls_iscorrect+=[syll_iscorrect] return _sylls_iscorrect import codecs ld=read_ld(fn) fn_split = fn.split('.') ofn_split=fn_split[:-1] + ['evaluated','meter='+meter.id] + [fn_split[-1]] ofn_split_ot=fn_split[:-1] + ['evaluated', 'ot','meter='+meter.id] + [fn_split[-1]] ofn='.'.join(ofn_split) ofn_ot='.'.join(ofn_split_ot) def _print(dx): print() for k,v in sorted(dx.items()): print(k,'\t',v) print('HUMAN :','\t'.join(dx['parse_human'])) print('PROSODIC:','\t'.join(dx['parse_comp'])) print(' ','\t'.join(['*' if x!=y else ' ' for x,y in zip(dx['parse_human'],dx['parse_comp'])])) print() def _recapitalize(parse,code): code=' '.join([x for x in code]) parse=parse.replace('|',' ').replace('.',' ') newparse=[] for s,c in zip(parse.split(),code.split()): if c=='w': newparse+=[s.lower()] else: newparse+=[s.upper()] return ' '.join(newparse) def _writegen(): lines_iscorrect=[] lines_iscorrect_control=[] lines_iscorrect_control2=[] lines_iscorrect_human2=[] sylls_iscorrect_control=[] sylls_iscorrect_control2=[] sylls_iscorrect_human2=[] sylls_iscorrect=[] lines_iscorrect_nonbounded=[] otf=open(ofn_ot,'w') otf_nl=0 for di,d in enumerate(ld): line=d[key_line] parse_human=''.join([x for x in d[key_parse].lower() if x in ['s','w']]) if not parse_human: continue t=Text(line) t.parse(meter=meter) #if not t.isParsed: continue parse_comp=t.parse_str(viols=False, text=False).replace('|','') #if len(parse_comp) != len(parse_human): continue parse_str=t.parse_str(viols=False, text=True) parses_comp = [x.replace('|','') for x in t.parse_strs(viols=False,text=False)] parse_human2=''.join([x for x in d.get('parse_human2','').lower() if x in ['s','w']]) #parse_human,parse_human2=parse_human2,parse_human ### OT if not otf_nl: header=['','',''] for c in meter.constraints: header+=['[*'+c.name+']'] otf.write('\t'.join(header)+'\n') humans = [parse_human] if parse_human2: humans+=[parse_human2] for _i,_parses in enumerate(t.allParses()): if not _parses: continue _parses.sort(key=lambda _P: (-humans.count(_P.str_meter()), _P.totalCount)) if not humans.count(_parses[0].str_meter()): # is the good parse in the bounded ones? for _bndp in t.boundParses()[_i]: if _bndp.str_meter() in humans: _parses.insert(0,_bndp) for _pi,_parse in enumerate(_parses): otf_nl+=1 code=_parse.str_meter() row=[line.encode('utf-8',errors='ignore') if not _pi else '', str(_parse) + (' [*Bounded]' if _parse.isBounded else ''), str(humans.count(code)) if code in humans else ''] for c in meter.constraints: row+=[str(_parse.constraintCounts[c]) if _parse.constraintCounts[c] else ''] otf.write('\t'.join(row)+'\n') parse_comp_dummy2 = ''.join(['w' if not i%2 else 's' for i in range(len(parse_comp))]) if key_meterscheme: if d[key_meterscheme]=='iambic': parse_comp_dummy = ('ws'*100)[:len(parse_comp)] elif d[key_meterscheme]=='trochaic': parse_comp_dummy = ('sw'*100)[:len(parse_comp)] elif d[key_meterscheme]=='anapestic': parse_comp_dummy = ('wws'*100)[:len(parse_comp)] elif d[key_meterscheme]=='dactylic': parse_comp_dummy = ('sww'*100)[:len(parse_comp)] else: parse_comp_dummy=parse_comp_dummy2 else: parse_comp_dummy=parse_comp_dummy2 ## sylls correct? this_sylls_correct = get_num_sylls_correct(parse_human, parse_comp) this_sylls_correct_dummy = get_num_sylls_correct(parse_human, parse_comp_dummy) this_sylls_correct_dummy2 = get_num_sylls_correct(parse_human, parse_comp_dummy2) if parse_human2: this_sylls_correct_human2 = get_num_sylls_correct(parse_human, parse_human2) num_sylls_correct=sum(this_sylls_correct) num_sylls_correct_dummy = sum(this_sylls_correct_dummy) num_sylls_correct_dummy2 = sum(this_sylls_correct_dummy2) if parse_human2: num_sylls_correct_human2 = sum(this_sylls_correct_human2) sylls_iscorrect+=this_sylls_correct sylls_iscorrect_control+=this_sylls_correct_dummy sylls_iscorrect_control2+=this_sylls_correct_dummy2 if parse_human2: sylls_iscorrect_human2+=this_sylls_correct_human2 # line correct? line_iscorrect=int(parse_comp == parse_human) lines_iscorrect+=[line_iscorrect] line_iscorrect_dummy = int(parse_comp_dummy == parse_human) line_iscorrect_dummy2 = int(parse_comp_dummy2 == parse_human) if parse_human2: line_iscorrect_human2 = int(parse_human2 == parse_human) lines_iscorrect_control+=[line_iscorrect_dummy] lines_iscorrect_control2+=[line_iscorrect_dummy2] if parse_human2: lines_iscorrect_human2+=[line_iscorrect_human2] # line at least in list of nonbounded parses? line_iscorrect_nonbounded=int(parse_human in parses_comp) lines_iscorrect_nonbounded+=[line_iscorrect_nonbounded] parse_stress = [] for w in t.words(): for x in w.stress: parse_stress += ['w' if x=='U' else 's'] parse_stress=''.join(parse_stress) odx=d odx['parse_human']=parse_human if parse_human2: odx['parse_human2']=parse_human2 odx['parse_comp']=parse_comp odx['parses_comp_nonbounded']=' | '.join(parses_comp) odx['num_sylls']=len(parse_human) odx['num_sylls_correct']=num_sylls_correct odx['num_sylls_correct_control']=num_sylls_correct_dummy odx['num_sylls_correct_control_iambic']=num_sylls_correct_dummy2 if parse_human2: odx['num_sylls_correct_human2']=num_sylls_correct_human2 odx['perc_sylls_correct_human2']=num_sylls_correct_human2 / float(len(parse_human)) odx['line_iscorrect_human2']=line_iscorrect_human2 odx['perc_sylls_correct']=num_sylls_correct / float(len(parse_human)) odx['perc_sylls_correct_control']=num_sylls_correct_dummy / float(len(parse_human)) odx['perc_sylls_correct_control_iambic']=num_sylls_correct_dummy2 / float(len(parse_human)) odx['line_iscorrect']=line_iscorrect odx['line_iscorrect_dummy']=line_iscorrect_dummy odx['line_iscorrect_dummy_iambic']=line_iscorrect_dummy2 odx['line_is_in_nonbounded_parses']=line_iscorrect_nonbounded odx['parse_str_human']=_recapitalize(parse_str, parse_human) odx['parse_str_compu']=_recapitalize(parse_str, parse_comp) odx['parse_str_stress']=_recapitalize(parse_str, parse_stress) odx['prosody_ipa']=' '.join([w.str_ipasyllstress() for w in t.words()]) odx['prosody_stress']=' '.join([w.stress for w in t.words()]) odx['meter_info']=str(t.meter).replace('\n',' ').replace('\t',' ') sumconstr=0 for k,v in list(t.constraintViolations(use_weights=False,normalize=False).items()): odx['constraint_'+k]=v sumconstr+=v odx['constraint_SUM_VIOL']=sumconstr #if not line_iscorrect and line_iscorrect_dummy: #if len(parse_comp) != len(parse_human): #if len(parse_human)>len(parse_comp): _print(odx) yield odx print() print('##'*10) print('RESULTS SUMMARY') print('##'*10) perc_sylls_correct = sum(sylls_iscorrect) / float(len(sylls_iscorrect)) * 100 perc_lines_correct = sum(lines_iscorrect) / float(len(lines_iscorrect)) * 100 perc_lines_correct_control = sum(lines_iscorrect_control) / float(len(lines_iscorrect_control)) * 100 perc_sylls_correct_control = sum(sylls_iscorrect_control) / float(len(sylls_iscorrect_control)) * 100 perc_lines_correct_nonbound = sum(lines_iscorrect_nonbounded) / float(len(lines_iscorrect_nonbounded)) * 100 print('PERCENT SYLLABLES CORRECT:',round(perc_sylls_correct,2),'% [vs.',round(perc_sylls_correct_control,2),'% for control]') print('PERCENT LINES CORRECT:',round(perc_lines_correct,2),'% [vs.',round(perc_lines_correct_control,2),'% for control]') print('PERCENT LINES IN AVAILABLE NONBOUNDED PARSES:',round(perc_lines_correct_nonbound,2),'%') writegen(ofn, _writegen)