def prosodic(self): if not hasattr(self,'_prosodic'): import prosodic as p p.config['print_to_screen']=0 self._prosodic=pd={} numlines=len(self.lined) for _i,(i,line) in enumerate(sorted(self.lined.items())): line=line.replace('-',' ').replace("'","").strip() pd[i]=p.Text(line,meter=self.meter) return self._prosodic
def getSymEnglish(): PHO = [] with open("english.txt", encoding="utf-8") as f: content = f.read().splitlines() for line in content: x = p.Text(line) x.parse() PAR = x.bestParses()[0] PHO.extend(x.phonemes()) print(PHO) PHO = set(PHO) return PHO
def meters(line): p.config['print_to_screen'] = False final_jsons = [] mline = p.Text(line) mline.parse() i = mline.bestParses() final_json = {} if i != [] and i[0] is not None: meter = i[0].str_meter() final_json['meter'] = meter # final_json.clear() else: final_json['meter'] = '???' return final_json
def parse_files(meter, path='*.txt'): files = glob.glob(path) for f in files: terminal_out = sys.stdout sys.stdout = open('parses/parse_{}_{}'.format(meter, f), 'w') printable = set(string.printable) file_text = '\n'.join( [line.rstrip().split('\t')[0] for line in open(f, 'r')]) file_text = filter(lambda x: x in printable, file_text) t = p.Text(file_text, meter=meter) t.parse() t.scansion() sys.stdout.close() sys.stdout = terminal_out
def compute(year): f_names = os.path.join('..', 'data', 'social_security_info', 'yob%s.txt' % year) f_save = os.path.join(save_dest, 'yob%s.csv' % year) if os.path.exists(f_save): print("Already completed", year) return False df = pd.read_csv(f_names, header=None, names=["name", "gender", "counts"], encoding='utf-8') data = [] #df = df[:5] for _, row in tqdm(df.iterrows()): name = row["name"] sx = p.Text(name).syllables() item = {} item["name"] = name item["gender"] = row["gender"] item["counts"] = row["counts"] item["n_syllables"] = len(sx) item['IPA'] = ' '.join(map(str, sx)) if not item["IPA"]: continue item['IPA_stress'] = ''.join( ['1' if x[0] in "'`" else '0' for x in item["IPA"].split()]) #print(item) data.append(item) df = pd.DataFrame(data).set_index(["name", "gender"]) df = df.sort_values("counts", ascending=False) df.to_csv(f_save, encoding='utf-8') print(df) print(f_save)
def go_ans(event): output_text.delete(1.0, 'end') text=input_text.get(1.0, 'end-1c') t = p.Text(text,meter='default_english') t.parse() txt=[] for parse in t.bestParses(): # print(parse) txt.append(str(parse)) syllablegram=fsyllablegram(txt) # print(syllablegram) to_print=dinoit(syllablegram) # print(to_print) for line in to_print: for word in line: output_text.insert('end',word+' ') output_text.insert('end','\n')
def normEng(eng, delemit): x = p.Text(eng) x.parse() PAR = str(x.bestParses()[0]).split("|") SYL = x.syllables() if len(PAR) != len(SYL): print("check dif len: ", eng) result = "" for i, syl in enumerate(SYL): syllable = str(syl).replace("'", "").replace("ː", "").replace("ɑ", "a") if PAR[i].lower().upper() == PAR[i]: result += syllable + "'5" + " " else: result += syllable + "'1" + " " result = result.rstrip(" ") if delemit != "": takemore = "" for r in result: if r in symbols: takemore += delemit + r result = takemore return result
def mark_line(content): t = p.Text(content) words = t.words() results = [] for i, word in enumerate(words): if word.isMonoSyllab(): result = mark_syllable(syl_text(word.syllables()[0]), 'P') # print (i, word.token.decode('utf-8'), word.syllables()[0], result) else: vowel_cluster_positions = find_vowel_clusters(word) syl_count = len(word.syllables()) token = word.token.decode('utf-8') ok = len(vowel_cluster_positions) == syl_count if not ok: revise_cluster_positions(vowel_cluster_positions, syl_count, token) ok = len(vowel_cluster_positions) == syl_count if ok: result = mark_lexical_stress_from_vowel_clusters( word, vowel_cluster_positions) else: result = mark_lexical_stress_by_prosodic( word, vowel_cluster_positions) if result == token: # As a last resort, just mark the first vowel in the word. ok = False result = mark_syllable(token, 'P') if ok: pass # print (i, vowel_clusters_ct, syl_count, vowel_cluster_positions, token, result) else: print('##############', i, syl_count, vowel_cluster_positions, token, result) results.append(result) return u' '.join(results)
#encoding=utf-8 import prosodic as p p.config['print_to_screen'] = True input_text = u"""Let me not to the marriage of true minds Admit impediments. Love is not love Which alters when it alteration finds, Or bends with the remover to remove. O no, it is an ever-fixèd mark That looks on tempests and is never shaken; It is the star to every wand'ring bark, Whose worth's unknown, although his height be taken. Love's not Time's fool, though rosy lips and cheeks Within his bending sickle's compass come; Love alters not with his brief hours and weeks, But bears it out even to the edge of doom. If this be error and upon me proved, I never writ, nor no man ever loved.""" # input_text is some string text = p.Text(input_text) text.parse()
import prosodic as p t = p.Text('sonnet1.txt') t.parse(meter='prose_rhythm_iambic_violable') for a in t.allParses(): print(a) for a in t.stats(): print(a)
import prosodic as p #t = p.Text('have you reckoned a thousand acres much?') t = p.Text('corpora/corppoetry_fi/fi.koskenniemi.txt') t.more() print print ">> printing all heavy syllables..." print t.feature('+prom.weight', True) def is_ntV(word): phonemes = word.phonemes() if phonemes[-3:-1] != [p.Phoneme("n"), p.Phoneme("t")]: return False return phonemes[-1].feature("syll") print print ">> printing all -ntV words..." nta = [word for word in t.words() if is_ntV(word)] print nta