Ejemplo n.º 1
0
	def prosodic(self):
		if not hasattr(self,'_prosodic'):
			import prosodic as p
			p.config['print_to_screen']=0
			self._prosodic=pd={}
			numlines=len(self.lined)
			for _i,(i,line) in enumerate(sorted(self.lined.items())):
				line=line.replace('-',' ').replace("'","").strip()
				pd[i]=p.Text(line,meter=self.meter)
		return self._prosodic
Ejemplo n.º 2
0
def getSymEnglish():
    PHO = []
    with open("english.txt", encoding="utf-8") as f:
        content = f.read().splitlines()
        for line in content:
            x = p.Text(line)
            x.parse()
            PAR = x.bestParses()[0]
            PHO.extend(x.phonemes())
    print(PHO)
    PHO = set(PHO)
    return PHO
Ejemplo n.º 3
0
def meters(line):
    p.config['print_to_screen'] = False
    final_jsons = []
    mline = p.Text(line)
    mline.parse()
    i = mline.bestParses()
    final_json = {}
    if i != [] and i[0] is not None:

        meter = i[0].str_meter()
        final_json['meter'] = meter

        # final_json.clear()
    else:
        final_json['meter'] = '???'

    return final_json
Ejemplo n.º 4
0
def parse_files(meter, path='*.txt'):
    files = glob.glob(path)

    for f in files:
        terminal_out = sys.stdout
        sys.stdout = open('parses/parse_{}_{}'.format(meter, f), 'w')

        printable = set(string.printable)
        file_text = '\n'.join(
            [line.rstrip().split('\t')[0] for line in open(f, 'r')])
        file_text = filter(lambda x: x in printable, file_text)

        t = p.Text(file_text, meter=meter)
        t.parse()
        t.scansion()

        sys.stdout.close()
        sys.stdout = terminal_out
Ejemplo n.º 5
0
def compute(year):
    f_names = os.path.join('..', 'data', 'social_security_info',
                           'yob%s.txt' % year)
    f_save = os.path.join(save_dest, 'yob%s.csv' % year)

    if os.path.exists(f_save):
        print("Already completed", year)
        return False

    df = pd.read_csv(f_names,
                     header=None,
                     names=["name", "gender", "counts"],
                     encoding='utf-8')

    data = []

    #df = df[:5]

    for _, row in tqdm(df.iterrows()):
        name = row["name"]

        sx = p.Text(name).syllables()

        item = {}
        item["name"] = name
        item["gender"] = row["gender"]
        item["counts"] = row["counts"]
        item["n_syllables"] = len(sx)
        item['IPA'] = ' '.join(map(str, sx))

        if not item["IPA"]:
            continue

        item['IPA_stress'] = ''.join(
            ['1' if x[0] in "'`" else '0' for x in item["IPA"].split()])

        #print(item)
        data.append(item)

    df = pd.DataFrame(data).set_index(["name", "gender"])
    df = df.sort_values("counts", ascending=False)
    df.to_csv(f_save, encoding='utf-8')
    print(df)
    print(f_save)
Ejemplo n.º 6
0
def go_ans(event):
	output_text.delete(1.0, 'end')
	text=input_text.get(1.0, 'end-1c')
	t = p.Text(text,meter='default_english')
	t.parse()

	txt=[]
	for parse in t.bestParses():
		# print(parse)
		txt.append(str(parse))

	syllablegram=fsyllablegram(txt)
	# print(syllablegram)

	to_print=dinoit(syllablegram)
	# print(to_print)
	for line in to_print:
		for word in line:
			output_text.insert('end',word+' ')
		output_text.insert('end','\n')
Ejemplo n.º 7
0
def normEng(eng, delemit):
    x = p.Text(eng)
    x.parse()
    PAR = str(x.bestParses()[0]).split("|")
    SYL = x.syllables()
    if len(PAR) != len(SYL):
        print("check dif len: ", eng)
    result = ""
    for i, syl in enumerate(SYL):
        syllable = str(syl).replace("'", "").replace("ː", "").replace("ɑ", "a")
        if PAR[i].lower().upper() == PAR[i]:
            result += syllable + "'5" + " "
        else:
            result += syllable + "'1" + " "
    result = result.rstrip(" ")
    if delemit != "":
        takemore = ""
        for r in result:
            if r in symbols:
                takemore += delemit + r
        result = takemore
    return result
Ejemplo n.º 8
0
def mark_line(content):
    t = p.Text(content)
    words = t.words()
    results = []
    for i, word in enumerate(words):
        if word.isMonoSyllab():
            result = mark_syllable(syl_text(word.syllables()[0]), 'P')
            # print (i, word.token.decode('utf-8'), word.syllables()[0], result)
        else:
            vowel_cluster_positions = find_vowel_clusters(word)
            syl_count = len(word.syllables())
            token = word.token.decode('utf-8')
            ok = len(vowel_cluster_positions) == syl_count
            if not ok:
                revise_cluster_positions(vowel_cluster_positions, syl_count,
                                         token)
                ok = len(vowel_cluster_positions) == syl_count

            if ok:
                result = mark_lexical_stress_from_vowel_clusters(
                    word, vowel_cluster_positions)
            else:
                result = mark_lexical_stress_by_prosodic(
                    word, vowel_cluster_positions)

            if result == token:
                # As a last resort, just mark the first vowel in the word.
                ok = False
                result = mark_syllable(token, 'P')

            if ok:
                pass
                # print (i, vowel_clusters_ct, syl_count, vowel_cluster_positions, token, result)
            else:
                print('##############', i, syl_count, vowel_cluster_positions,
                      token, result)
        results.append(result)
    return u' '.join(results)
Ejemplo n.º 9
0
#encoding=utf-8

import prosodic as p
p.config['print_to_screen'] = True

input_text = u"""Let me not to the marriage of true minds
Admit impediments. Love is not love
Which alters when it alteration finds,
Or bends with the remover to remove.
O no, it is an ever-fixèd mark
That looks on tempests and is never shaken;
It is the star to every wand'ring bark,
Whose worth's unknown, although his height be taken.
Love's not Time's fool, though rosy lips and cheeks
Within his bending sickle's compass come;
Love alters not with his brief hours and weeks,
But bears it out even to the edge of doom.
If this be error and upon me proved,
I never writ, nor no man ever loved."""

# input_text is some string
text = p.Text(input_text)
text.parse()
Ejemplo n.º 10
0
import prosodic as p
t = p.Text('sonnet1.txt')
t.parse(meter='prose_rhythm_iambic_violable')

for a in t.allParses():
    print(a)

for a in t.stats():
    print(a)
Ejemplo n.º 11
0
import prosodic as p
#t = p.Text('have you reckoned a thousand acres much?')
t = p.Text('corpora/corppoetry_fi/fi.koskenniemi.txt')
t.more()
print

print ">> printing all heavy syllables..."
print t.feature('+prom.weight', True)


def is_ntV(word):
    phonemes = word.phonemes()
    if phonemes[-3:-1] != [p.Phoneme("n"), p.Phoneme("t")]:
        return False
    return phonemes[-1].feature("syll")


print
print ">> printing all -ntV words..."
nta = [word for word in t.words() if is_ntV(word)]
print nta