Exemple #1
0
    def syllables_for_term(self, term):
        if is_special_punctuation(term):
            return 0

        # Some things to do before stripping the term
        # Disney+, Apple+, etc.
        r = re.match('(.+)\+$', term)
        if r:
            return self.syllables_for_term(r.group(1)) + 1

        stripped_term = clean_term(term)
        try:
            if has_syllable_exception(stripped_term):
                return syllapy.count(stripped_term)

            r = re.match("(.+)'s$", stripped_term)
            if r:
                # Most possessive's don't add syllables
                return syllapy.count(r.group(1))

            r = re.match('([0-9]{4})s?$', stripped_term)
            if r:
                terms = num2words(r.group(1), to='year').split()
                return reduce(
                    operator.add,
                    [self.syllables_for_term(term) for term in terms])

            if re.match('[0-9,]+$', stripped_term):
                terms = num2words(int(stripped_term.replace(',', ''))).split()
                return reduce(
                    operator.add,
                    [self.syllables_for_term(term) for term in terms])

            r = re.match('([0-9]+)-([0-9]+)$', stripped_term)
            if r:
                s1 = self.syllables_for_term(r.group(1))
                s2 = self.syllables_for_term(r.group(2))
                if s1 and s2:
                    return s1 + s2 + 1
                else:
                    return 0

            r = re.match('([^-]+)[-/](.+)$', stripped_term)
            if r:
                s1 = self.syllables_for_term(r.group(1))
                s2 = self.syllables_for_term(r.group(2))

                if s1 and s2:
                    return s1 + s2
                else:
                    return 0

            c = syllapy.count(stripped_term)
            return c

        except RuntimeError as err:
            raise SyllableCountError("Unable to count syllables for term")
Exemple #2
0
def _get_num_syllables(doc: Doc, min_syllables: int = 1):
    """Return number of words in the document.
    Filters punctuation and words that start with apostrophe (aka contractions)
    """
    text = (word for word in doc if not word.is_punct and "'" not in word.text)
    syllables_per_word = tuple(syllapy.count(word.text) for word in text)
    return sum(c for c in syllables_per_word if c >= min_syllables)
Exemple #3
0
def validate_haiku(form, haiku):
    words = haiku.data.split()
    syllables = 0
    for word in words:
        word = word.lower()
        syllables += syllapy.count(word)
    if (syllables != 17):
        raise ValidationError("Check the number of syllables")
Exemple #4
0
 def syllable_count(word):
     try:  # look in cmudict
         return [
             len(list(y for y in x if y[-1].isdigit()))
             for x in cmu_d[word.lower()]
         ][0]
     except KeyError:  # look in syllapy
         return syllapy.count(word)
Exemple #5
0
    def forcast(self, doc):
        num_words = self.get_num_words(doc)

        if num_words < 150:
            return 0
        mono_syllabic = 0
        for i in range(150):
            if syllapy.count(doc[i].text) == 1:
                mono_syllabic += 1
        return 20 - (mono_syllabic / 10)
Exemple #6
0
def countDifficult(lst):
    s = list()
    for e in lst:
        l = syllapy.count(e)

        if l > 2:
            #print(e)
            s.append(e)
    sset = set(s)
    return len(sset)
Exemple #7
0
    def forcast(self, doc):
        """Returns the Forcast score for the document.
        """
        num_words = _get_num_words(doc)

        if num_words < 150:
            return 0

        mono_syllabic = 0
        for i in range(150):
            if syllapy.count(doc[i].text) == 1:
                mono_syllabic += 1
        return 20 - (mono_syllabic / 10)
Exemple #8
0
def haikuFormatter(haiku):
	syllables=0
	words = haiku.split()
	haikuFormatted = ''
	for word in words:
		syllables += syllapy.count(word)
		if(syllables>= 5):
			if(syllables == 5):
				word = word+os.linesep	
			elif(syllables>=12):
				if(syllables==12):
					word = word+os.linesep
		haikuFormatted += ' '+word				
	haikuFormatted = haikuFormatted+os.linesep
	return haikuFormatted	
Exemple #9
0
 def syllableCount(self, article):
     count = 0
     for word in article:
         count += syllapy.count(word)
     return count
Exemple #10
0
def main():
    #f=open('0418TestOnlineG6.csv', 'w')
    for e in IDlist:
        dataresult = {}
        '''
        with open('Z:/Special Projects/ISASP/2019/DIF/DIF EL/Transcription/Yi_AI_6WR/'+e, encoding="ISO-8859-1" ) as file:
            essay= file.read()
            
        '''
        with open(
                'Z:/Special Projects/ISASP/2019/DIF/DIF EL/Transcription/Yi_AI_6WR/AAAVLP13819000473707.txt',
                encoding="ISO-8859-1") as file:
            essay = file.read()

        # remove the footer in txt
        id_num = pulloid(essay)
        print(id_num)

        essay = noft_text(essay)
        essay = essay[30:]
        dataresult['id_num'] = id_num

        #process Prompt,remove stopwords and number and punctuations
        Promwdstxt = cleanPromt(prom6)
        validwdsinProm = getUniqWords(
            Promwdstxt
        )  #Here the list only contains unique words in prompt except stopwords

        cleaness_text = cleanPromt(essay)
        cleaness_text = noft_text(cleaness_text)

        cleanlst_essay = getWordlist(cleaness_text)

        #Vocab in the essay
        #essay_Uniq=getVocabNum(cleaness_text)
        Vocabnum = getVocabNum(cleaness_text)
        #l=getVocab(cleaness_text)
        print(getVocab(cleaness_text))

        # count how many words in the essay are from the list
        Num_wdsFromProm = wdsAppinProm(validwdsinProm, cleanlst_essay)

        #number of words in the essay that are appeared in prompt
        #es_wdsinpm=wdsAppinProm(cleanedess)

        #count how many grammar errors
        #No_gram=countGram(gramerror(essay))

        misspelled = {}
        misspelled = spell.unknown(cleanlst_essay)
        for e in cleanlst_essay:
            if e in wrong.keys():
                misspelled.add(e)
        c = list()
        for e in misspelled:
            if e in ConTract.keys():
                c.append(e)
            elif e in compo.keys():
                c.append(e)
        for e in c:
            misspelled.remove(e)


############## removed contractions as misspelled
        print(misspelled)
        Num_Mispell = len(misspelled)

        w = re.split("[^-\w]+", cleaness_text)
        w = [string for string in w if string != ""]
        print(w)

        wordcount = len(w)
        dataresult['wordcount'] = wordcount

        from nltk.tokenize import sent_tokenize, word_tokenize

        sentcount = len(sent_tokenize(essay))
        ASL = (wordcount / sentcount)

        dataresult['sentcount'] = sentcount
        dataresult['Avsenlg'] = ASL

        ASW = (syllapy.count(cleaness_text)) / wordcount
        ASL = (wordcount / sentcount)
        Fre = 206.835 - (1.015 * ASL) - (84.6 * ASW)

        dataresult['Times using words from prompt'] = Num_wdsFromProm
        dataresult['Vocab in essay'] = Vocabnum

        dataresult[
            'Essay percentage using words from Prompt'] = "{:.2f}".format(
                (Num_wdsFromProm / wordcount) * 100)

        dataresult['No of mispelled words'] = Num_Mispell
        dataresult['Percentage of mispelled words'] = "{:.2f}".format(
            (Num_Mispell / wordcount) * 100)

        #dataresult['No of grammar errors']=countGram(gramerror(essay))

        dw = countDifficult(cleanlst_essay)
        dataresult['No of Difficult words'] = dw

        Estimatedlevel = textstat.text_standard(essay)
        dataresult['Estimatedlevel'] = Estimatedlevel

        ease = Fre
        dataresult['reading_ease'] = ease
        '''
        sim=Similarity(cleaness_text,Promwdstxt)
        dataresult['Cosine Similarity with Prompt']="{:.2f}".format(sim[0][1])
        
        posiness=Posscore(cleanlst_essay)
        dataresult['No of Positives in essay']=posiness
        
        PerPos="{:.2f}".format((posiness/wordcount)*100) 
        dataresult['percentage of Positives in essay']=PerPos
        
        negaess=Negscore(cleanlst_essay)
        dataresult['No of Negatives in essay']=negaess


        PerNeg="{:.2f}".format((negaess/wordcount)*100) 
        dataresult['percentage of Negatives in essay']=PerNeg
        
        L=wordDict.LLink()
        
        lk=0
        for word in cleanlst_essay:
            
            if word in L:
                lk+=1
                
                
        dataresult['No of Linking words in essay']=lk
        
        PerLink="{:.2f}".format((lk/wordcount)*100) 
        dataresult['percentage of Linking Words in essay']=PerLink
        '''
        import csv
        for v in dataresult:
            dataresult[v] = str(dataresult[v])

        with open('0418TestOnlineG6.csv', 'a+', newline='') as f:
            writer = csv.writer(f, quoting=csv.QUOTE_ALL)
            writer.writerow(list(dataresult.keys()))
            writer.writerow(list(dataresult.values()))
Exemple #11
0
def test_none():
    """Testing passing `None` type."""
    assert syllapy.count(None) == 0
Exemple #12
0
def test_bool():
    """Testing passing `bool` type."""
    assert syllapy.count(True) == 0
Exemple #13
0
 def get_num_syllables(self, doc, min_syllables=1):
     # filter punctuation and words that start with apostrophe (aka contractions)
     text = (word for word in doc
             if not word.is_punct and "'" not in word.text)
     syllables_per_word = tuple(syllapy.count(word.text) for word in text)
     return sum(c for c in syllables_per_word if c >= min_syllables)
Exemple #14
0
def test_space():
    """Testing passing space"""
    assert syllapy.count(" ") == 0
import os
import csv
import syllapy

syllable_file_path = os.path.join(os.path.dirname(__file__), 'nyt_haiku', 'data', 'syllable_counts.csv')
with open(syllable_file_path, newline='') as file:
    reader = csv.reader(file)
    for row in reader:
        if len(row) == 2:
            word = row[0].lower()
            count = int(row[1])
            if count != syllapy.count(word):
                print(f"{word},{count}")
Exemple #16
0
def test_int():
    """Testing passing `None` type."""
    assert syllapy.count(2) == 0
Exemple #17
0
def test_number_end_word():
    """Test number at end of word"""
    assert syllapy.count("dog123") == 0
Exemple #18
0
# Following demo: https://medium.com/better-programming/nlp-with-python-build-a-haiku-machine-in-50-lines-of-code-6c7b6de959e3

import spacy
import string
from spacy.matcher import Matcher
import syllapy

count = syllapy.count('additional')
import random
import re
import dominate
from dominate.tags import *
import pdfkit
from fpdf import FPDF
import os

title = 'Almost A Haiku - NaNoGenMo 2020'

nlp = spacy.load("en_core_web_sm")  #loading a language model
matcher2 = Matcher(nlp.vocab)  #https://spacy.io/api/matcher
matcher3 = Matcher(nlp.vocab)
matcher4 = Matcher(nlp.vocab)
matcher5 = Matcher(nlp.vocab)

# POS = Part of Speech
pattern = [{
    'POS': {
        "IN": ["NOUN", "ADP", "ADJ", "ADV"]
    }
}, {
    'POS': {
Exemple #19
0
def make_verse(incipit, syllables_length, should_rhyme_with=False):

    incipit = incipit[:1000]

    incipit_length = len(incipit)
    top_k = config_top_k
    errors = 0
    added_words = 0

    # We add one word at time until we reach the minimum/maximum length

    for i in range(651):
        full_output = keras_gpt_2.generate(text_model,
                                           bpe, [incipit],
                                           length=1,
                                           top_k=top_k)
        full_output = full_output[0]
        print('output', full_output)

        newOutput = full_output[len(incipit):]
        print('NEW output', newOutput)

        if (all(x.isalpha() or x.isspace() for x in newOutput)
                and all(x not in newOutput for x in config_forbidden)):
            incipit = full_output
            added_words += 1
            errors = 0
        else:
            errors += 1
            if added_words == 0 and errors > 10:
                incipit = incipit + 'and '
            if errors > 10:
                incipit = incipit + 'and '

        current_length = len(incipit) - incipit_length
        print('length', current_length)

        syllables_count = syllapy.count(full_output[incipit_length:])
        print('syllables', syllables_count)

        print('>>>>>>>>>>>>>>>>>>>>>>>>>>', syllables_count,
              ' in : ' + full_output[incipit_length:])
        # If we find a line break and the length is greater than the minimum
        # we stop the text generation

        if syllables_count == syllables_length:
            print('Syllables length reached')
            break

        # If the string is greater than the allowed maximum, we stop the generation
        if syllables_count > syllables_length:
            print('TOO MANY SYLLABLES')
            spaces = [
                pos for pos, char in enumerate(full_output) if char == ' '
            ]
            # removes 2 last words
            incipit = full_output[:spaces[-2]]

    result = full_output[incipit_length:]

    # we clean double spaces in the result
    for i in range(3):
        result = result.replace('  ', ' ')

    result = result.strip()

    if should_rhyme_with:
        rhymes = rhymer.get_perfect_rhymes(should_rhyme_with)
        rhyme = should_rhyme_with

        print('all rhymes ', rhymes)

        all_rhymes = []

        if '2' in rhymes and rhymes[2]:
            all_rhymes = rhymes[2]
        else:
            for r in rhymes:
                if rhymes[r]:
                    all_rhymes = rhymes[r]
                    break

        print('rhymes ', all_rhymes)

        random.shuffle(all_rhymes)

        for word in all_rhymes:
            print('>>> ', word)
            if (word is not should_rhyme_with and len(word) > 2
                    and all(x.isalpha() or x.isspace() for x in word)):
                rhyme = word
                break

        print('choosen ', rhyme)

        # shorten input to right number of syllables

        while True:

            toTest = result + ' ' + rhyme
            syllables_count = syllapy.count(toTest)
            print('checking ', toTest)
            print('syllables ', syllables_count)

            if (syllables_count <= syllables_length):
                break
            else:
                spaces = [
                    pos for pos, char in enumerate(result) if char == ' '
                ]
                # removes 2 last words
                result = result[:spaces[-1]]

        while True:
            spaces = [pos for pos, char in enumerate(result) if char == ' ']

            if len(spaces) > 2:
                result = result[:spaces[-1]]
            else:
                return False

            solutions = nlp(result + ' ' + nlp.tokenizer.mask_token + ' ' +
                            rhyme)
            print('solution', solutions)

            acceptable_solution = False

            for solution in solutions:
                solution = solution['sequence']
                solution = solution.replace('[CLS]', '')
                solution = solution.replace('[SEP]', '')
                solution = solution.strip()

                syllables_count = syllapy.count(solution)
                print(solution, syllables_count)

                if (syllables_count == syllables_length):
                    acceptable_solution = solution
                    break

            if acceptable_solution:
                result = acceptable_solution
                break

    result = result.encode('utf-8', errors='ignore').decode('utf-8')
    return result
Exemple #20
0
def test_in_dict():
    """Test words in known dataset"""
    assert syllapy.count("because") == 2
    assert syllapy.count("woman") == 2
    assert syllapy.count("international") == 5
Exemple #21
0
def test_not_in_dict():
    """Test word not in known dataset"""
    assert syllapy.count("ostentatious") == 4
Exemple #22
0
def test_punctuation_only():
    """Testing punctuation only"""
    for punct in punctuation:
        assert syllapy.count(punct) == 0
Exemple #23
0
def main():
    f = open('0418Test3G6.csv', 'w')
    for e in namelist:
        dataresult = {}
        with open(
                'Z:/Special Projects/ISASP/2019/DIF/DIF EL/Transcription/6WR/'
                + e,
                encoding="ISO-8859-1") as file:
            essay = file.read()
        # remove the footer in txt
        id_num = pullid(essay)
        #pull (id_num)
        dataresult['id_num'] = id_num
        print(id_num)
        essay = essay[22:]
        #process Prompt,remove stopwords and number and punctuations
        Promwdstxt = cleanPromt(prom6)
        validwdsinProm = getUniqWords(
            Promwdstxt
        )  #Here the list only contains unique words in prompt except stopwords

        cleaness_text = cleanPromt(essay)
        cleanlst_essay = getWordlist(cleaness_text)

        #Vocab in the essay
        Vocabnum = getVocabNum(cleaness_text)
        allwords = getVocab(cleaness_text)
        #print(allwords)

        # count how many words in the essay are from the list
        Num_wdsFromProm = wdsAppinProm(validwdsinProm, cleanlst_essay)

        misspelled = {}
        misspelled = spell.unknown(cleanlst_essay)
        for e in cleanlst_essay:
            if e in wrong.keys():
                misspelled.add(e)
        c = list()
        for e in misspelled:
            if e in ConTract.keys():
                c.append(e)
            elif e in compo.keys():
                c.append(e)
        for e in c:
            misspelled.remove(e)
############## removed contractions as misspelled

#print(misspelled)
        Num_Mispell = len(misspelled)

        w = re.split("[^-\w]+", cleaness_text)
        w = [string for string in w if string != ""]
        #print(w)

        wordcount = len(w)
        dataresult['wordcount'] = wordcount
        #es_wdsinpm=wdsAppinProm(cleanedess)
        PrtWdsFrProm = (Num_wdsFromProm / wordcount) * 100

        sentcount = len(sent_tokenize(essay))
        ASL = "{:.2f}".format(wordcount / sentcount)
        dataresult['sentcount'] = sentcount
        dataresult['Avsenlg'] = ASL

        ASW = (syllapy.count(cleaness_text)) / wordcount
        print(ASW)
        ASL = float(ASL)
        Fre = 206.835 - (1.015 * ASL) - (84.6 * ASW)

        dataresult['Times using words from prompt'] = Num_wdsFromProm
        dataresult['Vocab in essay'] = Vocabnum

        dataresult[
            'Essay percentage using words from Prompt'] = "{:.2f}".format(
                (Num_wdsFromProm / wordcount) * 100)

        dataresult['No of mispelled words'] = Num_Mispell
        dataresult['Percentage of mispelled words'] = "{:.2f}".format(
            (Num_Mispell / wordcount) * 100)

        #dataresult['No of grammar errors']=countGram(gramerror(essay))

        dw = countDifficult(cleanlst_essay)

        dataresult['No of Difficult words'] = dw

        Estimatedlevel = textstat.text_standard(essay)
        dataresult['Estimatedlevel'] = Estimatedlevel

        ease = 206.835 - (1.015 * ASL) - (84.6 * ASW)
        dataresult['reading_ease'] = ease
        '''
Exemple #24
0
def test_number_in_word():
    """Test number in word"""
    assert syllapy.count("d0g") == 0
Exemple #25
0
def test_case_insensitive():
    """Test words changing capitalization"""
    assert syllapy.count("Norway") == 2
    assert syllapy.count("norway") == 2
    assert syllapy.count("Ohio") == 3
    assert syllapy.count("ohio") == 3
Exemple #26
0
def test_empty():
    """Test empty string"""
    assert syllapy.count("") == 0
Exemple #27
0
def test_simple():
    """Simple Test."""
    assert syllapy.count("dog!!!!!") == 1
Exemple #28
0
def test_number_start_word():
    """Test number at start of word"""
    assert syllapy.count("4dog") == 0
def syllable_counter(tokenized_list):
    return sum([syllapy.count(token) for token in tokenized_list])
Exemple #30
0
def verse_gen(verse_input, syllable_length):
    global verse_words
    global verse_string
    global verse_count
    global verse_syllable_count
    
    global verse_one_string

#Go to first whitespace, count syllables.  Continue until "syllable_length" syllables.  If over required amount syllables try with new input.
#initialize counter
    y=0
    x=1
    verse_syllable_count=0

#Split to remove whitespace
    verse_words=verse_input.split(' ')

    while verse_syllable_count < syllable_length:
        print("Adding next word to the string")

#Put the first word in a string
        verse_string=' '.join(verse_words[y:x])

#Count the syllables
        verse_syllable_count = syllapy.count(verse_string)
    
#increment x
        x=x+1

#Get new input if the words don't make 5 syllables
#        if verse_syllable_count > syllable_length:
 #           print("Need new input")
  #          text_generator(state_dict)
   #         verse_input = GPT2_output
    #        verse_gen(verse_input, syllable_length)
        
#If the words make 5 syllables, check for period or comma at the end of it.  Use if so, get new input if not       
     #   if verse_syllable_count == syllable_length:
          #  if verse_string[-1] == "." or verse_string[-1] == ",":
           #     print(verse_string)
        #    else:
         #       print("Need input ending with punctuation")
      #         verse_gen(verse_input, syllable_length)
            
        
        
## New way:  go down the input to look for haiku-able phrases.  If not, get new input

        if verse_syllable_count == syllable_length:
            print(verse_string)
            return verse_string
    
        if verse_syllable_count > syllable_length:
        #reinitialize the string and keep going
            print("Moving up in string")
            print(verse_string)
            
            #reinitialize verse_string
            verse_string=""
            verse_syllable_count=0
            y=x-1