def getSylCMU(lowercase): ''' Receives lowercase (a string). Returns two values, low and high. Checks CMU[dict] for the minimum and maximum syllable counts ''' low = min([len([y for y in x if isdigit(y[-1])]) for x in CMU[lowercase]]) high = max([len([y for y in x if isdigit(y[-1])]) for x in CMU[lowercase]]) return low, high
def getMaxMin(word): lowercase = word['word'] if lowercase not in cmu: print lowercase,' not in dictionary' lowercase = stripEndings(word) return word else: word['low'] = min([len([y for y in x if isdigit(y[-1])]) for x in cmu[lowercase]]) word['high'] = max([len([y for y in x if isdigit(y[-1])]) for x in cmu[lowercase]]) return word
def nsyl(word): try: print d[word.lower()] out = [len(list(y for y in x if isdigit(y[-1]))) for x in cmudict[word.lower()]][0] except Exception as e: f = urllib.urlopen("http://rhymebrain.com/talk?function=getWordInfo&word="+word) a = json.loads(f.read())['pron'].split() d = [] for word in a: d.append(word.encode("utf-8")) out = [len(list(y for y in x if isdigit(y[-1]))) for x in [d]][0] return out
def __to_matrix(self, string): """ Private method. Creates main sudoku matrix from string Return: main matrix and list of bool's (is currnet element const). """ matrix = [] tmp = [] is_const = [] j = 0 for i in range(len(string)): if isdigit(string[i]): tmp.append(int(string[i])) j += 1 if j == SUDOKU_SIZE: # add line to main matrix matrix.append(tmp) j = 0 tmp = [] if len(matrix) == SUDOKU_SIZE: # main matrix completed break tmp = [] for i in range(SUDOKU_SIZE): for j in range(SUDOKU_SIZE): tmp.append(matrix[i][j]) is_const.append(tmp) tmp = [] return matrix, is_const
def nsyl(word): pronounce = cmu.get(word.lower()) if pronounce: pronounce = pronounce[0] return len(list(y for y in pronounce if isdigit(y[-1]))) else: return 3
def AO_iSylables(word): # print word # print DICT[word.lower()] # return syllables_en.count(word) return [ len(list(y for y in x if isdigit(y[-1]))) for x in DICT[word.lower()] ][0]
def evaluate_infix_line(line: str) -> int: operators: List[str] = [] values: List[int] = [] op: str for c in line.replace(" ", ""): if isdigit(c): values.append(int(c)) elif c == "(": operators.append(c) elif c == ")": op = operators.pop() while op != "(": eval_operator(values, op) op = operators.pop() elif c in "+*": while (operators and (op_precedence(operators[-1]) >= op_precedence(c))): eval_operator(values, operators.pop()) operators.append(c) while operators: op = operators.pop() eval_operator(values, op) assert len(values) == 1 return values.pop()
def check(opstring ,count , target , nlist ,idx , result,closFlag): print(opstring) rnum = 0 if opstring == '()': return if opstring != "" : try : rnum = eval(opstring) except: rnum = rnum if rnum > target: return if rnum == target : result.append(count) if idx != len(nlist): if isdigit(opstring[-1]): check(opstring + '+' , count , target , nlist , idx , result , closFlag) check(opstring + '*' , count , target , nlist , idx , result , closFlag) if closFlag == False : check(opstring + ')' , count , target , nlist , idx , result, True) else: if opstring[-1] == ')': check(opstring + '+' , count , target , nlist , idx , result , closFlag) check(opstring + '*' , count , target , nlist , idx , result , closFlag) check(opstring + str(nlist[idx]) ,count + 1, target , nlist , idx +1 , result, closFlag) return result
def getAvgSentenceLengthAndFleschFogAndWord(filename): sentences = getSentences(filename) totCharLength = 0 totWords = 0 syllableCt = 0 totWordLength = 0 complexWordCt = 0 for sentence in sentences: totCharLength += len(sentence) words = nltk.word_tokenize(sentence) global d for word in words: if word in d: totWordLength += len(word) wordSyl = [ len(list(y for y in x if isdigit(y[-1]))) for x in d[word.lower()] ][0] syllableCt += wordSyl if wordSyl >= 3: complexWordCt += 1 totWords += len(words) flesch = 206.835 - 1.105 * (totWords / len(sentences)) - 84.6 * ( syllableCt / totWords) fog = .4 * (totWords / len(sentences) + 100 * complexWordCt / totWords) return totCharLength / len(sentences), totWords / len(sentences), \ flesch, fog, totWordLength / totWords
def nsyl(word, d): """ :param word: str :param d: NLTK dictionary instance :return: a list. How many syllables are in a word """ return [len(list(y for y in x if isdigit(y[-1]))) for x in d[word.lower()]]
def __to_matrix(self, string): """ Private method. Creates main sudoku matrix from string Return: main matrix and list of bool's (is currnet element const). """ matrix = [] tmp = [] is_const = [] j = 0 for i in range(len(string)): if isdigit(string[i]): tmp.append(int(string[i])); j += 1; if j == SUDOKU_SIZE: # add line to main matrix matrix.append(tmp) j = 0 tmp = [] if len(matrix) == SUDOKU_SIZE: # main matrix completed break; tmp = [] for i in range(SUDOKU_SIZE): for j in range(SUDOKU_SIZE): tmp.append(matrix[i][j]) is_const.append(tmp) tmp = [] return matrix, is_const;
def nsyl(word): if word.lower() in d: possy = [len(list(y for y in x if isdigit(y[-1]))) for x in d[word.lower()]] ### Just one way to deal with the list of numbers that are returned back. return max(possy) else: return 100
def AO_iSylables(AO_sWord): ''' This function returns a count of the sylbles is a word. ''' return [len(list(y for y in x if isdigit(y[-1]))) for x in DICT[AO_sWord.lower()]][0]
def brailleify(rel): '''Turn 1.3.45 or AKPR54633-1PHI into UEB''' # FIXME: if we do this at all it should be in braille.py, and we # probably shouldn't be trying to do liblouis-level translation at # all. ret = u'' digits = False for c in rel: if ASCII.isdigit(c): if not digits: ret += u'⠼' digits = True c = ueb_number_mapping[int(c)] ret += alpha_to_unicode(c) elif c == '.': ret += u'⠲' elif ASCII.isalpha(c): if digits: # UEB 'guidelines for technical material' suggests capital # letter marker, not letter sign ret += u'⠠' digits = False ret += alpha_to_unicode(c) else: # e.g. dash in serial digits = False ret += alpha_to_unicode(c) return ret
def count_syllables_primary(word, d): try: return [ len(list(y for y in x if isdigit(y[-1]))) for x in d[word.lower()] ] except: return 0
def nsyl(word): try: return [ len(list(y for y in x if isdigit(str(y[-1])))) for x in d[word.lower()] ] except KeyError: return [0]
def syllablecnt (word): """Returns the number of syllables in a word.""" if word.lower() in d: counts = [len(list(y for y in x if isdigit(y[-1]))) for x in d[word.lower()]] if len(counts) > 0: return counts[0] return len(h.syllables(word+' '))
def countSyllablesInWord(cls,word): lowercase = word.lower() cmud = cls.__getCMUDict() if lowercase not in cmud: return 0 else: return max([len([y for y in x if isdigit(y[-1])]) for x in cmud[lowercase]])
def num_syllables(word): # if word in string.punctuation: # return 0 word = word.translate(None, string.punctuation) try: return [len(list(y for y in x if isdigit(y[-1]))) for x in d[word.lower()]][0] except KeyError: return 18
def countSyllables(word): """ Returns the amount of syllables in a word """ try: return max([len([y for y in x if isdigit(y[-1])]) for x in d[word.lower()]]) except: return None
def countsyl(word): word = word.lower() if not(word in d): return countsylManual(word) numSyl = 0 for y in d[word][0]: # Use first pronunciation if isdigit(str(y)[-1]): numSyl += 1 return numSyl
def print_stress(word): forms = [list(y for y in x if isdigit(y[-1])) for x in d[word.lower()]] #print forms stress = [list(pick_stress(syll) for syll in sylls) for sylls in forms] #print stress print " ".join(stress[0]) print word
def nsyl(word): try: out = [len(list(y for y in x if isdigit(y[-1]))) for x in d[word.lower()]] except KeyError: out = [0] finally: return out
def nsyl(word): ''' Return the number of syllables in word.''' try: res = [ len(list(y for y in x if isdigit(y[-1]))) for x in d[word.lower()] ][0] except: res = np.round(textstat.syllable_count(word)) return res
def ShowAscii(char, code): if ascii.isalpha(code): print char, 'is an ascii alphabeta' elif ascii.isdigit(code): print char, 'is an ascii digital' elif ascii.ispunct(code): print char, 'is an ascii punctuation' else: print char, 'is an ascii code(not alphabeta, number or punctuation)'
def 分開音標(self, 連做伙音標): 頂一个音 = None 音標結果 = '' for 音 in 連做伙音標: if 頂一个音 != None and isalpha(音) and isdigit(頂一个音): 音標結果 += ' ' 音標結果 += 音 頂一个音 = 音 return 音標結果
def word(self, word): phonemes = get_phoneme(word.lower()) result = [len(list(y for y in x if isdigit(y[-1]))) for x in phonemes] if len(result): return result[0] return None
def num_syllables(word): d = cmudict.dict() if "-" in word: word2 = "".join(word.split("-")) if word2 in d: word = word2 else: return sum([num_syllables(w) for w in word.split("-")]) return list((len(list(y for y in x if isdigit(y[-1]))) for x in d[word.lower()]))[0]
def rhyme_from_phonemes(list1, list2): i = -1 while i >= 0 - len(list1): if isdigit(list1[i][-1]): if i >= 0 - len(list2) and list1[i][:-1] == list2[i][:-1] and (i == -1 or list1[i + 1:] == list2[i + 1:]): return True else: return False i -= 1
def SyllableCount(AszWord): """return the max syllable count in the case of multiple pronunciations""" #http://groups.google.com/group/nltk-users/msg/81e70cb6704dc01e?pli=1 return [ len([y for y in x if isdigit(y[-1])]) for x in GzzCMUDict[AszWord.lower()] ]
def changePostions(fname): csvf = open(fname, 'r', encoding='utf8') freader= csv.reader(csvf,delimiter=',',quotechar='"') header = next(freader) d = {} for row in freader: for i in range(28,54): if "+" in row[i]: row[i] = sum([int(x) for x in row[i].split("+")]) release = row[-3] if len(release) != 0: if isdigit(release[0]) == False: #euro sign release = release[1:] if "M" in release: release = float(release[:-1])*1000000 elif "K" in release: release = float(release[:-1])*1000 else: release = float(release) row[-3] = release value = row[11] if len(value) != 0: if isdigit(value[0]) == False: #euro sign value = value[1:] if "M" in value: value = float(value[:-1])*1000000 elif "K" in value: value = float(value[:-1])*1000 else: value = float(value) row[11] = value d[row[1]] = row print(d['20801']) with open('finalProcessed.csv', 'w') as myfile: wr = csv.writer(myfile) header = ['place'] + header[1:] + ['FUT Price'] wr.writerow(header) for k, v in sorted(d.items(), key = lambda x: x[1][-1], reverse = True): if v[-1] != 0: v = [strip_accents(str(x)) for x in v] wr.writerow(v) myfile.close() print('done')
def is_haiku(self, text): text_orig = text text = text.lower() if filter(str.isdigit, str(text)): return False words = nltk.wordpunct_tokenize(re.sub('[^a-zA-Z_ ]', '', text)) #print words syl_count = 0 word_count = 0 haiku_line_count = 0 lines = [] d = self.d for word in words: if word.lower() in d.keys(): syl_count += [ len(list(y for y in x if isdigit(y[-1]))) for x in d[word.lower()] ][0] if haiku_line_count == 0: if syl_count == 5: lines.append(word) haiku_line_count += 1 elif haiku_line_count == 1: if syl_count == 12: lines.append(word) haiku_line_count += 1 else: if syl_count == 17: lines.append(word) haiku_line_count += 1 if syl_count == 17: try: final_lines = [] str_tmp = "" counter = 0 for word in text_orig.split(): str_tmp += str(word) + " " if lines[counter].lower() in str(word).lower(): final_lines.append(str_tmp.strip()) counter += 1 str_tmp = "" if len(str_tmp) > 0: final_lines.append(str_tmp.strip()) return True except Exception as e: print e return False else: return False return True
def stripEndings(word): temp = (word['word']) if (len(temp) > 1): tempCheck, tempLetter = temp[:-1], temp[-1] print 'temp check: ',tempCheck if (tempCheck in cmu): a = tempCheck ''' for some reason, if I try to run tempCheck through the below, it gives an error... changing it to a diff variable, (set after entering this if statement, makes it okay. I don't understand why.) ''' word['low'] = min([len([y for y in tempCheck if isdigit(y[-1])]) for tempCheck in cmu[tempCheck]]) print 'word["low"]: ',word['low'] word['high'] = max([len([y for y in a if isdigit(y[-1])]) for a in cmu[a]]) print 'word["high"]: ',word['high'] else: word = dumbGuess(word) return word
def rhyme_from_phonemes(list1, list2): i = -1 while i >= 0 - len(list1): if isdigit(list1[i][-1]): if i >= 0 - len(list2) and list1[i][:-1] == list2[i][:-1] and ( i == -1 or list1[i + 1:] == list2[i + 1:]): return True else: return False i -= 1
def count_syllables(word, dictionary): """returns number of syllables in a given word using CMU's syllable dictionary""" phenom_list = dictionary.get(word) if phenom_list == None: return count_syllables_pseudo(word) syllable_count = 0 for phenom in phenom_list[0]: if isdigit(phenom[-1]): #cmu dictionary looks things up with syllable_count += 1 return syllable_count
def count_syllables(word, dictionary): """returns number of syllables in a given word using CMU's syllable dictionary""" phenom_list = dictionary.get(word) if phenom_list == None: return count_syllables_pseudo(word) syllable_count = 0 for phenom in phenom_list[0]: if isdigit(phenom[-1]): #cmu dictionary looks things up with syllable_count+=1 return syllable_count
def SyllableCalculator(text): d = cmudict.dict() counter = 0.0 tokens = re.findall("[A-Z]{2,}(?![a-z])|[A-Z][a-z]+(?=[A-Z])|[\'\w\-]+",text) for token in tokens: count = 1.0 if token.lower() in d.keys(): count =max([len(list(y for y in x if isdigit(y[-1]))) for x in d[token.lower()]]) counter = counter+count return counter
def num_syllables(self, word): """ Returns the number of syllables in a word. If there's more than one pronunciation, take the shorter one. If there is no entry in the dictionary, return 1. """ s = word.lower() if s not in self._pronunciations: return 1 else: return min([len([y for y in x if isdigit(y[-1])]) for x in self._pronunciations[word.lower()]])
def is_haiku(self, text): text_orig = text text = text.lower() if filter(str.isdigit, str(text)): return False words = nltk.wordpunct_tokenize(re.sub('[^a-zA-Z_ ]', '',text)) #print words syl_count = 0 word_count = 0 haiku_line_count = 0 lines = [] d = self.d for word in words: if word.lower() in d.keys(): syl_count += [len(list(y for y in x if isdigit(y[-1]))) for x in d[word.lower()]][0] if haiku_line_count == 0: if syl_count == 5: lines.append(word) haiku_line_count += 1 elif haiku_line_count == 1: if syl_count == 12: lines.append(word) haiku_line_count += 1 else: if syl_count == 17: lines.append(word) haiku_line_count += 1 if syl_count == 17: try: final_lines = [] str_tmp = "" counter = 0 for word in text_orig.split(): str_tmp += str(word) + " " if lines[counter].lower() in str(word).lower(): final_lines.append(str_tmp.strip()) counter += 1 str_tmp = "" if len(str_tmp) > 0: final_lines.append(str_tmp.strip()) return True except Exception as e: print e return False else: return False return True
def rhymesyls_for_pronunciation(pronunciation): outlist = str() i = -1 while i >= 0 - len(pronunciation): if isdigit(str(pronunciation[i][-1])): outlist = pronunciation[i][:-1] if i != -1: outlist = outlist + " " + pronunciation[i + 1 :][0] return outlist i -= 1 return outlist
def setSyls(self, d): """ Calculate the number of syllables in self.name, and set the results equal to set.syls Inputs: d - A dictionary of words from the nltk.corpus library Output: None """ if self.name in d: self.syls = \ len([x for x in d[self.name][0] if isdigit(str(x[-1]))])
def operate(self, c): # Add a digit. if ascii.isdigit(c): self.tag_redraw() self._number += chr(c) # Delete a character. elif c in {ascii.BS, ascii.DEL, curses.KEY_BACKSPACE}: self.tag_redraw() self._number = self._number[:-1] return 'CONTINUE'
def nsyl(word): if word.lower() in exc: return exc[word.lower()] if word.lower() in d: prons = [len(list(y for y in x if isdigit(y[-1]))) for x in d[word.lower()]] mn = 999 for p in prons: if p < mn: mn = p return mn if "'" in word: return nsyl(word.replace("'",""))
def count_syllables(text): # count number of syllables wordlist = text.split() syllable_count = 0 for word in wordlist: word = word.strip('.,!?-*();:\'\"[]{}\\') # checks if word is alpha && in the dictionary (prevents key errors) if word.isalpha() and word in d: syllable_count += [ len(list(y for y in x if isdigit(y[-1]))) for x in d[word.lower()] ][0] return syllable_count
def nsyl(self, word): """ From http://www.onebloke.com/2011/06/counting-syllables-accurately-in-python-on-google-app-engine/ It works by looking up the pronunciation of the word in the Carnegie Mellon University’s pronunciation dictionary that is part of the Python-based Natural Language Toolkit (NLTK). This returns one or more pronunciations for the word. Then the clever bit is that the routine counts the stressed vowels in the word. The raw entry from the cmudict file for the word SYLLABLE is shown below. SYLLABLE 1 S IH1 L AH0 B AH0 L """ return [len(list(y for y in x if isdigit(y[-1]))) for x in self.cmu_dict[word.lower()]]
def nsyl(word): #This is needed for dealing with hypenation stuff below if word == "": return 0 if "-" in word: print "Attempting hypenated word: " + str(word) return sum([nsyl(w) for w in word.split("-")]) try: #returns the syllable length of a word - d actually returns a list of phonetics, so by default choose first length return [len(list(y for y in x if isdigit(y[-1]))) for x in d[word.lower()]][0] except KeyError as e: return custom_dictionary.get_nsyl(word)
def SyllableCalculator(text): d = cmudict.dict() counter = 0.0 tokens = re.findall("[A-Z]{2,}(?![a-z])|[A-Z][a-z]+(?=[A-Z])|[\'\w\-]+", text) for token in tokens: count = 1.0 if token.lower() in d.keys(): count = max([ len(list(y for y in x if isdigit(y[-1]))) for x in d[token.lower()] ]) counter = counter + count return counter
def syllable_count(self, word): """ Count syllables in a word. Uses NLTK dictionary to find word syllabication. Args: word (string) Returns: int syllable count """ self.d = cmudict.dict() return min([len([y for y in x if isdigit(y[-1])]) for x in self.d[str(word).lower()]])
def suff(dict): with open('suff_a.txt', 'a') as f: for word, vals in dict.iteritems(): if re.search("((?i)[BCDFGHJKLMNPQRSTVWXZ]{1,2}[AEIOUY]+[BCDFGHJKLMNPQRSTVWXZ]*(E|ED)?('[A-Z]{1,2})?)(?![a-zA-Z]+)", word): graphemes = re.search("((?i)[BCDFGHJKLMNPQRSTVWXZ]{1,2}[AEIOUY]+[BCDFGHJKLMNPQRSTVWXZ]*(E|ED)?('[A-Z]{1,2})?)(?![a-zA-Z]+)", word).group() val = min(vals, key=len) # for val in vals: i = -1 while i >= 0 - len(val): if isdigit(val[i][-1]): str = " ".join(val[i:]) f.write(graphemes + ' ' + str + '\n') f.write(graphemes[1 - len(graphemes):] + ' ' + str + '\n') break i -= 1
def rhymesyls(word): if word.lower() in cmu_dictionary: list1 = min(cmu_dictionary[word.lower()], key=len) outlist = str() i = -1 while i >= 0 - len(list1): if isdigit(str(list1[i][-1])): outlist = list1[i][:-1] if i != -1: outlist = outlist + ' ' + list1[i + 1:][0] return outlist i -= 1 return outlist else: return "NORHYME"
def event(self, ev, c): if ascii.isdigit(ev): self.batteries = c elif c == 'p': self.parallel.toggle() elif c == 'v': self.vowels.toggle() elif c == 's': self.serial.toggle() elif c == 'f': self.frk.toggle() elif c == 'c': self.car.toggle() elif ev in [curses.KEY_ENTER, 10]: for cb in self.notify_list: cb(self)
def suff(dict): f = open('suff_a.txt', 'a') for word, vals in dict.iteritems(): if re.search(pattern, word): graphemes = re.search(pattern, word).group() val = min(vals, key=len) # for val in vals: i = -1 while i >= 0 - len(val): if isdigit(val[i][-1]): str = " ".join(val[i:]) f.write(graphemes + ' ' + str + '\n') f.write(graphemes[1 - len(graphemes):] + ' ' + str + '\n') break i -= 1 f.close()
def nsyl(self, word): """ From http://www.onebloke.com/2011/06/counting-syllables-accurately-in-python-on-google-app-engine/ It works by looking up the pronunciation of the word in the Carnegie Mellon University’s pronunciation dictionary that is part of the Python-based Natural Language Toolkit (NLTK). This returns one or more pronunciations for the word. Then the clever bit is that the routine counts the stressed vowels in the word. The raw entry from the cmudict file for the word SYLLABLE is shown below. SYLLABLE 1 S IH1 L AH0 B AH0 L """ return [ len(list(y for y in x if isdigit(y[-1]))) for x in self.cmu_dict[word.lower()] ]
from nltk.corpus import cmudict from operator import itemgetter from collections import defaultdict def getSyllables(word): d = cmudict.dict() if word == "": return 0 return [len(list(y for y in x if isdigit(y[-1]))) for x in d[word.lower()]][0] def updateInfoNewFile(): freq = defaultdict(int) results = [] output = "" with open('wordfreq.txt') as inputfile: for line in inputfile: items = line.strip().split(',') freq[items[1]] = float(items[4]) #Output the line so far output = output.strip("\n") + line #If missing syllable info, generate it if(len(items) == 5): syllables = getSyllables(items[1]) output += "," + str(syllables) output = output + "\n" return output output = updateInfoNewFile() f = open('workfile.txt', 'w')
def suff(dict): f = open('suff_a.txt', 'a') for word, vals in dict.iteritems(): if re.search( "((?i)[BCDFGHJKLMNPQRSTVWXZ]{1,2}[AEIOUY]+[BCDFGHJKLMNPQRSTVWXZ]*(E|ED)?('[A-Z]{1,2})?)(?![a-zA-Z]+)", word): graphemes = re.search( "((?i)[BCDFGHJKLMNPQRSTVWXZ]{1,2}[AEIOUY]+[BCDFGHJKLMNPQRSTVWXZ]*(E|ED)?('[A-Z]{1,2})?)(?![a-zA-Z]+)", word).group() val = min(vals, key=len) # for val in vals: i = -1 while i >= 0 - len(val): if isdigit(val[i][-1]): str = " ".join(val[i:]) f.write(graphemes + ' ' + str + '\n') f.write(graphemes[1 - len(graphemes):] + ' ' + str + '\n') break i -= 1 f.close()