def test_filter_rules(): rule1 = { 'applies': { 'positive': ['nasal'] }, 'conditions': { 'negative': ['nasal'], 'positive': ['syllabic'] }, 'name': 'nasalization' } rule2 = { 'applies': { 'positive': ['tonal'] }, 'conditions': { 'positive': ['syllabic'] }, 'name': 'valid' } word1 = Word( [Segment(['consonantal'], ['tonal']), Segment(['sonorant'], ['high'])]) word2 = Word( [Segment(['syllabic', 'low'], []), Segment(['high'], ['sonorant'])]) assert filter_rules([word1, word2], [rule1, rule2]) == [rule2]
def insert(self, value): """ --------------------------------------------------------- Inserts value into the hashset, allows only one copy of value. Calls _rehash if the hashset _LOAD_FACTOR is exceeded. Use: inserted = hs.insert( value ) ------------------------------------------------------- Preconditions: value - a comparable data element (?) Postconditions: returns inserted - True if value is inserted, False otherwise. ------------------------------------------------------- """ hash_slot = self._find_slot(value) val = Word("no") for i in hash_slot: if i == val: val = Word("yes") if hash_slot.is_empty() and val.word == "no": hash_slot.insert(value) inserted = True self._count += 1 else: inserted = False if self._count > self._total: self._rehash() return inserted
def analysis_one_segment(segment): detected_phrases = [] cut_words = cut(segment) first_seg_word, first_seg_pos = next(cut_words) logging.debug("{} {}".format(first_seg_word, first_seg_pos)) new_phrase_segments = Segment(init=Word(first_seg_word, first_seg_pos)) new_cut_words = [] for word, pos in cut_words: logging.debug("{} {}".format(word, pos)) _phrase, consistent = could_concatenate(new_phrase_segments, Word(word, pos)) if _phrase: new_phrase_segments.append(Word(word, pos, consistent)) else: new_cut_words.append(new_phrase_segments.merge()) detected_phrases = add_detected_new_phrase(new_phrase_segments, detected_phrases) new_phrase_segments = Segment(init=Word(word, pos)) new_cut_words.append(new_phrase_segments.merge()) if len(new_phrase_segments) > 1: detected_phrases = add_detected_new_phrase(new_phrase_segments, detected_phrases) return detected_phrases, new_cut_words
def load(self): """ Read the lesson's path, loading and creating words """ if self.path is None: return False if (os.path.exists(os.path.join(self.path, self.word_list_file))): #new loading method with a words.list ifile = open(os.path.join(self.path, self.word_list_file), "r") for line in ifile: s = line.split(";") word_name = s[1].strip().decode('utf-8') full_file_name = os.path.join(self.path, s[0].decode('utf-8')) print "Filename: " + full_file_name print "Word: " + word_name self.words.append(Word(word_name, full_file_name)) else: #legacy loading method, kept for backward compatibility for file in os.listdir(self.path): full_file_name = os.path.join(self.path, file) if (os.path.isfile(full_file_name) and os.path.splitext(file)[1] == ".wav"): word_name = os.path.splitext(file)[0] self.words.append(Word(word_name, full_file_name)) return True
def setUp(self): self.d1 = DataManager() self.wd1 = Word('software', '소프트웨어', (True, self.d1)) self.wd2 = Word('project', '프로젝트', (True, self.d1)) self.d1.words = [self.wd1, self.wd2]
def get_basic_latin_word(self): """ >>> a = Algorithm('abc def c') >>> a.get_basic_latin_word() abc >>> a.get_basic_latin_word() def >>> a.get_basic_latin_word() c >>> a = Algorithm('abc 我 c') >>> a.get_basic_latin_word() abc >>> a.get_basic_latin_word() """ basicLatinWord = [] while (self.pos < self.length and is_basic_latin(self.text[self.pos])): current_char = self.text[self.pos] self.pos += 1 if current_char.isspace(): if len(basicLatinWord): return Word(u''.join(basicLatinWord), BASICLATIN_WORD) basicLatinWord.append(current_char) if len(basicLatinWord): return Word(u''.join(basicLatinWord), BASICLATIN_WORD) else: return None
def readAllWords(self): tmp_wds = [ ("word", "단어"), ("asd", "ㅁㄴㄹ"), ("qwer", "ㅂㅈㄷㄱ, ㅂㅈㄷㄱ"), ("zcxvzxcv", "ㅋㅌㅊㅍ, ㅋㅌㅊㅍ, ㅋㅌㅊㅍ, ㅋㅌㅊㅍ, ㅋㅌㅊㅍ, ㅋㅌㅊㅍ, ㅋㅌㅊㅍ, ㅋㅌㅊㅍ, ㅋㅌㅊㅍ, ㅋㅌㅊㅍ, ㅋㅌㅊㅍ, ㅋㅌㅊㅍ, ㅋㅌㅊㅍ, ㅋㅌㅊㅍ, ㅋㅌㅊㅍ, ㅋㅌㅊㅍ, ㅋㅌㅊㅍ, ㅋㅌㅊㅍ, ㅋㅌㅊㅍ, ㅋㅌㅊㅍ, ㅋㅌㅊㅍ" ), ("rtyu", "ㄱ쇼ㅕ, ㄱ쇼ㅕ"), ("vbcbv", "ㅠㅊ풏ㅊ"), ("vbcbvyy", "ㅠㅊ풏ㅊㅊㅊ"), ("realword", "진짜 단어"), ("bback", "빡빡이아저씨"), ("aaaa", "아아아아"), ] for w in tmp_wds: self.words.append(Word(w[0], w[1], False)) tmp_fwds = [ ("focus", "집중하다"), ("wow", "놀라운"), ("amazing", "엄청난"), ("awesome", "개쩌는"), ("verylonglonglongandlongword", "매우 길고 길고 또 길고 그리고 긴 단어를 아주 길고 길게 적는중"), ] for w in tmp_fwds: ww = Word(w[0], w[1], True) self.words.append(ww) self.focusedWords.append(ww) print("Data Loaded")
def count_words(self, is_canonical, list_test_pos, list_training_pos, list_test_neg, list_training_neg): if is_canonical: find_words = NaifBayes.find_words_tagged else: find_words = NaifBayes.find_words_untagged dict_words = dict() for file_name in list_training_pos: with codecs.open(file_name, "r", "utf-8") as file: for line in file.readlines(): for word in find_words(line): self.nbr_pos += 1 if word in dict_words.keys(): dict_words[word].incr_pos() else: dict_words[word] = Word(word) for file_name in list_training_neg: with codecs.open(file_name, "r", "utf-8") as file: for line in file.readlines(): for word in find_words(line): self.nbr_neg += 1 if word in dict_words.keys(): dict_words[word].incr_neg() else: dict_words[word] = Word(word) return dict_words
def test_equality(): word1 = Word( [Segment(['nasal'], ['syllabic']), Segment(['syllabic'], ['nasal'])]) word2 = Word( [Segment(['nasal'], ['syllabic']), Segment(['syllabic'], ['nasal'])]) assert word1 == word2
def convert_to_word(token: str) -> Word: pronunciations = p.phones_for_word(token) stress_patterns = [ p.stresses(pronunciation).replace("2", "1") for pronunciation in pronunciations ] if stress_patterns: # pick one arbitrarily return Word(token, stress_patterns[0]) else: number_syllables_guess = len(re.findall(r"[aeiou]+", token)) return Word(token, "?" * number_syllables_guess)
def main(): #git input parser = optparse.OptionParser(description="generator") parser.add_option('-a', '--lexL', type='string', help='The name of a file \ storing the lexical tags for the left side language') parser.add_option('-b', '--lexR', type='string', help='The name of a file \ storing the lexical tags for the right side language') parser.add_option('-d', '--dictionary', type='string', help='The name of a\ file containing an apertium bilingual dictioary') (opts, args) = parser.parse_args() mandatories = ["lexL", "lexR", "dictionary"] for m in mandatories: if not opts.__dict__[m]: print('mandatory option ' + m + ' is missing\n') parser.print_help() sys.exit() #read dictionary into list dictionary = read_dictionary(opts.dictionary) #create lists of lexical tags langL_lex_tags = parse_lex_tags(opts.lexL) langR_lex_tags = parse_lex_tags(opts.lexR) # a list that will contain all generated entries entries = [] ######test input # a word class for the left side of dict entry langL_word = Word("be<vbser><past><p3><sg>", 13) langL_word1 = Word("girl<n><sg>", 13) # a word class for the right side of dict entry langR_word = Word("Ser<vbser><inf>", 9) langR_word1 = Word("chico<n><f><sg>", 9) langL_words = [langL_word, langL_word1] langR_words = [langR_word, langR_word1] #####end test input #loops through parralel lusts of word objects and creates possible entries for langL_word,langR_word in zip(langL_words, langR_words): entry = build_entry(langL_word, langR_word, langL_lex_tags, langR_lex_tags) entries.append(entry) approve_entries(entries, dictionary) rewrite_dictionary(dictionary, opts.dictionary)
def filter_type(): for word in word_list: results = morphology.analyze(word) for result in results: print(result.getStems(), result.getMorphemes()) if result.getMorphemes()[0].toString() == "Noun:Noun": noun_list.append(Word(result.getStems()[0], 'noun')) break if result.getMorphemes()[0].toString() == "Verb:Verb": verb_list.append(Word(result.getStems()[0], 'verb')) break if result.getMorphemes()[0].toString() == "Adjective:Adj": adj_list.append(Word(result.getStems()[0], 'adj')) break
def gameMain(): word = Word('words.txt') guess = Guess(word.randFromDB()) finished = False hangman = Hangman() maxTries = hangman.getLife() while guess.numTries < maxTries: display = hangman.get(maxTries - guess.numTries) print(display) guess.display() guessedChar = input('Select a letter: ') if len(guessedChar) != 1: print('One character at a time!') continue if guessedChar in guess.guessedChars: print('You already guessed \"' + guessedChar + '\"') continue finished = guess.guess(guessedChar) if finished == True: break if finished == True: print('Success') else: print(hangman.get(0)) print('word [' + guess.secretWord + ']') print('guess [' + guess.currentWord + ']') print('Fail')
def load_data_from_web(self): service = build('sheets', 'v4', credentials=self.creds) # Call the Sheets API sheet = service.spreadsheets() sheet_metadata = sheet.get(spreadsheetId=self.SPREADSHEET_ID).execute() sheets = sheet_metadata.get("sheets", "") print("Reading", len(sheets), "sheets") for s in sheets: cat_name = s.get("properties", {}).get("title") self.categories.append(Category(cat_name)) # read this sheet result = sheet.values().get(spreadsheetId=self.SPREADSHEET_ID, range=cat_name + "!A:Z").execute() values = result.get('values', []) if not values: print('No data found.') else: for v in values: clue = v[0] for sol in v[1:]: self.categories[-1].add_word(Word(clue, sol))
def startElement(self, name, attrs): if name == "Image": self.image_name = str(attrs['name']) if name == "TextLine": self.cur_line = [] if name == "Word": word = Word() word.top = int(attrs['top']) word.bottom = int(attrs['bottom']) word.left = int(attrs['left']) word.right = int(attrs['right']) word.text = unicode(attrs['text']) word.shear = int(attrs['shear']) self.cur_line.append(word) if name == "Character": char = Character() if 'top' in attrs: char.top = int(attrs['top']) else: char.top = self.cur_line[-1].top if 'bottom' in attrs: char.bottom = int(attrs['bottom']) else: char.bottom = self.cur_line[-1].bottom char.left = int(attrs['left']) char.right = int(attrs['right']) char.text = unicode(attrs['text']) if 'shear' in attrs: char.shear = int(attrs['shear']) else: char.shear = self.cur_line[-1].shear self.cur_line[-1].characters.append(char)
def parse_words(strings, segments, diacritics): '''Given a list of word strings (in IPA), return a list of Word objects containing parsed segments. Use the given list of segment dictionaries and diacritic rules. ''' # Create two lists of available segments and diacritics segment_strings = [segment['IPA'] for segment in segments] diacritic_strings = [diacritic['IPA'] for diacritic in diacritics] words = [] for word in strings: try: tokens = tokenise(word, segment_strings, diacritic_strings) except ValueError as subword: error = ('Error parsing word: {0}. There was an unknown character ' 'in the subword: {1}') raise ValueError(error.format(word, subword)) parsed_segments = [ token_to_segment(token, segments, diacritics) for token in tokens ] words.append(Word(parsed_segments)) return words
def gameMain(): word = Word('words.txt') guess = Guess(word.randFromDB()) hangman = Hangman() while hangman.remainingLives > 0: display = hangman.currentShape() print(display) display = guess.displayCurrent() print('Current: ' + display) display = guess.displayGuessed() print('Already Used: ' + display) guessedChar = input('Select a letter: ') if len(guessedChar) != 1: print('One character at a time!') continue if guessedChar in guess.guessedChars: print('You already guessed \"' + guessedChar + '\"') continue success = guess.guess(guessedChar) if success == False: hangman.decreaseLife() if guess.finished() == True: print('**** ' + guess.displayCurrent() + ' ****') print('Success') break else: print(hangman.currentShape()) print('word [' + guess.secretWord + ']') print('guess [' + guess.displayCurrent() + ']') print('Fail')
def spellMed(word): w = Word(word) candidates = isMedicine([word]) if len(candidates) != 0: for x in candidates: x = x.encode('utf-8') return max(candidates, key=MED_COUNTS.get).encode('utf-8') candidates = isMedicine(w.typos()) if len(candidates) is not 0: for x in candidates: x = x.encode('utf-8') return max(candidates, key=MED_COUNTS.get).encode('utf-8') candidates = isMedicine(w.double_typos()) if len(candidates) is not 0: for x in candidates: x = x.encode('utf-8') return max(candidates, key=MED_COUNTS.get).encode('utf-8') # candidates = (isMedicine([word]) or isMedicine(w.typos()) or isMedicine(w.double_typos())) if len(candidates) is 0: return -1
def __init__(self, data): self._data = data self._score = Score() self._category = None self._iter_category = None self._word = Word()
def load_words(filename, word_dict, charset): for line in file(filename): # convert into unicode line = line.strip().decode(charset) if line.find('#') < 0: items = line.split(' ') if len(items[0]) > config.WORD_MAX_LENGTH: continue if len(items) > 1: try: word_dict[items[0]] = Word(items[0], frequency=int(items[1])) except: pass else: word_dict[items[0]] = Word(items[0])
def make_optimal_paths(transducer_input): transducer = pickle.loads(pickle.dumps(transducer_input, -1)) alphabet = transducer.get_alphabet() new_arcs = list() for segment in alphabet: word = Word(segment.get_symbol()) word_transducer = word.get_transducer() #print(word_transducer.dot_representation()) intersected_machine = Transducer.intersection(word_transducer, transducer) states = transducer.get_states() for state1, state2 in itertools.product(states, states): initial_state = word_transducer.initial_state & state1 final_state = word_transducer.get_a_final_state() & state2 temp_transducer = pickle.loads(pickle.dumps(intersected_machine, -1)) temp_transducer.initial_state = initial_state temp_transducer.set_final_state(final_state) temp_transducer.clear_dead_states() if final_state in temp_transducer.get_final_states(): # otherwise no path. try: temp_transducer = remove_suboptimal_paths(temp_transducer) range = temp_transducer.get_range() arc = Arc(state1, segment, range, _get_path_cost(temp_transducer), state2) new_arcs.append(arc) except KeyError: pass #print("****") #print(temp_transducer.dot_representation()) transducer.set_arcs(new_arcs) return transducer
def make_node(self,indexed_word): if indexed_word not in [node.name for node in self.nodes]: node = Word(indexed_word,self.extracted_words,self.dic_df) self.nodes.append(node) else: node = [node for node in self.nodes if node.name == indexed_word][0] return node
def main(argv): #Parse out the commandline arguments parser = argparse.ArgumentParser( formatter_class=argparse.RawDescriptionHelpFormatter, description=DESCRIPTION, epilog=EPILOG ) parser.add_argument("-t", "--test", nargs=1, default=["something"]) parser.add_argument("-d", "--debug", action="store_true", help="Enable Debug Messages") args = parser.parse_args() print "Running Script: %s" % NAME if args.debug: print "test: %s" % str(args.test[0]) my_string = "hello" print "Orginal String: %s" % my_string w = Word(my_string) print "Output: %s" % w.reverse()
def test_phonetic_product(): word = Word([ Segment(['consonantal'], ['tonal', 'long']), Segment(['nasal'], ['syllabic']), Segment(['syllabic', 'high', 'back'], ['nasal', 'front']), Segment(['sonorant'], ['high']) ]) featureless = Word([ Segment(['consonantal'], ['tonal', 'long']), Segment(['nasal'], ['syllabic']), Segment(['syllabic', 'high', 'back'], ['nasal']), Segment(['sonorant'], ['high']) ]) assert phonetic_product(word) == 2 assert phonetic_product(featureless) == 1
def wordAdd(self): en, ko = self.stripWordAddTextEdits() if en == "" and ko == "": return elif en == "": self.MessagingError("이름이 입력되지 않았습니다.") return elif ko == "": self.MessagingError("뜻이 입력되지 않았습니다.") return wordStandard = self.wordStandardCB.currentText() isFocused = False if wordStandard == "모두": wordStandardJudge = self.wordStandardJudgeWhoWantAll elif wordStandard == "집중 단어만": wordStandardJudge = self.wordStandardJudgeWhoWantFocused isFocused = True elif wordStandard == "집중 단어 아닌것만": wordStandardJudge = self.wordStandardJudgeWhoWantNotFocused else: self.MessagingError("알수 없는 오류가 발생했습니다: wordStandard에 \'" + wordStandard + "\'이 없음") return word = Word(en, ko, (isFocused, self.windowsManager.dataManager)) self.windowsManager.dataManager.wordAdd(word) self.wordAddTextClear() self.updateListedWords()
def __init__(self, word, guess_limit): self.guesses_remaining = guess_limit self.word = Word(word=word) self.settings = settings self.game_won = False self.game_in_progress = True self.previous_guesses = set()
def __init__(self, key, line, start): """ Sentence object. :param key: The key to which this sentence belongs. :param line: The line on which this sentences occurs. :param start: The start index of this line in characters. """ self.key = key self.words = [] self.start = start self.end = start + len(line) for windex, w in enumerate(line.split()): start = start end = start + len(w) self.words.append( Word(key=windex, sentkey=self.key, form=w, start=start, end=end)) start = end + 1
def gameMain(): word = Word('words.txt') guess = Guess(word.randFromDB()) finished = False hangman = Hangman() maxTries = hangman.getLife() while guess.numTries < maxTries: display = hangman.get(maxTries - guess.numTries) print(display) guess.display() guessedChar = input('Select a letter: ') finished = guess.guess(guessedChar) if finished: break if finished: print('Success') else: print(hangman.get(0)) print('word [' + guess.word + ']') print("Guess:", end=" ") for i in range(len(guess.current)): print(guess.current[i], end=" ") print() print('Fail')
def add_word(message: types.Message, english: Text, spanish: Text) -> NoReturn: regex: Text = r'^(([a-z]+),?)+$' fail: bool = False if not re.search(regex, english, re.IGNORECASE): bot.reply_to(message, f'{english} does not satisfy the regex: {regex}\n', reply_markup=get_markup_cmd()) fail = True if not re.search(regex, spanish, re.IGNORECASE): bot.reply_to(message, f'{spanish} does not satisfy the regex: {regex}\n', reply_markup=get_markup_cmd()) fail = True if fail: # show all errors before exit return word: Word = Word(0, spanish.split(','), english.split(',')) # id=0 nor use in insert insert: Text = insert_word(word) if re.search('UNIQUE constraint failed', insert, re.IGNORECASE): bot.reply_to(message, f'The word {word.get_str_spanish()} is already stored', reply_markup=get_markup_cmd()) else: bot.reply_to(message, f'insert: {word}', reply_markup=get_markup_cmd()) return
def __init__(self): self.word = Word('words.txt') self.secretWord = self.word.randFromDB() self.numTries = 0 # 7이되면 gameOver self.hangmanList = hangmanList self.currentStatus = "_"*len(self.secretWord) self.guessedChars = ""