コード例 #1
0
def test_filter_rules():

    rule1 = {
        'applies': {
            'positive': ['nasal']
        },
        'conditions': {
            'negative': ['nasal'],
            'positive': ['syllabic']
        },
        'name': 'nasalization'
    }

    rule2 = {
        'applies': {
            'positive': ['tonal']
        },
        'conditions': {
            'positive': ['syllabic']
        },
        'name': 'valid'
    }

    word1 = Word(
        [Segment(['consonantal'], ['tonal']),
         Segment(['sonorant'], ['high'])])

    word2 = Word(
        [Segment(['syllabic', 'low'], []),
         Segment(['high'], ['sonorant'])])

    assert filter_rules([word1, word2], [rule1, rule2]) == [rule2]
コード例 #2
0
    def insert(self, value):
        """
        ---------------------------------------------------------
        Inserts value into the hashset, allows only one copy of value.
        Calls _rehash if the hashset _LOAD_FACTOR is exceeded.
        Use: inserted = hs.insert( value )
        -------------------------------------------------------
        Preconditions:
            value - a comparable data element (?)
        Postconditions:
            returns
            inserted - True if value is inserted, False otherwise.
        -------------------------------------------------------
        """
        hash_slot = self._find_slot(value)
        val = Word("no")
        for i in hash_slot:
            if i == val:
                val = Word("yes")
        if hash_slot.is_empty() and val.word == "no":
            hash_slot.insert(value)
            inserted = True
            self._count += 1
        else:
            inserted = False

        if self._count > self._total:
            self._rehash()

        return inserted
コード例 #3
0
def analysis_one_segment(segment):
    detected_phrases = []

    cut_words = cut(segment)
    first_seg_word, first_seg_pos = next(cut_words)

    logging.debug("{} {}".format(first_seg_word, first_seg_pos))

    new_phrase_segments = Segment(init=Word(first_seg_word, first_seg_pos))

    new_cut_words = []

    for word, pos in cut_words:
        logging.debug("{} {}".format(word, pos))

        _phrase, consistent = could_concatenate(new_phrase_segments,
                                                Word(word, pos))

        if _phrase:
            new_phrase_segments.append(Word(word, pos, consistent))
        else:
            new_cut_words.append(new_phrase_segments.merge())
            detected_phrases = add_detected_new_phrase(new_phrase_segments,
                                                       detected_phrases)
            new_phrase_segments = Segment(init=Word(word, pos))

    new_cut_words.append(new_phrase_segments.merge())

    if len(new_phrase_segments) > 1:
        detected_phrases = add_detected_new_phrase(new_phrase_segments,
                                                   detected_phrases)

    return detected_phrases, new_cut_words
コード例 #4
0
 def load(self):
     """
     Read the lesson's path, loading and creating words
     """
     if self.path is None:
         return False
     if (os.path.exists(os.path.join(self.path, self.word_list_file))):
         #new loading method with a words.list
         ifile = open(os.path.join(self.path, self.word_list_file), "r")
         for line in ifile:
             s = line.split(";")
             word_name = s[1].strip().decode('utf-8')
             full_file_name = os.path.join(self.path, s[0].decode('utf-8'))
             print "Filename: " + full_file_name
             print "Word: " + word_name
             self.words.append(Word(word_name, full_file_name))
     else:
         #legacy loading method, kept for backward compatibility
         for file in os.listdir(self.path):
             full_file_name = os.path.join(self.path, file)
             if (os.path.isfile(full_file_name)
                     and os.path.splitext(file)[1] == ".wav"):
                 word_name = os.path.splitext(file)[0]
                 self.words.append(Word(word_name, full_file_name))
     return True
コード例 #5
0
    def setUp(self):
        self.d1 = DataManager()

        self.wd1 = Word('software', '소프트웨어', (True, self.d1))
        self.wd2 = Word('project', '프로젝트', (True, self.d1))

        self.d1.words = [self.wd1, self.wd2]
コード例 #6
0
ファイル: algorithms.py プロジェクト: superxiaoqiang/pychseg
 def get_basic_latin_word(self):
     """
     >>> a = Algorithm('abc def c')
     >>> a.get_basic_latin_word()
     abc
     >>> a.get_basic_latin_word()
     def
     >>> a.get_basic_latin_word()
     c
     >>> a = Algorithm('abc 我 c')
     >>> a.get_basic_latin_word()
     abc
     >>> a.get_basic_latin_word()
     """
     basicLatinWord = []
     while (self.pos < self.length and is_basic_latin(self.text[self.pos])):
         current_char = self.text[self.pos]
         self.pos += 1
         if current_char.isspace():
             if len(basicLatinWord):
                 return Word(u''.join(basicLatinWord), BASICLATIN_WORD)
         basicLatinWord.append(current_char)
     if len(basicLatinWord):
         return Word(u''.join(basicLatinWord), BASICLATIN_WORD)
     else:
         return None
コード例 #7
0
    def readAllWords(self):
        tmp_wds = [
            ("word", "단어"),
            ("asd", "ㅁㄴㄹ"),
            ("qwer", "ㅂㅈㄷㄱ, ㅂㅈㄷㄱ"),
            ("zcxvzxcv",
             "ㅋㅌㅊㅍ, ㅋㅌㅊㅍ, ㅋㅌㅊㅍ, ㅋㅌㅊㅍ, ㅋㅌㅊㅍ, ㅋㅌㅊㅍ, ㅋㅌㅊㅍ, ㅋㅌㅊㅍ, ㅋㅌㅊㅍ, ㅋㅌㅊㅍ, ㅋㅌㅊㅍ, ㅋㅌㅊㅍ, ㅋㅌㅊㅍ, ㅋㅌㅊㅍ, ㅋㅌㅊㅍ, ㅋㅌㅊㅍ, ㅋㅌㅊㅍ, ㅋㅌㅊㅍ, ㅋㅌㅊㅍ, ㅋㅌㅊㅍ, ㅋㅌㅊㅍ"
             ),
            ("rtyu", "ㄱ쇼ㅕ, ㄱ쇼ㅕ"),
            ("vbcbv", "ㅠㅊ풏ㅊ"),
            ("vbcbvyy", "ㅠㅊ풏ㅊㅊㅊ"),
            ("realword", "진짜 단어"),
            ("bback", "빡빡이아저씨"),
            ("aaaa", "아아아아"),
        ]

        for w in tmp_wds:
            self.words.append(Word(w[0], w[1], False))

        tmp_fwds = [
            ("focus", "집중하다"),
            ("wow", "놀라운"),
            ("amazing", "엄청난"),
            ("awesome", "개쩌는"),
            ("verylonglonglongandlongword",
             "매우 길고 길고 또 길고 그리고 긴 단어를 아주 길고 길게 적는중"),
        ]

        for w in tmp_fwds:
            ww = Word(w[0], w[1], True)
            self.words.append(ww)
            self.focusedWords.append(ww)

        print("Data Loaded")
コード例 #8
0
    def count_words(self, is_canonical, list_test_pos, list_training_pos,
                    list_test_neg, list_training_neg):
        if is_canonical:
            find_words = NaifBayes.find_words_tagged
        else:
            find_words = NaifBayes.find_words_untagged

        dict_words = dict()

        for file_name in list_training_pos:
            with codecs.open(file_name, "r", "utf-8") as file:
                for line in file.readlines():
                    for word in find_words(line):
                        self.nbr_pos += 1
                        if word in dict_words.keys():
                            dict_words[word].incr_pos()
                        else:
                            dict_words[word] = Word(word)

        for file_name in list_training_neg:
            with codecs.open(file_name, "r", "utf-8") as file:
                for line in file.readlines():
                    for word in find_words(line):
                        self.nbr_neg += 1
                        if word in dict_words.keys():
                            dict_words[word].incr_neg()
                        else:
                            dict_words[word] = Word(word)
        return dict_words
コード例 #9
0
def test_equality():
    word1 = Word(
        [Segment(['nasal'], ['syllabic']),
         Segment(['syllabic'], ['nasal'])])

    word2 = Word(
        [Segment(['nasal'], ['syllabic']),
         Segment(['syllabic'], ['nasal'])])

    assert word1 == word2
コード例 #10
0
def convert_to_word(token: str) -> Word:
    pronunciations = p.phones_for_word(token)
    stress_patterns = [
        p.stresses(pronunciation).replace("2", "1")
        for pronunciation in pronunciations
    ]
    if stress_patterns:
        # pick one arbitrarily
        return Word(token, stress_patterns[0])
    else:
        number_syllables_guess = len(re.findall(r"[aeiou]+", token))
        return Word(token, "?" * number_syllables_guess)
コード例 #11
0
def main():
    #git input
    parser = optparse.OptionParser(description="generator")

    parser.add_option('-a', '--lexL', type='string', help='The name of a file \
    storing the lexical tags for the left side language')

    parser.add_option('-b', '--lexR', type='string', help='The name of a file \
    storing the lexical tags for the right side language')

    parser.add_option('-d', '--dictionary', type='string', help='The name of a\
    file containing an apertium bilingual dictioary')

    (opts, args) = parser.parse_args()

    mandatories = ["lexL", "lexR", "dictionary"]

    for m in mandatories:
        if not opts.__dict__[m]:
            print('mandatory option ' + m + ' is missing\n')
            parser.print_help()
            sys.exit()

    #read dictionary into list
    dictionary = read_dictionary(opts.dictionary)
    #create lists of lexical tags
    langL_lex_tags = parse_lex_tags(opts.lexL)
    langR_lex_tags = parse_lex_tags(opts.lexR)

    # a list that will contain all generated entries
    entries = []

    ######test input
    # a word class for the left side of dict entry
    langL_word = Word("be<vbser><past><p3><sg>", 13)
    langL_word1 = Word("girl<n><sg>", 13)
    # a word class for the right side of dict entry
    langR_word = Word("Ser<vbser><inf>", 9)
    langR_word1 = Word("chico<n><f><sg>", 9)

    langL_words = [langL_word, langL_word1]
    langR_words = [langR_word, langR_word1]
    #####end test input

    #loops through parralel lusts of word objects and creates possible entries
    for langL_word,langR_word in zip(langL_words, langR_words):
        entry = build_entry(langL_word, langR_word, langL_lex_tags, langR_lex_tags)
        entries.append(entry)

    approve_entries(entries, dictionary)

    rewrite_dictionary(dictionary, opts.dictionary)
コード例 #12
0
ファイル: generator.py プロジェクト: gokseltokur/Turkish-NLP
def filter_type():
    for word in word_list:
        results = morphology.analyze(word)
        for result in results:
            print(result.getStems(), result.getMorphemes())
            if result.getMorphemes()[0].toString() == "Noun:Noun":
                noun_list.append(Word(result.getStems()[0], 'noun'))
                break
            if result.getMorphemes()[0].toString() == "Verb:Verb":
                verb_list.append(Word(result.getStems()[0], 'verb'))
                break
            if result.getMorphemes()[0].toString() == "Adjective:Adj":
                adj_list.append(Word(result.getStems()[0], 'adj'))
                break
コード例 #13
0
ファイル: game.py プロジェクト: kookminYeji/SW-Proj2-class1-3
def gameMain():
    word = Word('words.txt')
    guess = Guess(word.randFromDB())

    finished = False
    hangman = Hangman()
    maxTries = hangman.getLife()

    while guess.numTries < maxTries:

        display = hangman.get(maxTries - guess.numTries)
        print(display)
        guess.display()

        guessedChar = input('Select a letter: ')
        if len(guessedChar) != 1:
            print('One character at a time!')
            continue
        if guessedChar in guess.guessedChars:
            print('You already guessed \"' + guessedChar + '\"')
            continue

        finished = guess.guess(guessedChar)
        if finished == True:
            break

    if finished == True:
        print('Success')
    else:
        print(hangman.get(0))
        print('word [' + guess.secretWord + ']')
        print('guess [' + guess.currentWord + ']')
        print('Fail')
コード例 #14
0
ファイル: loader.py プロジェクト: maede97/Crossword-Generator
    def load_data_from_web(self):

        service = build('sheets', 'v4', credentials=self.creds)

        # Call the Sheets API
        sheet = service.spreadsheets()
        sheet_metadata = sheet.get(spreadsheetId=self.SPREADSHEET_ID).execute()

        sheets = sheet_metadata.get("sheets", "")
        print("Reading", len(sheets), "sheets")
        for s in sheets:
            cat_name = s.get("properties", {}).get("title")
            self.categories.append(Category(cat_name))

            # read this sheet
            result = sheet.values().get(spreadsheetId=self.SPREADSHEET_ID,
                                        range=cat_name + "!A:Z").execute()
            values = result.get('values', [])

            if not values:
                print('No data found.')
            else:
                for v in values:
                    clue = v[0]
                    for sol in v[1:]:
                        self.categories[-1].add_word(Word(clue, sol))
コード例 #15
0
    def startElement(self, name, attrs):
        if name == "Image":
            self.image_name = str(attrs['name'])

        if name == "TextLine":
            self.cur_line = []

        if name == "Word":
            word = Word()
            word.top = int(attrs['top'])
            word.bottom = int(attrs['bottom'])
            word.left = int(attrs['left'])
            word.right = int(attrs['right'])
            word.text = unicode(attrs['text'])
            word.shear = int(attrs['shear'])
            self.cur_line.append(word)

        if name == "Character":
            char = Character()
            if 'top' in attrs:
                char.top = int(attrs['top'])
            else:
                char.top = self.cur_line[-1].top
            if 'bottom' in attrs:
                char.bottom = int(attrs['bottom'])
            else:
                char.bottom = self.cur_line[-1].bottom
            char.left = int(attrs['left'])
            char.right = int(attrs['right'])
            char.text = unicode(attrs['text'])
            if 'shear' in attrs:
                char.shear = int(attrs['shear'])
            else:
                char.shear = self.cur_line[-1].shear
            self.cur_line[-1].characters.append(char)
コード例 #16
0
ファイル: parse.py プロジェクト: Exocamp/Onset
def parse_words(strings, segments, diacritics):
    '''Given a list of word strings (in IPA), return a list of Word objects
    containing parsed segments. Use the given list of segment dictionaries and
    diacritic rules.

    '''

    # Create two lists of available segments and diacritics
    segment_strings = [segment['IPA'] for segment in segments]
    diacritic_strings = [diacritic['IPA'] for diacritic in diacritics]

    words = []

    for word in strings:
        try:
            tokens = tokenise(word, segment_strings, diacritic_strings)
        except ValueError as subword:
            error = ('Error parsing word: {0}. There was an unknown character '
                     'in the subword: {1}')
            raise ValueError(error.format(word, subword))

        parsed_segments = [
            token_to_segment(token, segments, diacritics) for token in tokens
        ]
        words.append(Word(parsed_segments))

    return words
コード例 #17
0
def gameMain():
    word = Word('words.txt')
    guess = Guess(word.randFromDB())
    hangman = Hangman()

    while hangman.remainingLives > 0:

        display = hangman.currentShape()
        print(display)
        display = guess.displayCurrent()
        print('Current: ' + display)
        display = guess.displayGuessed()
        print('Already Used: ' + display)

        guessedChar = input('Select a letter: ')
        if len(guessedChar) != 1:
            print('One character at a time!')
            continue
        if guessedChar in guess.guessedChars:
            print('You already guessed \"' + guessedChar + '\"')
            continue

        success = guess.guess(guessedChar)
        if success == False:
            hangman.decreaseLife()

        if guess.finished() == True:
            print('**** ' + guess.displayCurrent() + ' ****')
            print('Success')
            break
    else:
        print(hangman.currentShape())
        print('word [' + guess.secretWord + ']')
        print('guess [' + guess.displayCurrent() + ']')
        print('Fail')
コード例 #18
0
ファイル: __init__.py プロジェクト: rushyam/DigiCon-1
def spellMed(word):
    w = Word(word)

    candidates = isMedicine([word])

    if len(candidates) != 0:
        for x in candidates:
            x = x.encode('utf-8')
        return max(candidates, key=MED_COUNTS.get).encode('utf-8')

    candidates = isMedicine(w.typos())

    if len(candidates) is not 0:
        for x in candidates:
            x = x.encode('utf-8')
        return max(candidates, key=MED_COUNTS.get).encode('utf-8')

    candidates = isMedicine(w.double_typos())

    if len(candidates) is not 0:
        for x in candidates:
            x = x.encode('utf-8')
        return max(candidates, key=MED_COUNTS.get).encode('utf-8')

    # candidates = (isMedicine([word]) or isMedicine(w.typos()) or isMedicine(w.double_typos()))

    if len(candidates) is 0:
        return -1
コード例 #19
0
    def __init__(self, data):
        self._data = data
        self._score = Score()

        self._category = None
        self._iter_category = None
        self._word = Word()
コード例 #20
0
def load_words(filename, word_dict, charset):
    for line in file(filename):
        # convert into unicode
        line = line.strip().decode(charset)
        if line.find('#') < 0:
            items = line.split(' ')
            if len(items[0]) > config.WORD_MAX_LENGTH:
                continue
            if len(items) > 1:
                try:
                    word_dict[items[0]] = Word(items[0],
                                               frequency=int(items[1]))
                except:
                    pass
            else:
                word_dict[items[0]] = Word(items[0])
def make_optimal_paths(transducer_input):
    transducer = pickle.loads(pickle.dumps(transducer_input, -1))
    alphabet = transducer.get_alphabet()
    new_arcs = list()
    for segment in alphabet:
        word = Word(segment.get_symbol())
        word_transducer = word.get_transducer()
        #print(word_transducer.dot_representation())
        intersected_machine = Transducer.intersection(word_transducer, transducer)
        states = transducer.get_states()
        for state1, state2 in itertools.product(states, states):
            initial_state = word_transducer.initial_state & state1
            final_state = word_transducer.get_a_final_state() & state2
            temp_transducer = pickle.loads(pickle.dumps(intersected_machine, -1))
            temp_transducer.initial_state = initial_state
            temp_transducer.set_final_state(final_state)
            temp_transducer.clear_dead_states()
            if final_state in temp_transducer.get_final_states():  # otherwise no path.
                try:
                    temp_transducer = remove_suboptimal_paths(temp_transducer)
                    range = temp_transducer.get_range()
                    arc = Arc(state1, segment, range, _get_path_cost(temp_transducer), state2)
                    new_arcs.append(arc)
                except KeyError:
                    pass
                #print("****")
                #print(temp_transducer.dot_representation())

    transducer.set_arcs(new_arcs)
    return transducer
コード例 #22
0
ファイル: sentence.py プロジェクト: utipe/imf_fiscal
 def make_node(self,indexed_word):
     if indexed_word not in [node.name for node in self.nodes]:
         node = Word(indexed_word,self.extracted_words,self.dic_df)
         self.nodes.append(node)
     else:
         node = [node for node in self.nodes if node.name == indexed_word][0]
     return node
コード例 #23
0
ファイル: test.py プロジェクト: yasinbakhtiar/python
def main(argv):
    #Parse out the commandline arguments
    parser = argparse.ArgumentParser(
        formatter_class=argparse.RawDescriptionHelpFormatter,
        description=DESCRIPTION,
        epilog=EPILOG
    )

    parser.add_argument("-t", "--test",
                        nargs=1,
                        default=["something"])

    parser.add_argument("-d", "--debug",
                        action="store_true",
                        help="Enable Debug Messages")

    args = parser.parse_args()
    print "Running Script: %s" % NAME


    if args.debug:
        print "test: %s" % str(args.test[0])

    my_string = "hello"
    print "Orginal String: %s" % my_string
    w = Word(my_string)
    print "Output: %s" % w.reverse()
コード例 #24
0
def test_phonetic_product():
    word = Word([
        Segment(['consonantal'], ['tonal', 'long']),
        Segment(['nasal'], ['syllabic']),
        Segment(['syllabic', 'high', 'back'], ['nasal', 'front']),
        Segment(['sonorant'], ['high'])
    ])

    featureless = Word([
        Segment(['consonantal'], ['tonal', 'long']),
        Segment(['nasal'], ['syllabic']),
        Segment(['syllabic', 'high', 'back'], ['nasal']),
        Segment(['sonorant'], ['high'])
    ])
    assert phonetic_product(word) == 2
    assert phonetic_product(featureless) == 1
コード例 #25
0
    def wordAdd(self):
        en, ko = self.stripWordAddTextEdits()

        if en == "" and ko == "":
            return
        elif en == "":
            self.MessagingError("이름이 입력되지 않았습니다.")
            return
        elif ko == "":
            self.MessagingError("뜻이 입력되지 않았습니다.")
            return

        wordStandard = self.wordStandardCB.currentText()
        isFocused = False
        if wordStandard == "모두":
            wordStandardJudge = self.wordStandardJudgeWhoWantAll
        elif wordStandard == "집중 단어만":
            wordStandardJudge = self.wordStandardJudgeWhoWantFocused
            isFocused = True
        elif wordStandard == "집중 단어 아닌것만":
            wordStandardJudge = self.wordStandardJudgeWhoWantNotFocused
        else:
            self.MessagingError("알수 없는 오류가 발생했습니다: wordStandard에 \'" +
                                wordStandard + "\'이 없음")
            return

        word = Word(en, ko, (isFocused, self.windowsManager.dataManager))
        self.windowsManager.dataManager.wordAdd(word)
        self.wordAddTextClear()
        self.updateListedWords()
コード例 #26
0
ファイル: game.py プロジェクト: TziTziFly/Hangman
 def __init__(self, word, guess_limit):
     self.guesses_remaining = guess_limit
     self.word = Word(word=word)
     self.settings = settings
     self.game_won = False
     self.game_in_progress = True
     self.previous_guesses = set()
コード例 #27
0
    def __init__(self, key, line, start):
        """
        Sentence object.

        :param key: The key to which this sentence belongs.
        :param line: The line on which this sentences occurs.
        :param start: The start index of this line in characters.
        """
        self.key = key
        self.words = []
        self.start = start
        self.end = start + len(line)

        for windex, w in enumerate(line.split()):

            start = start
            end = start + len(w)

            self.words.append(
                Word(key=windex,
                     sentkey=self.key,
                     form=w,
                     start=start,
                     end=end))
            start = end + 1
コード例 #28
0
def gameMain():
    word = Word('words.txt')
    guess = Guess(word.randFromDB())

    finished = False
    hangman = Hangman()
    maxTries = hangman.getLife()

    while guess.numTries < maxTries:

        display = hangman.get(maxTries - guess.numTries)
        print(display)
        guess.display()

        guessedChar = input('Select a letter: ')

        finished = guess.guess(guessedChar)
        if finished:
            break

    if finished:
        print('Success')
    else:
        print(hangman.get(0))
        print('word [' + guess.word + ']')
        print("Guess:", end=" ")
        for i in range(len(guess.current)):
            print(guess.current[i], end=" ")
        print()
        print('Fail')
コード例 #29
0
def add_word(message: types.Message, english: Text, spanish: Text) -> NoReturn:
    regex: Text = r'^(([a-z]+),?)+$'
    fail: bool = False
    if not re.search(regex, english, re.IGNORECASE):
        bot.reply_to(message,
                     f'{english} does not satisfy the regex: {regex}\n',
                     reply_markup=get_markup_cmd())
        fail = True
    if not re.search(regex, spanish, re.IGNORECASE):
        bot.reply_to(message,
                     f'{spanish} does not satisfy the regex: {regex}\n',
                     reply_markup=get_markup_cmd())
        fail = True
    if fail:  # show all errors before exit
        return

    word: Word = Word(0, spanish.split(','),
                      english.split(','))  # id=0 nor use in insert
    insert: Text = insert_word(word)
    if re.search('UNIQUE constraint failed', insert, re.IGNORECASE):
        bot.reply_to(message,
                     f'The word {word.get_str_spanish()} is already stored',
                     reply_markup=get_markup_cmd())
    else:
        bot.reply_to(message, f'insert: {word}', reply_markup=get_markup_cmd())
    return
コード例 #30
0
ファイル: model.py プロジェクト: ganghe74/swp2
 def __init__(self):
     self.word = Word('words.txt')
     self.secretWord = self.word.randFromDB()
     self.numTries = 0 # 7이되면 gameOver
     self.hangmanList = hangmanList
     self.currentStatus = "_"*len(self.secretWord)
     self.guessedChars = ""