Python tokenizeの例、parse.tokenize Pythonの例

コード例 #1

0

ファイルを表示

    def _tokenize_clean(self, token_type, cfg):
        """
        Tokenize and clean the tweet
        """
        tokens = p.tokenize(self.msg, token_type)

        # count the original data
        self.all_caps_num = p.count_all_caps(tokens)
        self.mentions_num = p.count_mentions(tokens)
        self.hash_num = p.count_hash(tokens)
        #self.pos_num_dic = p.count_pos(self.msg)

        # remove stop
        if cfg.get('pre_process_remove_stop', True):
            tokens = p.remove_stop(tokens)

        # normalize data according to the config
        if cfg.get('pre_process_lower', True):
            tokens = p.normalize_lower(tokens)
        if cfg.get('pre_process_mentions', True):
            tokens = p.normalize_mentions(tokens)
        if cfg.get('pre_process_hash', True):
            tokens = p.normalize_hash(tokens)
        if cfg.get('pre_process_url', True):
            tokens = p.normalize_url(tokens)
        if cfg.get('pre_process_number', True):
            tokens = p.normalize_number(tokens)

        # save the normalized message
        self.msg_cleaned = " ".join(tokens)

        # TODO: create negated tokens

        # finally return the data
        return tokens

コード例 #2

0

ファイルを表示

ファイル: emma.py プロジェクト: torakoneko/emma

def chat():
    input = raw_input(Fore.BLUE + 'You >> ').decode('utf-8')
    tokenizedMessage = parse.tokenize(input)
    intents, questionPackages = consume(tokenizedMessage)
    
    reply = sentencebuilder.generate_sentence(tokenizedMessage, get_mood(update=True, text=input, expressAsText=False), intents, questionPackages=questionPackages)
    if "%" not in reply: print Fore.BLUE + u"emma >> " + reply
    else: print Fore.RED + u"Reply generation failed."

コード例 #3

0

ファイルを表示

ファイル: test.py プロジェクト: dnewcome/pyrenthesis

	def test_nominal(self):
		tokens = []
		for token in parse.tokenize('one two'):
			tokens.append(token)
			
		actual1 = tokens[0] 
		actual2 = tokens[1] 

		self.assertEqual(actual1, 'one')
		self.assertEqual(actual2, 'two')

コード例 #4

0

ファイルを表示

ファイル: emma.py プロジェクト: SirFelolis/emma

def chat():
    print Fore.YELLOW + "!!! Chat mode enabled in config file. Press Control-C to exit."
    while True:
        input = raw_input(Fore.BLUE + 'You >> ').decode('utf-8')
        tokenizedMessage = parse.tokenize(input)
        intents = []
        for sentence in tokenizedMessage: intents.append(consume(sentence))
        
        reply = sentencebuilder.generate_sentence(tokenizedMessage, update_mood(input), intents)
        if "%" not in reply: print Fore.BLUE + u"emma >> " + reply
        else: print Fore.RED + u"Reply generation failed."

コード例 #5

0

ファイルを表示

ファイル: interpret.py プロジェクト: Konti6793/compilers

def interpret( str ):
    r = parse.program( parse.tokenize( str ))
    if r is None:
        return None
    (  e, tokens ) = r
    if len( tokens ) > 0:
        return None
    accum = set()
    accumVariablesInProgram( accum, e )
    print( "accum = ", accum )
    env = { STDOUT: ""}
    r = execProgram( env, e )
    return env[ STDOUT ]

コード例 #6

0

ファイルを表示

ファイル: emma.py プロジェクト: SirFelolis/emma

def reply_to_asks(askList):
    if len(askList) > 0:
        print "Fetched %d new asks." % len(askList)
        for askCount, ask in enumerate(askList):
            print "Reading ask no. %d of %d..." % (askCount + 1, len(askList))
            print Fore.BLUE + u"@" + ask['asker'] + u" >> " + ask['message']

            friendsList = []
            with connection:
                cursor.execute("SELECT username FROM friends")
                for name in cursor.fetchall(): friendsList.append(name[0])
                if not ask['asker'] in friendsList: 
                    print Fore.BLUE + "Adding @%s to friends list..." % ask['asker']
                    cursor.execute("INSERT INTO friends(username) VALUES(\'%s\');" % ask['asker'])

            parsedAsk = parse.tokenize(ask['message'])

            understanding = u""
            intents = []
            for sentenceCount, sentence in enumerate(parsedAsk):
                if console['verboseLogging']: print "Reading sentence no. %d of ask no. %d..." % ((sentenceCount + 1), (askCount + 1))
                intents.append(consume(sentence, ask['asker']))
            
                for wordCount, word in enumerate(sentence):
                    if wordCount == 0 and sentenceCount != 0:
                        understanding += u" "
                    understanding += word[0]
                    if wordCount < len(sentence) - 2:
                        understanding += u" "
            understanding = u"Emma interpreted this message as: \'%s\' %s" % (understanding, str(intents))
            print Fore.BLUE + understanding

            reply = sentencebuilder.generate_sentence(parsedAsk, update_mood(ask['message']), intents, ask['asker'])

            if "%" not in reply:
                print Fore.BLUE + u"emma >> %s" % reply

                print "Posting reply..."
                body = "@%s >> %s\n(%s)\n\nemma >> %s" % (ask['asker'], ask['message'], understanding, reply)
                tumblrclient.post(body.encode('utf-8'), ["dialogue", ask['asker'].encode('utf-8'), "feeling " + express_mood(update_mood(reply)).encode('utf-8')])
            else:
                print Fore.YELLOW + "Sentence generation failed."

            tumblrclient.delete_ask(ask['id'])

            if debug['enableSleep']:
                print "Sleeping for 3 minutes..."
                time.sleep(180)
            else:
                print Fore.YELLOW + "!!! Sleep disabled in config file -- execution will continue normally in 2 seconds..."
                time.sleep(2)

コード例 #7

0

ファイルを表示

ファイル: emma.py プロジェクト: torakoneko/emma

def reply_to_ask(ask):
    print "Reading ask..."
    print Fore.BLUE + u"@" + ask['asker'] + u" >> " + ask['message']

    parsedAsk = parse.tokenize(ask['message'])
    intents, questionPackages = consume(parsedAsk, ask['asker'])
    understanding = utilities.pretty_print_understanding(parsedAsk, intents)

    reply = sentencebuilder.generate_sentence(parsedAsk, get_mood(update=True, text=ask['message'], expressAsText=False), intents, ask['asker'], questionPackages)

    if "%" not in reply:
        print Fore.BLUE + u"emma >> %s" % reply
        print "Posting reply..."
        if settings.option('tumblr', 'enablePostPreview'): print Fore.BLUE + "\n\nTUMBLR POST PREVIEW\n\n" + Fore.RESET + "@" + ask['asker'] + " >> " + ask['message'] + "\n\n" + "emma >> " + reply + "\n- - - - - - - - - - -\n" + get_mood(update=False, expressAsText=True) + "\n\n"
        body = "<a href=" + ask['asker'] + ".tumblr.com/>@" + ask['asker'] + "</a>" + cgi.escape(" >> ") + cgi.escape(ask['message']) + "\n\n" + cgi.escape("emma >> ") + cgi.escape(reply) + "\n<!-- more -->\n" + cgi.escape(understanding)
        tumblrclient.post(body.encode('utf-8'), ["dialogue", ask['asker'].encode('utf-8'), get_mood().encode('utf-8')])
    else: print Fore.RED + "Reply generation failed."

    tumblrclient.delete_ask(ask['id'])

コード例 #8

0

ファイルを表示

ファイル: test.py プロジェクト: dnewcome/pyrenthesis

	def test_def_func(self):
		parse._eval(parse.parse(parse.tokenize('(def foo ((x) (+ x 1)))')))
		actual = parse._eval(parse.parse(parse.tokenize('(call foo 2)')))
		expected = 3 
		self.assertEqual(actual, expected)

コード例 #9

0

ファイルを表示

ファイル: test.py プロジェクト: dnewcome/pyrenthesis

	def test_def(self):
		parse._eval(parse.parse(parse.tokenize('(def foo "bar")')))
		actual = parse._eval(parse.parse(parse.tokenize('(foo)')))
		expected = 'bar'
		self.assertEqual(actual, expected)

コード例 #10

0

ファイルを表示

ファイル: test.py プロジェクト: dnewcome/pyrenthesis

	def test_nominal_tokenizer(self):
		actual = parse.parse(parse.tokenize("(one 'two)"))
		expected = ['(', 'one', "'", 'two', ')']
		self.assertEqual(actual, expected)

コード例 #11

0

ファイルを表示

ファイル: test.py プロジェクト: dnewcome/pyrenthesis

	def test_mul_expr_eval(self):
		actual = parse._eval(parse.parse(parse.tokenize(
			'(+ 1(+ 2 2) (* 3 3 ) )'
		)))
		expected = 14 
		self.assertEqual(actual, expected)

コード例 #12

0

ファイルを表示

ファイル: chessboard.py プロジェクト: ejconlon/knowledge_games

def main(args):
    mode = args[1]
    if mode == "play":
        board = ChessBoard.empty()
        agents = [ChessPlayerAgent(color) for color in ChessConstants.COLORS]
        final_board, moves, winner = base.play(agents, board)
    elif mode == "random":
        f = None
        if len(args) > 2:
            fn = args[2]
            f = open(fn, "a")
        board = ChessBoard.empty()
        #agents = [ChessRandomAgent(color) for color in ChessConstants.COLORS]
        #agents = [base.HeuristicAgent(ChessConstants.WHITE, ChessHeuristic()), ChessRandomAgent(ChessConstants.BLACK)]
        #agents = [ChessMinMaxSearchAgent(ChessConstants.WHITE, ChessConstants.BLACK, heuristic=ChessHeuristic(), max_depth=2), ChessRandomAgent(ChessConstants.BLACK)]
        #agents = [ChessMinMaxSearchAgent(ChessConstants.WHITE, ChessConstants.BLACK, heuristic=ChessHeuristic(), max_depth=2),ChessMinMaxSearchAgent(ChessConstants.BLACK, ChessConstants.WHITE, heuristic=ChessHeuristic(), max_depth=2)]
        #agents = [ChessMinMaxAlphaBetaAgent(ChessConstants.WHITE, ChessConstants.BLACK, heuristic=ChessHeuristic(), max_depth=2),ChessRandomAgent(ChessConstants.BLACK)]
        agents = [ChessMinMaxSearchAgent(ChessConstants.WHITE, ChessConstants.BLACK, heuristic=ChessHeuristic(), max_depth=2),
                  ChessMinMaxSearchAgent(ChessConstants.BLACK, ChessConstants.WHITE, heuristic=AnotherChessHeuristic(), max_depth=2)]

        final_board, moves, winner = base.play(agents, board)
        pgn = parse.write_game("Chess", agents, moves, winner)
        print ""
        print pgn
        if f is not None:
            f.write(pgn)
            f.close()
    elif mode == "manyrandom":
        while True:
            try:
                main(["", "random"])
            except IndexError:
                continue
    elif mode == "read":
        filename = args[2]
        with open(filename, 'r') as f:
            chars = f.read()
        game_chars = parse.split_games(chars)
        for chars in game_chars:
            print "NEW GAME"
            tokens = parse.tokenize(chars)
            game_parser = parse.PGNGameParser(tokens, ChessPGNMoveParser)
            game = game_parser.game
            board = ChessBoard.empty()
            print game
            print board
            turn = 0
            for move_parser in game.move_parsers:
                print move_parser
                it = move_parser.parse_moves(board.grid)
                first = True
                while True:
                    print ""
                    try:
                        if first:
                            move = it.next()
                            first = False
                        else:
                            move = it.send(board.grid)
                        who = ChessConstants.COLORS[turn]
                        print who+"'s turn"
                        #print move
                        trans_moves = board.translate_move(who, move)
                        print trans_moves
                        assert len(trans_moves) > 0
                        for move in trans_moves:
                            board = board.result(who, move)
                            print board
                            #r = raw_input("ok?")
                        turn = (turn + 1) % 2
                    except StopIteration:
                        break

コード例 #13

0

ファイルを表示

ファイル: chessboard.py プロジェクト: ejconlon/knowledge_games

def main(args):
    mode = args[1]
    if mode == "play":
        board = ChessBoard.empty()
        agents = [ChessPlayerAgent(color) for color in ChessConstants.COLORS]
        final_board, moves, winner = base.play(agents, board)
    elif mode == "random":
        f = None
        if len(args) > 2:
            fn = args[2]
            f = open(fn, "a")
        board = ChessBoard.empty()
        #agents = [ChessRandomAgent(color) for color in ChessConstants.COLORS]
        #agents = [base.HeuristicAgent(ChessConstants.WHITE, ChessHeuristic()), ChessRandomAgent(ChessConstants.BLACK)]
        #agents = [ChessMinMaxSearchAgent(ChessConstants.WHITE, ChessConstants.BLACK, heuristic=ChessHeuristic(), max_depth=2), ChessRandomAgent(ChessConstants.BLACK)]
        #agents = [ChessMinMaxSearchAgent(ChessConstants.WHITE, ChessConstants.BLACK, heuristic=ChessHeuristic(), max_depth=2),ChessMinMaxSearchAgent(ChessConstants.BLACK, ChessConstants.WHITE, heuristic=ChessHeuristic(), max_depth=2)]
        #agents = [ChessMinMaxAlphaBetaAgent(ChessConstants.WHITE, ChessConstants.BLACK, heuristic=ChessHeuristic(), max_depth=2),ChessRandomAgent(ChessConstants.BLACK)]
        agents = [
            ChessMinMaxSearchAgent(ChessConstants.WHITE,
                                   ChessConstants.BLACK,
                                   heuristic=ChessHeuristic(),
                                   max_depth=2),
            ChessMinMaxSearchAgent(ChessConstants.BLACK,
                                   ChessConstants.WHITE,
                                   heuristic=AnotherChessHeuristic(),
                                   max_depth=2)
        ]

        final_board, moves, winner = base.play(agents, board)
        pgn = parse.write_game("Chess", agents, moves, winner)
        print ""
        print pgn
        if f is not None:
            f.write(pgn)
            f.close()
    elif mode == "manyrandom":
        while True:
            try:
                main(["", "random"])
            except IndexError:
                continue
    elif mode == "read":
        filename = args[2]
        with open(filename, 'r') as f:
            chars = f.read()
        game_chars = parse.split_games(chars)
        for chars in game_chars:
            print "NEW GAME"
            tokens = parse.tokenize(chars)
            game_parser = parse.PGNGameParser(tokens, ChessPGNMoveParser)
            game = game_parser.game
            board = ChessBoard.empty()
            print game
            print board
            turn = 0
            for move_parser in game.move_parsers:
                print move_parser
                it = move_parser.parse_moves(board.grid)
                first = True
                while True:
                    print ""
                    try:
                        if first:
                            move = it.next()
                            first = False
                        else:
                            move = it.send(board.grid)
                        who = ChessConstants.COLORS[turn]
                        print who + "'s turn"
                        #print move
                        trans_moves = board.translate_move(who, move)
                        print trans_moves
                        assert len(trans_moves) > 0
                        for move in trans_moves:
                            board = board.result(who, move)
                            print board
                            #r = raw_input("ok?")
                        turn = (turn + 1) % 2
                    except StopIteration:
                        break

コード例 #14

0

ファイルを表示

ファイル: interpret.py プロジェクト: Konti6793/compilers

def evalTermStr( str ):
    ( e, tokens ) = parse.term( parse.tokenize( str ))
    print( parse.pp( e ))
    r = evalTerm( { "pi": 3.14, "a": 1 }, e )
    print( "Term : " + str + " yields : ", r )

コード例 #15

0

ファイルを表示

ファイル: interpret.py プロジェクト: Konti6793/compilers

def testTerm( str ):
    r = parse.term( parse.tokenize(  str ) )
    print( "term( " + str + " ) ----> " , r )
    r = parse.expression( parse.tokenize(  str ) )
    print( "expression( " + str + " ) ----> " , r )

コード例 #16

0

ファイルを表示

ファイル: interpret.py プロジェクト: Konti6793/compilers

def testProgram( str ):
    r = parse.program( parse.tokenize(  str ) )
    print( "program( " + str + " ) ----> " , r )

コード例 #17

0

ファイルを表示

def interpret(tokens):
        regex =['print' , 'assign' , ':=' , 'if', 'true' , 'false' , 'while' , 'not' , 'xor' , '+'\
                , '*' , ';' , '{' , '}' , '(' , ')' , ' ']
        tokens = parse.tokenize(regex, tokens)
        tokens = parse.program(tokens)
        return execProgram({}, tokens[0])[1]

コード例 #18

0

ファイルを表示

ファイル: interpret.py プロジェクト: Konti6793/compilers

def evalFormulaStr( str ):
    ( e, tokens ) = evalFormula( parse.tokenize( str ))
    r = evalTerm( { "pi": 3.14 }, e )
    print( "Formula : " + str + " yields : ", r )