Python filter_tokenの例

プログラミング言語: Python

名前空間/パッケージ名: main.filter_text

メソッド/関数: filter_token

hotexamples.comのコード掲載数: 3

Python filter_token - 3件のコード例が見つかりました。すべてオープンソースプロジェクトから抽出されたPythonのmain.filter_text.filter_tokenの実例で、最も評価が高いものを厳選しています。コード例の評価を行っていただくことで、より質の高いコード例が表示されるようになります。

コード例 #1

ファイルを表示

ファイル: token_dictionary.py プロジェクト: hvdthong/Transportation_NEC

def road_stop_token(list_line, command, stop_en):
    list_token = []
    cnt = 0
    for line in list_line:
        if command == 'abbr':  # no need to use
            split_line = line.split('\t')
            # port = PorterStemmer()
            # try:
            #     stem_word = port.stem(split_line[0])
            # except UnicodeDecodeError:
            #     # do nothing
            #     print 'Wrong stemming'
            # print split_line[0], stem_word  # get the word and stemmer word
            print split_line[0]

        elif command == 'road':
            split_line = line.split(';')
            for element in split_line:
                tokens = element.split()
                for each in tokens:
                    each = filter_token(each)
                    if (each not in list_token) and (each not in stop_en):
                        if is_int(each) is False:
                            list_token.append(each)

        elif command == 'busstop':
            cnt += 1
            split_line = line.split('\t')
            # if ('code' not in line) and ('name' not in line):
            if cnt > 1:
                tokens = split_line[1].split()
                for each in tokens:
                    filter_each = filter_token(each.strip())
                    if (filter_each not in list_token) and (len(filter_each) > 0) and (each not in stop_en):
                        if is_int(each) is False:
                            list_token.append(filter_each.strip())

        elif command == 'bussvc':
            cnt += 1
            split_line = line.split('\t')
            # if ('no' not in line) and ('routes' not in line) and ('type' not in line) and ('operator' not in line) and ('name' not in line):
            if cnt > 1:
                list_token.append(split_line[0].strip())
                # print split_line[0], cnt

    # for value in sorted(list_token):
    #     print value.lower()
    for value in list_token:
        print value.lower()
    print 'Total length of list: %i' % len(list_token)

コード例 #2

ファイルを表示

ファイル: twitterFunction.py プロジェクト: hvdthong/Transportation_NEC

def filtering_tweetText(path, name_write):
    db = MySQLdb.connect(host="localhost", # your host, usually localhost
                     user="******", # your username
                      passwd="ducthong", # your password
                      db="twitter_bus") # name of the data base
    cur = db.cursor()
    list_write = []

    cnt = 0
    sql = 'select tweetID, tweetText from twitter_posts_distinct'
    cur.execute(sql)
    for row in cur.fetchall():
        tweetID = row[0]
        tweetText = filter_token(row[1])

        print (tweetID + '\t' + tweetText)
        list_write.append(tweetID + '\t' + tweetText)

    db.close()
    write_file(path, name_write, list_write)

コード例 #3

ファイルを表示

ファイル: facebookFunc.py プロジェクト: hvdthong/Transportation_NEC

def filtering_facebookBusNews(path, name_write):
    db = MySQLdb.connect(host="localhost", # your host, usually localhost
                     user="******", # your username
                      passwd="ducthong", # your password
                      db="2015_allschemas") # name of the data base
    cur = db.cursor()
    list_write = []

    cnt = 0
    # sql = 'select facebookID, post from facebook_busnews'
    sql = 'select facebookID, post from facebook_busnews_ver2'
    cur.execute(sql)
    for row in cur.fetchall():
        tweetID = row[0]
        tweetText = filter_token(row[1])

        print (tweetID + '\t' + tweetText)
        list_write.append(tweetID + '\t' + tweetText)

    db.close()
    write_file(path, name_write, list_write)