예제 #1
0
def checkin_txt_file_read(filename):
    f = open(filename)
    index = 0
    while True:
        lines = f.readlines(readlines_number)
        words_list = []
        #print "index: %d"%(index)
        index = index + 1 
        if not lines:
            break
        for l in lines:
            words = l.split("\t") 
            if len(words) != 7:
                print len(words), words
                #raise Exception
            #print words
            u_id = unicode(words[0], encoding='UTF-8')
            tweet_id = unicode(words[1], encoding='UTF-8')
            latitude = unicode(words[2], encoding='UTF-8')
            longitude = unicode(words[3], encoding='UTF-8')
            createdat = unicode(words[4], encoding='UTF-8')
            text = unicode(words[5], encoding='UTF-8')
            place_id = unicode(words[6], encoding='UTF-8').strip('\n')
            #word = (u_id, tweet_id, latitude, longitude, createdat, text, place_id)
            word = (tweet_id, u_id, latitude, longitude, createdat, text, place_id)
            #print word
            words_list.append(word)
        #dbutil.bulk_insert(words_list, dbutil.conn_checkin, dbutil.sql_insert_words_checkin_list)
        dbutil.bulk_insert(words_list, dbutil.conn, dbutil.sql_insert_words_checkin_list)
    f.close()
예제 #2
0
def checkin_txt_file_read_one(filename):
    #f = codecs.open(filename, encoding='utf-8')
    f = open(filename)
    index = 0
    while True:
        lines = f.readlines(readlines_number)
        #print readlines_number
        words_list = []
        print "index: %d"%(index)
        index = index + 1 
        if not lines:
            break
        for l in lines:
            words = l.split("\t") 
            #if len(words) != 7:
                #print len(words), words
                #raise Exception
            #print words
            u_id = words[0]
            tweet_id = words[1]
            latitude = words[2]
            longitude = words[3]
            createdat = words[4]
            text = unicode(words[5], encoding='utf-8')
            place_id = unicode(words[6], encoding='utf-8').strip('\n')
            word = (tweet_id, u_id, latitude, longitude, createdat, text, place_id)
            words_list.append(word)
        dbutil.bulk_insert(words_list, dbutil.conn_one, dbutil.sql_insert_words_checkin_list_one)
    f.close()
예제 #3
0
def users_data_txt_file_read(filename):
    f = open(filename)
    while True:
        lines = f.readlines(readlines_number)
        if not lines:
            break
        words_list = []
        for l in lines:
            words = l.split("\t")
            if len(words) != 4:
                print len(words), words
            u_id = words[0]
            status_count = words[1]
            followers_count = words[2]
            friends_count = unicode(words[3], encoding='UTF-8').strip('\n')
            word = (u_id, status_count, followers_count, friends_count)
            words_list.append(word)
        #dbutil.bulk_insert(words_list, dbutil.conn_userdata, dbutil.sql_insert_words_users_data_list)
        dbutil.bulk_insert(words_list, dbutil.conn, dbutil.sql_insert_words_users_data_list)
    f.close()
예제 #4
0
def sentiment_uk_txt_file_read(filename):
    f = open(filename)
    while True:
        lines = f.readlines(readlines_number)
        if not lines:
            break
        words_list = []
        for l in lines:
            words = l.split("\t")
            if len(words) != 4:
                print len(words), words
            sentiment = words[0]
            twitter_stm = unicode(words[1], encoding='UTF-8')
            twitter_id = words[2]
            language = unicode(words[3], encoding='UTF-8').strip('\n')
            #word = (sentiment, twitter_stm, twitter_id, language)
            word = (twitter_id, sentiment, twitter_stm, language)
            words_list.append(word)
        #dbutil.bulk_insert(words_list, dbutil.conn_uk, dbutil.sql_insert_words_uk_list)
        dbutil.bulk_insert(words_list, dbutil.conn, dbutil.sql_insert_words_uk_list)
    f.close()
예제 #5
0
def sentiment_world_txt_file_read_one(filename):
    #f = codecs.open(filename, encoding='utf-8')
    f = open(filename)
    while True:
        lines = f.readlines(readlines_number)
        if not lines:
            break
        words_list = []
        for l in lines:
            words = l.split("\t")
            if len(words) != 4:
                print len(words), words
            sentiment = words[0]
            #text = unicode(words[1], encoding='UTF-8')
            twitter_id = words[2]
            language = unicode(words[3], encoding='utf-8').strip('\n')
            #word = (sentiment, twitter_stm, twitter_id, language)
            word = (twitter_id, sentiment, language)
            words_list.append(word)
        #dbutil.bulk_insert(words_list, dbutil.conn_world, dbutil.sql_insert_words_world_list)
        dbutil.bulk_insert(words_list, dbutil.conn_one, dbutil.sql_insert_words_world_list_one_update)
    f.close()