Python csvToPandasDF 예제들

프로그래밍 언어: Python

네임스페이스/패키지 이름: cleanText

메소드/함수: csvToPandasDF

hotexamples.com에서의 예제들: 4

Python csvToPandasDF - 4개의 예제가 발견되었습니다. 이것들은 오픈소스 프로젝트에서 추출된 Python의 cleanText.csvToPandasDF에 대한 실세계 최고 등급의 예제들입니다. 예제들을 평가하여 예제의 품질 향상에 도움을 줄 수 있습니다.

예제 #1

파일 보기

파일: mainTesting.py 프로젝트: alexjacobs08/capStoneTwitterMining

file for testing things before adding them to pipeline
"""

import time
from tweepy import Stream
from tweepy import OAuthHandler
from tweepy.streaming import StreamListener
import io
import os
import json
from keys import consumer_secret,consumer_key,access_token_secret,access_token
import accessStream
import cleanText
#from main import keyword_list

df = cleanText.csvToPandasDF("tweets_out.csv")

#print df.head(1)
#print df.shape

df.insert(0, 'uID', range(0, df.shape[0]))

#print df.head(4)
#print df.shape

#keyword_list = ['bernie, sanders, hillary, clinton']
keyword_list = ['bernie, sanders, hillary, clinton']
keywords = keyword_list[0].replace(',', '').split()
#keywords = keyword_list[0].replace(',', '').split()
#print keywords

예제 #2

파일 보기

파일: main.py 프로젝트: alexjacobs08/capStoneTwitterMining

print("starting capturing stream")
#twitterStream = Stream(auth, accessStream.Listener(start_time, time_limit,inputFile))
twitterStream = Stream(auth, accessStream.Listener(tweet_limit, start_time, time_limit, inputFile))  #WHY THE F**K DOESN'T THIS WORK

twitterStream.filter(track=keyword_list)  #call the filter method to run the Stream Listener

print("done capturing stream")
print("cleaning tweets")


cleanText.jsonUTF8toCsv(inputFile, outputFile)
print("tweets cleaned to CSV. CSV created")


cleanDF = cleanText.csvToPandasDF(outputFile)
print("data frame created.")



print("identifying subject of the tweet")

textList = cleanDF.loc[:, ['text']]

subject_array = []
clean_text = []
#keywords = keyword_list[0].replace(',', '').split() this can only be used in one place or only first word comes through.

for i in xrange(len(textList)):

    subject_array.append(cleanText.identifySubject(textList['text'][i], keyword_list))

예제 #3

파일 보기

file for testing things before adding them to pipeline
"""

import time
from tweepy import Stream
from tweepy import OAuthHandler
from tweepy.streaming import StreamListener
import io
import os
import json
from keys import consumer_secret, consumer_key, access_token_secret, access_token
import accessStream
import cleanText
#from main import keyword_list

df = cleanText.csvToPandasDF("tweets_out.csv")

#print df.head(1)
#print df.shape

df.insert(0, 'uID', range(0, df.shape[0]))

#print df.head(4)
#print df.shape

#keyword_list = ['bernie, sanders, hillary, clinton']
keyword_list = ['bernie, sanders, hillary, clinton']
keywords = keyword_list[0].replace(',', '').split()
#keywords = keyword_list[0].replace(',', '').split()
#print keywords

예제 #4

파일 보기

파일: main.py 프로젝트: alexjacobs08/capStoneTwitterMining

print("starting capturing stream")
#twitterStream = Stream(auth, accessStream.Listener(start_time, time_limit,inputFile))
twitterStream = Stream(
    auth, accessStream.Listener(tweet_limit, start_time, time_limit,
                                inputFile))  #WHY THE F**K DOESN'T THIS WORK

twitterStream.filter(
    track=keyword_list)  #call the filter method to run the Stream Listener

print("done capturing stream")
print("cleaning tweets")

cleanText.jsonUTF8toCsv(inputFile, outputFile)
print("tweets cleaned to CSV. CSV created")

cleanDF = cleanText.csvToPandasDF(outputFile)
print("data frame created.")

print("identifying subject of the tweet")

textList = cleanDF.loc[:, ['text']]

subject_array = []
clean_text = []
#keywords = keyword_list[0].replace(',', '').split() this can only be used in one place or only first word comes through.

for i in xrange(len(textList)):

    subject_array.append(
        cleanText.identifySubject(textList['text'][i], keyword_list))
    clean_text.append(