file for testing things before adding them to pipeline
"""

import time
from tweepy import Stream
from tweepy import OAuthHandler
from tweepy.streaming import StreamListener
import io
import os
import json
from keys import consumer_secret,consumer_key,access_token_secret,access_token
import accessStream
import cleanText
#from main import keyword_list

df = cleanText.csvToPandasDF("tweets_out.csv")

#print df.head(1)
#print df.shape

df.insert(0, 'uID', range(0, df.shape[0]))

#print df.head(4)
#print df.shape

#keyword_list = ['bernie, sanders, hillary, clinton']
keyword_list = ['bernie, sanders, hillary, clinton']
keywords = keyword_list[0].replace(',', '').split()
#keywords = keyword_list[0].replace(',', '').split()
#print keywords
예제 #2
0
print("starting capturing stream")
#twitterStream = Stream(auth, accessStream.Listener(start_time, time_limit,inputFile))
twitterStream = Stream(auth, accessStream.Listener(tweet_limit, start_time, time_limit, inputFile))  #WHY THE F**K DOESN'T THIS WORK

twitterStream.filter(track=keyword_list)  #call the filter method to run the Stream Listener

print("done capturing stream")
print("cleaning tweets")


cleanText.jsonUTF8toCsv(inputFile, outputFile)
print("tweets cleaned to CSV. CSV created")


cleanDF = cleanText.csvToPandasDF(outputFile)
print("data frame created.")



print("identifying subject of the tweet")

textList = cleanDF.loc[:, ['text']]

subject_array = []
clean_text = []
#keywords = keyword_list[0].replace(',', '').split() this can only be used in one place or only first word comes through.

for i in xrange(len(textList)):

    subject_array.append(cleanText.identifySubject(textList['text'][i], keyword_list))
예제 #3
0
file for testing things before adding them to pipeline
"""

import time
from tweepy import Stream
from tweepy import OAuthHandler
from tweepy.streaming import StreamListener
import io
import os
import json
from keys import consumer_secret, consumer_key, access_token_secret, access_token
import accessStream
import cleanText
#from main import keyword_list

df = cleanText.csvToPandasDF("tweets_out.csv")

#print df.head(1)
#print df.shape

df.insert(0, 'uID', range(0, df.shape[0]))

#print df.head(4)
#print df.shape

#keyword_list = ['bernie, sanders, hillary, clinton']
keyword_list = ['bernie, sanders, hillary, clinton']
keywords = keyword_list[0].replace(',', '').split()
#keywords = keyword_list[0].replace(',', '').split()
#print keywords
예제 #4
0
print("starting capturing stream")
#twitterStream = Stream(auth, accessStream.Listener(start_time, time_limit,inputFile))
twitterStream = Stream(
    auth, accessStream.Listener(tweet_limit, start_time, time_limit,
                                inputFile))  #WHY THE F**K DOESN'T THIS WORK

twitterStream.filter(
    track=keyword_list)  #call the filter method to run the Stream Listener

print("done capturing stream")
print("cleaning tweets")

cleanText.jsonUTF8toCsv(inputFile, outputFile)
print("tweets cleaned to CSV. CSV created")

cleanDF = cleanText.csvToPandasDF(outputFile)
print("data frame created.")

print("identifying subject of the tweet")

textList = cleanDF.loc[:, ['text']]

subject_array = []
clean_text = []
#keywords = keyword_list[0].replace(',', '').split() this can only be used in one place or only first word comes through.

for i in xrange(len(textList)):

    subject_array.append(
        cleanText.identifySubject(textList['text'][i], keyword_list))
    clean_text.append(