processes2 = []
for can in canList:

    name = can.getName()
    keywords = can.getKey_words()
    start_time = time.time()
    jsonFile = str(name) + "_raw_tweets.json"
    csv_outFile = str(name) + "_tweets_out.csv"

    processname = str(name) + "_stream"

    processes.append(
        Process(target=startCapture(name, tweet_limit, start_time, time_limit,
                                    jsonFile, csv_outFile)))
    processes2.append(
        Process(target=cleanText.jsonUTF8toCsv(jsonFile, csv_outFile)))

    print name
    print keywords
    print jsonFile
    print csv_outFile

processes.start()

living_processes = [p.is_alive() for p in processes]
while living_processes != False:
    print 'done'
    pass

processes2.start()
예제 #2
0

auth = OAuthHandler(consumer_key, consumer_secret) #OAuth object
auth.set_access_token(access_token, access_token_secret)

print("starting capturing stream")
#twitterStream = Stream(auth, accessStream.Listener(start_time, time_limit,inputFile))
twitterStream = Stream(auth, accessStream.Listener(tweet_limit, start_time, time_limit, inputFile))  #WHY THE F**K DOESN'T THIS WORK

twitterStream.filter(track=keyword_list)  #call the filter method to run the Stream Listener

print("done capturing stream")
print("cleaning tweets")


cleanText.jsonUTF8toCsv(inputFile, outputFile)
print("tweets cleaned to CSV. CSV created")


cleanDF = cleanText.csvToPandasDF(outputFile)
print("data frame created.")



print("identifying subject of the tweet")

textList = cleanDF.loc[:, ['text']]

subject_array = []
clean_text = []
#keywords = keyword_list[0].replace(',', '').split() this can only be used in one place or only first word comes through.
canCount = 0

processes = []
processes2 = []
for can in canList:

    name = can.getName()
    keywords = can.getKey_words()
    start_time = time.time()
    jsonFile = str(name) + "_raw_tweets.json"
    csv_outFile = str(name) + "_tweets_out.csv"

    processname = str(name) + "_stream"

    processes.append(Process(target = startCapture(name, tweet_limit, start_time, time_limit, jsonFile, csv_outFile)))
    processes2.append(Process(target= cleanText.jsonUTF8toCsv(jsonFile, csv_outFile) ))




    print name
    print keywords
    print jsonFile
    print csv_outFile

processes.start()

living_processes = [p.is_alive() for p in processes]
while living_processes != False:
    print 'done'
    pass
예제 #4
0
auth = OAuthHandler(consumer_key, consumer_secret)  #OAuth object
auth.set_access_token(access_token, access_token_secret)

print("starting capturing stream")
#twitterStream = Stream(auth, accessStream.Listener(start_time, time_limit,inputFile))
twitterStream = Stream(
    auth, accessStream.Listener(tweet_limit, start_time, time_limit,
                                inputFile))  #WHY THE F**K DOESN'T THIS WORK

twitterStream.filter(
    track=keyword_list)  #call the filter method to run the Stream Listener

print("done capturing stream")
print("cleaning tweets")

cleanText.jsonUTF8toCsv(inputFile, outputFile)
print("tweets cleaned to CSV. CSV created")

cleanDF = cleanText.csvToPandasDF(outputFile)
print("data frame created.")

print("identifying subject of the tweet")

textList = cleanDF.loc[:, ['text']]

subject_array = []
clean_text = []
#keywords = keyword_list[0].replace(',', '').split() this can only be used in one place or only first word comes through.

for i in xrange(len(textList)):