processes2 = [] for can in canList: name = can.getName() keywords = can.getKey_words() start_time = time.time() jsonFile = str(name) + "_raw_tweets.json" csv_outFile = str(name) + "_tweets_out.csv" processname = str(name) + "_stream" processes.append( Process(target=startCapture(name, tweet_limit, start_time, time_limit, jsonFile, csv_outFile))) processes2.append( Process(target=cleanText.jsonUTF8toCsv(jsonFile, csv_outFile))) print name print keywords print jsonFile print csv_outFile processes.start() living_processes = [p.is_alive() for p in processes] while living_processes != False: print 'done' pass processes2.start()
auth = OAuthHandler(consumer_key, consumer_secret) #OAuth object auth.set_access_token(access_token, access_token_secret) print("starting capturing stream") #twitterStream = Stream(auth, accessStream.Listener(start_time, time_limit,inputFile)) twitterStream = Stream(auth, accessStream.Listener(tweet_limit, start_time, time_limit, inputFile)) #WHY THE F**K DOESN'T THIS WORK twitterStream.filter(track=keyword_list) #call the filter method to run the Stream Listener print("done capturing stream") print("cleaning tweets") cleanText.jsonUTF8toCsv(inputFile, outputFile) print("tweets cleaned to CSV. CSV created") cleanDF = cleanText.csvToPandasDF(outputFile) print("data frame created.") print("identifying subject of the tweet") textList = cleanDF.loc[:, ['text']] subject_array = [] clean_text = [] #keywords = keyword_list[0].replace(',', '').split() this can only be used in one place or only first word comes through.
canCount = 0 processes = [] processes2 = [] for can in canList: name = can.getName() keywords = can.getKey_words() start_time = time.time() jsonFile = str(name) + "_raw_tweets.json" csv_outFile = str(name) + "_tweets_out.csv" processname = str(name) + "_stream" processes.append(Process(target = startCapture(name, tweet_limit, start_time, time_limit, jsonFile, csv_outFile))) processes2.append(Process(target= cleanText.jsonUTF8toCsv(jsonFile, csv_outFile) )) print name print keywords print jsonFile print csv_outFile processes.start() living_processes = [p.is_alive() for p in processes] while living_processes != False: print 'done' pass
auth = OAuthHandler(consumer_key, consumer_secret) #OAuth object auth.set_access_token(access_token, access_token_secret) print("starting capturing stream") #twitterStream = Stream(auth, accessStream.Listener(start_time, time_limit,inputFile)) twitterStream = Stream( auth, accessStream.Listener(tweet_limit, start_time, time_limit, inputFile)) #WHY THE F**K DOESN'T THIS WORK twitterStream.filter( track=keyword_list) #call the filter method to run the Stream Listener print("done capturing stream") print("cleaning tweets") cleanText.jsonUTF8toCsv(inputFile, outputFile) print("tweets cleaned to CSV. CSV created") cleanDF = cleanText.csvToPandasDF(outputFile) print("data frame created.") print("identifying subject of the tweet") textList = cleanDF.loc[:, ['text']] subject_array = [] clean_text = [] #keywords = keyword_list[0].replace(',', '').split() this can only be used in one place or only first word comes through. for i in xrange(len(textList)):