def on_status(self, status): try: fmt = '%Y-%m-%d %H:%M:%S' from datetime import datetime #this if statement is controlling Streaming API according to date if ((datetime.strptime(self.date2 , fmt)-self.datenow).total_seconds()!=0) \ and ((datetime.strptime(self.date2 , fmt)-self.datenow).total_seconds()>0): # this record variable will have some fields from tweets json record = {'Text': status.text, 'Created At': status.created_at} print(record) print(str((datetime.strptime(self.date2 , fmt)-self.datenow).total_seconds())+" seconds left to cut the connection") self.datenow=datetime.strptime(datetime.now().strftime(fmt), fmt) # See Tweepy documentation to learn how to access other fields self.db = pymongo.MongoClient().__getattr__(self.mongodbName).__getattr__(self.mongodbCollectionName).insert(record) Logging.log(self.mongodbName + " database has been created. and insertion get started") Logging.log("Tweets which are collecting from Streaming API added to " + ConfigParser.streamingTxtFile + " filepath") Logging.log("Tweets which are collecting from Streaming API added to " + self.mongodbCollectionName + " inside " + self.mongodbName + "mongodb database") return True elif((datetime.strptime(self.date2 , fmt)-self.datenow).total_seconds()<=0): print((datetime.strptime(self.date2, fmt) - self.datenow).total_seconds()) Logging.log("you have finished collecting") print("you have finished collecting") return False else: return False except: import sys e = sys.exc_info()[1] print("Error: %s" % e) Logging.log(str(e))
def on_sapi(self, stwets): self.num_tweets += 1 if self.num_tweets < self.numOfStreamTweet: sapi = tweepy.streaming.Stream(AllVariableClass.auth, CustomStreamListener(AllVariableClass.api, self.mongodbName, self.mongodbCollectionName, self.fileName,self.numOfStreamTweet)) sapi.filter(track=stwets) self.db = pymongo.MongoClient().__getattr__(self.mongodbName).__getattr__(self.mongodbCollectionName).insert(stwets) return True else: return False Logging.log("Getting Tweets json from Streaming API")
def on_sapi(self, stwets): self.num_tweets += 1 if self.num_tweets < self.numOfStreamTweet: sapi = tweepy.streaming.Stream( AllVariableClass.auth, CustomStreamListener(AllVariableClass.api, self.mongodbName, self.mongodbCollectionName, self.fileName, self.numOfStreamTweet)) sapi.filter(track=stwets) self.db = pymongo.MongoClient().__getattr__( self.mongodbName).__getattr__( self.mongodbCollectionName).insert(stwets) return True else: return False Logging.log("Getting Tweets json from Streaming API")
def on_status(self, status): try: fmt = '%Y-%m-%d %H:%M:%S' from datetime import datetime #this if statement is controlling Streaming API according to date if ((datetime.strptime(self.date2 , fmt)-self.datenow).total_seconds()!=0) \ and ((datetime.strptime(self.date2 , fmt)-self.datenow).total_seconds()>0): # this record variable will have some fields from tweets json record = {'Text': status.text, 'Created At': status.created_at} print(record) print( str((datetime.strptime(self.date2, fmt) - self.datenow).total_seconds()) + " seconds left to cut the connection") self.datenow = datetime.strptime(datetime.now().strftime(fmt), fmt) # See Tweepy documentation to learn how to access other fields self.db = pymongo.MongoClient().__getattr__( self.mongodbName).__getattr__( self.mongodbCollectionName).insert(record) Logging.log( self.mongodbName + " database has been created. and insertion get started") Logging.log( "Tweets which are collecting from Streaming API added to " + ConfigParser.streamingTxtFile + " filepath") Logging.log( "Tweets which are collecting from Streaming API added to " + self.mongodbCollectionName + " inside " + self.mongodbName + "mongodb database") return True elif ((datetime.strptime(self.date2, fmt) - self.datenow).total_seconds() <= 0): print((datetime.strptime(self.date2, fmt) - self.datenow).total_seconds()) Logging.log("you have finished collecting") print("you have finished collecting") return False else: return False except: import sys e = sys.exc_info()[1] print("Error: %s" % e) Logging.log(str(e))
class AllVariableClass: # authorize twitter, initialize tweepy auth = tweepy.OAuthHandler(conf.consumer_key, conf.consumer_secret) auth.set_access_token(conf.access_key, conf.access_secret) api = tweepy.API(auth) #print(conf.access_key) # Now, we can log to the root logger, or any other logger. First the root... Logging.log('Variable has been called from AllClassVariable')
def writetoMongo(self,filepath,mongodbName,mongodbCollectionName): import glob import json from DM import Logging # create mongodb database from pymongo import MongoClient client = MongoClient() db = client.__getattr__(mongodbName).__getattr__(mongodbCollectionName) # and inside that DB, a collection called "files" filenames = glob.glob(filepath) for filename in filenames: with open(filename) as f: for line in f: db.insert_one(json.loads(line)) Logging.log("Filename " + filename + " was added to mongodb collection name is files")
def writetoMongo(self, filepath, mongodbName, mongodbCollectionName): import glob import json from DM import Logging # create mongodb database from pymongo import MongoClient client = MongoClient() db = client.__getattr__(mongodbName).__getattr__(mongodbCollectionName) # and inside that DB, a collection called "files" filenames = glob.glob(filepath) for filename in filenames: with open(filename) as f: for line in f: db.insert_one(json.loads(line)) Logging.log("Filename " + filename + " was added to mongodb collection name is files")
def write_on_file(self,screenname,mongodbName="Followers"): from DM.AllVariableClass import AllVariableClass from DM import Logging from DM import ConfigParser import sys try: for follower in AllVariableClass.api.followers_ids(screenname): with open(ConfigParser.filePathForFollowersOutputs + screenname + ".txt", "a") as f: try: follower_name = {'Follower of '+screenname: AllVariableClass.api.get_user(follower).screen_name} f.write(AllVariableClass.api.get_user(follower).screen_name + ' \n') import pymongo self.db = pymongo.MongoClient().__getattr__(mongodbName).__getattr__(screenname).insert(follower_name) except: import sys e = sys.exc_info()[1] print("Error: %s" % e) Logging.log(str(e)) pass Logging.log("FollowerCollector collected followers of " + screenname + " and added to " + screenname + ".txt") except: e = sys.exc_info()[0] print("Error: %s" % e) Logging.log("Screen name : " + screenname + " with error " + str(e)) pass
def get_all_tweets(self,screen_name,input,mongodbName): try: import json from DM import Logging from DM import ConfigParser from DM.AllVariableClass import AllVariableClass allclassvar=AllVariableClass() #Twitter only allows access to a users most recent 3240 tweets with this method #initialize a list to hold all the tweepy Tweets alltweets = [] #make initial request for most recent tweets (200 is the maximum allowed count) new_tweets = allclassvar.api.user_timeline(screen_name = screen_name, count=input/2) #save most recent tweets alltweets.extend(new_tweets) #save the id of the oldest tweet less one oldest = alltweets[-1].id - 1 #keep grabbing tweets until there are no tweets left to grab while (len(new_tweets) > 0): print ("getting tweets before %s" % (oldest)) #all subsiquent requests use the max_id param to prevent duplicates new_tweets = allclassvar.api.user_timeline(screen_name = screen_name, count=input/2, max_id=oldest) #save most recent tweets alltweets.extend(new_tweets) #update the id of the oldest tweet less one oldest = alltweets[-1].id - 1 print ("...%s tweets downloaded so far" % (len(alltweets))) if (len(alltweets)) >= input: Logging.log(str(input)+" tweets have been collected from "+str(screen_name)+" timeline") #write tweets to the txt files and mongodb database. for tweet in alltweets: f = open(ConfigParser.filePathForTimelineOutputs + screen_name + ".txt", "a") f.write(json.dumps(tweet._json) + "\n") import pymongo self.db = pymongo.MongoClient().__getattr__(mongodbName).__getattr__(screen_name).insert(tweet._json) break print(screen_name+"'s tweets added to file") Logging.log("Tweets have been added to " + screen_name + ".txt") pass except: import sys e = sys.exc_info()[1] print("Error: %s" % e) Logging.log(str(screen_name)+" "+str(e)) pass
def on_data(self, tweet): with open(ConfigParser.streamingTxtFile + self.fileName, 'a') as tf: tf.write(tweet) import json self.db = pymongo.MongoClient().__getattr__(self.mongodbName).__getattr__(self.mongodbCollectionName) Logging.log(self.mongodbName + " database has been created.") self.db.insert(json.loads(tweet)) Logging.log("Tweets which are collecting from Streaming API added to " + ConfigParser.streamingTxtFile + " filepath") Logging.log("Tweets which are collecting from Streaming API added to "+self.mongodbCollectionName+" inside "+self.mongodbName +"mongodb database") return True
def on_data(self, tweet): with open(ConfigParser.streamingTxtFile + self.fileName, 'a') as tf: tf.write(tweet) import json self.db = pymongo.MongoClient().__getattr__( self.mongodbName).__getattr__(self.mongodbCollectionName) Logging.log(self.mongodbName + " database has been created.") self.db.insert(json.loads(tweet)) Logging.log( "Tweets which are collecting from Streaming API added to " + ConfigParser.streamingTxtFile + " filepath") Logging.log( "Tweets which are collecting from Streaming API added to " + self.mongodbCollectionName + " inside " + self.mongodbName + "mongodb database") return True
def on_timeout(self, status): print('Stream disconnected; continuing...') Logging.log('Stream disconnected; continuing...')
def on_limit(self, status): print('Limit threshold exceeded', status) Logging.log('Limit threshold exceeded', status)
def on_error(self, status): print('Error on status', status) Logging.log('Error on status' + status)
def on_timeout(self): Logging.log("Don't kill the stream on timeouts") return True # Don't kill the stream
def on_timeout(self, status): print ('Stream disconnected; continuing...') Logging.log('Stream disconnected; continuing...')
def on_error(self, status): print ('Error on status', status) Logging.log('Error on status'+status)
tweetcollect.get_all_tweets(word, numberOfTweet,mongodbName) cont = input("\nType any key to cont \n ") elif cont == "no": username=input("Please enter username : "******"How many tweets do you want to collect? : "), 10) tweetcollect.get_all_tweets(username, numberOfTweet,mongodbName) else: loop2=False print("You have exited from tweet collector succesfully...") except: import sys e = sys.exc_info()[1] print("Error: %s" % e) Logging.log(str(e)) continue ## You can add your code or functions here elif choice == 2: try: loop3=True while loop3: # your code print("Type any key to exit this option") cont = input("Do you want to give a username list with file path? yes/no > ") from DM.FollowerCollector import FollowerCollector from DM import ConfigParser followercollect = FollowerCollector() if cont == "yes":
def on_timeout(self): from DM import Logging Logging.log("Don't kill the followerCollector on timeouts") return True # Don't kill the followerCollector
def on_error(self, status_code): from DM import Logging Logging.log("Don't kill the followerCollector. error status code : " + status_code) return True # Don't kill the followerCollector
def on_timeout(self): from DM import Logging Logging.log("Don't kill the collector on timeouts.") return True # Don't kill the collector
def on_error(self, status_code): from DM import Logging Logging.log("Don't kill the collector here status code is : " + status_code) return True # Don't kill the collector
def on_limit(self, status): print ('Limit threshold exceeded', status) Logging.log('Limit threshold exceeded', status)
def on_error(self, status_code): print("we have error") Logging.log("Don't kill the stream here status code : " + status_code) return True # Don't kill the stream