def insertOrUpdateProfileBatch(screenNames): """ Get Twitter profile data from the Twitter API and store in the database. Profile records are created, or updated if they already exist. :param screenNames: list of user screen names as strings, to be fetched from the Twitter API. :return successScreenNames: list of user screen names as strings, for the Profiles which were successfully fetched then inserted/updated in the db. :return failedScreenNames: list of user screen names as strings, for the Profiles which could not be fetched from the Twitter API and inserted/updated in the db. """ APIConn = authentication.getAPIConnection() successScreenNames = [] failedScreenNames = [] for s in screenNames: try: fetchedProf = _getProfile(APIConn, screenName=s) except TweepError as e: # The profile could be missing or suspended, so we log it # and then skip inserting or updating (since we have no data). print("Could not fetch user: @{name}. {error}. {msg}".format( name=s, error=type(e).__name__, msg=str(e))) failedScreenNames.append(s) else: try: localProf = insertOrUpdateProfile(fetchedProf) # Represent log of followers count visually as repeated stars, # sidestepping error for log of zero. logFollowers = (int(math.log10(localProf.followersCount)) if localProf.followersCount else 0) stars = "*" * logFollowers print("Inserted/updated user: {name:20} {stars}".format( name=u"@" + localProf.screenName, stars=stars)) successScreenNames.append(s) except Exception as e: print(("Could not insert/update user: @{name}. {error}. {msg}". format(name=s, error=type(e).__name__, msg=str(e)))) failedScreenNames.append(s) return successScreenNames, failedScreenNames
def test_getAPIConnection(self): """ Test that App Access token can be used to connect to Twitter API. """ api = authentication.getAPIConnection(userFlow=False)
def insertTrendsForWoeid(woeid, userApi=None, delete=False, verbose=True): """ Retrieve Trend data from the Twitter API for a place and insert into the database. Expects a WOEID value for a Place, gets up to 50 trend records for the Place as limited by the API and stores each of the values in the Trend table. From the API request response, we ignore the location field (which we know already) and the time field (since we just use current time as close enough). For printing of the added trend, it works normally to print the string as '...{}'.format, even if the value is 'Jonathan Garc\xeda'. This was tested in the bash console of Python Anywhere. However, when running as a cronjob and outputting to log file, it appears to be converted to ASCII and throws an error. Therefore encoding to ASCII and replacing the character is done, even though it less readable. :param woeid: Integer for WOEID value of a Place. :param userApi: tweepy API connection object. Set this with a user-authorised connection to skip the default behaviour of generating and using an app-authorised connection. :param delete: Boolean, default False. If set to True, delete item after it is inserted into db. This is useful for testing. :param verbose: Print details for each trend added. """ global appApi now = datetime.datetime.now() print(f"{now.strftime('%x %X')} Inserting trend data for WOEID {woeid}") assert isinstance( woeid, int ), f"Expected WOEID as type `int` but got type `{type(woeid).__name__}`." if userApi: # Use user token. api = userApi else: # Use app token. if not appApi: # Set it if necessary and then reuse it next time. appApi = authentication.getAPIConnection() api = appApi response = api.trends_place(woeid)[0] trends = response["trends"] for x in trends: topic = x["name"] volume = x["tweet_volume"] t = db.Trend(topic=topic, volume=volume).setPlace(woeid) if verbose: print("Added trend: {tweetID:4d} | {topic:25} - {volume:7,d} K |" " {woeid:10} - {place}.".format( tweetID=t.id, topic=t.topic, volume=(t.volume // 1000 if t.volume else 0), woeid=t.place.woeid, place=t.place.name, )) if delete: db.Trend.delete(t.id) if verbose: print(" - removed from db.") return len(trends)
Get my own tweets Do bar graph. Do tag cloud. Store tweets in DB with mix of columns I want and JSON column for full object. """ import json import tweepy from lib.twitter_api import authentication as twitterAuth api = twitterAuth.getAPIConnection() def _writeJSON(data, filename): print("Write") with open(filename, "w") as writer: json.dump(data, writer, indent=4) return True def _readJSON(filename): print("Read") with open(filename, "r") as reader: data = json.load(reader) return data
def insertOrUpdateTweetBatch( profileRecs, tweetsPerProfile=200, verbose=False, writeToDB=True, campaignRec=None, onlyUpdateEngagements=True, ): """ Get Twitter tweet data from the Twitter API for a batch of profiles and store their tweets in the database. The verbose and writeToDB flags can be used together to print tweet data which would be inserted into the database without actually inserting it. This can be used preview tweet data without increasing storage or using time to do inserts and updates. :param profileRecs: list of Profile objects, to create or update tweets for. This might be a list from the Profile table which has been filtered based on a job schedule, or Profiles which match criteria such as high follower count. :param tweetsPerProfile: Default 200. Count of tweets to get for each profile, as an integer. If this is 200 or less, then page limit is left at 1 and the items per page count is reduced. If this is more than 200, then the items per page count is left at 200 and page limit is adjusted to get a number of tweets as the next multiple of 200. e.g. 550 tweets needs 2 pages to get the first 400 tweets, plus a 3rd page to the additional 150 tweets. We simplify to get 200*3 = 600 tweets, to keep the count consistent on each query. Note that even if 200 tweets are requested, the API sometimes returns only 199 and the user may have posted fewer than the requested tweets. The limit for a single request to the API is 200, therefore any number up to 200 has the same rate limit cost. It may be useful to set a number here as 200 or less if we want to get through all the users quickly, as this takes fewer API queries and fewer db inserts or updates. Also, consider that a very low number may lead to deadtime, where the script takes a fixed time to get 200 or 1 tweets and now that is has processed the 1 requested and the window limit is hit, it has no Tweet processing to do while waiting for the next rate limited window. Thought a low value will mean less storage space is required. :param verbose: Default False. If True, print the data used to created a local Tweet record. This data can be printed regardless of whether the data is written to the db record or not. :param writeToDB: Default True. If True, write the fetched tweets to local database, otherwise print and discard them. This is useful when used in combination with verbose flag which prints the data. :param campaignRec: Campaign record to assign to the local Tweet records. Default None to not assign any Campaign. :param onlyUpdateEngagements: Default True to only update the favorite and retweet count of the tweet in the local db. If False, update other fields too. Those are expected to be static on the Twitter API, but if rules change on this repo then it is useful to apply them historically on existing Tweet records. This flag only affects existing records. :return: None """ APIConn = authentication.getAPIConnection() if tweetsPerProfile <= 200: tweetsPerPage = tweetsPerProfile pageLimit = 1 else: tweetsPerPage = 200 # Round up to get the last page which might have fewerb items pageLimit = math.ceil(tweetsPerProfile / tweetsPerPage) for p in profileRecs: try: fetchedTweets = _getTweets(APIConn, userID=p.guid, tweetsPerPage=tweetsPerPage, pageLimit=pageLimit) except TweepError as e: print("Could not fetch tweets for user: @{screenName}." " {type}. {msg}".format(screenName=p.screenName, type=type(e).__name__, msg=str(e))) else: print("User: {0}".format(p.screenName)) if writeToDB: print("Inserting/updating tweets in db...") else: print("Displaying tweets but not inserting/updating...") added = errors = 0 for f in fetchedTweets: try: data, tweetRec = insertOrUpdateTweet( tweet=f, profileID=p.id, writeToDB=writeToDB, onlyUpdateEngagements=onlyUpdateEngagements, ) if tweetRec and campaignRec: try: campaignRec.addTweet(tweetRec) except DuplicateEntryError: # Ignore error if Tweet was already assigned. pass if verbose: if tweetRec: tweetRec.prettyPrint() else: # No record was created, so use data dict. m = data["message"] created = data["createdAt"] data["message"] = lib.text_handling.flattenText(m) data["createdAt"] = str(lib.set_tz(created)) # TODO: Check if this will raise an error # on unicode symbols in message. print(json.dumps(data, indent=4)) added += 1 except Exception as e: print("Could not insert/update tweet `{id}` for user" " @{screenName}. {type}. {msg}".format( id=f.id, screenName=p.screenName, type=type(e).__name__, msg=str(e), )) errors += 1 total = added + errors # Print stats on every 10 processed and on the last item. if total % 10 == 0 or f == fetchedTweets[-1]: print("Total: {total:2,d}. Added: {added:2,d}. " "Errors: {errors:2,d}.".format(total=total, added=added, errors=errors))