def test_getCommentReplies(self): # Tests the getCommentThread API with commentID # The json response should contain a keyword youtube#comment commentID = "z134f5zqbovpdhky304cgfcwhr2xylrobtc0k" channelAPI = youtube_api.getCommentReplies % (commentID, router_settings.youtube_API_key, "") response = yield http_client.fetch_coroutine(channelAPI) self.assertIn("youtube#comment", json.dumps(response))
def test_getCommentThread(self): # Tests the getCommentThread API with videoID # The json response should contain a keyword youtube#commentThreadListResponse videoID = "sEQf5lcnj_o" channelAPI = youtube_api.getCommentThread % (videoID, router_settings.youtube_API_key, "") response = yield http_client.fetch_coroutine(channelAPI) self.assertIn("youtube#commentThreadListResponse", json.dumps(response))
def test_getChannels(self): # Tests the getChannels API with channel name # The json response should contain a keyword youtube#channel channelName = "Animenzzz" channelAPI = youtube_api.getChannels % (channelName, router_settings.youtube_API_key, "") response = yield http_client.fetch_coroutine(channelAPI) self.assertIn("youtube#channel", json.dumps(response))
def test_getVideos(self): # Tests the getVideos API with channel ID # The json response should contain a keyword youtube#video channelID = "UC27HFwJdWZFhwllMtwdES-A" channelAPI = youtube_api.getVideos % (channelID, router_settings.youtube_API_key, "") response = yield http_client.fetch_coroutine(channelAPI) self.assertIn("youtube#video", json.dumps(response))
def getChannelData(self, channelName, channelID, loopFlags): # Usage: # Since only channelName or channelID is populated, we will test # if channelName or channelID exists. If channelName exists, then # we will use youtube_api.getChannelAPI, else youtube_api.getChannels_withID # Arguments: # channelName: name of a channel # channelID : ID of a channel # loopFlags : a dictionary of flags that indicate whether if there are next set # of json data whether channel/video/comment/commentThread # that we need # Return: # None # Determine the API that we are going to use, whether it is primary channelName or channelID # since you cannot use both channelName or channelID to get channel information # Using the channelName/channelID, we would get the channel information # with youtube's channels API. This will not only give us the multiple channels embedded in a # channel, but it would also convert channelName into channelID, and videos associate themselves # with channelID, not channelName. getChannelAPI = self.getChannelAPI(channelName, channelID) # Loop through if nextPageToken exists, this means that either this is at the first run # or the the results of channels > 50 while loopFlags["channelNextPageToken"]: # Using the getChannelAPI we created previously, we will feed this into a coroutine, and # the asyncHTTPClient will help us to get the results asynchronously channelNameJson = yield http_client.fetch_coroutine(getChannelAPI) logger.logger.info("channelName:%s, channelID:%s, getChannelAPI:%s, channelNameJson:%s" % (channelName, channelID, getChannelAPI, channelNameJson)) # Sometimes a fetch can fail, and return None if channelNameJson: # Loop through each channel ID inside the json array with the key items # this will essentially give us all the independent channel ID's in a channelName/ChannelID # (this means that the channel owner subscribed to multiple channels) for channelID in channelNameJson["items"]: # Create a getVideos api string, this will essentialy let us to get all the videoID's # associated within a channel, and videoID's are unique strings, and we will use # the videoID's to get comments later getVideosAPI = youtube_api.getVideos % (channelID["id"], router_settings.youtube_API_key, "") # A flag to indicate that we have more pages to search for, we will need to set this # for every channel, since if this is not re-initialized to True, it will not # loop through the next channels if there are any loopFlags["videosNextPageToken"] = True # Loop through videos if nextPageToken exists, either at first loop or # meaning results are > 50 for videos while loopFlags["videosNextPageToken"]: # Using the getVideosAPI that we created to get videoID's, we would feed this # into a coroutine to grab the results asynchronously videosJson = yield http_client.fetch_coroutine(getVideosAPI) logger.logger.info("getVideosAPI:%s, videosJson:%s" % (getVideosAPI, videosJson)) # Sometimes a fetch can fail, and return None if videosJson: # Loop through each video information using the "items" key, this will # essentially give us all the unique video ID, and name for each video for video in videosJson["items"]: # We will need to check if videoId key exists in the video["id"] key, since this will # tell us whether the item we are looking at a video entry, # because sometimes if the channel/user subscribed to a channel # it will also appear in as a "video" item if "videoId" in video["id"]: # Create a getCommentThread api string, this essentially will give us all the # video's top level comments. These are all the comments directly commenting the video itself, and # there's another type which are reply comments, which are comments replied to a top level comment. # We can also get the comment username, comment text, and comment date. getCommentThreadAPI = youtube_api.getCommentThread % (video["id"]["videoId"], router_settings.youtube_API_key, "") # A flag to indicate that we have more comments to go through, we would need to re-intialized this # everytime we get comment threads for a video, because when we get to the next video, if we don't # set this back to True, then it will not find the next videos loopFlags["commentThreadNextPageToken"] = True # Loop through next set of comments if results > 100, # or if we are at the first iteration while loopFlags["commentThreadNextPageToken"]: # Fetch the comments for a specific video, we will feed this into a coroutine # to get us json data for comments in a specific videoID asynchronously commentThreadJson = yield http_client.fetch_coroutine(getCommentThreadAPI) logger.logger.info("getCommentThreadAPI:%s, commentThreadJson:%s" % (getCommentThreadAPI, commentThreadJson)) # Sometimes a fetch can fail, and return None if commentThreadJson: # Loop through each top level comments, this will essentially give us # all the comments for a videoID, and we can use this data to record the username, user comment, # and date for topComment in commentThreadJson["items"]: # We will insert the video information itself, such as videoID, title, description, and # when it's published, and also username, user comments, and date, and this is an # indepotent action, where if we did this multiple times, the result is the same result = yield mongo.insert_user_video_comments(self.db, topComment["id"], channelName, topComment["snippet"]["topLevelComment"]["snippet"]["authorDisplayName"], topComment["snippet"]["topLevelComment"]["snippet"]["textDisplay"], topComment["snippet"]["topLevelComment"]["snippet"]["updatedAt"], channelID["id"], video["id"]["videoId"], video["snippet"]["title"], video["snippet"]["description"], video["snippet"]["publishedAt"]) logger.logger.info("mongo.insert_user_video_comments, " "topComment[id]:%s, " "channelName:%s, " "topComment[snippet][topLevelComment][snippet][textDisplay]:%s, " "topComment[snippet][topLevelComment][snippet][authorDisplayName]:%s, " "topComment[snippet][topLevelComment][snippet][updatedAt]:%s, " "channelID[id]:%s, " "video[id][videoId]:%s, " "video[snippet][title]:%s, " "video[snippet][description]:%s, " "video[snippet][publishedAt]:%s, " "result:%s" % (topComment["id"], channelName, topComment["snippet"]["topLevelComment"]["snippet"]["authorDisplayName"], topComment["snippet"]["topLevelComment"]["snippet"]["textDisplay"], topComment["snippet"]["topLevelComment"]["snippet"]["updatedAt"], channelID["id"], video["id"]["videoId"], video["snippet"]["title"], video["snippet"]["description"], video["snippet"]["publishedAt"],result)) # If the totalReplyCount > 0, then we know that there are replies # to this comment if topComment["snippet"]["totalReplyCount"] > 0: # Create a getCommentReplies api string, this will also give us the same information # as top level comment for users, such as their username, user comment, and date # but aimed at replies instead of top level comments getCommentRepliesAPI = youtube_api.getCommentReplies % (topComment["id"], router_settings.youtube_API_key, "") # A flag to indicate that we have more replies to go through, again, we need to # initialize this at the start to True for every top level comment replies we want to find # otherwise it will not find the replies for top level comment for the next video loopFlags["commentRepliesNextPageToken"] = True # Loop through each set of replies if > 100 results, or this is the first run while loopFlags["commentRepliesNextPageToken"]: # Fetch the replies for a specific comment by feeding this URL into # a coroutine, so that we can fetch the data asynchronously commentRepliesJson = yield http_client.fetch_coroutine(getCommentRepliesAPI) logger.logger.info("getCommentRepliesAPI:%s, commentRepliesJson:%s" % (getCommentRepliesAPI, commentRepliesJson)) # Sometimes a fetch can fail, and return None if commentRepliesJson: # Loop through each top comment replies, this will essentially give us # the replies for a top level comment which includes the username # and user comment, and date for replies in commentRepliesJson["items"]: # Store the data into MongoDB using the same API for inserting the top level comments # except we will replies the top level comment data with replies data result = yield mongo.insert_user_video_comments(self.db, replies["id"], channelName, replies["snippet"]["authorDisplayName"], replies["snippet"]["textDisplay"], replies["snippet"]["updatedAt"], channelID["id"], video["id"]["videoId"], video["snippet"]["title"], video["snippet"]["description"], video["snippet"]["publishedAt"]) logger.logger.info("mongo.insert_user_video_comments, " "replies[id]:%s, " "channelName:%s, " "replies[snippet][authorDisplayName]:%s, " "replies[snippet][textDisplay]:%s, " "replies[snippet][updatedAt]:%s, " "channelID[id]:%s, " "video[id][videoId]:%s, " "video[snippet][title]:%s, " "video[snippet][description]:%s, " "video[snippet][publishedAt]:%s, " "result:%s" % (replies["id"], channelName, replies["snippet"]["authorDisplayName"], replies["snippet"]["textDisplay"], replies["snippet"]["updatedAt"], channelID["id"], video["id"]["videoId"], video["snippet"]["title"], video["snippet"]["description"], video["snippet"]["publishedAt"],result)) # If next page token does not exist, means that there are no more replies for comment # then we can stop the loop if "nextPageToken" not in commentRepliesJson: # If the nextPageToken does not exist, then we can end the while loop loopFlags["commentRepliesNextPageToken"] = False else: # If it does exist, then we build the getCommentRepliesAPI with the next page token to go the next set of top level replies getCommentRepliesAPI = youtube_api.getCommentReplies % (topComment["id"], router_settings.youtube_API_key, commentRepliesJson["nextPageToken"]) # If next page token does not exist, means that there are no more comments for the video # then we can stop the loop if "nextPageToken" not in commentThreadJson: # If the nextPageToken does not exist, then we can end the while loop loopFlags["commentThreadNextPageToken"] = False else: # If it does exist, then we build the getCommentThreadAPI with the next page token to go the next set of top level comments getCommentThreadAPI = youtube_api.getCommentThread % (video["id"]["videoId"], router_settings.youtube_API_key, commentThreadJson["nextPageToken"]) # If next page token does not exist, means that there are no more videos in a channel left # then we can stop the loop if "nextPageToken" not in videosJson: # If the nextPageToken does not exist, then we can end the while loop loopFlags["videosNextPageToken"] = False else: # If it does exist, then we build the getVideosAPI with the next page token to go the next set of videos getVideosAPI = youtube_api.getVideos % (channelID["id"], router_settings.youtube_API_key, videosJson["nextPageToken"]) # If next page token does not exist, means that there are no more channels left # then we can stop the loop if "nextPageToken" not in channelNameJson: # If the nextPageToken does not exist, then we can end the while loop loopFlags["channelNextPageToken"] = False else: # If it does exist, then we build the getChannelAPI with the next page token, we can go to the next set of channels getChannelAPI = self.getChannelAPI(channelName, channelID)