Beispiel #1
0
def get_comments(video_id, youtube, pageToken=None):
	"""fetch all comments and replies for a given youtube video ID"""
	video_id = video_id
	parser = HTMLParser()
	to_replace = ['\ufeff', '\n']
	all_comments = []
	page_token = pageToken
	comment_thread_id = 0
	count = 0
	while page_token is not 0:	
		comments = youtube.commentThreads().list(
			part="snippet",
			videoId=video_id,
			textFormat="plainText",
			pageToken=page_token
			).execute()
		next_page_token = ""
		next_page_tokens = []
		print(count)
		count += 1
		for comment in comments['items']:
			comment_id = comment['id']
			replies = comment['snippet']['totalReplyCount']
			comment_text = comment['snippet']['topLevelComment']['snippet']['textDisplay']
			comment_text = parser.unescape(comment_text)
			for symbol in to_replace:
				comment_text = comment_text.replace(symbol, '')
			all_comments.append(comment_text)
			published_at = comment['snippet']['topLevelComment']['snippet']['publishedAt']
			like_count = comment['snippet']['topLevelComment']['snippet']['likeCount']
			author_display_name = comment['snippet']['topLevelComment']['snippet']['authorDisplayName']
			new_comment = Comment(place_in_thread=0, comment_thread_id=0, video_id=video_id, username=author_display_name, url="www.example.com", comment_text=comment_text, not_sure=0, is_a_troll=0, not_a_troll=0)
			if replies > 0:
				new_comment.place_in_thread = 0
				comment_replies = youtube.comments().list(part="snippet", parentId=comment_id, textFormat="plainText").execute()
				#reply data starts with last comment in thread
				thread_location = len(comment_replies['items'])
				for reply in comment_replies['items']:
					
					reply_text = reply['snippet']['textDisplay']
					reply_author_display_name = reply['snippet']['authorDisplayName']
					reply_text = parser.unescape(reply_text)
					for symbol in to_replace:
						reply_text = reply_text.replace(symbol, '')
					all_comments.append(reply_text)
					reply_comment = Comment(place_in_thread=thread_location, comment_thread_id=comment_thread_id, video_id=video_id, not_sure=0, username=reply_author_display_name, url="www.example.com", comment_text=reply_text, is_a_troll=0, not_a_troll=0)
					#reply_comment.place_in_thread = thread_location
					#reply_comment.comment_thread_id = comment_thread_id
					reply_comment.video_id = video_id
					db.session.add(reply_comment)
					db.session.commit()
					thread_location -= 1
			new_comment.comment_thread_id = comment_thread_id
			comment_thread_id += 1
			new_comment.video_id = video_id
			db.session.add(new_comment)
			db.session.commit()
			
		if 'nextPageToken' in comments:
			
			next_page_token = comments['nextPageToken']
			next_page_tokens.append(next_page_token)
			page_token = next_page_token
		else:
			page_token = 0
	else:
		
		return all_comments