def createMatrix(): # connect to db to get num columns and rows and data cursor, connection = connect() try: # get all user id's (for rows) cursor.execute("SELECT id from users;") results = cursor.fetchall() index = [] for r in results: index.append(r[0]) # get all video id's (for columns) cursor.execute("SELECT v_id from video;") results = cursor.fetchall() columns = [] for r in results: columns.append(r[0]) # create empty matrix of users x videos df = pd.DataFrame(0, index=index, columns=columns, dtype=float) # get all user ratings on videos (for data) cursor.execute("SELECT u_id, v_id, rating from myvideos;") results = cursor.fetchall() for r in results: df.at[r[0], r[1]] = r[2] return df except Exception as e: print("Exception in createMatrix: ", e) finally: closeConnection(connection, cursor)
def clean_video_description(search_q): """ cleans video description stored in database and stores cleaned text in the database under clean_descr :return: void """ try: cursor, connection = connect() cursor.execute( """SELECT descr,v_id FROM video WHERE "searchQ" = %s;""", (search_q, )) results = cursor.fetchall() # clean the text for each description retrieved for result in results: text = result[0] clean_text = cleanText(text) if not (clean_text and clean_text.strip()): clean_text = ' ' # update database with clean description cursor.execute( """UPDATE video SET clean_descr = %s WHERE v_id = %s""", (clean_text, result[1])) connection.commit() print("Success ", result[1]) except Exception as e: print("Error in cleanVideoDescription:", e) finally: closeConnection(connection, cursor)
def retrieveVideos(): youtube = credentialVerification() request = youtube.videos().list( part="snippet,contentDetails,statistics", id="yDKWmNpw7gE, 3uk6rKXbG1M, p-aVhSEO8Ro, t_CbWtSSHMw, z7Tadx4XGjA" ) response = request.execute() # connect to database cursor, connection = connect() # try: # collect data on search results for video in response['items']: VIEWS = video['statistics']['viewCount'] LIKES = video['statistics']['likeCount'] DISLIKES = video['statistics']['dislikeCount'] FAV = video['statistics']['favoriteCount'] TITLE = video['snippet']['title'] DESCRP = video['snippet']['description'] CHAN_ID = video['snippet']['channelId'] CHAN_TITLE = video['snippet']['channelTitle'] videoID = video['id'] URL = "https://youtube.com/watch?v=" + videoID searchQ = 'control' print(VIEWS, LIKES, DISLIKES, FAV, TITLE, DESCRP, CHAN_ID, CHAN_TITLE, videoID, URL) cursor.execute( """INSERT INTO video (title, descr, views, likes, dislikes, v_id, channel_id, channel_name, "searchQ", url) VALUES(%s,%s,%s,%s,%s,%s,%s,%s,%s,%s);""", (TITLE, DESCRP, VIEWS, LIKES, DISLIKES, videoID, CHAN_ID, CHAN_TITLE, searchQ, URL)) connection.commit() print("Inserted into Video Table") closeConnection(connection, cursor) cursor, connection = connect() makeRequestCommentThread(videoID, youtube, cursor, connection)
def collect_inputs(search_q): try: cursor, connection = connect() cursor.execute( """SELECT video.likes, video.dislikes, views, title,descr, clean_descr, channel_name, video.duration, AVG(comments.prob_pos)*100, AVG(comments.prob_neg)*100, AVG(comments.prob_neutral)*100, v_id, url FROM comments right join video ON comments.video_id = video.v_id WHERE "searchQ"=%s GROUP BY v_id, video.likes, video.dislikes, views, title ORDER BY views desc;""", (search_q, )) results = cursor.fetchall() return results except Exception as e: print("Exception in collectInputs:", e) finally: closeConnection(connection, cursor)
def detect_language(search_q): """ Uses detectlanguage API to determine language of video based on video title and cleaned description API returns json result with string: predicted language, boolean: isReliable and double: confidence level Updates video table in database with detectlanguage data :returns: void """ try: cursor, connection = connect() cursor.execute( """SELECT title, clean_descr, v_id FROM video WHERE "searchQ" = %s;""", (search_q, )) results = cursor.fetchall() # configure language detection API detectlanguage.configuration.api_key = "eea8968a48d7b6af0a3de993f7f401e0" # for each result from the database, detect language from title and clean description for result in results: text = result[0] + " " + result[1] v_id = result[2] json = detectlanguage.detect(text)[0] # update database with language data (language, isReliable, confidence level) cursor.execute( """UPDATE video SET language = %s, is_reliable = %s, confidence = %s WHERE v_id = %s""", (json['language'], json['isReliable'], json['confidence'], v_id)) connection.commit() except Exception as e: print("Exception in detect_language:", e) finally: closeConnection(connection, cursor)
def rankVideos(): cursor, connection = connect() try: # retrieve video stats sql = "SELECT v_id, likes, dislikes, views FROM video;" df = sqlio.read_sql_query(sql, connection) connection = None # compute average views total_views = 0 for i in range(0, len(df)): total_views += df.iat[i, 3] avg_views = total_views / len(df) # video ranking = [(likes-dislikes) / views]*log(views/avg_views) video_rankings = {} for i in range(0, len(df)): v_id = df.iat[i, 0] likes = df.iat[i, 1] dislikes = df.iat[i, 2] views = df.iat[i, 3] if views == 0: rank = 0 else: rank = ( (likes - dislikes) / views) * math.log(views / avg_views) video_rankings[v_id] = rank return video_rankings except Exception as e: print("Exception in rank videos: ", e) finally: closeConnection(connection, cursor)
def recommend(u, v, df): user_id = int(df.index.values[u]) video_id = str(df.columns.values[v]) cursor, connection = connect() try: cursor.execute( "SELECT v_id FROM recommendations WHERE u_id = %s and v_id = %s;", (user_id, video_id)) if cursor.fetchone() is None: cursor.execute( "INSERT INTO recommendations (u_id, v_id, rank) VALUES (%s, %s, %s);", ( user_id, video_id, df.iat[u, v], )) connection.commit() except Exception as e: print("Exception while inserting: ", e) finally: closeConnection(connection, cursor)
def retrieve_comments_for_cleaning(search_q): """ retrieves comments from the database and preps them for cleaning calls cleanText from util checks comments after cleaning and inserts clean text into database :return: void """ try: cursor, connection = connect() cursor.execute( """SELECT text, c_id FROM comments JOIN video ON video.v_id = comments.video_id WHERE text IS NOT NULL AND "searchQ" = %s""", (search_q, )) results = cursor.fetchall() # get sentiment for each comment retrieved for result in results: try: text = result[0] # clean the text clean_text = cleanText(text) # if clean text returns null, update database with null and -1 values if not (clean_text and clean_text.strip()): sentiment = 'null' prob_neg, prob_pos, prob_neutral = -1 cursor.execute( """UPDATE comments SET clean_text = %s, sentiment = %s, prob_pos = %s, prob_neg = %s, prob_neutral = %s WHERE c_id = %s""", (clean_text, sentiment, prob_pos, prob_neg, prob_neutral, result[1])) connection.commit() # if clean_text returns something, analyze the sentiment else: sent_analysis = getSentiment(clean_text) sentiment = sent_analysis['label'] prob_neg = sent_analysis['probability']['neg'] prob_pos = sent_analysis['probability']['pos'] prob_neutral = sent_analysis['probability']['neutral'] # update the database cursor.execute( """UPDATE comments SET clean_text = %s, sentiment = %s, prob_pos = %s, prob_neg = %s, prob_neutral = %s WHERE c_id = %s""", (clean_text, sentiment, prob_pos, prob_neg, prob_neutral, result[1])) connection.commit() except Exception as e: print("Exception in loop in retrieve comments: ", e) except IndexError as e: print("Index Error in retrieveComments:", e) except Exception as e: print("Error in retrieveComments:", e) finally: closeConnection(connection, cursor)
def makeRequestCommentThread(video_id, youtube): """ Uses YouTube API to make a request for the comment thread for a video. Stores data in database :param video_id: video id of YouTube video :type video_id: string :param youtube: youtube API service instance :type youtube: googleapiclient.discovery built instance :returns: boolean. True if video inserted into database. False if not """ # make request for comment thread of video request = youtube.commentThreads().list( part="id,snippet", videoId=video_id, ) try: cursor, connection = connect() response = request.execute() for comment in response['items']: c_id = comment['snippet']['topLevelComment']['id'] author = comment['snippet']['topLevelComment']['snippet'][ 'authorDisplayName'] text = comment['snippet']['topLevelComment']['snippet'][ 'textOriginal'] likes = comment['snippet']['topLevelComment']['snippet'][ 'likeCount'] published_at = comment['snippet']['topLevelComment']['snippet'][ 'publishedAt'] updated_at = comment['snippet']['topLevelComment']['snippet'][ 'updatedAt'] # store comment thread information in the database # make a search to see if comment is already stored in database cursor.execute("""SELECT * FROM comments WHERE c_id = %s;""", (c_id, )) # if comment is returned, update comment information if cursor.fetchone() is not None: cursor.execute( """UPDATE comments SET text = %s, likes = %s, updated_at = %s WHERE c_id = %s""", (text, likes, updated_at, c_id)) connection.commit() # otherwise, insert comment into database else: cursor.execute( """INSERT INTO comments VALUES(%s,%s,%s,%s,%s,%s,%s);""", (c_id, text, likes, author, video_id, published_at, updated_at)) connection.commit() print("Inserted Comment into database") except KeyError as e: print("key error:", e) except IndexError as e: print("Index error in MakeRequestCommentThreads:", e) except TypeError as e: print("type error:", e) except googleapiclient.errors.HttpError as e: print("Comments disabled:", e) except Exception as e: print("Exception in makeRequestCommentThreads", e) finally: closeConnection(connection, cursor)
def makeRequestVideos(search_subject, video_id, youtube, dur): """ Uses YouTube API to make a request for video data based on video ID parameter. Stores data in database :param search_subject: search subject of video (for database entry) :type search_subject: string :param video_id: video id of YouTube video :type video_id: string :param youtube: youtube API service instance :type youtube: googleapiclient.discovery built instance :param dur: duration of video (for database entry) :type dur: string :returns: void """ # Form request parameter request = youtube.videos().list(part="statistics, snippet", id=video_id) try: cursor, connection = connect() response = request.execute() # collect data on video VIEWS = response['items'][0]['statistics']['viewCount'] LIKES = response['items'][0]['statistics']['likeCount'] DISLIKES = response['items'][0]['statistics']['dislikeCount'] FAV = response['items'][0]['statistics']['favoriteCount'] COMMENTS = response['items'][0]['statistics']['commentCount'] TITLE = response['items'][0]['snippet']['title'] DESCRP = response['items'][0]['snippet']['description'] CHAN_ID = response['items'][0]['snippet']['channelId'] CHAN_TITLE = response['items'][0]['snippet']['channelTitle'] URL = "https://youtube.com/watch?v=" + video_id # query to see if video is already in db. If yes, update stats, if no, insert cursor.execute("""SELECT * FROM video WHERE v_id = %s;""", (video_id, )) if cursor.fetchone() is not None: cursor.execute( """UPDATE video SET likes = %s, dislikes = %s, fav_count = %s, com_count = %s, channel_id = %s, channel_name = %s, "searchQ" = %s, duration = %s, url = %s WHERE v_id = %s""", (LIKES, DISLIKES, FAV, COMMENTS, CHAN_ID, CHAN_TITLE, search_subject, dur, URL, video_id)) connection.commit() else: cursor.execute( """INSERT INTO video VALUES(%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s);""", (TITLE, DESCRP, VIEWS, LIKES, DISLIKES, FAV, COMMENTS, video_id, CHAN_ID, CHAN_TITLE, search_subject, dur, URL)) connection.commit() print("Inserted into Video Table") except KeyError as e: print("key error:", e) except Exception as e: print("Exception in MakeVideoRequest:", e) finally: closeConnection(connection, cursor)