Beispiel #1
0
def createMatrix():
    # connect to db to get num columns and rows and data
    cursor, connection = connect()
    try:
        # get all user id's (for rows)
        cursor.execute("SELECT id from users;")
        results = cursor.fetchall()
        index = []
        for r in results:
            index.append(r[0])

        # get all video id's (for columns)
        cursor.execute("SELECT v_id from video;")
        results = cursor.fetchall()
        columns = []
        for r in results:
            columns.append(r[0])

        # create empty matrix of users x videos
        df = pd.DataFrame(0, index=index, columns=columns, dtype=float)

        # get all user ratings on videos (for data)
        cursor.execute("SELECT u_id, v_id, rating from myvideos;")
        results = cursor.fetchall()
        for r in results:
            df.at[r[0], r[1]] = r[2]

        return df

    except Exception as e:
        print("Exception in createMatrix: ", e)

    finally:
        closeConnection(connection, cursor)
Beispiel #2
0
def clean_video_description(search_q):
    """
    cleans video description stored in database and stores cleaned
    text in the database under clean_descr

    :return: void
    """

    try:
        cursor, connection = connect()
        cursor.execute(
            """SELECT descr,v_id FROM video WHERE "searchQ" = %s;""",
            (search_q, ))
        results = cursor.fetchall()

        # clean the text for each description retrieved
        for result in results:
            text = result[0]
            clean_text = cleanText(text)
            if not (clean_text and clean_text.strip()):
                clean_text = ' '

            # update database with clean description
            cursor.execute(
                """UPDATE video SET clean_descr = %s
                WHERE v_id = %s""", (clean_text, result[1]))
            connection.commit()
            print("Success ", result[1])

    except Exception as e:
        print("Error in cleanVideoDescription:", e)
    finally:
        closeConnection(connection, cursor)
Beispiel #3
0
def retrieveVideos():
    youtube = credentialVerification()
    request = youtube.videos().list(
        part="snippet,contentDetails,statistics",
        id="yDKWmNpw7gE, 3uk6rKXbG1M, p-aVhSEO8Ro, t_CbWtSSHMw, z7Tadx4XGjA"
    )
    response = request.execute()

    # connect to database
    cursor, connection = connect()

    # try:
        # collect data on search results
    for video in response['items']:
        VIEWS = video['statistics']['viewCount']
        LIKES = video['statistics']['likeCount']
        DISLIKES = video['statistics']['dislikeCount']
        FAV = video['statistics']['favoriteCount']
        TITLE = video['snippet']['title']
        DESCRP = video['snippet']['description']
        CHAN_ID = video['snippet']['channelId']
        CHAN_TITLE = video['snippet']['channelTitle']
        videoID = video['id']
        URL = "https://youtube.com/watch?v=" + videoID
        searchQ = 'control'

        print(VIEWS, LIKES, DISLIKES, FAV, TITLE, DESCRP, CHAN_ID, CHAN_TITLE, videoID, URL)

        cursor.execute(
            """INSERT INTO video (title, descr, views, likes, dislikes, v_id, channel_id, channel_name, "searchQ", url)
            VALUES(%s,%s,%s,%s,%s,%s,%s,%s,%s,%s);""",
            (TITLE, DESCRP, VIEWS, LIKES, DISLIKES,
             videoID, CHAN_ID, CHAN_TITLE,
             searchQ, URL))
        connection.commit()
        print("Inserted into Video Table")

        closeConnection(connection, cursor)
        cursor, connection = connect()
        makeRequestCommentThread(videoID, youtube, cursor, connection)
def collect_inputs(search_q):
    try:
        cursor, connection = connect()

        cursor.execute(
            """SELECT video.likes, video.dislikes, views, title,descr,
                        clean_descr, channel_name, video.duration, AVG(comments.prob_pos)*100,
                        AVG(comments.prob_neg)*100,
                        AVG(comments.prob_neutral)*100, v_id, url
                        FROM comments right join video
                        ON comments.video_id = video.v_id
                        WHERE "searchQ"=%s
                        GROUP BY v_id, video.likes, video.dislikes, views, title
                        ORDER BY views desc;""", (search_q, ))
        results = cursor.fetchall()

        return results

    except Exception as e:
        print("Exception in collectInputs:", e)
    finally:
        closeConnection(connection, cursor)
Beispiel #5
0
def detect_language(search_q):
    """
        Uses detectlanguage API to determine language of video based on video title and cleaned description
        API returns json result with string: predicted language, boolean: isReliable and double: confidence level
        Updates video table in database with detectlanguage data

       :returns: void
    """
    try:
        cursor, connection = connect()
        cursor.execute(
            """SELECT title, clean_descr, v_id 
                         FROM video      
                         WHERE "searchQ" = %s;""", (search_q, ))
        results = cursor.fetchall()

        # configure language detection API
        detectlanguage.configuration.api_key = "eea8968a48d7b6af0a3de993f7f401e0"

        # for each result from the database, detect language from title and clean description
        for result in results:
            text = result[0] + " " + result[1]
            v_id = result[2]
            json = detectlanguage.detect(text)[0]

            # update database with language data (language, isReliable, confidence level)
            cursor.execute(
                """UPDATE video SET language = %s, is_reliable = %s,
                confidence = %s WHERE v_id = %s""",
                (json['language'], json['isReliable'], json['confidence'],
                 v_id))
            connection.commit()

    except Exception as e:
        print("Exception in detect_language:", e)
    finally:
        closeConnection(connection, cursor)
Beispiel #6
0
def rankVideos():
    cursor, connection = connect()
    try:
        # retrieve video stats
        sql = "SELECT v_id, likes, dislikes, views FROM video;"
        df = sqlio.read_sql_query(sql, connection)
        connection = None

        # compute average views
        total_views = 0
        for i in range(0, len(df)):
            total_views += df.iat[i, 3]
        avg_views = total_views / len(df)

        # video ranking = [(likes-dislikes) / views]*log(views/avg_views)
        video_rankings = {}
        for i in range(0, len(df)):
            v_id = df.iat[i, 0]
            likes = df.iat[i, 1]
            dislikes = df.iat[i, 2]
            views = df.iat[i, 3]

            if views == 0:
                rank = 0
            else:
                rank = (
                    (likes - dislikes) / views) * math.log(views / avg_views)

            video_rankings[v_id] = rank

        return video_rankings

    except Exception as e:
        print("Exception in rank videos: ", e)

    finally:
        closeConnection(connection, cursor)
Beispiel #7
0
def recommend(u, v, df):
    user_id = int(df.index.values[u])
    video_id = str(df.columns.values[v])

    cursor, connection = connect()
    try:
        cursor.execute(
            "SELECT v_id FROM recommendations WHERE u_id = %s and v_id = %s;",
            (user_id, video_id))
        if cursor.fetchone() is None:
            cursor.execute(
                "INSERT INTO recommendations (u_id, v_id, rank) VALUES (%s, %s, %s);",
                (
                    user_id,
                    video_id,
                    df.iat[u, v],
                ))
            connection.commit()

    except Exception as e:
        print("Exception while inserting: ", e)

    finally:
        closeConnection(connection, cursor)
Beispiel #8
0
def retrieve_comments_for_cleaning(search_q):
    """
        retrieves comments from the database and preps them for cleaning
        calls cleanText from util
        checks comments after cleaning and inserts clean text into database

        :return: void
        """
    try:
        cursor, connection = connect()
        cursor.execute(
            """SELECT text, c_id FROM comments
            JOIN video ON video.v_id = comments.video_id
            WHERE text IS NOT NULL AND "searchQ" = %s""", (search_q, ))
        results = cursor.fetchall()

        # get sentiment for each comment retrieved
        for result in results:
            try:
                text = result[0]

                # clean the text
                clean_text = cleanText(text)

                # if clean text returns null, update database with null and -1 values
                if not (clean_text and clean_text.strip()):
                    sentiment = 'null'
                    prob_neg, prob_pos, prob_neutral = -1

                    cursor.execute(
                        """UPDATE comments SET clean_text = %s,
                                    sentiment = %s,
                                    prob_pos = %s, prob_neg = %s, prob_neutral = %s
                                    WHERE c_id = %s""",
                        (clean_text, sentiment, prob_pos, prob_neg,
                         prob_neutral, result[1]))
                    connection.commit()

                # if clean_text returns something, analyze the sentiment
                else:
                    sent_analysis = getSentiment(clean_text)
                    sentiment = sent_analysis['label']
                    prob_neg = sent_analysis['probability']['neg']
                    prob_pos = sent_analysis['probability']['pos']
                    prob_neutral = sent_analysis['probability']['neutral']

                    # update the database
                    cursor.execute(
                        """UPDATE comments SET clean_text = %s,
                        sentiment = %s,
                        prob_pos = %s, prob_neg = %s, prob_neutral = %s
                        WHERE c_id = %s""",
                        (clean_text, sentiment, prob_pos, prob_neg,
                         prob_neutral, result[1]))
                    connection.commit()

            except Exception as e:
                print("Exception in loop in retrieve comments: ", e)

    except IndexError as e:
        print("Index Error in retrieveComments:", e)
    except Exception as e:
        print("Error in retrieveComments:", e)
    finally:
        closeConnection(connection, cursor)
Beispiel #9
0
def makeRequestCommentThread(video_id, youtube):
    """
       Uses YouTube API to make a request for the comment thread for a video.
       Stores data in database

       :param video_id: video id of YouTube video
       :type video_id: string
       
       :param youtube: youtube API service instance
       :type youtube: googleapiclient.discovery built instance

       :returns: boolean. True if video inserted into database. False if not
    """

    # make request for comment thread of video
    request = youtube.commentThreads().list(
        part="id,snippet",
        videoId=video_id,
    )

    try:
        cursor, connection = connect()

        response = request.execute()
        for comment in response['items']:
            c_id = comment['snippet']['topLevelComment']['id']
            author = comment['snippet']['topLevelComment']['snippet'][
                'authorDisplayName']
            text = comment['snippet']['topLevelComment']['snippet'][
                'textOriginal']
            likes = comment['snippet']['topLevelComment']['snippet'][
                'likeCount']
            published_at = comment['snippet']['topLevelComment']['snippet'][
                'publishedAt']
            updated_at = comment['snippet']['topLevelComment']['snippet'][
                'updatedAt']

            # store comment thread information in the database
            # make a search to see if comment is already stored in database
            cursor.execute("""SELECT * FROM comments WHERE c_id = %s;""",
                           (c_id, ))

            # if comment is returned, update comment information
            if cursor.fetchone() is not None:
                cursor.execute(
                    """UPDATE comments SET text = %s, likes = %s,
                    updated_at = %s WHERE c_id = %s""",
                    (text, likes, updated_at, c_id))
                connection.commit()

            # otherwise, insert comment into database
            else:
                cursor.execute(
                    """INSERT INTO comments
                        VALUES(%s,%s,%s,%s,%s,%s,%s);""",
                    (c_id, text, likes, author, video_id, published_at,
                     updated_at))
                connection.commit()
                print("Inserted Comment into database")

    except KeyError as e:
        print("key error:", e)
    except IndexError as e:
        print("Index error in MakeRequestCommentThreads:", e)
    except TypeError as e:
        print("type error:", e)
    except googleapiclient.errors.HttpError as e:
        print("Comments disabled:", e)
    except Exception as e:
        print("Exception in makeRequestCommentThreads", e)
    finally:
        closeConnection(connection, cursor)
Beispiel #10
0
def makeRequestVideos(search_subject, video_id, youtube, dur):
    """
        Uses YouTube API to make a request for video data based on video ID parameter.
        Stores data in database

        :param search_subject: search subject of video (for database entry)
        :type search_subject: string

        :param video_id: video id of YouTube video
        :type video_id: string

        :param youtube: youtube API service instance
        :type youtube: googleapiclient.discovery built instance

        :param dur: duration of video (for database entry)
        :type dur: string


        :returns: void
    """

    # Form request parameter
    request = youtube.videos().list(part="statistics, snippet", id=video_id)

    try:
        cursor, connection = connect()
        response = request.execute()

        # collect data on video
        VIEWS = response['items'][0]['statistics']['viewCount']
        LIKES = response['items'][0]['statistics']['likeCount']
        DISLIKES = response['items'][0]['statistics']['dislikeCount']
        FAV = response['items'][0]['statistics']['favoriteCount']
        COMMENTS = response['items'][0]['statistics']['commentCount']
        TITLE = response['items'][0]['snippet']['title']
        DESCRP = response['items'][0]['snippet']['description']
        CHAN_ID = response['items'][0]['snippet']['channelId']
        CHAN_TITLE = response['items'][0]['snippet']['channelTitle']
        URL = "https://youtube.com/watch?v=" + video_id

        # query to see if video is already in db. If yes, update stats, if no, insert
        cursor.execute("""SELECT * FROM video WHERE v_id = %s;""",
                       (video_id, ))
        if cursor.fetchone() is not None:
            cursor.execute(
                """UPDATE video SET likes = %s, dislikes = %s,
                fav_count = %s, com_count = %s, channel_id = %s,
                channel_name = %s, "searchQ" = %s, duration = %s,
                url = %s WHERE v_id = %s""",
                (LIKES, DISLIKES, FAV, COMMENTS, CHAN_ID, CHAN_TITLE,
                 search_subject, dur, URL, video_id))
            connection.commit()

        else:
            cursor.execute(
                """INSERT INTO video
                    VALUES(%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s);""",
                (TITLE, DESCRP, VIEWS, LIKES, DISLIKES, FAV, COMMENTS,
                 video_id, CHAN_ID, CHAN_TITLE, search_subject, dur, URL))
            connection.commit()
            print("Inserted into Video Table")

    except KeyError as e:
        print("key error:", e)
    except Exception as e:
        print("Exception in MakeVideoRequest:", e)
    finally:
        closeConnection(connection, cursor)