def update_db():
    with sql.connect(db_name) as conn:
        for query in queries:
            for page in range(1, 4):
                print(query, page)
                search_page = download_search_page(query, page)
                video_list = parse_search_page(search_page)

                for video in video_list:
                    video_page = download_video_page(video['link'])
                    video_json_data = parse_video_page(video_page)

                    if 'watch-time-text' not in video_json_data:
                        continue

                    p = compute_prediction(video_json_data)

                    video_id = video_json_data.get('og:video:url', '')
                    watch_title = video_json_data['watch-title'].replace(
                        "'", "")
                    data_front = {
                        "title": watch_title,
                        "score": float(p),
                        "video_id": video_id
                    }
                    data_front['update_time'] = time.time_ns()

                    print(video_id, json.dumps(data_front))
                    c = conn.cursor()
                    c.execute(
                        "INSERT INTO videos VALUES ('{title}', '{video_id}', {score}, {update_time})"
                        .format(**data_front))
                    conn.commit()
    return True
Ejemplo n.º 2
0
def update_db():
    with sql.connect(db_name) as conn:
        for keyword in keywords:
            for page in range(1, 4):
                search_page = get_data.download_search_page(keyword, page)
                videos_list = get_data.parse_search_page(search_page)

                for video in videos_list:
                    video_page = get_data.download_video_page(video['link'])
                    video_json_data = get_data.parse_video_page(video_page)

                    if 'watch-time-text' not in video_json_data:  # if the video parsed does not have a title, discard
                        continue

                    p = compute_prediction(video_json_data)

                    video_id = video_json_data.get('og:video:url', '')
                    video_title = video_json_data['watch-title'].replace(
                        "'", "''")
                    data_front = {
                        "title": video_title,
                        "score": float(p),
                        "video_id": video_id
                    }

                    print(video_id, json.dumps(data_front))
                    cursor = conn.cursor()
                    cursor.execute(
                        "INSERT INTO videos VALUES ('{title}', '{video_id}', {score})"
                        .format(**data_front))
                    conn.commit()

    return True
Ejemplo n.º 3
0
def predict_api():
    yt_video_id = request.args.get("yt_video_id", default='')
    video_page = get_data.download_video_page(
        "/watch?v={}".format(yt_video_id))
    video_json_data = get_data.parse_video_page(video_page)

    if 'watch-time-text' not in video_json_data:
        return "not found"

    p = ml_utils.compute_prediction(video_json_data)
    output = {"title": video_json_data['watch-title'], "score": p}

    return json.dumps(output)