def get_spotify_data(keywords, num_playlists):
    """Master function get retrieve data from Spotify."""
    # Create instance of Spotify class
    SpotifyMaster = Spotify(CLIENT_ID, CLIENT_SECRET)

    # Only retrieve playlists if not at num_playlists
    playlist_table_size = return_table_len("Playlists")
    if playlist_table_size < num_playlists - 10:
        # Pull playlist data a keyword
        print("Getting Spotify playlists")
        cache_dict = json_helper.read_cache()
        keyword_index = cache_dict["keyword_index"]
        keyword = keywords[keyword_index]
        print("Keyword: " + keyword)

        # Get playlists
        json_result = SpotifyMaster.search(keyword, "playlist")
        playlists = json_result["playlists"]["items"]

        # Write playlists to database
        write_playlists_to_database(SpotifyMaster, playlists)
        playlist_table_size = return_table_len("Playlists")
        print("Playlist table size: " + str(playlist_table_size))

        return

    # Otherwise, start getting tracks until reach limit
    tracks_table_size = return_table_len("Tracks")
    track_features_table_size = return_table_len("TrackFeatures")

    # Finish if over 100 rows for either
    if tracks_table_size > 120 and track_features_table_size > 120:
        print("Gathered sufficient data for the database.")
        return

    if tracks_table_size != num_playlists * 10:
        print("Getting Spotify Tracks")

        # Get the correct playlist href and increment the index counter
        cache_dict = json_helper.read_cache()
        cache_dict["playlist_href_index"] = cache_dict.get(
            "playlist_href_index", -1) + 1
        playlist_href_index = cache_dict["playlist_href_index"]
        json_helper.write_cache(cache_dict)
        playlist_href = cache_dict["playlist_hrefs"][playlist_href_index]

        # Get track ids from the playlist and write to database
        track_ids = SpotifyMaster.get_tracks_from_playlist(playlist_href)
        write_tracks_and_features_to_database(SpotifyMaster, track_ids,
                                              playlist_href,
                                              playlist_href_index + 1)
        print("Tracks table size: " + str(tracks_table_size))
        print("Track Features table size: " + str(track_features_table_size))

        return

    # Done getting data, JOIN time.
    print("Done retrieving Spotify playlists and track data.")
def main():
    # If first time running, then cache top keywords
    keywords = get_news_keywords()
    num_playlists = len(keywords) * 20
    print("Total number of expected playlists: " + str(num_playlists))

    # Cache keyword index
    cache_dict = json_helper.read_cache()
    cache_dict["keyword_index"] = cache_dict.get("keyword_index", -1) + 1
    json_helper.write_cache(cache_dict)

    # Create JSON file if needed
    #
    # JSON file will be used to make sure only 20 items
    # are pulled from an API into the respective table in
    # the databse per code execution
    dir_path = os.path.dirname(os.path.realpath(__file__))
    cache_path = dir_path + '/' + "helpers.json"
    os.system("touch " + cache_path)

    # Create Databases
    create_databases()

    # Get Spotify
    get_spotify_data(keywords, num_playlists)
def get_news_keywords():
    """Retrieve keywords from table created from data from Google News API"""
    # Get list of top keywords from JSON
    cache_dict = json_helper.read_cache()
    category_top_keywords = cache_dict.get(
        "category_top_keywords", ["Coronavirus", "China", "Trump", "Economy"])

    return category_top_keywords
def main():
    # Delete tables
    delete_table("Playlists")
    delete_table("Tracks")
    delete_table("TrackFeatures")
    delete_table("ArticleData")

    # Clear JSON data
    cache_dict = json_helper.read_cache()
    cache_dict = {}
    json_helper.write_cache(cache_dict)
Beispiel #5
0
def main():
    # Cache country index
    cache_dict = json_helper.read_cache()
    cache_dict["news_category_index"] = cache_dict.get("news_category_index",
                                                       -1) + 1
    json_helper.write_cache(cache_dict)

    if cache_dict["news_category_index"] >= len(CATEGORIES):
        print("No more categories to pull articles from.")
        return

    create_database()
    get_google_news_data()
def write_playlists_to_database(SpotifyMaster, playlists):
    """Write returned data to table"""
    print("Number of playlists: " + str(len(playlists)))
    # Connecting to the database
    connection = sqlite3.connect("Databases.db")
    cur = connection.cursor()

    # Cache playlist href to get tracks later
    cache_dict = json_helper.read_cache()
    playlist_hrefs = cache_dict.get("playlist_hrefs", [])

    # Write each playlist into the database
    for playlist in playlists:
        # Get necessary playlist metadata
        name = playlist["name"]
        owner = playlist["owner"]["id"]
        desc = playlist["description"]
        href = playlist["href"]

        # Get total number of tracks in the playlist
        id = playlist["id"]
        playlist_data = SpotifyMaster.get_data("playlist", id)
        total_tracks = playlist_data["tracks"]["total"]
        # print("Playlist Size:\t" + str(total_tracks))

        playlist_data = [name, owner, desc, href, total_tracks]

        # Insert playlist into database
        sql_command = """
                        INSERT OR IGNORE INTO Playlists 
                            (playlist_name, 
                             playlist_owner, 
                             playlist_desc,
                             playlist_href,
                             playlist_size) 
                        values (?,?,?,?, ?)
                    """
        cur.execute(sql_command, playlist_data)

        # Append playlist href to list of hrefs
        playlist_hrefs.append(href)

    cache_dict["playlist_hrefs"] = playlist_hrefs
    json_helper.write_cache(cache_dict)

    connection.commit()
    connection.close()
Beispiel #7
0
def get_google_news_data():
    # Using news_category_index
    print("Getting Google News Top Articles")
    cache_dict = json_helper.read_cache()
    news_category_index = cache_dict["news_category_index"]
    category = CATEGORIES[news_category_index]
    print("Category: " + category)

    # Get 10 top articles per category
    params = {
        'apiKey': API_KEY,
        'country': 'us',
        'category': category,
        'totalResults': 20
    }
    response = requests.get(URL, params)
    json_response = response.json()

    articles = json_response['articles']

    # Only get data for 20 articles
    connection = sqlite3.connect("Databases.db")
    top_keyword = ""
    count = 0
    for article in articles:
        if count == 20:
            break
        article_data = get_article_data(article, category)
        if article_data == None:
            continue
        print(article_data)

        cur = connection.cursor()
        top_keyword = article_data[2]

        # Insert article data into database
        sql_command = """
                        INSERT OR IGNORE INTO ArticleData 
                            (article_title, 
                             article_category, 
                             top_keyword_one,
                             top_keyword_two) 
                        values (?,?,?,?)
                    """
        cur.execute(sql_command, article_data)

        count += 1

    # Cache to top keyword for each category
    cache_dict = json_helper.read_cache()
    category_top_keywords = cache_dict.get("category_top_keywords", [])
    print("top keyword: " + top_keyword)
    if top_keyword not in category_top_keywords:
        category_top_keywords.append(top_keyword)
    else:
        category_top_keywords.append("virus")
    cache_dict["category_top_keywords"] = category_top_keywords
    json_helper.write_cache(cache_dict)

    connection.commit()
    connection.close()