def main(): # If first time running, then cache top keywords keywords = get_news_keywords() num_playlists = len(keywords) * 20 print("Total number of expected playlists: " + str(num_playlists)) # Cache keyword index cache_dict = json_helper.read_cache() cache_dict["keyword_index"] = cache_dict.get("keyword_index", -1) + 1 json_helper.write_cache(cache_dict) # Create JSON file if needed # # JSON file will be used to make sure only 20 items # are pulled from an API into the respective table in # the databse per code execution dir_path = os.path.dirname(os.path.realpath(__file__)) cache_path = dir_path + '/' + "helpers.json" os.system("touch " + cache_path) # Create Databases create_databases() # Get Spotify get_spotify_data(keywords, num_playlists)
def get_spotify_data(keywords, num_playlists): """Master function get retrieve data from Spotify.""" # Create instance of Spotify class SpotifyMaster = Spotify(CLIENT_ID, CLIENT_SECRET) # Only retrieve playlists if not at num_playlists playlist_table_size = return_table_len("Playlists") if playlist_table_size < num_playlists - 10: # Pull playlist data a keyword print("Getting Spotify playlists") cache_dict = json_helper.read_cache() keyword_index = cache_dict["keyword_index"] keyword = keywords[keyword_index] print("Keyword: " + keyword) # Get playlists json_result = SpotifyMaster.search(keyword, "playlist") playlists = json_result["playlists"]["items"] # Write playlists to database write_playlists_to_database(SpotifyMaster, playlists) playlist_table_size = return_table_len("Playlists") print("Playlist table size: " + str(playlist_table_size)) return # Otherwise, start getting tracks until reach limit tracks_table_size = return_table_len("Tracks") track_features_table_size = return_table_len("TrackFeatures") # Finish if over 100 rows for either if tracks_table_size > 120 and track_features_table_size > 120: print("Gathered sufficient data for the database.") return if tracks_table_size != num_playlists * 10: print("Getting Spotify Tracks") # Get the correct playlist href and increment the index counter cache_dict = json_helper.read_cache() cache_dict["playlist_href_index"] = cache_dict.get( "playlist_href_index", -1) + 1 playlist_href_index = cache_dict["playlist_href_index"] json_helper.write_cache(cache_dict) playlist_href = cache_dict["playlist_hrefs"][playlist_href_index] # Get track ids from the playlist and write to database track_ids = SpotifyMaster.get_tracks_from_playlist(playlist_href) write_tracks_and_features_to_database(SpotifyMaster, track_ids, playlist_href, playlist_href_index + 1) print("Tracks table size: " + str(tracks_table_size)) print("Track Features table size: " + str(track_features_table_size)) return # Done getting data, JOIN time. print("Done retrieving Spotify playlists and track data.")
def main(): # Delete tables delete_table("Playlists") delete_table("Tracks") delete_table("TrackFeatures") delete_table("ArticleData") # Clear JSON data cache_dict = json_helper.read_cache() cache_dict = {} json_helper.write_cache(cache_dict)
def main(): # Cache country index cache_dict = json_helper.read_cache() cache_dict["news_category_index"] = cache_dict.get("news_category_index", -1) + 1 json_helper.write_cache(cache_dict) if cache_dict["news_category_index"] >= len(CATEGORIES): print("No more categories to pull articles from.") return create_database() get_google_news_data()
def write_playlists_to_database(SpotifyMaster, playlists): """Write returned data to table""" print("Number of playlists: " + str(len(playlists))) # Connecting to the database connection = sqlite3.connect("Databases.db") cur = connection.cursor() # Cache playlist href to get tracks later cache_dict = json_helper.read_cache() playlist_hrefs = cache_dict.get("playlist_hrefs", []) # Write each playlist into the database for playlist in playlists: # Get necessary playlist metadata name = playlist["name"] owner = playlist["owner"]["id"] desc = playlist["description"] href = playlist["href"] # Get total number of tracks in the playlist id = playlist["id"] playlist_data = SpotifyMaster.get_data("playlist", id) total_tracks = playlist_data["tracks"]["total"] # print("Playlist Size:\t" + str(total_tracks)) playlist_data = [name, owner, desc, href, total_tracks] # Insert playlist into database sql_command = """ INSERT OR IGNORE INTO Playlists (playlist_name, playlist_owner, playlist_desc, playlist_href, playlist_size) values (?,?,?,?, ?) """ cur.execute(sql_command, playlist_data) # Append playlist href to list of hrefs playlist_hrefs.append(href) cache_dict["playlist_hrefs"] = playlist_hrefs json_helper.write_cache(cache_dict) connection.commit() connection.close()
def get_google_news_data(): # Using news_category_index print("Getting Google News Top Articles") cache_dict = json_helper.read_cache() news_category_index = cache_dict["news_category_index"] category = CATEGORIES[news_category_index] print("Category: " + category) # Get 10 top articles per category params = { 'apiKey': API_KEY, 'country': 'us', 'category': category, 'totalResults': 20 } response = requests.get(URL, params) json_response = response.json() articles = json_response['articles'] # Only get data for 20 articles connection = sqlite3.connect("Databases.db") top_keyword = "" count = 0 for article in articles: if count == 20: break article_data = get_article_data(article, category) if article_data == None: continue print(article_data) cur = connection.cursor() top_keyword = article_data[2] # Insert article data into database sql_command = """ INSERT OR IGNORE INTO ArticleData (article_title, article_category, top_keyword_one, top_keyword_two) values (?,?,?,?) """ cur.execute(sql_command, article_data) count += 1 # Cache to top keyword for each category cache_dict = json_helper.read_cache() category_top_keywords = cache_dict.get("category_top_keywords", []) print("top keyword: " + top_keyword) if top_keyword not in category_top_keywords: category_top_keywords.append(top_keyword) else: category_top_keywords.append("virus") cache_dict["category_top_keywords"] = category_top_keywords json_helper.write_cache(cache_dict) connection.commit() connection.close()