def main(): """ ~ auralFix - Music discovery tool using PRAW AND youtube-dl ~ - Extract user information from csv (aural_users.csv) - Can edit top_from time period(wk=1, mon=2, yr=3, all_time=4) GLOBAL/user subs - Can edit max_songs [GLOBAL for all subs per user] - Pass each user's information to 'reddit_fix' - Reddit_fix scrapes reddit.com/r/*user_subreddits* for music links and creates csv - Ensures folder structure exists - Downloads music to appropriate folder - Moves csv to archives folder for reference and buying/liking music from webpages - Sends an email to the user once all the user's music is available: subs and total songs scraped - Active participation via csv - Show a popup notif to Root user - Root folder is linked to cloud - File structure provided in aural_tree.txt - Script is set up in cron to run every 2 weeks """ # Creating a list containing the dictionary of all the users details user_details = csv2dict("aural_users.csv") # cleaup threshold in days data_threshold = CLEANUP_THRESHOLD # Folder hooked to cloud os.chdir(AURAL_DIRECTORY) if not os.path.exists(r"auralCloud"): os.makedirs("auralCloud") os.chdir("auralCloud") user_prompt = raw_input( "Do you want to delete the previous (data older than " + str(data_threshold) + ")\nEnter 'Y'/blank to proceed or 'N' to cancel" ) if user_prompt in ["y", "Y", ""]: # prev data cleanup aural_path = os.getcwd() print aural_path remove_old_files(dir_to_search=aural_path, threshold_in_days=data_threshold) for i, enum in enumerate(user_details): # for each user in the csv user_name = user_details[i]["Name"] user_email = user_details[i]["Email"] user_subs_str = user_details[i]["Subreddits"] user_period_pref = user_details[i]["top_from"] user_max_songs = user_details[i]["max_songs"] user_subs = user_details[i]["Subreddits"].split(",") subs_2_scrape = [user_subs[i].strip() for i, enum in enumerate(user_subs)] user_songs_count = 0 print "\nName >> " + user_name + "\nEmail >> " + user_email + "\n" # creating folder name for User if not present if not os.path.exists(user_name): os.makedirs(user_name) os.chdir(user_name) # scraping each of the user's subreddits for music for i, enum in enumerate(subs_2_scrape): # for each subreddit of the particular user selected print "\t~ Scraping subreddit " + subs_2_scrape[ i ] + " for user > " + user_name + "\n\tTime period pref is > " + user_period_pref + "\n\tMax songs per subreddit > " + user_max_songs curr_sub = subs_2_scrape[i] userSub_song_count = 0 auralWorker = Reddit_fix() userSub_song_count = auralWorker.sub2muzak( sub_name=curr_sub, num_posts=user_max_songs, period=user_period_pref ) # print 'userSub_song_count', userSub_song_count user_songs_count += int(userSub_song_count) move_csv(curr_sub) # checking user's active participation and scraping user csv links userLinks = csv2Music() userLinks.muzak() os.chdir("..") # up to all users folder # sending root user popup notifs """ notif_msg = ">> Finished up scraping " + str(len(subs_2_scrape)) +\ " subs for user ~ " + user_name auralNotifs(msg=notif_msg, count=str(user_songs_count)) """ # emailer """
def sub2muzak(self, sub_name='listentothis', num_posts=20, period=2): subreddit_name = sub_name num_posts = int(num_posts) period = int(period) title = [] links = [] sub_date = [] # print 'period:: ', period # print subreddit_name,'\t',num_posts,'\t',period # separate if-elif blks coz we get a praw container if (period == 1): # print 'Period > top_from_week' for submission in self.agent.get_subreddit(subreddit_name).get_top_from_week(limit=num_posts): auth = ["youtu.be", "soundcloud", "bandcamp", "youtube.com"] for i, items in enumerate(auth): if auth[i] in submission.url: print submission.url title.append( (submission.title).encode('utf-8', errors='ignore')) links.append( (submission.url).encode('utf-8', errors='ignore')) pprint.pprint(links) elif(period == 2): # print 'Period > top_from_month' for submission in self.agent.get_subreddit(subreddit_name).get_top_from_month(limit=num_posts): auth = ["youtu.be", "soundcloud", "bandcamp", "youtube.com"] for i, items in enumerate(auth): if auth[i] in submission.url: title.append( (submission.title).encode('utf-8', errors='ignore')) links.append( (submission.url).encode('utf-8', errors='ignore')) elif(period == 3): # print 'Period > top_from_year' for submission in self.agent.get_subreddit(subreddit_name).get_top_from_year(limit=num_posts): auth = ["youtu.be", "soundcloud", "bandcamp", "youtube.com"] for i, items in enumerate(auth): if auth[i] in submission.url: title.append( (submission.title).encode('utf-8', errors='ignore')) links.append( (submission.url).encode('utf-8', errors='ignore')) elif(period == 4): # print 'Period > top_from_all_time' for submission in self.agent.get_subreddit(subreddit_name).get_top_from_all(limit=num_posts): auth = ["youtu.be", "soundcloud", "bandcamp", "youtube.com"] for i, items in enumerate(auth): if auth[i] in submission.url: title.append( (submission.title).encode('utf-8', errors='ignore')) links.append( (submission.url).encode('utf-8', errors='ignore')) sub_date.append(submission.created_utc) else: pass # pprint.pprint(links) # figure out overlapping music conundrum # def unixtime2normal(unixtime): # normal_time = datetime.datetime.fromtimestamp(int(unixtime)).strftime('%Y-%m-%d %H:%M:%S') # return normal_time # print 'Title lenght', len(title) # print 'Title links', len(links) # print 'Title utc', sub_date # created_time = map(unixtime2normal, sub_date) # print 'normal_time', created_time source = dict(zip(title, links)) csv_name = subreddit_name + '_' + str(datetime.date.today()) + '_.csv' songs_per_subs = 0 # print '\t\t> Reddit fix csv:',csv_name if len(source) > 0: for i in source: writer = csv.writer(open(csv_name, 'wb')) for k, v in source.items(): writer.writerow([k, v]) # print 'Done with extracting link!\ncsv filename > ', csv_name print '\t\tDownloading links!' # os.system('pwd') # create an object of the csv2Music class and then call the method # redditMuzak downloader = csv2Music() songs_per_subs = downloader.redditMuzak(csv_name) return songs_per_subs else: return songs_per_subs