def list(self): from models import SearchData, VideoSearchIndex # Construct a query to get all the searches searchesQuery = SearchData.all().order('-created') # Create an empty list to hold these resultsList = [] # Go through each search in the database for search in searchesQuery: # filter videos by search. this is quick because it just holds keys *?* videosBySearch = VideoSearchIndex.all().filter('searchTerms = ', search) videosCount = videosBySearch.count() search.count = videosCount search.urlSafeQueryText = str(search.queryText).replace(' ', '+') # chuck each one at the end of the list resultsList.append(search) return resultsList
def searchThis(self, search): from models import VideoData, VideoViewsData, VideoSearchIndex search.lastQuery = datetime.datetime.now() br = gaemechanize.Browser() # Browser options br.set_handle_equiv(True) br.set_handle_gzip(True) br.set_handle_redirect(True) br.set_handle_referer(True) br.set_handle_robots(False) # User-Agent (this is cheating, ok?) br.addheaders = [('User-agent', 'Mozilla/5.0 (X11; U; Linux i686; en-US; rv:1.9.0.1) Gecko/2008071615 Fedora/3.0.1-1.fc9 Firefox/3.0.1')] # The site we will navigate into, handling its session br.open('http://www.youtube.com') # Scrape First Page Looking for Forms br.select_form(nr=1) # Executes Query with Given Word br.form['search_query'] = search.queryText br.submit() # Finds all links the page search_links = [l for l in br.links()] linkcounter = 0 for link in search_links: linkcounter += linkcounter # Selects By Upload Rate (it's a hack now, needs to be context independent) br.follow_link(search_links[16]) html = br.response().read() soup = BeautifulSoup(html) soup.prettify() # Creates Video List For Results search_results = soup.findAll('div', attrs = {'class': "result-item *sr "}) # Store in DB new_video = VideoData() for result in search_results: # strip token from youtube url vidtoken = self.scrapeVideoInfo(result)['url'][31:42] # Create a new VideoData object with the video token new_video = VideoData(key_name=vidtoken) # If it doesn't exist already. TODO #if VideoData.get(new_video.key()) is None: new_video.token = vidtoken new_video.json = simplejson.dumps(self.scrapeVideoInfo(result)) viewsDate, views = self.scrapeVideoViews(result) views_object = VideoViewsData(dateTime=viewsDate, views=views, video=new_video) views_object.put() new_video_searchlist = VideoSearchIndex(key_name=new_video.token, parent=new_video) new_video_searchlist.searchTerms.append(search.key()) new_video_searchlist.put() new_video.alertLevel = "initial" new_video.checkMeFlag = False new_video.put()
def dictionary(self, search_term): from models import VideoData, VideoSearchIndex, SearchData, VideoViewsData import logging from main import DATE_STRING_FORMAT displayDictionary = {} # query to find all the saved searches that match the string searchesQuery = SearchData.all().filter('queryText = ', search_term) # order by priority TODO logging.info('number of searches for %s: %i (should only ever be one)', search_term, searchesQuery.count()) # if there is any result at all if searchesQuery.get(): # for each search that matches for search in searchesQuery: # query to find all the videos that were found using this search term videosBySearch = VideoSearchIndex.all().filter('searchTerms = ', search) logging.info('number of videos for this search: %i', videosBySearch.count()) videoList = [] videoIndex = 0 videoInfo = {} dataList = [] # each video in the result set for videoSearchIndex in videosBySearch: dataList = [] video = videoSearchIndex.parent() # Create a list of date-stamped views records for each video viewsQuery = video.views.order('dateTime') # reset the iterator i = 0 for record in viewsQuery: # have to declare these vars to make sure that they are floats viewsSpeed = 0. viewsAcceleration = 0. # can't calculate speed/acceleration if there is only one entry if i > 0: viewsSpeed, viewsAcceleration = CalculateViewData().viewData(record, previousRecord, previousSpeed) # We need to store the record for next time around previousRecord = record previousSpeed = viewsSpeed # create a dictionary for each entry containing this data dataDict = {"datetime": record.dateTime.strftime(DATE_STRING_FORMAT), "views": record.views, "speed": viewsSpeed, "acceleration": viewsAcceleration} # append this new dictionary to the list. dataList.append(dataDict) # iterate counter i = i +1 # turn info into dictionary videoInfo = eval(video.json) # iterate and create big dictionary videoDictionary = { "info" : videoInfo, "data" : dataList} videoList.append(videoDictionary) videoIndex = videoIndex + 1 displayDictionary[search.queryText] = videoList return displayDictionary