Beispiel #1
0
    def list(self):
        from models import SearchData, VideoSearchIndex

        # Construct a query to get all the searches
        searchesQuery = SearchData.all().order('-created')

        # Create an empty list to hold these 
        resultsList = []

        # Go through each search in the database
        for search in searchesQuery:

            # filter videos by search. this is quick because it just holds keys *?*
            videosBySearch = VideoSearchIndex.all().filter('searchTerms = ', search)
            videosCount = videosBySearch.count()
            search.count = videosCount
            search.urlSafeQueryText = str(search.queryText).replace(' ', '+')

            # chuck each one at the end of the list
            resultsList.append(search)

        return resultsList
Beispiel #2
0
    def searchThis(self, search):
        from models import VideoData, VideoViewsData, VideoSearchIndex
        
        search.lastQuery = datetime.datetime.now()
        br = gaemechanize.Browser()
          
        # Browser options
        br.set_handle_equiv(True)
        br.set_handle_gzip(True)
        br.set_handle_redirect(True)
        br.set_handle_referer(True)
        br.set_handle_robots(False)

        # User-Agent (this is cheating, ok?)
        br.addheaders = [('User-agent', 'Mozilla/5.0 (X11; U; Linux i686; en-US; rv:1.9.0.1) Gecko/2008071615 Fedora/3.0.1-1.fc9 Firefox/3.0.1')]

        # The site we will navigate into, handling its session
        br.open('http://www.youtube.com')

        # Scrape First Page Looking for Forms 
        br.select_form(nr=1)
          
        # Executes Query with Given Word
        br.form['search_query'] = search.queryText
        br.submit()
           
        # Finds all links the page
        search_links = [l for l in br.links()]
          
        linkcounter = 0

        for link in search_links:
            linkcounter  += linkcounter

        # Selects By Upload Rate (it's a hack now, needs to be context independent)         
        br.follow_link(search_links[16])
          
        html = br.response().read()          
        soup = BeautifulSoup(html)
        soup.prettify()
          
        # Creates Video List For Results
        search_results = soup.findAll('div', attrs = {'class': "result-item *sr "})
          
        # Store in DB
        new_video = VideoData()   

                   
        for result in search_results:
            
            # strip token from youtube url
            vidtoken =  self.scrapeVideoInfo(result)['url'][31:42] 
            
            # Create a new VideoData object with the video token
            new_video = VideoData(key_name=vidtoken)
              
            # If it doesn't exist already. TODO
            #if VideoData.get(new_video.key()) is None:
            new_video.token = vidtoken
            new_video.json = simplejson.dumps(self.scrapeVideoInfo(result))
              
            viewsDate, views = self.scrapeVideoViews(result)
            views_object = VideoViewsData(dateTime=viewsDate, views=views, video=new_video)
            views_object.put()
              
            new_video_searchlist = VideoSearchIndex(key_name=new_video.token, parent=new_video)    
            new_video_searchlist.searchTerms.append(search.key())
            new_video_searchlist.put()
                            
            new_video.alertLevel = "initial"
            new_video.checkMeFlag = False
            new_video.put()
Beispiel #3
0
    def dictionary(self, search_term):
        
        from models import VideoData, VideoSearchIndex, SearchData, VideoViewsData
        import logging
        from main import DATE_STRING_FORMAT
        
        displayDictionary = {}
        
        # query to find all the saved searches that match the string
        searchesQuery = SearchData.all().filter('queryText = ', search_term) # order by priority TODO
        logging.info('number of searches for %s: %i (should only ever be one)', search_term, searchesQuery.count())
    
        # if there is any result at all
        if searchesQuery.get():
        
            # for each search that matches
            for search in searchesQuery:
    
                # query to find all the videos that were found using this search term
                videosBySearch = VideoSearchIndex.all().filter('searchTerms = ', search)
                logging.info('number of videos for this search: %i', videosBySearch.count())
            
                videoList = []
                videoIndex = 0
                videoInfo = {}
                dataList = []
            
                # each video in the result set
                for videoSearchIndex in videosBySearch:
                    dataList = []
                
                    video = videoSearchIndex.parent()
                
                    # Create a list of date-stamped views records for each video
                    viewsQuery = video.views.order('dateTime')
                                
                    # reset the iterator
                    i = 0

                    for record in viewsQuery:
                        
                        # have to declare these vars to make sure that they are floats
                        viewsSpeed = 0.
                        viewsAcceleration = 0.
                        
                        # can't calculate speed/acceleration if there is only one entry
                        if i > 0:
                            viewsSpeed, viewsAcceleration = CalculateViewData().viewData(record, previousRecord, previousSpeed)

                        # We need to store the record for next time around
                        previousRecord = record
                        previousSpeed = viewsSpeed
                    
                        # create a dictionary for each entry containing this data
                        dataDict = {"datetime": record.dateTime.strftime(DATE_STRING_FORMAT), "views": record.views, "speed": viewsSpeed, "acceleration": viewsAcceleration}

                        # append this new dictionary to the list.
                        dataList.append(dataDict)
                    
                        # iterate counter
                        i = i +1
                
                    # turn info into dictionary
                    videoInfo = eval(video.json)
                        
                    # iterate and create big dictionary
                    videoDictionary = { "info" : videoInfo, "data" : dataList}
                    videoList.append(videoDictionary)
                    videoIndex = videoIndex + 1
            
                displayDictionary[search.queryText] =  videoList
               
            return displayDictionary