Esempio n. 1
0
 def get(self):
     from models import VideoData, VideoViewsData
     """ 
     Selects videos from database and tracks their views over time
     """
     
     # get current datetime
     now = datetime.datetime.now()
     
     # query db for videos which have been flagged                   
     videos_to_check = VideoData.gql("WHERE checkMeFlag = True") # CHANGE THIS BACK TO TRUE WHEN DEPLOYING
     
     logging.info('Checking %i videos', videos_to_check.count()) 
            
     for video in videos_to_check:
                     
         # get the current number of views
         newViewsEntry = self.getEntryData(video.token)
         
         new_views_data = VideoViewsData(video=video, dateTime=now, views=newViewsEntry, collection_name="views")
         new_views_data.put()
         
         video.checkMeFlag = False
         video.put()
Esempio n. 2
0
    def searchThis(self, search):
        from models import VideoData, VideoViewsData, VideoSearchIndex
        
        search.lastQuery = datetime.datetime.now()
        br = gaemechanize.Browser()
          
        # Browser options
        br.set_handle_equiv(True)
        br.set_handle_gzip(True)
        br.set_handle_redirect(True)
        br.set_handle_referer(True)
        br.set_handle_robots(False)

        # User-Agent (this is cheating, ok?)
        br.addheaders = [('User-agent', 'Mozilla/5.0 (X11; U; Linux i686; en-US; rv:1.9.0.1) Gecko/2008071615 Fedora/3.0.1-1.fc9 Firefox/3.0.1')]

        # The site we will navigate into, handling its session
        br.open('http://www.youtube.com')

        # Scrape First Page Looking for Forms 
        br.select_form(nr=1)
          
        # Executes Query with Given Word
        br.form['search_query'] = search.queryText
        br.submit()
           
        # Finds all links the page
        search_links = [l for l in br.links()]
          
        linkcounter = 0

        for link in search_links:
            linkcounter  += linkcounter

        # Selects By Upload Rate (it's a hack now, needs to be context independent)         
        br.follow_link(search_links[16])
          
        html = br.response().read()          
        soup = BeautifulSoup(html)
        soup.prettify()
          
        # Creates Video List For Results
        search_results = soup.findAll('div', attrs = {'class': "result-item *sr "})
          
        # Store in DB
        new_video = VideoData()   

                   
        for result in search_results:
            
            # strip token from youtube url
            vidtoken =  self.scrapeVideoInfo(result)['url'][31:42] 
            
            # Create a new VideoData object with the video token
            new_video = VideoData(key_name=vidtoken)
              
            # If it doesn't exist already. TODO
            #if VideoData.get(new_video.key()) is None:
            new_video.token = vidtoken
            new_video.json = simplejson.dumps(self.scrapeVideoInfo(result))
              
            viewsDate, views = self.scrapeVideoViews(result)
            views_object = VideoViewsData(dateTime=viewsDate, views=views, video=new_video)
            views_object.put()
              
            new_video_searchlist = VideoSearchIndex(key_name=new_video.token, parent=new_video)    
            new_video_searchlist.searchTerms.append(search.key())
            new_video_searchlist.put()
                            
            new_video.alertLevel = "initial"
            new_video.checkMeFlag = False
            new_video.put()