Example #1
0
 def get(self):
     from models import VideoData, VideoSearchIndex, SearchData, VideoViewsData
     """
     Resource retrieves 20 most recent videos of You Tube given a search term. It retrieves them and stores in a datastore object
     using Mechanize and Beautiful soup.
      
     Resource usage:
       
     /tasks/scrape_page?search=term
       
     """
     search_term = self.request.get("search")
      
     existing_search_query = db.GqlQuery("SELECT __key__ FROM SearchData WHERE queryText = :1", search_term)
     existing_search = existing_search_query.get()
     if existing_search is None:
         logging.info("No existing search_term matches: %s", search_term)
         new_search = SearchData()
         new_search.queryText = search_term
         new_search.put()
         search_query_key = new_search.key()
     else:
         logging.info("Found existing search_term: %s", existing_search)
         search_query_key = existing_search
     
     ScrapePage().searchThis(SearchData().get(search_query_key))         
     #path = os.path.join(os.path.dirname(__file__), '/')
     self.response.out.write(HOST+"search?search="+search_term.replace(" ", "+"))
Example #2
0
 def get(self):
     from models import SearchData
     
     activeSearches = SearchData.all().filter("active =", True)
     
     logging.info("No. of active searches %i", activeSearches.count())
     
     for search in activeSearches:
         
         ScrapePage().searchThis(search)
         search.put()
Example #3
0
	def get(self):
		sdlist = []
		count = 0
		for sd in SearchData.all():
			exists = False
			for oid in sdlist:
				if oid == sd.Sid:
					exists = True
			if exists == False:
				sdlist.append(sd.Sid)	
				count += 1
		self.response.out.write(count)
Example #4
0
    def list(self):
        from models import SearchData, VideoSearchIndex

        # Construct a query to get all the searches
        searchesQuery = SearchData.all().order('-created')

        # Create an empty list to hold these 
        resultsList = []

        # Go through each search in the database
        for search in searchesQuery:

            # filter videos by search. this is quick because it just holds keys *?*
            videosBySearch = VideoSearchIndex.all().filter('searchTerms = ', search)
            videosCount = videosBySearch.count()
            search.count = videosCount
            search.urlSafeQueryText = str(search.queryText).replace(' ', '+')

            # chuck each one at the end of the list
            resultsList.append(search)

        return resultsList
Example #5
0
    def dictionary(self, search_term):
        
        from models import VideoData, VideoSearchIndex, SearchData, VideoViewsData
        import logging
        from main import DATE_STRING_FORMAT
        
        displayDictionary = {}
        
        # query to find all the saved searches that match the string
        searchesQuery = SearchData.all().filter('queryText = ', search_term) # order by priority TODO
        logging.info('number of searches for %s: %i (should only ever be one)', search_term, searchesQuery.count())
    
        # if there is any result at all
        if searchesQuery.get():
        
            # for each search that matches
            for search in searchesQuery:
    
                # query to find all the videos that were found using this search term
                videosBySearch = VideoSearchIndex.all().filter('searchTerms = ', search)
                logging.info('number of videos for this search: %i', videosBySearch.count())
            
                videoList = []
                videoIndex = 0
                videoInfo = {}
                dataList = []
            
                # each video in the result set
                for videoSearchIndex in videosBySearch:
                    dataList = []
                
                    video = videoSearchIndex.parent()
                
                    # Create a list of date-stamped views records for each video
                    viewsQuery = video.views.order('dateTime')
                                
                    # reset the iterator
                    i = 0

                    for record in viewsQuery:
                        
                        # have to declare these vars to make sure that they are floats
                        viewsSpeed = 0.
                        viewsAcceleration = 0.
                        
                        # can't calculate speed/acceleration if there is only one entry
                        if i > 0:
                            viewsSpeed, viewsAcceleration = CalculateViewData().viewData(record, previousRecord, previousSpeed)

                        # We need to store the record for next time around
                        previousRecord = record
                        previousSpeed = viewsSpeed
                    
                        # create a dictionary for each entry containing this data
                        dataDict = {"datetime": record.dateTime.strftime(DATE_STRING_FORMAT), "views": record.views, "speed": viewsSpeed, "acceleration": viewsAcceleration}

                        # append this new dictionary to the list.
                        dataList.append(dataDict)
                    
                        # iterate counter
                        i = i +1
                
                    # turn info into dictionary
                    videoInfo = eval(video.json)
                        
                    # iterate and create big dictionary
                    videoDictionary = { "info" : videoInfo, "data" : dataList}
                    videoList.append(videoDictionary)
                    videoIndex = videoIndex + 1
            
                displayDictionary[search.queryText] =  videoList
               
            return displayDictionary
Example #6
0
def banSD(sid):
	for sd in SearchData.all().filter("Sid =", sid).run():
		db.delete(sd)
Example #7
0
        def post(self):
		for sd in SearchData.all():
			db.delete(sd)

		appid = APPID
		if APPID == 'ebfmex-pub.appspot.com' or APPID == 'ebfmxorg.appspot.com':
			appid = 'movil.' + APPID

                url = 'http://' + appid + '/backend/generatesearch?kind=Oferta&field=Oferta'
		#result = urlfetch.fetch(url)
		url = 'http://' + appid + '/backend/generatesearch?kind=Oferta&field=Descripcion'
		#result = urlfetch.fetch(url)

		#self.redirect('/backend/generatesearch?kind=Oferta&field=Descripcion')
		#self.redirect('/backend/generatesearch?kind=Oferta&field=Oferta')

		for oferta in Oferta.all():
			for palabra in OfertaPalabra.all().filter("IdOft =", oferta.IdOft):
				 newsd = SearchData()
                                 newsd.Enlinea = oferta.Enlinea
                                 newsd.FechaHora = datetime.now() - timedelta(hours = H)
                                 newsd.Field = 'OfertaPalabra'
                                 newsd.IdCat = oferta.IdCat
                                 newsd.Kind = 'Oferta'
                                 newsd.Sid = str(oferta.key())
                                 newsd.Value = palabra.Palabra.lower()
                                 newsd.put()
Example #8
0
	def token(self):
		token = self.request.get('token')
		if token and str(token) == 'ZWJmbWV4LXB1YnIeCxISX0FoQWRtaW5Yc3JmVG9rZW5fIgZfWFNSRl8M':
			try:
				gminutes = self.request.get('minutes')
				ghours = self.request.get('hours')
				gdays = self.request.get('days')
				if not gminutes:
					gminutes = 0
				else:
					gminutes = int(gminutes)
				if not ghours:
					ghours = 0
				else:
					ghours = int(ghours)
				if not gdays:
					gdays = 0
				else:
					gdays = int(gdays)
			except ValueError:
				gminutes = 30
				ghours = 0
				gdays = 0
			time = datetime.now() - timedelta(days = gdays, hours = ghours, minutes = gminutes)
			self.response.headers['Content-Type'] = 'application/json'

			#self.response.out.write (str(time))
			changecontrol = ChangeControl.all().filter("FechaHora >=", time).filter("Kind =", 'Oferta').filter("Status IN", ["A","M"])
			nbadded = 0
			nbremoved = 0
			for cc in changecontrol:
				#self.response.out.write(cc.Id + '\n')
				if cc.Status != 'B':
					ofertas = Oferta.all().filter("IdOft =", cc.Id)
					for oferta in ofertas:
						if cc.Status == 'M':
							searchdata = SearchData.all().filter("Sid =", str(oferta.key()))
							for sd in searchdata:
								db.delete(sd)
								nbremoved += 1
						desc = oferta.Descripcion.replace('\n',' ').replace('\r',' ').replace('.',' ').replace(',',' ').split(' ')
						nombre = oferta.Oferta.replace('.',' ').replace(',',' ').split(' ')
						for palabra in desc:
							if len(palabra) > 3:
								newsd = SearchData()
								newsd.Enlinea = oferta.Enlinea
								newsd.FechaHora = datetime.now() - timedelta(hours = H)
								newsd.Field = 'Descripcion'
								newsd.IdCat = oferta.IdCat
								newsd.Kind = 'Oferta'
								newsd.Sid = str(oferta.key())
								newsd.Value = palabra.lower()
								newsd.put()
								nbadded += 1
						for palabra in nombre:
							if len(palabra) > 3:
		                                                newsd = SearchData()
       			                                        newsd.Enlinea = oferta.Enlinea
		                                                newsd.FechaHora = datetime.now() - timedelta(hours = H)
		                                                newsd.Field = 'Oferta'
		                                                newsd.IdCat = oferta.IdCat
		                                                newsd.Kind = 'Oferta'
		                                                newsd.Sid = str(oferta.key())
		                                                newsd.Value = palabra.lower()
		                                                newsd.put()
								nbadded += 1
						palabraclave = OfertaPalabra.all().filter("IdOft =", oferta.IdOft)
						for palabra in palabraclave.Palabra:
							if len(palabra) > 3:
								newsd = SearchData()
		                                                newsd.Enlinea = oferta.Enlinea
		                                                newsd.FechaHora = datetime.now() - timedelta(hours = H)
		                                                newsd.Field = 'OfertaPalabra'
		                                                newsd.IdCat = oferta.IdCat
		                                                newsd.Kind = 'Oferta'
		                                                newsd.Sid = str(oferta.key())
		                                                newsd.Value = palabra.Palabra.lower()
		                                                newsd.put()
								nbadded += 1

			logging.info("Finished updating. Added: %s. Removed: %s.", str(nbadded), str(nbremoved))
		else:
			logging.error('Wrong token given.')