コード例 #1
0
ファイル: taskworker.py プロジェクト: frankk00/TweetHit
 def post(self):
   payloads = [Payload(simple_url['url'],simple_url['user_id']) for simple_url in eval(self.request.get('data'))]
   
   cached_urls = Url.get_by_key_name([payload.url for payload in payloads],
                                     _storage = [LOCAL,MEMCACHE],
                                     _result_type = NAME_DICT)
   
   user_ban_list = Banlist.retrieve(_storage=[LOCAL,MEMCACHE,DATASTORE],
                                       _local_expiration=time_util.minute_expiration(minutes=10)).users
                                       
   fetch_targets = [] #Urls that are not in lookup list
   counter_targets = [] #Product urls that were fetched before
   
   for payload in payloads:
     if payload.user_id in user_ban_list:
       #Don't take banned users' URLs into account
       continue
           
     #Look for existing cached instance with same short_url    
     cached_url = cached_urls[payload.url]
     if cached_url is not None:
       if cached_url.is_product: #cached url points to a valid product page
         counter_targets.append(Payload(cached_url.product_url,
                                                     payload.user_id))
     else:
       fetch_targets.append(payload)                                 
   
   if len(fetch_targets):
     urlfetch_payload = Payload.serialize(fetch_targets)
     enqueue_url_fetch(urlfetch_payload)
   if len(counter_targets):
     counter_payload = Payload.serialize(counter_targets)
     enqueue_counter(counter_payload)
コード例 #2
0
 def get(self):
   date = yesterday()
   enqueue_renderer_update(WEEKLY,date)
   enqueue_renderer_update(MONTHLY,date)
   enqueue_cleanup(UserCounter.kind(), DAILY, date)
   enqueue_cleanup(ProductCounter.kind(), DAILY, date)
   enqueue_cleanup(ProductRenderer.kind(), DAILY, date)
   enqueue_cleanup(ProductRenderer.kind(), WEEKLY, date,countdown = 3600)
   enqueue_cleanup(ProductRenderer.kind(), MONTHLY, date,countdown = 3600)
   
   #Delete banlist so it is refreshed with latest banned entities
   banlist = Banlist.retrieve()
   banlist.delete()
コード例 #3
0
 def get(self):
          
   USER_SPAM_COUNTERS.bind(spam_count_limit = SPAM_COUNT_LIMIT)
       
   user_counters = USER_SPAM_COUNTERS.fetch(100)
   users = []
   if len(user_counters):
     for counter in user_counters:
       counter.is_banned = True
       users.append(TwitterUser(key_name = counter.key_root))
     
     targets = [user.key().name() for user in users]
     ban_list = Banlist.retrieve()
     ban_list.users += targets
     ban_list.put(_storage=[MEMCACHE,DATASTORE])
     #TwitterUser.update_banlist([user.key().name() for user in users])
     logging.info('Banning users with keys: %s' %[user.key().name() for user in users])
     pdb.put(user_counters+users)
コード例 #4
0
ファイル: taskworker.py プロジェクト: frankk00/TweetHit
    def post(self):
        logging.info('UrlFetchWorker started')
        payloads = Payload.deserialize(self.request.get('payload'))
        product_ban_list = Banlist.retrieve(
            _storage=[LOCAL, MEMCACHE, DATASTORE],
            _local_expiration=time_util.minute_expiration(minutes=10)).products

        fetch_targets = list(set([payload.url for payload in payloads]))
        result_dict = UrlFetcher.fetch_urls(fetch_targets)
        urls = []
        counter_targets = []

        for payload in payloads:
            request_url = payload.url
            final_url = result_dict[request_url]
            user_id = payload.user_id

            urls.append(
                Url(key_name=request_url, final_url=final_url,
                    user_id=user_id))

        for url in urls:
            if url.final_url is not None:
                try:
                    product_url = AmazonURLParser.product_url(url.final_url)

                    if product_url in product_ban_list:
                        logging.info(
                            'Mention creation prevented for banned product url: %s'
                            % product_url)
                        continue  #no action for banned product

                    url.is_product = True  #No exceptions for product_url => valid product reference
                    counter_targets.append(Payload(product_url, url.user_id))
                except ParserException:
                    pass

        logging.info('UrlFetchWorker finished, counter targets: %s' %
                     counter_targets)
        pdb.put(urls, _storage=[LOCAL,
                                MEMCACHE])  #Urls are stored in cache only

        if len(counter_targets):
            enqueue_counter(Payload.serialize(counter_targets))
コード例 #5
0
 def get(self):
   renderers = PRODUCT_RENDERER_BAN_TARGETS.fetch(100)
   products = [Product(key_name = renderer.key_root) for renderer in renderers]
   product_counters = []
   for renderer in renderers:
     product_counters.append(ProductCounter(
                                            key_name = renderer.key().name(),
                                            is_banned = True,
                                            day = renderer.day,
                                            week = renderer.week,
                                            month = renderer.month,
                                            year = renderer.year))
     renderer.is_ban_synched = True
   
   targets = [product.key().name() for product in products]
   ban_list = Banlist.retrieve()
   ban_list.products += targets
   ban_list.put(_storage=[MEMCACHE,DATASTORE])     
   pdb.put(products+renderers+product_counters,_storage = [MEMCACHE,DATASTORE])
コード例 #6
0
ファイル: taskworker.py プロジェクト: frankk00/TweetHit
 def post(self):
   logging.info('UrlFetchWorker started')
   payloads = Payload.deserialize(self.request.get('payload'))
   product_ban_list = Banlist.retrieve(_storage=[LOCAL,MEMCACHE,DATASTORE],
                                   _local_expiration=time_util.minute_expiration(minutes=10)).products 
   
   fetch_targets = list(set([payload.url for payload in payloads]))
   result_dict = UrlFetcher.fetch_urls(fetch_targets)
   urls = []
   counter_targets = []
   
   for payload in payloads:
     request_url = payload.url
     final_url = result_dict[request_url]
     user_id = payload.user_id
     
     urls.append(Url(key_name=request_url,
                         final_url=final_url,
                         user_id = user_id))
       
   for url in urls:
     if url.final_url is not None:
       try:
         product_url = AmazonURLParser.product_url(url.final_url)
         
         if product_url in product_ban_list:
             logging.info('Mention creation prevented for banned product url: %s' %product_url)
             continue #no action for banned product
         
         url.is_product = True #No exceptions for product_url => valid product reference
         counter_targets.append(Payload(product_url,url.user_id))
       except ParserException:
         pass 
   
   logging.info('UrlFetchWorker finished, counter targets: %s' %counter_targets)   
   pdb.put(urls, _storage = [LOCAL,MEMCACHE]) #Urls are stored in cache only
   
   if len(counter_targets):
     enqueue_counter(Payload.serialize(counter_targets))
コード例 #7
0
ファイル: taskworker.py プロジェクト: frankk00/TweetHit
    def post(self):
        payloads = [
            Payload(simple_url['url'], simple_url['user_id'])
            for simple_url in eval(self.request.get('data'))
        ]

        cached_urls = Url.get_by_key_name(
            [payload.url for payload in payloads],
            _storage=[LOCAL, MEMCACHE],
            _result_type=NAME_DICT)

        user_ban_list = Banlist.retrieve(
            _storage=[LOCAL, MEMCACHE, DATASTORE],
            _local_expiration=time_util.minute_expiration(minutes=10)).users

        fetch_targets = []  #Urls that are not in lookup list
        counter_targets = []  #Product urls that were fetched before

        for payload in payloads:
            if payload.user_id in user_ban_list:
                #Don't take banned users' URLs into account
                continue

            #Look for existing cached instance with same short_url
            cached_url = cached_urls[payload.url]
            if cached_url is not None:
                if cached_url.is_product:  #cached url points to a valid product page
                    counter_targets.append(
                        Payload(cached_url.product_url, payload.user_id))
            else:
                fetch_targets.append(payload)

        if len(fetch_targets):
            urlfetch_payload = Payload.serialize(fetch_targets)
            enqueue_url_fetch(urlfetch_payload)
        if len(counter_targets):
            counter_payload = Payload.serialize(counter_targets)
            enqueue_counter(counter_payload)