コード例 #1
0
ファイル: taskworker.py プロジェクト: frankk00/TweetHit
 def post(self):
   payloads = [Payload(simple_url['url'],simple_url['user_id']) for simple_url in eval(self.request.get('data'))]
   
   cached_urls = Url.get_by_key_name([payload.url for payload in payloads],
                                     _storage = [LOCAL,MEMCACHE],
                                     _result_type = NAME_DICT)
   
   user_ban_list = Banlist.retrieve(_storage=[LOCAL,MEMCACHE,DATASTORE],
                                       _local_expiration=time_util.minute_expiration(minutes=10)).users
                                       
   fetch_targets = [] #Urls that are not in lookup list
   counter_targets = [] #Product urls that were fetched before
   
   for payload in payloads:
     if payload.user_id in user_ban_list:
       #Don't take banned users' URLs into account
       continue
           
     #Look for existing cached instance with same short_url    
     cached_url = cached_urls[payload.url]
     if cached_url is not None:
       if cached_url.is_product: #cached url points to a valid product page
         counter_targets.append(Payload(cached_url.product_url,
                                                     payload.user_id))
     else:
       fetch_targets.append(payload)                                 
   
   if len(fetch_targets):
     urlfetch_payload = Payload.serialize(fetch_targets)
     enqueue_url_fetch(urlfetch_payload)
   if len(counter_targets):
     counter_payload = Payload.serialize(counter_targets)
     enqueue_counter(counter_payload)
コード例 #2
0
ファイル: taskworker.py プロジェクト: frankk00/TweetHit
    def post(self):
        logging.info('UrlFetchWorker started')
        payloads = Payload.deserialize(self.request.get('payload'))
        product_ban_list = Banlist.retrieve(
            _storage=[LOCAL, MEMCACHE, DATASTORE],
            _local_expiration=time_util.minute_expiration(minutes=10)).products

        fetch_targets = list(set([payload.url for payload in payloads]))
        result_dict = UrlFetcher.fetch_urls(fetch_targets)
        urls = []
        counter_targets = []

        for payload in payloads:
            request_url = payload.url
            final_url = result_dict[request_url]
            user_id = payload.user_id

            urls.append(
                Url(key_name=request_url, final_url=final_url,
                    user_id=user_id))

        for url in urls:
            if url.final_url is not None:
                try:
                    product_url = AmazonURLParser.product_url(url.final_url)

                    if product_url in product_ban_list:
                        logging.info(
                            'Mention creation prevented for banned product url: %s'
                            % product_url)
                        continue  #no action for banned product

                    url.is_product = True  #No exceptions for product_url => valid product reference
                    counter_targets.append(Payload(product_url, url.user_id))
                except ParserException:
                    pass

        logging.info('UrlFetchWorker finished, counter targets: %s' %
                     counter_targets)
        pdb.put(urls, _storage=[LOCAL,
                                MEMCACHE])  #Urls are stored in cache only

        if len(counter_targets):
            enqueue_counter(Payload.serialize(counter_targets))
コード例 #3
0
ファイル: taskworker.py プロジェクト: frankk00/TweetHit
 def post(self):
   logging.info('UrlFetchWorker started')
   payloads = Payload.deserialize(self.request.get('payload'))
   product_ban_list = Banlist.retrieve(_storage=[LOCAL,MEMCACHE,DATASTORE],
                                   _local_expiration=time_util.minute_expiration(minutes=10)).products 
   
   fetch_targets = list(set([payload.url for payload in payloads]))
   result_dict = UrlFetcher.fetch_urls(fetch_targets)
   urls = []
   counter_targets = []
   
   for payload in payloads:
     request_url = payload.url
     final_url = result_dict[request_url]
     user_id = payload.user_id
     
     urls.append(Url(key_name=request_url,
                         final_url=final_url,
                         user_id = user_id))
       
   for url in urls:
     if url.final_url is not None:
       try:
         product_url = AmazonURLParser.product_url(url.final_url)
         
         if product_url in product_ban_list:
             logging.info('Mention creation prevented for banned product url: %s' %product_url)
             continue #no action for banned product
         
         url.is_product = True #No exceptions for product_url => valid product reference
         counter_targets.append(Payload(product_url,url.user_id))
       except ParserException:
         pass 
   
   logging.info('UrlFetchWorker finished, counter targets: %s' %counter_targets)   
   pdb.put(urls, _storage = [LOCAL,MEMCACHE]) #Urls are stored in cache only
   
   if len(counter_targets):
     enqueue_counter(Payload.serialize(counter_targets))
コード例 #4
0
ファイル: taskworker.py プロジェクト: frankk00/TweetHit
    def post(self):
        from collections import defaultdict
        payload_string = self.request.get('payload')
        counter_targets = Payload.deserialize(payload_string)
        today = time_util.today()

        product_targets = defaultdict(int)
        user_targets = defaultdict(int)

        for payload in counter_targets:
            daily_product_key = ProductCounter.build_key_name(
                payload.url, DAILY, today)
            weekly_product_key = ProductCounter.build_key_name(
                payload.url, WEEKLY, today)
            monthly_product_key = ProductCounter.build_key_name(
                payload.url, MONTHLY, today)
            user_key = UserCounter.build_key_name(payload.user_id, DAILY,
                                                  today)
            product_targets[daily_product_key] += 1
            product_targets[weekly_product_key] += 1
            product_targets[monthly_product_key] += 1
            user_targets[user_key] += 1

        product_counters = ProductCounter.get_by_key_name(
            product_targets.keys(),
            _storage=[MEMCACHE, DATASTORE],
            _result_type=NAME_DICT)
        user_counters = UserCounter.get_by_key_name(user_targets.keys(),
                                                    _result_type=NAME_DICT)

        for key_name, delta in product_targets.iteritems():
            try:
                product_counters[key_name].count += delta
            except AttributeError:  #Value is None in dict
                frequency = ProductCounter.frequency_from_key_name(key_name)
                product_counters[key_name] = ProductCounter.new(
                    key_name,
                    frequency,
                    today,
                    count=delta,
                    _build_key_name=False)

        for key_name, delta in user_targets.iteritems():
            try:
                user_counters[key_name].count += delta
            except AttributeError:  #Value is None in dict
                user_counters[key_name] = UserCounter.new(
                    key_name, DAILY, today, count=delta, _build_key_name=False)

        ProductCounter.filtered_update(product_counters.values())
        UserCounter.filtered_update(user_counters.values())
コード例 #5
0
ファイル: taskworker.py プロジェクト: frankk00/TweetHit
    def post(self):
        payloads = [
            Payload(simple_url['url'], simple_url['user_id'])
            for simple_url in eval(self.request.get('data'))
        ]

        cached_urls = Url.get_by_key_name(
            [payload.url for payload in payloads],
            _storage=[LOCAL, MEMCACHE],
            _result_type=NAME_DICT)

        user_ban_list = Banlist.retrieve(
            _storage=[LOCAL, MEMCACHE, DATASTORE],
            _local_expiration=time_util.minute_expiration(minutes=10)).users

        fetch_targets = []  #Urls that are not in lookup list
        counter_targets = []  #Product urls that were fetched before

        for payload in payloads:
            if payload.user_id in user_ban_list:
                #Don't take banned users' URLs into account
                continue

            #Look for existing cached instance with same short_url
            cached_url = cached_urls[payload.url]
            if cached_url is not None:
                if cached_url.is_product:  #cached url points to a valid product page
                    counter_targets.append(
                        Payload(cached_url.product_url, payload.user_id))
            else:
                fetch_targets.append(payload)

        if len(fetch_targets):
            urlfetch_payload = Payload.serialize(fetch_targets)
            enqueue_url_fetch(urlfetch_payload)
        if len(counter_targets):
            counter_payload = Payload.serialize(counter_targets)
            enqueue_counter(counter_payload)
コード例 #6
0
ファイル: taskworker.py プロジェクト: frankk00/TweetHit
  def post(self):
    from collections import defaultdict
    payload_string = self.request.get('payload')
    counter_targets = Payload.deserialize(payload_string)
    today = time_util.today()
    
    product_targets = defaultdict(int)
    user_targets = defaultdict(int)
    
    for payload in counter_targets:
      daily_product_key = ProductCounter.build_key_name(payload.url, DAILY, today)
      weekly_product_key = ProductCounter.build_key_name(payload.url, WEEKLY, today)
      monthly_product_key = ProductCounter.build_key_name(payload.url, MONTHLY, today)
      user_key = UserCounter.build_key_name(payload.user_id, DAILY, today)
      product_targets[daily_product_key] += 1
      product_targets[weekly_product_key] += 1
      product_targets[monthly_product_key] += 1
      user_targets[user_key] += 1
        
    product_counters = ProductCounter.get_by_key_name(product_targets.keys(),
                                                      _storage = [MEMCACHE,DATASTORE],
                                                      _result_type=NAME_DICT)
    user_counters = UserCounter.get_by_key_name(user_targets.keys(),
                                                _result_type=NAME_DICT)
        
    for key_name,delta in product_targets.iteritems():
      try:
        product_counters[key_name].count += delta
      except AttributeError: #Value is None in dict
        frequency = ProductCounter.frequency_from_key_name(key_name)
        product_counters[key_name] = ProductCounter.new(key_name, frequency, today,
                                                   count=delta,_build_key_name = False)

    for key_name,delta in user_targets.iteritems():  
      try:
        user_counters[key_name].count += delta
      except AttributeError: #Value is None in dict
        user_counters[key_name] = UserCounter.new(key_name, DAILY, today,
                                             count=delta,_build_key_name = False)
                
    ProductCounter.filtered_update(product_counters.values())
    UserCounter.filtered_update(user_counters.values())