def post(self): payloads = [Payload(simple_url['url'],simple_url['user_id']) for simple_url in eval(self.request.get('data'))] cached_urls = Url.get_by_key_name([payload.url for payload in payloads], _storage = [LOCAL,MEMCACHE], _result_type = NAME_DICT) user_ban_list = Banlist.retrieve(_storage=[LOCAL,MEMCACHE,DATASTORE], _local_expiration=time_util.minute_expiration(minutes=10)).users fetch_targets = [] #Urls that are not in lookup list counter_targets = [] #Product urls that were fetched before for payload in payloads: if payload.user_id in user_ban_list: #Don't take banned users' URLs into account continue #Look for existing cached instance with same short_url cached_url = cached_urls[payload.url] if cached_url is not None: if cached_url.is_product: #cached url points to a valid product page counter_targets.append(Payload(cached_url.product_url, payload.user_id)) else: fetch_targets.append(payload) if len(fetch_targets): urlfetch_payload = Payload.serialize(fetch_targets) enqueue_url_fetch(urlfetch_payload) if len(counter_targets): counter_payload = Payload.serialize(counter_targets) enqueue_counter(counter_payload)
def get(self): date = yesterday() enqueue_renderer_update(WEEKLY,date) enqueue_renderer_update(MONTHLY,date) enqueue_cleanup(UserCounter.kind(), DAILY, date) enqueue_cleanup(ProductCounter.kind(), DAILY, date) enqueue_cleanup(ProductRenderer.kind(), DAILY, date) enqueue_cleanup(ProductRenderer.kind(), WEEKLY, date,countdown = 3600) enqueue_cleanup(ProductRenderer.kind(), MONTHLY, date,countdown = 3600) #Delete banlist so it is refreshed with latest banned entities banlist = Banlist.retrieve() banlist.delete()
def get(self): USER_SPAM_COUNTERS.bind(spam_count_limit = SPAM_COUNT_LIMIT) user_counters = USER_SPAM_COUNTERS.fetch(100) users = [] if len(user_counters): for counter in user_counters: counter.is_banned = True users.append(TwitterUser(key_name = counter.key_root)) targets = [user.key().name() for user in users] ban_list = Banlist.retrieve() ban_list.users += targets ban_list.put(_storage=[MEMCACHE,DATASTORE]) #TwitterUser.update_banlist([user.key().name() for user in users]) logging.info('Banning users with keys: %s' %[user.key().name() for user in users]) pdb.put(user_counters+users)
def post(self): logging.info('UrlFetchWorker started') payloads = Payload.deserialize(self.request.get('payload')) product_ban_list = Banlist.retrieve( _storage=[LOCAL, MEMCACHE, DATASTORE], _local_expiration=time_util.minute_expiration(minutes=10)).products fetch_targets = list(set([payload.url for payload in payloads])) result_dict = UrlFetcher.fetch_urls(fetch_targets) urls = [] counter_targets = [] for payload in payloads: request_url = payload.url final_url = result_dict[request_url] user_id = payload.user_id urls.append( Url(key_name=request_url, final_url=final_url, user_id=user_id)) for url in urls: if url.final_url is not None: try: product_url = AmazonURLParser.product_url(url.final_url) if product_url in product_ban_list: logging.info( 'Mention creation prevented for banned product url: %s' % product_url) continue #no action for banned product url.is_product = True #No exceptions for product_url => valid product reference counter_targets.append(Payload(product_url, url.user_id)) except ParserException: pass logging.info('UrlFetchWorker finished, counter targets: %s' % counter_targets) pdb.put(urls, _storage=[LOCAL, MEMCACHE]) #Urls are stored in cache only if len(counter_targets): enqueue_counter(Payload.serialize(counter_targets))
def get(self): renderers = PRODUCT_RENDERER_BAN_TARGETS.fetch(100) products = [Product(key_name = renderer.key_root) for renderer in renderers] product_counters = [] for renderer in renderers: product_counters.append(ProductCounter( key_name = renderer.key().name(), is_banned = True, day = renderer.day, week = renderer.week, month = renderer.month, year = renderer.year)) renderer.is_ban_synched = True targets = [product.key().name() for product in products] ban_list = Banlist.retrieve() ban_list.products += targets ban_list.put(_storage=[MEMCACHE,DATASTORE]) pdb.put(products+renderers+product_counters,_storage = [MEMCACHE,DATASTORE])
def post(self): logging.info('UrlFetchWorker started') payloads = Payload.deserialize(self.request.get('payload')) product_ban_list = Banlist.retrieve(_storage=[LOCAL,MEMCACHE,DATASTORE], _local_expiration=time_util.minute_expiration(minutes=10)).products fetch_targets = list(set([payload.url for payload in payloads])) result_dict = UrlFetcher.fetch_urls(fetch_targets) urls = [] counter_targets = [] for payload in payloads: request_url = payload.url final_url = result_dict[request_url] user_id = payload.user_id urls.append(Url(key_name=request_url, final_url=final_url, user_id = user_id)) for url in urls: if url.final_url is not None: try: product_url = AmazonURLParser.product_url(url.final_url) if product_url in product_ban_list: logging.info('Mention creation prevented for banned product url: %s' %product_url) continue #no action for banned product url.is_product = True #No exceptions for product_url => valid product reference counter_targets.append(Payload(product_url,url.user_id)) except ParserException: pass logging.info('UrlFetchWorker finished, counter targets: %s' %counter_targets) pdb.put(urls, _storage = [LOCAL,MEMCACHE]) #Urls are stored in cache only if len(counter_targets): enqueue_counter(Payload.serialize(counter_targets))
def post(self): payloads = [ Payload(simple_url['url'], simple_url['user_id']) for simple_url in eval(self.request.get('data')) ] cached_urls = Url.get_by_key_name( [payload.url for payload in payloads], _storage=[LOCAL, MEMCACHE], _result_type=NAME_DICT) user_ban_list = Banlist.retrieve( _storage=[LOCAL, MEMCACHE, DATASTORE], _local_expiration=time_util.minute_expiration(minutes=10)).users fetch_targets = [] #Urls that are not in lookup list counter_targets = [] #Product urls that were fetched before for payload in payloads: if payload.user_id in user_ban_list: #Don't take banned users' URLs into account continue #Look for existing cached instance with same short_url cached_url = cached_urls[payload.url] if cached_url is not None: if cached_url.is_product: #cached url points to a valid product page counter_targets.append( Payload(cached_url.product_url, payload.user_id)) else: fetch_targets.append(payload) if len(fetch_targets): urlfetch_payload = Payload.serialize(fetch_targets) enqueue_url_fetch(urlfetch_payload) if len(counter_targets): counter_payload = Payload.serialize(counter_targets) enqueue_counter(counter_payload)