def next_product_url(): cli = get_cli() while True: product = cli.lpop( KEY_PRODUCTS ) if product is None: logger.info( 'no product in queue ...' ) time.sleep( 5 ) continue try: product = json.loads( product )[ 'asin' ] if not product_is_timeout( product ): logger.info( 'product : ' + product + \ ' is not timeout, {day} days'.format( day = CRAWL_PRODUCT_TIMEOUT ) ) continue set_product_task( product ) del_product_reviews( product ) logger.info( 'next product : ' + product ) return 'http://www.amazon.com/ss/customer-reviews/' + product except Exception as e: logger.exception( repr( e ) )
def next_product_url(): cli = get_cli() while True: product = cli.lpop(KEY_PRODUCTS) if product is None: logger.info('no product in queue ...') time.sleep(5) continue try: product = json.loads(product)['asin'] if not product_is_timeout(product): logger.info( 'product : ' + product + \ ' is not timeout, {day} days'.format( day = CRAWL_PRODUCT_TIMEOUT ) ) continue set_product_task(product) del_product_reviews(product) logger.info('next product : ' + product) return 'http://www.amazon.com/ss/customer-reviews/' + product except Exception as e: logger.exception(repr(e))
def set_product_task( prdid ): task = { 'prdid' : prdid, 'ctime' : time.time() } task_k = KEY_PRODUCT_TASK.format( p = prdid ) cli = get_cli() cli.set( task_k, json.dumps( task ) )
def get_product_task( prdid ): cli = get_cli() d = cli.get( KEY_PRODUCT_TASK.format( p = prdid ) ) if d is None: return None try: return json.loads( d ) except Exception as e: logger.info( 'get task: {p} {d} {e}'.format( p = prdid, d = repr( d ), e = repr( e ) ) )
def get_product_task(prdid): cli = get_cli() d = cli.get(KEY_PRODUCT_TASK.format(p=prdid)) if d is None: return None try: return json.loads(d) except Exception as e: logger.info('get task: {p} {d} {e}'.format(p=prdid, d=repr(d), e=repr(e)))
def process_item_amazon_review(self, item, spider): cli = get_cli() prdid = item.get( 'prdid' ) if not prdid: return key = KEY_REVIEW.format( p = prdid ) cli.rpush( key, json.dumps( dict( item ) ) ) logger.debug( repr( dict( item ) ) )
def del_product_reviews(prdid): k = KEY_REVIEW.format(p=prdid) cli = get_cli() if cli.exists(k): cli.delete(k)
def set_product_task(prdid): task = {'prdid': prdid, 'ctime': time.time()} task_k = KEY_PRODUCT_TASK.format(p=prdid) cli = get_cli() cli.set(task_k, json.dumps(task))
def del_product_reviews( prdid ): k = KEY_REVIEW.format( p = prdid ) cli = get_cli() if cli.exists( k ): cli.delete( k )