def get_product( product_id ): logger.info( 'get : ' + product_id ) handle = AmazonAPI( conf.aws_key, conf.aws_secret, conf.aws_associate_tag, Version = "2013-08-01", ) product = handle.lookup( ItemId = product_id ) d = _dict_product( product ) cli = get_cli() k = conf.KEY_REVIEW.format( p = product.asin ) d[ 'crawl' ] = True if cli.exists( k ) else False k = conf.KEY_PRODUCT_TASK.format( p = product.asin ) if cli.exists( k ): r = cli.get( k ) d[ 'delay' ] = time_delay( json.loads( r )[ 'ctime' ] ) else: d[ 'delay' ] = None if d[ 'reviews' ] and d[ 'reviews' ][ 0 ]: d[ 'reviews' ] = d[ 'reviews' ][ 1 ] return d
def get_search_products( Keywords, SearchIndex, num = 10, **argkv ): logger.info( 'search : ' + repr( ( Keywords, SearchIndex, num, argkv ) ) ) handle = AmazonAPI( conf.aws_key, conf.aws_secret, conf.aws_associate_tag, Version = "2013-08-01", ) products = handle.search( Keywords = Keywords, SearchIndex = SearchIndex, **argkv ) cli = get_cli() items = [] for product in products: d = _dict_product( product ) k = conf.KEY_REVIEW.format( p = product.asin ) d[ 'crawl' ] = True if cli.exists( k ) else False k = conf.KEY_PRODUCT_TASK.format( p = product.asin ) if cli.exists( k ): r = cli.get( k ) d[ 'delay' ] = time_delay( json.loads( r )[ 'ctime' ] ) else: d[ 'delay' ] = None if d[ 'reviews' ] and d[ 'reviews' ][ 0 ]: d[ 'reviews' ] = d[ 'reviews' ][ 1 ] items.append( d ) if len( items ) == num: return items
def get_product(product_id): logger.info('get : ' + product_id) handle = AmazonAPI( conf.aws_key, conf.aws_secret, conf.aws_associate_tag, Version="2013-08-01", ) product = handle.lookup(ItemId=product_id) d = _dict_product(product) cli = get_cli() k = conf.KEY_REVIEW.format(p=product.asin) d['crawl'] = True if cli.exists(k) else False k = conf.KEY_PRODUCT_TASK.format(p=product.asin) if cli.exists(k): r = cli.get(k) d['delay'] = time_delay(json.loads(r)['ctime']) else: d['delay'] = None if d['reviews'] and d['reviews'][0]: d['reviews'] = d['reviews'][1] return d
def add( product_id = None ): try: cli = get_cli() d = {} d[ 'asin' ] = product_id cli.rpush( conf.KEY_PRODUCTS, json.dumps( d ) ) logger.info( 'rpush {p} to {k}'.format( p = product_id, k = conf.KEY_PRODUCTS ) ) except Exception, e: logger.warn( repr( e ) + ', when rpush {p} to {k}'.format( p = product_id, k = conf.KEY_PRODUCTS ) ) return repr( e )
def add(product_id=None): try: cli = get_cli() d = {} d['asin'] = product_id cli.rpush(conf.KEY_PRODUCTS, json.dumps(d)) logger.info('rpush {p} to {k}'.format(p=product_id, k=conf.KEY_PRODUCTS)) except Exception, e: logger.warn( repr(e) + ', when rpush {p} to {k}'.format(p=product_id, k=conf.KEY_PRODUCTS) ) return repr(e)
def yield_review( prdid ): if prdid is None: return cli = get_cli() k = conf.KEY_REVIEW.format( p = prdid ) if not cli.exists( k ): return lenth = cli.llen( k ) start, stop = 0, PAGE_ITEMS - 1 while start <= lenth: rws = cli.lrange( k, start, stop ) start = start + PAGE_ITEMS stop = stop + PAGE_ITEMS for r in rws: yield json.loads( r )
def yield_review(prdid): if prdid is None: return cli = get_cli() k = conf.KEY_REVIEW.format(p=prdid) if not cli.exists(k): return lenth = cli.llen(k) start, stop = 0, PAGE_ITEMS - 1 while start <= lenth: rws = cli.lrange(k, start, stop) start = start + PAGE_ITEMS stop = stop + PAGE_ITEMS for r in rws: yield json.loads(r)
def get_reviews(prdid, page): if prdid is None: return None, None, None cli = get_cli() k = conf.KEY_REVIEW.format(p=prdid) if not cli.exists(k): return None, None, None nums = cli.llen(k) if nums % PAGE_ITEMS == 0: pages = nums / PAGE_ITEMS else: pages = nums / PAGE_ITEMS + 1 rws = cli.lrange(k, (page - 1) * PAGE_ITEMS, page * PAGE_ITEMS - 1) rws = [json.loads(r) for r in rws] return rws, pages, nums
def get_reviews( prdid, page ): if prdid is None: return None, None, None cli = get_cli() k = conf.KEY_REVIEW.format( p = prdid ) if not cli.exists( k ): return None, None, None nums = cli.llen( k ) if nums % PAGE_ITEMS == 0: pages = nums / PAGE_ITEMS else: pages = nums / PAGE_ITEMS + 1 rws = cli.lrange( k, ( page - 1 ) * PAGE_ITEMS, page * PAGE_ITEMS - 1 ) rws = [ json.loads( r ) for r in rws ] return rws, pages, nums
def get_search_products(Keywords, SearchIndex, num=10, **argkv): logger.info('search : ' + repr((Keywords, SearchIndex, num, argkv))) handle = AmazonAPI( conf.aws_key, conf.aws_secret, conf.aws_associate_tag, Version="2013-08-01", ) products = handle.search(Keywords=Keywords, SearchIndex=SearchIndex, **argkv) cli = get_cli() items = [] for product in products: d = _dict_product(product) k = conf.KEY_REVIEW.format(p=product.asin) d['crawl'] = True if cli.exists(k) else False k = conf.KEY_PRODUCT_TASK.format(p=product.asin) if cli.exists(k): r = cli.get(k) d['delay'] = time_delay(json.loads(r)['ctime']) else: d['delay'] = None if d['reviews'] and d['reviews'][0]: d['reviews'] = d['reviews'][1] items.append(d) if len(items) == num: return items
import rediscli import amazonapi handle = amazonapi.AmazonAPI( conf.aws_key, conf.aws_secret, conf.aws_associate_tag, Version='2013-08-01', ) #product = handle.lookup( ItemId = 'B00IQ8MWBS' ) products = handle.search(Keywords='Barbie', SearchIndex='Toys') #products = handle.search( Keywords='iPhone', SearchIndex='Electronics' ) cli = rediscli.get_cli() key = 'amazon.products' i = 0 for product in products: i += 1 d = {} d['asin'] = product.asin #cli.rpush( key, json.dumps( d ) ) attrs = ( 'price_and_currency', 'asin', 'sales_rank',
import conf import rediscli import amazonapi handle = amazonapi.AmazonAPI( conf.aws_key, conf.aws_secret, conf.aws_associate_tag, Version = '2013-08-01', ) #product = handle.lookup( ItemId = 'B00IQ8MWBS' ) products = handle.search( Keywords='Barbie', SearchIndex='Toys' ) #products = handle.search( Keywords='iPhone', SearchIndex='Electronics' ) cli = rediscli.get_cli() key = 'amazon.products' i = 0 for product in products: i += 1 d = {} d[ 'asin' ] = product.asin #cli.rpush( key, json.dumps( d ) ) attrs = ( 'price_and_currency', 'asin', 'sales_rank', 'offer_url',