Beispiel #1
0
def custom_query():
    '''custom query for data'''
    if "query" in request.args or "ret" in request.args:
        try:
            query = eval(request.args.get('query', "{}"))
            ret = eval(request.args.get('ret', "{}"))
            ret['_id'] = 0

            db = mongo_util.get_mongo_db()

            page = int(request.args.get('page', '1'))

            custom_cursor = db[conf.MONGO_COL].find(query, ret).skip((page)\
             * conf.ITEM_PERPAGE).limit(conf.ITEM_PERPAGE).batch_size(2000)

            return jsonify({
                'status': 'ok',
                'data': map(lambda x: x, custom_cursor)
            })
        except:
            return jsonify({
                'status': 'error',
                'data': 'not valid custom query'
            })

    return jsonify({'status': 'error', 'data': 'not valid custom query'})
Beispiel #2
0
def multi_category_fetch(field):
	#get category by field
	#field means some categories combined
	target_category_set = conf.CATEGORY_DICT.get(field, ['Pet Supplies>Dogs'])
	#category_set = map(lambda x: x.split('>'), target_category_set)
	db = mongo_util.get_mongo_db()
	
	#all_data = db['commodity'].find({'category.0': {'$in': category_set}}).sort('', -1)
	
	top_number = int(request.args.get('topn', 5))
	
	page = int(request.args.get('page', '0'))
	
	all_fields = eval(request.args.get('fields', "['ASIN', 'productInfo.name', 'productInfo.img']"))
	query_fields = dict(zip(all_fields + ['_id'], 
		[1 for i in xrange(len(all_fields))] + [0]))
	
	final_data = []
	#print target_category_set
	for category in target_category_set:
		current_data = db['commodity'].find({'category':{'$elemMatch': 
				{'$all': category.split('>')}}}, query_fields).sort('stats_info.review_count', 
				-1).skip(page*top_number).limit(top_number).batch_size(3000)
		final_data += map(lambda x: x, current_data)
	
	top_n_commodity = sorted(final_data)[:top_number]
	
	return jsonify({'status': 'ok', 
				'data': top_n_commodity})
Beispiel #3
0
def all_categories():
	'''fetch all categories'''
	db = mongo_util.get_mongo_db()

	all_cate = db[conf.MONGO_COL].distinct('category.0')
	all_cate_info = map(lambda x:{'name': '>'.join(x)}, filter(lambda x: x if x else [], all_cate))

	return jsonify({'status': 'ok', 'data': all_cate_info})
Beispiel #4
0
def all_categories():
    '''fetch all categories'''
    db = mongo_util.get_mongo_db()

    all_cate = db[conf.MONGO_COL].distinct('category.0')
    all_cate_info = map(lambda x: {'name': '>'.join(x)}, all_cate)

    return jsonify({'status': 'ok', 'data': all_cate_info})
Beispiel #5
0
def category_commodity(category):
	'''fetch commodity info given category name'''
	db = mongo_util.get_mongo_db()

	category_cursor = db[conf.MONGO_COL].find({'category.0': 
		category.replace('$', '&').split('>')})

	return jsonify({'status': 'ok', 'data': 
		{'name': category.replace('$', '&'), 
		'count': category_cursor.count()}})
Beispiel #6
0
def single_commodity(asin):
    '''get single commodity info'''
    db = mongo_util.get_mongo_db()

    all_fields = eval(request.args.get('field', "['ASIN']"))
    query_fields = dict(
        zip(all_fields + ['_id'], [1 for i in xrange(len(all_fields))] + [0]))

    commodity_info = db[conf.MONGO_COL].find_one({'ASIN': asin}, query_fields)

    return jsonify({'status': 'ok', 'data': commodity_info})
def seller_mobile_field(field, **kwargs):
    target_category_set = conf.CATEGORY_DICT.get(field, ['Pet Supplies>Dogs'])
    
    db = mongo_util.get_mongo_db()
    total_data = [] 
    
    for category in target_category_set:
        cur_data = db['commodity'].find({'category.0': category.split('>')}, 
                                        {'seller': 1, 
                                         'ASIN': 1}).batch_size(3000)
        total_data += map(lambda x: x, cur_data)
    
    seller_set = {}
    for data in total_data:        
        if data['seller']:
            for time_seller in data['seller']:
                for single_seller in time_seller['seller']:
                    try:                   
                        if single_seller.has_key('link') and seller_set.has_key(md5(single_seller['link']).digest()):
                            seller_set[md5(single_seller['link']).digest()]['count'] += 1
                            if single_seller['name'] != 'Null':
                                seller_set[md5(single_seller['link']).digest()]['name']= \
                                    single_seller['name']
                                
                                seller_set[md5(single_seller['link']).digest()]['link'] = \
                                    single_seller.get('link', '')
                                seller_set[md5(single_seller['link']).digest()]['img'] = \
                                    single_seller.get('img', '')
                                
                        elif single_seller.has_key('link'):
                            #has no key
                            seller_set[md5(single_seller['link']).digest()]={'count': 1, 
                                'name': single_seller['name'], 
                                'link': single_seller.get('link', ''), 
                                'img': single_seller.get('img', '')}                            
                        elif single_seller['name'] == 'Amazon' and seller_set.has_key(md5('http://www.amazon.com').digest()):
                            #Amazon Self
                            seller_set[md5('http://www.amazon.com').digest()]['count'] += 1                            
                        else:
                            seller_set[md5('http://www.amazon.com').digest()]={'count': 1, 
                                'name': 'Amazon', 
                                'link': 'http://www.amazon.com', 
                                'img': ''}
                    except:
                        pass
    
    return filter(lambda x: x['name'] != 'Amazon' and x['name'] is not None, 
                  map(lambda x: {'name': x[1].get('name', ''), 
                                 'seller_info': {
                                                 'count': x[1]['count'], 
                                                 'keywords': get_keywords_info(x[1]['name'], type='seller'), 
                                                 'img': x[1].get('img', ''), 
                                                 'link': x[1].get('link', '')}}, 
                      seller_set.items()))
Beispiel #8
0
def single_commodity(asin):
	'''get single commodity info'''
	db =  mongo_util.get_mongo_db()

	all_fields = eval(request.args.get('field', "['ASIN']"))
	query_fields = dict(zip(all_fields + ['_id'], 
			[1 for i in xrange(len(all_fields))] + [0]))

	commodity_info = db[conf.MONGO_COL].find_one({'ASIN': asin}, 
		query_fields)

	return jsonify({'status': 'ok', 'data': commodity_info})
Beispiel #9
0
def category_commodity(category):
    '''fetch commodity info given category name'''
    db = mongo_util.get_mongo_db()

    category_cursor = db[conf.MONGO_COL].find(
        {'category.0': category.replace('$', '&').split('>')})

    return jsonify({
        'status': 'ok',
        'data': {
            'name': category.replace('$', '&'),
            'count': category_cursor.count()
        }
    })
def get_keywords_info(seller, type='seller'):
    db = mongo_util.get_mongo_db()
    if type == 'seller':
        target_col = 'seller_keywords'
    if type == 'brand':
        target_col = 'brand_keywords'
    seller_cur = map(lambda x:x, db[type].find({'name': seller}, {target_col: 1}))
    
    if seller_cur:
        return map(lambda x: x['word'], 
                   seller_cur[0].get(target_col, 
                                        [{'word': 'good'}, 
                                         {'word': 'great'}, 
                                         {'word': 'bad'}]))[:3]
    return ['good', 'great', 'bad']
Beispiel #11
0
def worker_process():
    col = mongo_util.get_mongo_db()['file_request']
    record = col.find_one({'state': 'not_processed'})
    if record is None:
        return
    url = record['url']
    res = requests.get(url)
    filepath = os.path.join(settings.DATA_DIR, str(record['_id']))
    f = open(filepath, 'wb')
    f.write(res.content)
    f.close()
    record['state'] = 'downloaded'
    col.save(record)
    callback = record['callback']
    requests.get(callback, params={
        'file_id': str(record['_id']),
        'request_id': str(record['_id']),
    })
Beispiel #12
0
def worker_process():
    col = mongo_util.get_mongo_db()['file_request']
    record = col.find_one({'state': 'not_processed'})
    if record is None:
        return
    url = record['url']
    res = requests.get(url)
    filepath = os.path.join(settings.DATA_DIR, str(record['_id']))
    f = open(filepath, 'wb')
    f.write(res.content)
    f.close()
    record['state'] = 'downloaded'
    col.save(record)
    callback = record['callback']
    requests.get(callback,
                 params={
                     'file_id': str(record['_id']),
                     'request_id': str(record['_id']),
                 })
Beispiel #13
0
def category_commodity_info(category):
	'''fetch commodity info given category name'''
	db = mongo_util.get_mongo_db()

	category_name = category.replace('$', '&').split('>')

	all_fields = eval(request.args.get('fields', "['ASIN']"))
	query_fields = dict(zip(all_fields + ['_id'], 
		[1 for i in xrange(len(all_fields))] + [0]))

	page = int(request.args.get('page', '1'))

	current_data_cursor = db[conf.MONGO_COL].find({'category.0': category_name}, 
		query_fields).sort('stats_info.review_count', 
		-1).skip((page - 1)*conf.ITEM_PERPAGE).\
		limit(conf.ITEM_PERPAGE).batch_size(2000)

	return jsonify({'status': 'ok', 
			'page': page, 
			'data': map(lambda x: x, current_data_cursor)})
Beispiel #14
0
def dispatch_by_asin(asin):
	#url http://xxxx?type=stats&time=?
	db = mongo_util.get_mongo_db()

	target_data = db['commodity'].find_one({'ASIN': asin}, {'category': 1, 'productInfo.img': 1})

	if target_data and target_data['category']:
		category = target_data['category'][0]

		img_dir = ROOT_DIR + '/'.join(category) + '/' + asin

		all_target_files = os.listdir(img_dir)
		
		#filter and get newest img of all types
		all_types = list(set(map(lambda x: x.split('-')[0], all_target_files)))
		
		if request.args.get('filter', '') != 'all':
			filter_img_files = map(lambda type: sorted(filter(lambda x: re.search(r'^'+ type +'.*', x), all_target_files))[-1], all_types)
		else:
			filter_img_files = all_target_files
			
		all_access_imgs = map(lambda x: 
			{'path': ACCESS_DIR + '/'.join(category) + '/' + asin + '/' + x}, 
			filter_img_files)
		
		asin_url = ''
		
		if re.search(r'^http:.*', target_data['productInfo'][0]['img']):
			#use amazon url
			asin_url = target_data['productInfo'][0]['img']
		elif re.search(r'^/mnt/.*', target_data['productInfo'][0]['img']):
			#/mnt/
			asin_url = ACCESS_DIR + '/'.join(target_data['productInfo'][0]['img'].split('/')[2:])
		else:
			pass
			
		return jsonify({'status': 'ok', 'data': {'img': asin_url, 'charts': all_access_imgs}})
	
	return jsonify({'status': 'error', 'data': 'target data not exists'})
Beispiel #15
0
def category_commodity_info(category):
    '''fetch commodity info given category name'''
    db = mongo_util.get_mongo_db()

    category_name = category.replace('$', '&').split('>')

    all_fields = eval(request.args.get('fields', "['ASIN']"))
    query_fields = dict(
        zip(all_fields + ['_id'], [1 for i in xrange(len(all_fields))] + [0]))

    page = int(request.args.get('page', '1'))

    current_data_cursor = db[conf.MONGO_COL].find({'category.0': category_name},
     query_fields).sort('stats_info.review_count',
     -1).skip((page - 1)*conf.ITEM_PERPAGE).\
     limit(conf.ITEM_PERPAGE).batch_size(2000)

    return jsonify({
        'status': 'ok',
        'page': page,
        'data': map(lambda x: x, current_data_cursor)
    })
def brand_mobile_field(field, **kwargs):
    target_category_set = conf.CATEGORY_DICT.get(field, ['Pet Supplies>Dogs'])
    
    db = mongo_util.get_mongo_db()
    limit = kwargs.get('limit', 10)
    final_data = []
    brand_set = {}
    
    for category in target_category_set:
        cur_data = db.commodity.find({'category.0': category.split('>')}, 
                                     {'productInfo': 1, 
                                      'offer': 1, 
                                      'ASIN': 1, 
                                      'stats_info': 1}).batch_size(3000)
        final_data += map(lambda x: x, cur_data)
    
    for data in final_data:
        if data['productInfo'][0].has_key('brand'):
            try:
                if brand_set.has_key(data['productInfo'][0]['brand']['name']):
                    brand_set[data['productInfo'][0]['brand']['name']]['count'] += 1
                    brand_set[data['productInfo'][0]['brand']['name']]['review_count'] += \
                        data['stats_info']['review_count']
                else:
                    brand_set[data['productInfo'][0]['brand']['name']] = {}
                    brand_set[data['productInfo'][0]['brand']['name']]['count'] = 1
                    brand_set[data['productInfo'][0]['brand']['name']]['review_count'] = \
                        data['stats_info']['review_count']
            except:
                pass
                #brand_set[data['productInfo'][0]['brand']['name']]['high_price'] = ''
                
    return map(lambda x: {'name': x[0], 'brand_info': {'count': x[1]['count'], 
                                                       'review_count': x[1]['review_count'],
                                                       'keywords': get_keywords_info(x[0], type='brand')}}, 
                            sorted(brand_set.items(), \
                            key=lambda x: x[1]['review_count'], 
                            reverse=True))
Beispiel #17
0
def custom_query():
	'''custom query for data'''
	if "query" in request.args or "ret" in request.args:
		try:
			query = eval(request.args.get('query', "{}"))
			ret = eval(request.args.get('ret', "{}"))
			ret['_id'] = 0

			db = mongo_util.get_mongo_db()

			page = int(request.args.get('page', '1'))

			custom_cursor = db[conf.MONGO_COL].find(query, ret).skip((page-1)\
			 * conf.ITEM_PERPAGE).limit(conf.ITEM_PERPAGE).batch_size(2000)

			return jsonify({'status': 'ok', 
				'data': map(lambda x: x, custom_cursor)})
		except:
			return jsonify({'status': 'error', 
				'data': 'not valid custom query'})
			
	return jsonify({'status': 'error', 
		'data': 'not valid custom query'})
def seller_info(seller_name):
    db = mongo_util.get_mongo_db()
    seller_info = map(lambda x:x ,db['seller'].find({'name': seller_name}, {'_id': 0}))
    
    return seller_info[0] if seller_info else {}
def brand_info(brand_name):
    db = mongo_util.get_mongo_db()
    
    brand_info = map(lambda x: x, db['brand'].find({'name': brand_name}, {'_id': 0}))
    
    return brand_info[0] if brand_info else {}