def custom_query(): '''custom query for data''' if "query" in request.args or "ret" in request.args: try: query = eval(request.args.get('query', "{}")) ret = eval(request.args.get('ret', "{}")) ret['_id'] = 0 db = mongo_util.get_mongo_db() page = int(request.args.get('page', '1')) custom_cursor = db[conf.MONGO_COL].find(query, ret).skip((page)\ * conf.ITEM_PERPAGE).limit(conf.ITEM_PERPAGE).batch_size(2000) return jsonify({ 'status': 'ok', 'data': map(lambda x: x, custom_cursor) }) except: return jsonify({ 'status': 'error', 'data': 'not valid custom query' }) return jsonify({'status': 'error', 'data': 'not valid custom query'})
def multi_category_fetch(field): #get category by field #field means some categories combined target_category_set = conf.CATEGORY_DICT.get(field, ['Pet Supplies>Dogs']) #category_set = map(lambda x: x.split('>'), target_category_set) db = mongo_util.get_mongo_db() #all_data = db['commodity'].find({'category.0': {'$in': category_set}}).sort('', -1) top_number = int(request.args.get('topn', 5)) page = int(request.args.get('page', '0')) all_fields = eval(request.args.get('fields', "['ASIN', 'productInfo.name', 'productInfo.img']")) query_fields = dict(zip(all_fields + ['_id'], [1 for i in xrange(len(all_fields))] + [0])) final_data = [] #print target_category_set for category in target_category_set: current_data = db['commodity'].find({'category':{'$elemMatch': {'$all': category.split('>')}}}, query_fields).sort('stats_info.review_count', -1).skip(page*top_number).limit(top_number).batch_size(3000) final_data += map(lambda x: x, current_data) top_n_commodity = sorted(final_data)[:top_number] return jsonify({'status': 'ok', 'data': top_n_commodity})
def all_categories(): '''fetch all categories''' db = mongo_util.get_mongo_db() all_cate = db[conf.MONGO_COL].distinct('category.0') all_cate_info = map(lambda x:{'name': '>'.join(x)}, filter(lambda x: x if x else [], all_cate)) return jsonify({'status': 'ok', 'data': all_cate_info})
def all_categories(): '''fetch all categories''' db = mongo_util.get_mongo_db() all_cate = db[conf.MONGO_COL].distinct('category.0') all_cate_info = map(lambda x: {'name': '>'.join(x)}, all_cate) return jsonify({'status': 'ok', 'data': all_cate_info})
def category_commodity(category): '''fetch commodity info given category name''' db = mongo_util.get_mongo_db() category_cursor = db[conf.MONGO_COL].find({'category.0': category.replace('$', '&').split('>')}) return jsonify({'status': 'ok', 'data': {'name': category.replace('$', '&'), 'count': category_cursor.count()}})
def single_commodity(asin): '''get single commodity info''' db = mongo_util.get_mongo_db() all_fields = eval(request.args.get('field', "['ASIN']")) query_fields = dict( zip(all_fields + ['_id'], [1 for i in xrange(len(all_fields))] + [0])) commodity_info = db[conf.MONGO_COL].find_one({'ASIN': asin}, query_fields) return jsonify({'status': 'ok', 'data': commodity_info})
def seller_mobile_field(field, **kwargs): target_category_set = conf.CATEGORY_DICT.get(field, ['Pet Supplies>Dogs']) db = mongo_util.get_mongo_db() total_data = [] for category in target_category_set: cur_data = db['commodity'].find({'category.0': category.split('>')}, {'seller': 1, 'ASIN': 1}).batch_size(3000) total_data += map(lambda x: x, cur_data) seller_set = {} for data in total_data: if data['seller']: for time_seller in data['seller']: for single_seller in time_seller['seller']: try: if single_seller.has_key('link') and seller_set.has_key(md5(single_seller['link']).digest()): seller_set[md5(single_seller['link']).digest()]['count'] += 1 if single_seller['name'] != 'Null': seller_set[md5(single_seller['link']).digest()]['name']= \ single_seller['name'] seller_set[md5(single_seller['link']).digest()]['link'] = \ single_seller.get('link', '') seller_set[md5(single_seller['link']).digest()]['img'] = \ single_seller.get('img', '') elif single_seller.has_key('link'): #has no key seller_set[md5(single_seller['link']).digest()]={'count': 1, 'name': single_seller['name'], 'link': single_seller.get('link', ''), 'img': single_seller.get('img', '')} elif single_seller['name'] == 'Amazon' and seller_set.has_key(md5('http://www.amazon.com').digest()): #Amazon Self seller_set[md5('http://www.amazon.com').digest()]['count'] += 1 else: seller_set[md5('http://www.amazon.com').digest()]={'count': 1, 'name': 'Amazon', 'link': 'http://www.amazon.com', 'img': ''} except: pass return filter(lambda x: x['name'] != 'Amazon' and x['name'] is not None, map(lambda x: {'name': x[1].get('name', ''), 'seller_info': { 'count': x[1]['count'], 'keywords': get_keywords_info(x[1]['name'], type='seller'), 'img': x[1].get('img', ''), 'link': x[1].get('link', '')}}, seller_set.items()))
def single_commodity(asin): '''get single commodity info''' db = mongo_util.get_mongo_db() all_fields = eval(request.args.get('field', "['ASIN']")) query_fields = dict(zip(all_fields + ['_id'], [1 for i in xrange(len(all_fields))] + [0])) commodity_info = db[conf.MONGO_COL].find_one({'ASIN': asin}, query_fields) return jsonify({'status': 'ok', 'data': commodity_info})
def category_commodity(category): '''fetch commodity info given category name''' db = mongo_util.get_mongo_db() category_cursor = db[conf.MONGO_COL].find( {'category.0': category.replace('$', '&').split('>')}) return jsonify({ 'status': 'ok', 'data': { 'name': category.replace('$', '&'), 'count': category_cursor.count() } })
def get_keywords_info(seller, type='seller'): db = mongo_util.get_mongo_db() if type == 'seller': target_col = 'seller_keywords' if type == 'brand': target_col = 'brand_keywords' seller_cur = map(lambda x:x, db[type].find({'name': seller}, {target_col: 1})) if seller_cur: return map(lambda x: x['word'], seller_cur[0].get(target_col, [{'word': 'good'}, {'word': 'great'}, {'word': 'bad'}]))[:3] return ['good', 'great', 'bad']
def worker_process(): col = mongo_util.get_mongo_db()['file_request'] record = col.find_one({'state': 'not_processed'}) if record is None: return url = record['url'] res = requests.get(url) filepath = os.path.join(settings.DATA_DIR, str(record['_id'])) f = open(filepath, 'wb') f.write(res.content) f.close() record['state'] = 'downloaded' col.save(record) callback = record['callback'] requests.get(callback, params={ 'file_id': str(record['_id']), 'request_id': str(record['_id']), })
def category_commodity_info(category): '''fetch commodity info given category name''' db = mongo_util.get_mongo_db() category_name = category.replace('$', '&').split('>') all_fields = eval(request.args.get('fields', "['ASIN']")) query_fields = dict(zip(all_fields + ['_id'], [1 for i in xrange(len(all_fields))] + [0])) page = int(request.args.get('page', '1')) current_data_cursor = db[conf.MONGO_COL].find({'category.0': category_name}, query_fields).sort('stats_info.review_count', -1).skip((page - 1)*conf.ITEM_PERPAGE).\ limit(conf.ITEM_PERPAGE).batch_size(2000) return jsonify({'status': 'ok', 'page': page, 'data': map(lambda x: x, current_data_cursor)})
def dispatch_by_asin(asin): #url http://xxxx?type=stats&time=? db = mongo_util.get_mongo_db() target_data = db['commodity'].find_one({'ASIN': asin}, {'category': 1, 'productInfo.img': 1}) if target_data and target_data['category']: category = target_data['category'][0] img_dir = ROOT_DIR + '/'.join(category) + '/' + asin all_target_files = os.listdir(img_dir) #filter and get newest img of all types all_types = list(set(map(lambda x: x.split('-')[0], all_target_files))) if request.args.get('filter', '') != 'all': filter_img_files = map(lambda type: sorted(filter(lambda x: re.search(r'^'+ type +'.*', x), all_target_files))[-1], all_types) else: filter_img_files = all_target_files all_access_imgs = map(lambda x: {'path': ACCESS_DIR + '/'.join(category) + '/' + asin + '/' + x}, filter_img_files) asin_url = '' if re.search(r'^http:.*', target_data['productInfo'][0]['img']): #use amazon url asin_url = target_data['productInfo'][0]['img'] elif re.search(r'^/mnt/.*', target_data['productInfo'][0]['img']): #/mnt/ asin_url = ACCESS_DIR + '/'.join(target_data['productInfo'][0]['img'].split('/')[2:]) else: pass return jsonify({'status': 'ok', 'data': {'img': asin_url, 'charts': all_access_imgs}}) return jsonify({'status': 'error', 'data': 'target data not exists'})
def category_commodity_info(category): '''fetch commodity info given category name''' db = mongo_util.get_mongo_db() category_name = category.replace('$', '&').split('>') all_fields = eval(request.args.get('fields', "['ASIN']")) query_fields = dict( zip(all_fields + ['_id'], [1 for i in xrange(len(all_fields))] + [0])) page = int(request.args.get('page', '1')) current_data_cursor = db[conf.MONGO_COL].find({'category.0': category_name}, query_fields).sort('stats_info.review_count', -1).skip((page - 1)*conf.ITEM_PERPAGE).\ limit(conf.ITEM_PERPAGE).batch_size(2000) return jsonify({ 'status': 'ok', 'page': page, 'data': map(lambda x: x, current_data_cursor) })
def brand_mobile_field(field, **kwargs): target_category_set = conf.CATEGORY_DICT.get(field, ['Pet Supplies>Dogs']) db = mongo_util.get_mongo_db() limit = kwargs.get('limit', 10) final_data = [] brand_set = {} for category in target_category_set: cur_data = db.commodity.find({'category.0': category.split('>')}, {'productInfo': 1, 'offer': 1, 'ASIN': 1, 'stats_info': 1}).batch_size(3000) final_data += map(lambda x: x, cur_data) for data in final_data: if data['productInfo'][0].has_key('brand'): try: if brand_set.has_key(data['productInfo'][0]['brand']['name']): brand_set[data['productInfo'][0]['brand']['name']]['count'] += 1 brand_set[data['productInfo'][0]['brand']['name']]['review_count'] += \ data['stats_info']['review_count'] else: brand_set[data['productInfo'][0]['brand']['name']] = {} brand_set[data['productInfo'][0]['brand']['name']]['count'] = 1 brand_set[data['productInfo'][0]['brand']['name']]['review_count'] = \ data['stats_info']['review_count'] except: pass #brand_set[data['productInfo'][0]['brand']['name']]['high_price'] = '' return map(lambda x: {'name': x[0], 'brand_info': {'count': x[1]['count'], 'review_count': x[1]['review_count'], 'keywords': get_keywords_info(x[0], type='brand')}}, sorted(brand_set.items(), \ key=lambda x: x[1]['review_count'], reverse=True))
def custom_query(): '''custom query for data''' if "query" in request.args or "ret" in request.args: try: query = eval(request.args.get('query', "{}")) ret = eval(request.args.get('ret', "{}")) ret['_id'] = 0 db = mongo_util.get_mongo_db() page = int(request.args.get('page', '1')) custom_cursor = db[conf.MONGO_COL].find(query, ret).skip((page-1)\ * conf.ITEM_PERPAGE).limit(conf.ITEM_PERPAGE).batch_size(2000) return jsonify({'status': 'ok', 'data': map(lambda x: x, custom_cursor)}) except: return jsonify({'status': 'error', 'data': 'not valid custom query'}) return jsonify({'status': 'error', 'data': 'not valid custom query'})
def seller_info(seller_name): db = mongo_util.get_mongo_db() seller_info = map(lambda x:x ,db['seller'].find({'name': seller_name}, {'_id': 0})) return seller_info[0] if seller_info else {}
def brand_info(brand_name): db = mongo_util.get_mongo_db() brand_info = map(lambda x: x, db['brand'].find({'name': brand_name}, {'_id': 0})) return brand_info[0] if brand_info else {}