def local_search(params): keyword = params.get("keyword") page = int(params.get("page", 1)) count = int(params.get("count", 20)) sort = int(params.get('tid', 1)) if isinstance(keyword, unicode): keyword = keyword.encode("utf-8") share_text_info = find_goods_info(keyword) if share_text_info: num_id = share_text_info['num_id'] save_goods_info(num_id, share_text_info) tmp = _ship_miniapp(share_text_info) return {'errcode': 0, 'data': [tmp]} data = [] sort_dict = {} if sort == 8: sort_dict.update({'coupon_amount': -1}) elif sort == 6: sort_dict.update({'sales': -1}) elif sort == 7: sort_dict.update({'coupon_fee': 1}) elif sort == 9: super_params = { 'keyword': keyword, 'page': page, 'count': count, 'yq': 0, 'tid': 0 } return super_search_miniapp(super_params) data = searcher.search(keyword, sort_dict, page=page, count=count) # ids = map(lambda x: int(x['id']), data) # LOG.info("data: %s", data) LOG.info('keyword: %s, ret: %s', keyword, len(data)) table_dict = {} ordered_id = [] for item in data: table = item.get("table", 'goods') table_dict.setdefault(table, []) table_dict[table].append(int(item['id'])) ordered_id.append(int(item['id'])) goods_obj = TbkGoods() data_dict = {} for table, ids in table_dict.items(): goods_obj.__table__ = table cond = {'num_id': {'$in': ids}} goods_list = goods_obj.find_goods_by_cond(cond, 1, count=100) for goods in goods_list: if goods.get('num_id') is None: LOG.info(goods['_id']) continue tmp = _ship_miniapp(goods) data_dict[goods['num_id']] = tmp result = [] for _id in ordered_id: tmp = data_dict.get(_id) if not tmp: continue result.append(tmp) return {'errcode': 0, 'data': result}
def crawler_one_page(link, table, mid): parse_ret = urlparse(link) domain = parse_ret.netloc config = DATA_FIELD.get(domain) if not config: LOG.info("domain: %s not config", domain) return res_data_field = config.get("res_data") id_field = config.get("id") start = time.time() client = HttpClient() res = client.get(link) goods_list = res.get(res_data_field, []) for goods in goods_list: num_id = goods.get(id_field) tmp = _ship_goods(num_id) if not tmp: continue tmp.update({'mid': mid}) if isinstance(table, unicode): table = table.encode("utf-8") tmp.update({'table': table}) searcher.update_index(tmp) goods_obj = TbkGoods(**tmp) goods_obj.__table__ = table goods_obj.save() LOG.info("link: %s takes: %s", link, time.time() - start)
def get_data(self, params): table = 'haitao' res = self.validate(params) if res['errcode'] != 0: return res goods_obj = TbkGoods() goods_obj.__table__ = table sort = self.get_sort_field() ret = goods_obj.find_goods_by_cond({}, self.page, self.count) if sort: ret.sort(sort) return {'errcode': 0, 'data': ret}
def miniapp_goods_detail(gid, mid): res = {'errcode': -1} gid = int(gid) goods = get_goods_info_by_id(gid) if goods: data = _ship_miniapp_detail(goods) res.update({'errcode': 0, 'data': data}) return res goods_obj = TbkGoods(num_id=gid) if mid and mid.isdigit(): table = get_table_by_mid(mid) if table: goods_obj.__table__ = table goods = goods_obj.find_goods_by_id() if not goods: res.update({'errmsg': u"找不到商品"}) return res data = _ship_miniapp_detail(goods) res.update({'errcode': 0, 'data': data}) return res
def update_one_by_one(table): page = 1 count = 1000 have_data = True update_count = 0 goods_obj = TbkGoods() goods_obj.__table__ = table LOG.info(table) while have_data: have_data = False goods_list = goods_obj.find_goods_by_cond({}, page, count) now = int(time.time() * 1000) for goods in goods_list: have_data = True update_time = goods.get('update_time') if update_time and now - update_time < 3600000: continue update_goods(goods['title'], goods['num_id'], table) page += 1 LOG.info("page: %s" % page) print(update_count)