def taobao_spider(): Commodity_base_info = commodity_base_info() commodity_infos = Commodity_base_info.query.all() for commodity in commodity_infos: print(commodity.commodity_name) search_taobao(commodity.commodity_name, commodity.commodity_type, commodity.commodity_brand)
def zhihu_review_spider(): Commodity_base_info = commodity_base_info() Commodity_infos = Commodity_base_info.query.all() file = open('APP/spider/zhihu_{}.txt'.format(time.strftime('%m%d')), 'x') for commodity in Commodity_infos: print(commodity.commodity_name) text = get_x_zse_86(commodity.commodity_name) # print(text) file.write(commodity.commodity_name + ',' + text[2:] + '/n') zhihu_search_spider(commodity.commodity_type, commodity.commodity_brand, commodity.commodity_name, text) file.close()
def index(): commodity = commodity_base_info() commodity_list = [] for info in commodity.query.limit(3): info = vars(info) # print(info) commodity_type = info.pop('commodity_type') name = info.pop('commodity_name') price = info.pop('commodity_base_price') img_path = info.pop('img_path') infos = info.pop('info').replace('>', '').split(';') # print(img_path) base_info = {} n = 0 for i in infos: if n > 3: break n += 1 # print(i) i = i.split(':') if len(i) < 2: continue # print(i[1]) text = '' texts = i[1].split(',') if len(texts) >= 3: texts = texts[:-2] # print(texts) # print(type(text)) # print(text) for c in texts: if c != '': text += c + ';' base_info[i[0]] = text.replace(',;', '') info = { 'name': name, 'price': price, 'img_path': img_path, 'commodity_type': commodity_type, 'base_info': base_info } # print(info['img_path']) commodity_list.append(info) return render_template('index.html', index_list=commodity_list)
def base_infoz(): base_infos = [] base_urls = [ 'http://detail.zol.com.cn/cell_phone_index/subcate57_613_list_1.html', # 手机 'http://detail.zol.com.cn/cell_phone_index/subcate57_1795_list_1.html', 'http://detail.zol.com.cn/cell_phone_index/subcate57_1673_list_1.html', 'http://detail.zol.com.cn/cell_phone_index/subcate57_544_list_1.html', 'http://detail.zol.com.cn/cell_phone_index/subcate57_98_list_1.html', 'http://detail.zol.com.cn/cell_phone_index/subcate57_34645_list_1.html', 'http://detail.zol.com.cn/notebook_index/subcate16_160_list_1.html', # 笔记本 'http://detail.zol.com.cn/notebook_index/subcate16_21_list_1.html', 'http://detail.zol.com.cn/notebook_index/subcate16_544_list_1.html', 'http://detail.zol.com.cn/notebook_index/subcate16_223_list_1.html', 'http://detail.zol.com.cn/notebook_index/subcate16_227_list_1.html', 'http://detail.zol.com.cn/notebook_index/subcate16_613_list_1.html', 'http://detail.zol.com.cn/notebook_index/subcate16_1191_list_1.html', 'http://detail.zol.com.cn/notebook_index/subcate16_34645_list_1.html', 'http://detail.zol.com.cn/cpu/amd/', # CPU 'http://detail.zol.com.cn/cpu/intel/', 'http://detail.zol.com.cn/motherboard/gigabyte/', # 主板 'http://detail.zol.com.cn/motherboard/asus/', 'http://detail.zol.com.cn/vga/colorful/', # 显卡 'http://detail.zol.com.cn/vga/galaxy/', 'http://detail.zol.com.cn/memory/corsair/', # 内存 'http://detail.zol.com.cn/memory/gskill/', 'http://detail.zol.com.cn/solid_state_drive/samsung/', # 固态 'http://detail.zol.com.cn/solid_state_drive/toshiba/', 'http://detail.zol.com.cn/power/corsair/', # 电源 'http://detail.zol.com.cn/power/coolermaster/' ] for base_url in base_urls: info_list = phone_info_spider(base_url) for base_info in info_list: Commodity_info = commodity_base_info() Commodity_info.commodity_brand = base_info['commodity_brand'] Commodity_info.commodity_name = base_info['commodity_name'] Commodity_info.commodity_type = base_info['commodity_type'] Commodity_info.img_path = base_info['img_path'] Commodity_info.info = base_info['info'] Commodity_info.commodity_base_price = base_info[ 'commodity_base_price'] Commodity_info.save() print(base_info) base_infos.append(base_info)
def Contrast(): commodity_type = request.args.get('commodity_type') commodity_name = request.args.get('commodity_name') commodity_contrast_name = request.args.get('commodity_contrast_name') contrast_info = request.args.get('contrast_info').split(',') # print(commodity_type) # print(commodity_name) # print(contrast_info) # return commodity_type+';'+commodity_name commodity = commodity_base_info() commodity_review = commodity_review_info() commodity_price = commodity_price_info() user = User() price_list = [] review_list = [] # print(commodity_name) # print(commodity_type) res = commodity.query.filter( commodity_base_info.commodity_type == commodity_type).all() # print(vars(res[0])) phone_list = [] for i in res: phone_list.append(i.commodity_name) # print(res) info = commodity.query.filter( commodity_base_info.commodity_type == commodity_type, commodity_base_info.commodity_name == commodity_name).all() info_list = [] # print(info) info = info[0] info = vars(info) info_commodity_type = info.pop('commodity_type') info_name = info.pop('commodity_name') info_price = info.pop('commodity_base_price') info_img_path = info.pop('img_path') base_info = {} infos = info.pop('info') infos = re.sub(r'(击败.*?),', '', infos) infos = infos.replace('>', '').split(';') for i in infos: # print(i) i = i.split(':') if len(i) < 2: continue # print(i[1]) info_list.append(i[0]) text = '' texts = i[1].split(',') if len(texts) >= 3: texts = texts[:-2] # print(texts) if i[0] in contrast_info: # print(type(text)) # print(text) for c in texts: if c != '': text += c + ';' base_info[i[0]] = text.replace(',;', '') info = { 'name': info_name, 'price': info_price, 'img_path': info_img_path, 'commodity_type': info_commodity_type, 'base_info': base_info, 'phone_list': phone_list, 'info_list': info_list } other_info = commodity.query.filter( commodity_base_info.commodity_type == commodity_type, commodity_base_info.commodity_name == commodity_contrast_name).all() other_info = other_info[0] other_info_commodity_type = other_info.commodity_type other_info_commodity_name = other_info.commodity_name other_info_info = other_info.info other_info_img_path = other_info.img_path other_info_commodity_base_price = other_info.commodity_base_price other_infos = re.sub(r'(击败.*?),', '', other_info_info) other_infos = other_infos.replace('>', '').split(';') other_base_info = {} for i in other_infos: # print(i) i = i.split(':') if len(i) < 2: continue # print(i[1]) info_list.append(i[0]) text = '' texts = i[1].split(',') if len(texts) >= 3: texts = texts[:-2] # print(texts) if i[0] in contrast_info: # print(type(text)) # print(text) for c in texts: if c != '': text += c + ';' other_base_info[i[0]] = text.replace(',;', '') other_info = { 'name': other_info_commodity_name, 'price': other_info_commodity_base_price, 'img_path': other_info_img_path, 'commodity_type': other_info_commodity_type, 'base_info': other_base_info } if 'username' in session: user_id = user.query.filter( User.username == session['username']).all()[0].user_id # print(user_id) # print(vars(user_collection.query.all()[0])) # price_infos = commodity_price.query(commodity_price_info,user_collection).outerjoin(user_collection, commodity_price_info.id == user_collection.commodity_info_id and user_collection.user_id == user_id).filter(commodity_price_info.commodity_type == commodity_type,commodity_price_info.commodity_name == commodity_name).order_by(-commodity_price_info.price).all() price_collection = db.session.query( commodity_price_info, user_collection).outerjoin( user_collection, commodity_price_info.id == user_collection.commodity_info_id).filter( commodity_price_info.commodity_type == commodity_type, commodity_price_info.commodity_name == commodity_name, user_collection.user_id == user_id).order_by( -commodity_price_info.price).all() collection_priceid_list = [] for collection in price_collection: collection_priceid_list.append(collection[0].id) price_infos = commodity_price.query.filter( commodity_price_info.commodity_type == commodity_type, commodity_price_info.commodity_name == commodity_name).order_by( -commodity_price_info.price).all() # print(price_infos) for price_info in price_infos: # print(vars(price_info[0])) # print(vars(price_info[1])) if price_info.id in collection_priceid_list: price_list.append({ 'price': price_info.price, 'price_title': price_info.price_title, 'price_url': price_info.price_url, 'price_img_path': price_info.price_img_path, 'price_id': price_info.id, 'commodity_name': price_info.commodity_name, 'collection_flag': 1 }) else: price_list.append({ 'price': price_info.price, 'price_title': price_info.price_title, 'price_url': price_info.price_url, 'price_img_path': price_info.price_img_path, 'price_id': price_info.id, 'commodity_name': price_info.commodity_name, 'collection_flag': 0 }) else: price_infos = commodity_price.query.filter( commodity_price_info.commodity_type == commodity_type, commodity_price_info.commodity_name == commodity_name).order_by( -commodity_price_info.price).all() price_id = 0 for price_info in price_infos: price_id += 1 price_list.append({ 'price': price_info.price, 'price_title': price_info.price_title, 'price_url': price_info.price_url, 'price_img_path': price_info.price_img_path, 'price_id': price_info.id, 'commodity_name': price_info.commodity_name, 'collection_flag': 0 }) review_infos = commodity_review.query.filter( commodity_review_info.commodity_type == commodity_type, commodity_review_info.commodity_name == commodity_name).all() for review_info in review_infos: review_list.append({ 'review_url': review_info.review_url, 'review_title': review_info.review_title.replace('<em>', '').replace('</em>', ''), 'review_img_path': review_info.review_img_path, 'review_excerpt': review_info.review_excerpt.replace('<em>', '').replace('</em>', '') }) return render_template('commodity_contrast.html', commodity_type=commodity_type, commodity_name=commodity_name, info=info, price_list=price_list, review_list=review_list, other_info=other_info)
def commodity_brand(commodity_type, commodity_brand): commodity = commodity_base_info() path_list = [] commodity_list = [] if request.method == 'POST': key_word = request.form.get('search_keyword') res = commodity.query.filter( or_(commodity_base_info.commodity_type.contains(key_word), commodity_base_info.commodity_brand.contains(key_word), commodity_base_info.commodity_name.contains(key_word))).all() path_list.append('搜索') path_list.append(key_word) else: if commodity_brand == '*': res = commodity.query.filter( commodity_base_info.commodity_type == commodity_type).all() path_list.append(commodity_type) else: res = commodity.query.filter( commodity_base_info.commodity_type == commodity_type, commodity_base_info.commodity_brand == commodity_brand).all() path_list.append(commodity_type) path_list.append(commodity_brand) commodity_sum = len(res) for info in res: info = vars(info) # print(info) commodity_type = info.pop('commodity_type') name = info.pop('commodity_name') price = info.pop('commodity_base_price') img_path = info.pop('img_path') infos = info.pop('info').replace('>', '').split(';') # print(img_path) base_info = {} n = 0 for i in infos: if n > 3: break n += 1 # print(i) i = i.split(':') if len(i) < 2: continue # print(i[1]) text = '' texts = i[1].split(',') if len(texts) >= 3: texts = texts[:-2] # print(texts) # print(type(text)) # print(text) for c in texts: if c != '': text += c + ';' base_info[i[0]] = text.replace(',;', '') info = { 'name': name, 'price': price, 'img_path': img_path, 'commodity_type': commodity_type, 'base_info': base_info } # print(info['img_path']) commodity_list.append(info) return render_template('commodity_brand.html', index_list=commodity_list, path=path_list, commodity_sum=commodity_sum)