def taobao_spider():
    Commodity_base_info = commodity_base_info()
    commodity_infos = Commodity_base_info.query.all()
    for commodity in commodity_infos:
        print(commodity.commodity_name)
        search_taobao(commodity.commodity_name, commodity.commodity_type,
                      commodity.commodity_brand)
def zhihu_review_spider():
    Commodity_base_info = commodity_base_info()
    Commodity_infos = Commodity_base_info.query.all()
    file = open('APP/spider/zhihu_{}.txt'.format(time.strftime('%m%d')), 'x')
    for commodity in Commodity_infos:
        print(commodity.commodity_name)
        text = get_x_zse_86(commodity.commodity_name)
        # print(text)
        file.write(commodity.commodity_name + ',' + text[2:] + '/n')
        zhihu_search_spider(commodity.commodity_type,
                            commodity.commodity_brand,
                            commodity.commodity_name, text)
    file.close()
Exemple #3
0
def index():
    commodity = commodity_base_info()
    commodity_list = []
    for info in commodity.query.limit(3):
        info = vars(info)
        # print(info)
        commodity_type = info.pop('commodity_type')
        name = info.pop('commodity_name')
        price = info.pop('commodity_base_price')
        img_path = info.pop('img_path')
        infos = info.pop('info').replace('>', '').split(';')
        # print(img_path)
        base_info = {}
        n = 0
        for i in infos:
            if n > 3:
                break
            n += 1
            # print(i)
            i = i.split(':')
            if len(i) < 2:
                continue
            # print(i[1])
            text = ''
            texts = i[1].split(',')
            if len(texts) >= 3:
                texts = texts[:-2]
            # print(texts)
            # print(type(text))
            # print(text)
            for c in texts:
                if c != '':
                    text += c + ';'
            base_info[i[0]] = text.replace(',;', '')
        info = {
            'name': name,
            'price': price,
            'img_path': img_path,
            'commodity_type': commodity_type,
            'base_info': base_info
        }
        # print(info['img_path'])
        commodity_list.append(info)

    return render_template('index.html', index_list=commodity_list)
def base_infoz():
    base_infos = []
    base_urls = [
        'http://detail.zol.com.cn/cell_phone_index/subcate57_613_list_1.html',  # 手机
        'http://detail.zol.com.cn/cell_phone_index/subcate57_1795_list_1.html',
        'http://detail.zol.com.cn/cell_phone_index/subcate57_1673_list_1.html',
        'http://detail.zol.com.cn/cell_phone_index/subcate57_544_list_1.html',
        'http://detail.zol.com.cn/cell_phone_index/subcate57_98_list_1.html',
        'http://detail.zol.com.cn/cell_phone_index/subcate57_34645_list_1.html',
        'http://detail.zol.com.cn/notebook_index/subcate16_160_list_1.html',  # 笔记本
        'http://detail.zol.com.cn/notebook_index/subcate16_21_list_1.html',
        'http://detail.zol.com.cn/notebook_index/subcate16_544_list_1.html',
        'http://detail.zol.com.cn/notebook_index/subcate16_223_list_1.html',
        'http://detail.zol.com.cn/notebook_index/subcate16_227_list_1.html',
        'http://detail.zol.com.cn/notebook_index/subcate16_613_list_1.html',
        'http://detail.zol.com.cn/notebook_index/subcate16_1191_list_1.html',
        'http://detail.zol.com.cn/notebook_index/subcate16_34645_list_1.html',
        'http://detail.zol.com.cn/cpu/amd/',  # CPU
        'http://detail.zol.com.cn/cpu/intel/',
        'http://detail.zol.com.cn/motherboard/gigabyte/',  # 主板
        'http://detail.zol.com.cn/motherboard/asus/',
        'http://detail.zol.com.cn/vga/colorful/',  # 显卡
        'http://detail.zol.com.cn/vga/galaxy/',
        'http://detail.zol.com.cn/memory/corsair/',  # 内存
        'http://detail.zol.com.cn/memory/gskill/',
        'http://detail.zol.com.cn/solid_state_drive/samsung/',  # 固态
        'http://detail.zol.com.cn/solid_state_drive/toshiba/',
        'http://detail.zol.com.cn/power/corsair/',  # 电源
        'http://detail.zol.com.cn/power/coolermaster/'
    ]
    for base_url in base_urls:
        info_list = phone_info_spider(base_url)
        for base_info in info_list:
            Commodity_info = commodity_base_info()
            Commodity_info.commodity_brand = base_info['commodity_brand']
            Commodity_info.commodity_name = base_info['commodity_name']
            Commodity_info.commodity_type = base_info['commodity_type']
            Commodity_info.img_path = base_info['img_path']
            Commodity_info.info = base_info['info']
            Commodity_info.commodity_base_price = base_info[
                'commodity_base_price']
            Commodity_info.save()
            print(base_info)
            base_infos.append(base_info)
Exemple #5
0
def Contrast():
    commodity_type = request.args.get('commodity_type')
    commodity_name = request.args.get('commodity_name')
    commodity_contrast_name = request.args.get('commodity_contrast_name')
    contrast_info = request.args.get('contrast_info').split(',')
    # print(commodity_type)
    # print(commodity_name)
    # print(contrast_info)
    # return commodity_type+';'+commodity_name
    commodity = commodity_base_info()
    commodity_review = commodity_review_info()
    commodity_price = commodity_price_info()
    user = User()
    price_list = []
    review_list = []
    # print(commodity_name)
    # print(commodity_type)
    res = commodity.query.filter(
        commodity_base_info.commodity_type == commodity_type).all()
    # print(vars(res[0]))
    phone_list = []
    for i in res:
        phone_list.append(i.commodity_name)
    # print(res)
    info = commodity.query.filter(
        commodity_base_info.commodity_type == commodity_type,
        commodity_base_info.commodity_name == commodity_name).all()
    info_list = []
    # print(info)
    info = info[0]
    info = vars(info)
    info_commodity_type = info.pop('commodity_type')
    info_name = info.pop('commodity_name')
    info_price = info.pop('commodity_base_price')
    info_img_path = info.pop('img_path')
    base_info = {}
    infos = info.pop('info')
    infos = re.sub(r'(击败.*?),', '', infos)
    infos = infos.replace('>', '').split(';')
    for i in infos:
        # print(i)
        i = i.split(':')
        if len(i) < 2:
            continue
        # print(i[1])
        info_list.append(i[0])
        text = ''
        texts = i[1].split(',')
        if len(texts) >= 3:
            texts = texts[:-2]
        # print(texts)
        if i[0] in contrast_info:
            # print(type(text))
            # print(text)
            for c in texts:
                if c != '':
                    text += c + ';'
            base_info[i[0]] = text.replace(',;', '')
    info = {
        'name': info_name,
        'price': info_price,
        'img_path': info_img_path,
        'commodity_type': info_commodity_type,
        'base_info': base_info,
        'phone_list': phone_list,
        'info_list': info_list
    }

    other_info = commodity.query.filter(
        commodity_base_info.commodity_type == commodity_type,
        commodity_base_info.commodity_name == commodity_contrast_name).all()
    other_info = other_info[0]
    other_info_commodity_type = other_info.commodity_type
    other_info_commodity_name = other_info.commodity_name
    other_info_info = other_info.info
    other_info_img_path = other_info.img_path
    other_info_commodity_base_price = other_info.commodity_base_price
    other_infos = re.sub(r'(击败.*?),', '', other_info_info)
    other_infos = other_infos.replace('>', '').split(';')
    other_base_info = {}
    for i in other_infos:
        # print(i)
        i = i.split(':')
        if len(i) < 2:
            continue
        # print(i[1])
        info_list.append(i[0])
        text = ''
        texts = i[1].split(',')
        if len(texts) >= 3:
            texts = texts[:-2]
        # print(texts)
        if i[0] in contrast_info:
            # print(type(text))
            # print(text)
            for c in texts:
                if c != '':
                    text += c + ';'
            other_base_info[i[0]] = text.replace(',;', '')
    other_info = {
        'name': other_info_commodity_name,
        'price': other_info_commodity_base_price,
        'img_path': other_info_img_path,
        'commodity_type': other_info_commodity_type,
        'base_info': other_base_info
    }

    if 'username' in session:
        user_id = user.query.filter(
            User.username == session['username']).all()[0].user_id
        # print(user_id)
        # print(vars(user_collection.query.all()[0]))
        # price_infos = commodity_price.query(commodity_price_info,user_collection).outerjoin(user_collection, commodity_price_info.id == user_collection.commodity_info_id and user_collection.user_id == user_id).filter(commodity_price_info.commodity_type == commodity_type,commodity_price_info.commodity_name == commodity_name).order_by(-commodity_price_info.price).all()
        price_collection = db.session.query(
            commodity_price_info, user_collection).outerjoin(
                user_collection, commodity_price_info.id ==
                user_collection.commodity_info_id).filter(
                    commodity_price_info.commodity_type == commodity_type,
                    commodity_price_info.commodity_name == commodity_name,
                    user_collection.user_id == user_id).order_by(
                        -commodity_price_info.price).all()
        collection_priceid_list = []
        for collection in price_collection:
            collection_priceid_list.append(collection[0].id)
        price_infos = commodity_price.query.filter(
            commodity_price_info.commodity_type == commodity_type,
            commodity_price_info.commodity_name == commodity_name).order_by(
                -commodity_price_info.price).all()
        # print(price_infos)
        for price_info in price_infos:
            # print(vars(price_info[0]))
            # print(vars(price_info[1]))
            if price_info.id in collection_priceid_list:
                price_list.append({
                    'price': price_info.price,
                    'price_title': price_info.price_title,
                    'price_url': price_info.price_url,
                    'price_img_path': price_info.price_img_path,
                    'price_id': price_info.id,
                    'commodity_name': price_info.commodity_name,
                    'collection_flag': 1
                })
            else:
                price_list.append({
                    'price': price_info.price,
                    'price_title': price_info.price_title,
                    'price_url': price_info.price_url,
                    'price_img_path': price_info.price_img_path,
                    'price_id': price_info.id,
                    'commodity_name': price_info.commodity_name,
                    'collection_flag': 0
                })
    else:
        price_infos = commodity_price.query.filter(
            commodity_price_info.commodity_type == commodity_type,
            commodity_price_info.commodity_name == commodity_name).order_by(
                -commodity_price_info.price).all()
        price_id = 0
        for price_info in price_infos:
            price_id += 1
            price_list.append({
                'price': price_info.price,
                'price_title': price_info.price_title,
                'price_url': price_info.price_url,
                'price_img_path': price_info.price_img_path,
                'price_id': price_info.id,
                'commodity_name': price_info.commodity_name,
                'collection_flag': 0
            })

    review_infos = commodity_review.query.filter(
        commodity_review_info.commodity_type == commodity_type,
        commodity_review_info.commodity_name == commodity_name).all()
    for review_info in review_infos:
        review_list.append({
            'review_url':
            review_info.review_url,
            'review_title':
            review_info.review_title.replace('<em>', '').replace('</em>', ''),
            'review_img_path':
            review_info.review_img_path,
            'review_excerpt':
            review_info.review_excerpt.replace('<em>',
                                               '').replace('</em>', '')
        })
    return render_template('commodity_contrast.html',
                           commodity_type=commodity_type,
                           commodity_name=commodity_name,
                           info=info,
                           price_list=price_list,
                           review_list=review_list,
                           other_info=other_info)
Exemple #6
0
def commodity_brand(commodity_type, commodity_brand):
    commodity = commodity_base_info()
    path_list = []
    commodity_list = []
    if request.method == 'POST':
        key_word = request.form.get('search_keyword')
        res = commodity.query.filter(
            or_(commodity_base_info.commodity_type.contains(key_word),
                commodity_base_info.commodity_brand.contains(key_word),
                commodity_base_info.commodity_name.contains(key_word))).all()
        path_list.append('搜索')
        path_list.append(key_word)
    else:
        if commodity_brand == '*':
            res = commodity.query.filter(
                commodity_base_info.commodity_type == commodity_type).all()
            path_list.append(commodity_type)
        else:
            res = commodity.query.filter(
                commodity_base_info.commodity_type == commodity_type,
                commodity_base_info.commodity_brand == commodity_brand).all()
            path_list.append(commodity_type)
            path_list.append(commodity_brand)

    commodity_sum = len(res)

    for info in res:
        info = vars(info)
        # print(info)
        commodity_type = info.pop('commodity_type')
        name = info.pop('commodity_name')
        price = info.pop('commodity_base_price')
        img_path = info.pop('img_path')
        infos = info.pop('info').replace('>', '').split(';')
        # print(img_path)
        base_info = {}
        n = 0
        for i in infos:
            if n > 3:
                break
            n += 1
            # print(i)
            i = i.split(':')
            if len(i) < 2:
                continue
            # print(i[1])
            text = ''
            texts = i[1].split(',')
            if len(texts) >= 3:
                texts = texts[:-2]
            # print(texts)
            # print(type(text))
            # print(text)
            for c in texts:
                if c != '':
                    text += c + ';'
            base_info[i[0]] = text.replace(',;', '')
        info = {
            'name': name,
            'price': price,
            'img_path': img_path,
            'commodity_type': commodity_type,
            'base_info': base_info
        }
        # print(info['img_path'])
        commodity_list.append(info)

    return render_template('commodity_brand.html',
                           index_list=commodity_list,
                           path=path_list,
                           commodity_sum=commodity_sum)