コード例 #1
0
ファイル: mail.py プロジェクト: rumjashka/journalist
def add_contact(name, email, theme, question):
    Mongo.insert('journalist', {
        'name': name,
        'email': email,
        'theme': theme,
        'question': question
    })
コード例 #2
0
def add_file(title, description, file, datetime):
    Mongo.insert(
        'img', {
            'title': title,
            'file': file,
            'description': description,
            'datetime': datetime
        })
コード例 #3
0
class GoodsList:
    def __init__(self):
        self.count = 0
        db_name = 'bayan'
        self.collection_name = 'JDGoodsUrl'
        self.db = Mongo(db_name)

    def get_list(self, list):
        urllist = []
        headers = {
            'referer': 'https://search.jd.com/Search?keyword=minecraft&enc=utf-8&pvid=b55d6cb7986748d6a32da02876cc9874',
            'user-agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/76.0.3809.132 Safari/537.36'
        }
        del list['_id']
        pages = list['pages']
        pages = pages*2+1
        for page in range(pages)[1:]:
            url = 'https://search.jd.com/s_new.php?keyword={}&psort=3&enc=utf-8&page={}&s={}&scrolling=y&log_id={}'.format(list['keyword'], page, page*31, int(time.time()))
            response = requests.get(url, headers=headers).text
            selector = Selector(text=response)
            goods = selector.xpath('//li[@class="gl-item"]')
            for good in goods:
                goodsurl = good.xpath('.//a[@target="_blank"]/@href').extract()[0]
                list['pageUrl'] = 'https:' + goodsurl
                print(list)
                urllist.append(deepcopy(list))
        self.count = len(urllist)
        back = self.check(list['count'], urllist)
        if back == 'success':
            print(list['keyword'] + ":成功")
            return urllist
        else:
            print(list['keyword'] + ":失败,重新抓取")
            self.get_list(list)

    def check(self, count, urllist):
        if self.count == count:
            self.db.insert(self.collection_name, urllist)
            self.count = 0
            return 'success'

    def run(self):
        self.db.drop(self.collection_name)
        lists = AllNumber().get_number()
        for list in lists:
            print("正在抓取关键词:" + list['keyword'])
            GoodsList().get_list(list)
            time.sleep(1)
コード例 #4
0
class GoodsList:
    def __init__(self):
        db_name = 'bayan'
        self.save_collection_name = 'TMGoodsUrl'
        self.collection_name = 'TMkey'
        self.db = Mongo(db_name)

    def get_list(self, list):
        urllist = []
        headers = {
            'referer':
            'http://list.tmall.com/search_product.htm?q={}&type=p&spm=a220m.6910245.a2227oh.d100&from=mallfp..m_1_searchbutton&sort=d'
            .format(quote(list['keyword'], encoding="gbk")),
            'user-agent':
            'Mozilla/5.0 (iPhone; CPU iPhone OS 11_0 like Mac OS X) AppleWebKit/604.1.38 (KHTML, like Gecko) Version/11.0 Mobile/15A372 Safari/604.1',
            'cookie':
            'cna=l9oDFcy6VQ4CAd6AqijwI13O; _med=dw:1366&dh:768&pw:1366&ph:768&ist:0; lid=%E8%91%AC%E4%BB%AA%E4%B8%BF%E5%A4%9C%E7%A5%9E%E6%9C%88; otherx=e%3D1%26p%3D*%26s%3D0%26c%3D0%26f%3D0%26g%3D0%26t%3D0; x=__ll%3D-1%26_ato%3D0; _uab_collina=156877667696726520994751; hng=CN%7Czh-CN%7CCNY%7C156; enc=twxIgD2w8bZQSql4cagTND22VE%2FhUTEOaq2XkcEtDvxxkz37BO5Mh25gdOoNdNoJF5i9aTpzn%2BrzEdT6wQL1qA%3D%3D; sm4=110100; _m_h5_tk=b33074b6753c714e2c5a32fd78d6c426_1571630922757; _m_h5_tk_enc=db58b55df9684e14b5c1aaef72e5c979; t=68aeed5a9ead7edb2d26b8d916cdf5be; _tb_token_=33b56e71b803e; cookie2=1d0efb3fed419cc8c79c62ce27633524; dnk=%5Cu846C%5Cu4EEA%5Cu4E3F%5Cu591C%5Cu795E%5Cu6708; cq=ccp%3D0; tracknick=%5Cu846C%5Cu4EEA%5Cu4E3F%5Cu591C%5Cu795E%5Cu6708; _l_g_=Ug%3D%3D; unb=2646574036; lgc=%5Cu846C%5Cu4EEA%5Cu4E3F%5Cu591C%5Cu795E%5Cu6708; cookie1=BxpRR3m3mq6u2SKR8tMIAV5PbfT0Mkqa7hIMcGbyJO8%3D; login=true; cookie17=UU6lS5IHpNO1Zw%3D%3D; _nk_=%5Cu846C%5Cu4EEA%5Cu4E3F%5Cu591C%5Cu795E%5Cu6708; sg=%E6%9C%886c; uc1=cookie14=UoTbnKU5vO8R1A%3D%3D&cookie16=V32FPkk%2FxXMk5UvIbNtImtMfJQ%3D%3D&pas=0&cookie15=Vq8l%2BKCLz3%2F65A%3D%3D&lng=zh_CN&existShop=false&tag=8&cookie21=U%2BGCWk%2F7p4mBoUyS4E9C; uc3=id2=UU6lS5IHpNO1Zw%3D%3D&nk2=tzejKGxa%2FgjcE9Gg&vt3=F8dByuckA3AzTRgxjWY%3D&lg2=VT5L2FSpMGV7TQ%3D%3D; uc4=nk4=0%40tUQ6%2FECahntTXqHnI5ioo65kDQrdQ7Y%3D&id4=0%40U2xo%2B4EAVHijItFSb4zrqlzll9lp; csg=66cb9310; x5sec=7b22746d616c6c7365617263683b32223a226433303262376437326166623365333865613263653965323938316134613934434a5846744f3046455048736b71766867504c3234674561444449324e4459314e7a51774d7a59374d673d3d227d; pnm_cku822=098%23E1hvevvUvbpvUvCkvvvvvjiPRszOAj1En2sWljD2PmPpsjrUnLSU1jE2PFMWlj3vRphvChCvvvvPvpvhvv2MMQhCvvXvovvvvvvEvpCWpSuUv8ROjovDN%2BClHdUf8B69D70Ode%2BRVA3l%2Bb8rwAtYmq0DW3CQcmx%2Fsj7J%2B3%2BijLjEIEkffvyf8j7yHdBYLjnv6nQ7RAYVEvLvq8yCvv3vpvolaufqRIyCvvXmp99he1KtvpvIphvvvvvvphCvpCBXvvCCN6CvHHyvvhn2phvZ7pvvpiivpCBXvvCmeuwCvvBvpvpZ; res=scroll%3A1349*5314-client%3A1349*318-offset%3A1349*5314-screen%3A1366*768; isg=BBkZMDWmoCVFC3xjiPXyRrtzKAUzDg1k1P50YDvO3sC_QjjUg_W7KLoQREaRf6WQ; l=dBLiViNPqB8DGUOzBOCZZuI8amQTKIRbSuPRwN4pi_5CG68_WbQOkM1H9FJ6cjWAGn8B4JuaUMvTCFJgJsl0NE8xDfpFlkM2B'
        }
        for page in range(list['page']):
            url = 'http://list.tmall.com/m/search_items.htm?page_size=20&page_no={}&q={}&type=p&sort=d'.format(
                page + 1, quote(list['keyword'], encoding="gbk"))
            print(url)
            response = requests.get(url, headers=headers)
            print(response.text)
            datas = json.loads(response.text)['item']
            for data in datas:
                list['productName'] = data['title']
                list['pageUrl'] = 'https:' + data['url']
                print(list)
                urllist.append(deepcopy(list))
            time.sleep(10)
        if urllist != []:
            self.db.insert(self.save_collection_name, urllist)
            print('抓取数量:' + str(len(urllist)), '显示数量:' + str(list['count']))
        else:
            print(list['keyword'] + '无商品')

    def run(self):
        # self.db.drop(self.save_collection_name)
        keys = self.db.get(self.collection_name)
        i = 0
        for key in keys:
            print(i)
            i += 1
            list = AllNumber().get_number(key)
            print("正在抓取关键词:" + list['keyword'])
            GoodsList().get_list(list)
            time.sleep(10)
コード例 #5
0
def add_reservation(name, email, date):
    Mongo.insert('reservation', {'name': name, 'email': email, 'date': date})
コード例 #6
0
ファイル: newsletter.py プロジェクト: rumjashka/dogs_cafe
def add_contact(email):
    Mongo.insert('dognews', {'email': email})
コード例 #7
0
class GoodsContent:
    def __init__(self):
        db_name = 'bayan'
        self.collection_name = 'TMGoodsUrl'
        self.save_collection_name = 'TMGoodsData'
        self.db = Mongo(db_name)
        self.conn = connRedis.OPRedis()

    def get(self, item):
        goods_id = re.findall(r'id=(\d+)', item['pageUrl'])
        item['pageUrl'] = 'https://detail.tmall.com/item.htm?id={}'.format(
            goods_id[0])
        item['_id'] = urlmd5(item['pageUrl'] + item['keyword'])
        item['productId'] = int(goods_id[0])
        item['custom'] = 2  # 平台
        item['platformType'] = 2
        url = 'https://h5api.m.taobao.com/h5/mtop.taobao.detail.getdetail/6.0/?data=%7B%22itemNumId%22%3A%22{}%22%7D'.format(
            goods_id[0])
        response = requests.get(url)
        print(response.url)
        data = json.loads(response.text)['data']
        dataitem = data['item']
        dataprops = data['props']
        apiStack = json.loads(data['apiStack'][0]['value'])
        try:
            item['productParam'] = dataprops['groupProps'][0]['基本信息'][12][
                '食品口味']  # 商品规格
        except:
            item['productParam'] = ''
        item['currentPrice'] = apiStack['price']['price']['priceText']
        try:
            item['originalPrice'] = apiStack['price']['extraPrices'][0][
                'priceText']  # 原价
        except IndexError as e:
            try:
                item['originalPrice'] = apiStack['price']['transmitPrice'][
                    'priceText']
            except:
                item['originalPrice'] = item['current_price']
        except KeyError as e:
            try:
                item['originalPrice'] = apiStack['price']['transmitPrice'][
                    'priceText']
            except:
                item['originalPrice'] = item['current_price']
        try:
            item['salesNumMonth'] = int(apiStack['item']['sellCount'])  # 月销量
        except:
            item['salesNumMonth'] = 0
        item['servicePromise'] = ''  # 服务承诺
        servicePromise = apiStack['consumerProtection']['items']
        for prom in servicePromise:
            item['servicePromise'] += prom['title'] + ' '
        item['paymentInformation'] = '信用卡快捷支付蚂蚁花呗余额宝'  # 支付方式
        item['stockNum'] = int(
            apiStack['skuCore']['sku2info']['0']['quantity'])  # 库存
        if item['stockNum']:
            item['stock'] = '有货'
        else:
            item['stock'] = '无货'
        item['shopName'] = data['seller']['shopName']  # 店铺名称
        item['shopUrl'] = "https://shop{}.taobao.com".format(
            data['seller']['shopId'])  # 店铺链接
        item['shopId'] = data['seller']['shopId']
        item['shopScore'] = []  # 店铺评分
        for shop in data['seller']['evaluates']:
            item['shopScore'].append(shop['title'])
            item['shopScore'].append(shop['score'])
        item['commentsCount'] = dataitem['commentCount']  # 评论数量
        item['collectionNum'] = dataitem['favcount']  # 收藏数量
        item['departureAddress'] = apiStack['delivery']['from']  # 发货地

        item['promotion'] = ''  # 商品促销信息
        try:
            shopProm = apiStack['price']['shopProm']
        except:
            shopProm = []
        for prom in shopProm:
            for con in prom['content']:
                if '登录' in con:
                    pass
                else:
                    item['promotion'] += con + ','
        try:
            pram = apiStack['consumerProtection']['channel'][
                'title']  # 是否聚划算 商品
        except:
            pram = ''
        item['promotion'] += pram

        item['crawlTime'] = int(time.time() * 1000)
        craw_date = time.localtime(item['crawlTime'] / 1000)
        craw_date = time.strftime("%Y-%m-%d", craw_date)
        url = item['shopId'] + str(
            item['productId']) + craw_date + item['platform']
        item['connectGoodsId'] = urlmd5(url)

        item['productSkuDetail'] = []  # 商品sku 详情
        try:
            skuprops = data['skuBase']['props']
        except KeyError as e:
            skuprops = []
        try:
            sku2info = data['skuBase']['skus']
        except KeyError as e:
            sku2info = []
        for sku2 in sku2info:
            sku_price = apiStack['skuCore']['sku2info'][
                sku2['skuId']]['price']['priceText']
            sku_stock = apiStack['skuCore']['sku2info'][
                sku2['skuId']]['quantity']
            propPath = sku2['propPath'].split(";")[-1].split(":")
            for prop in skuprops:
                if prop['pid'] == propPath[0]:
                    for col in prop['values']:
                        if col['vid'] == propPath[1]:
                            sku_name = col['name']
                            item['productParam'] += sku_name + ' '
                            item['productSkuDetail'].append({
                                'sku_id':
                                sku2['skuId'],
                                'sku_name':
                                sku_name,
                                'sku_price':
                                sku_price,
                                'sku_stock':
                                sku_stock
                            })
        comments, crawlCommentsTime = GoodsComment().get(item, 1)
        item['commentsData'] = comments
        item['crawlCommentsCount'] = len(comments)
        print('评论数:' + str(len(comments)))
        item['crawlCommentsTime'] = crawlCommentsTime
        print(item)

    def run(self):
        i = 0
        items = self.db.get(self.collection_name)
        for item in items:
            print(i)
            goodsitem = self.get(item)
            self.db.insert(self.save_collection_name, goodsitem)
            i += 1