예제 #1
0
def add_contact(name, email, theme, question):
    Mongo.insert('journalist', {
        'name': name,
        'email': email,
        'theme': theme,
        'question': question
    })
예제 #2
0
def process_program(self, id: int) -> Optional[str]:
    session = Session()

    program = session.query(Program).filter(Program.id == id).one_or_none()
    if not program:
        return

    state_db = Mongo(STATE_COLL)

    parameters = program.parameters

    rating = 0
    for param in parameters:
        rating += param['weight'] * int(
            param['value'])  # TODO: type processing
    program.rating = int(rating)  # round score

    session.add(program)
    session.commit()

    state_db.update_one({'key': PROCESS_KEY}, {'$pull': {'planned': id}})

    stats_db = Mongo(STATS_COLL)

    return f'New rating for program <{id}>: {rating}'
예제 #3
0
def add_file(title, description, file, datetime):
    Mongo.insert(
        'img', {
            'title': title,
            'file': file,
            'description': description,
            'datetime': datetime
        })
예제 #4
0
class GoodsList:
    def __init__(self):
        db_name = 'bayan'
        self.save_collection_name = 'TMGoodsUrl'
        self.collection_name = 'TMkey'
        self.db = Mongo(db_name)

    def get_list(self, list):
        urllist = []
        headers = {
            'referer':
            'http://list.tmall.com/search_product.htm?q={}&type=p&spm=a220m.6910245.a2227oh.d100&from=mallfp..m_1_searchbutton&sort=d'
            .format(quote(list['keyword'], encoding="gbk")),
            'user-agent':
            'Mozilla/5.0 (iPhone; CPU iPhone OS 11_0 like Mac OS X) AppleWebKit/604.1.38 (KHTML, like Gecko) Version/11.0 Mobile/15A372 Safari/604.1',
            'cookie':
            'cna=l9oDFcy6VQ4CAd6AqijwI13O; _med=dw:1366&dh:768&pw:1366&ph:768&ist:0; lid=%E8%91%AC%E4%BB%AA%E4%B8%BF%E5%A4%9C%E7%A5%9E%E6%9C%88; otherx=e%3D1%26p%3D*%26s%3D0%26c%3D0%26f%3D0%26g%3D0%26t%3D0; x=__ll%3D-1%26_ato%3D0; _uab_collina=156877667696726520994751; hng=CN%7Czh-CN%7CCNY%7C156; enc=twxIgD2w8bZQSql4cagTND22VE%2FhUTEOaq2XkcEtDvxxkz37BO5Mh25gdOoNdNoJF5i9aTpzn%2BrzEdT6wQL1qA%3D%3D; sm4=110100; _m_h5_tk=b33074b6753c714e2c5a32fd78d6c426_1571630922757; _m_h5_tk_enc=db58b55df9684e14b5c1aaef72e5c979; t=68aeed5a9ead7edb2d26b8d916cdf5be; _tb_token_=33b56e71b803e; cookie2=1d0efb3fed419cc8c79c62ce27633524; dnk=%5Cu846C%5Cu4EEA%5Cu4E3F%5Cu591C%5Cu795E%5Cu6708; cq=ccp%3D0; tracknick=%5Cu846C%5Cu4EEA%5Cu4E3F%5Cu591C%5Cu795E%5Cu6708; _l_g_=Ug%3D%3D; unb=2646574036; lgc=%5Cu846C%5Cu4EEA%5Cu4E3F%5Cu591C%5Cu795E%5Cu6708; cookie1=BxpRR3m3mq6u2SKR8tMIAV5PbfT0Mkqa7hIMcGbyJO8%3D; login=true; cookie17=UU6lS5IHpNO1Zw%3D%3D; _nk_=%5Cu846C%5Cu4EEA%5Cu4E3F%5Cu591C%5Cu795E%5Cu6708; sg=%E6%9C%886c; uc1=cookie14=UoTbnKU5vO8R1A%3D%3D&cookie16=V32FPkk%2FxXMk5UvIbNtImtMfJQ%3D%3D&pas=0&cookie15=Vq8l%2BKCLz3%2F65A%3D%3D&lng=zh_CN&existShop=false&tag=8&cookie21=U%2BGCWk%2F7p4mBoUyS4E9C; uc3=id2=UU6lS5IHpNO1Zw%3D%3D&nk2=tzejKGxa%2FgjcE9Gg&vt3=F8dByuckA3AzTRgxjWY%3D&lg2=VT5L2FSpMGV7TQ%3D%3D; uc4=nk4=0%40tUQ6%2FECahntTXqHnI5ioo65kDQrdQ7Y%3D&id4=0%40U2xo%2B4EAVHijItFSb4zrqlzll9lp; csg=66cb9310; x5sec=7b22746d616c6c7365617263683b32223a226433303262376437326166623365333865613263653965323938316134613934434a5846744f3046455048736b71766867504c3234674561444449324e4459314e7a51774d7a59374d673d3d227d; pnm_cku822=098%23E1hvevvUvbpvUvCkvvvvvjiPRszOAj1En2sWljD2PmPpsjrUnLSU1jE2PFMWlj3vRphvChCvvvvPvpvhvv2MMQhCvvXvovvvvvvEvpCWpSuUv8ROjovDN%2BClHdUf8B69D70Ode%2BRVA3l%2Bb8rwAtYmq0DW3CQcmx%2Fsj7J%2B3%2BijLjEIEkffvyf8j7yHdBYLjnv6nQ7RAYVEvLvq8yCvv3vpvolaufqRIyCvvXmp99he1KtvpvIphvvvvvvphCvpCBXvvCCN6CvHHyvvhn2phvZ7pvvpiivpCBXvvCmeuwCvvBvpvpZ; res=scroll%3A1349*5314-client%3A1349*318-offset%3A1349*5314-screen%3A1366*768; isg=BBkZMDWmoCVFC3xjiPXyRrtzKAUzDg1k1P50YDvO3sC_QjjUg_W7KLoQREaRf6WQ; l=dBLiViNPqB8DGUOzBOCZZuI8amQTKIRbSuPRwN4pi_5CG68_WbQOkM1H9FJ6cjWAGn8B4JuaUMvTCFJgJsl0NE8xDfpFlkM2B'
        }
        for page in range(list['page']):
            url = 'http://list.tmall.com/m/search_items.htm?page_size=20&page_no={}&q={}&type=p&sort=d'.format(
                page + 1, quote(list['keyword'], encoding="gbk"))
            print(url)
            response = requests.get(url, headers=headers)
            print(response.text)
            datas = json.loads(response.text)['item']
            for data in datas:
                list['productName'] = data['title']
                list['pageUrl'] = 'https:' + data['url']
                print(list)
                urllist.append(deepcopy(list))
            time.sleep(10)
        if urllist != []:
            self.db.insert(self.save_collection_name, urllist)
            print('抓取数量:' + str(len(urllist)), '显示数量:' + str(list['count']))
        else:
            print(list['keyword'] + '无商品')

    def run(self):
        # self.db.drop(self.save_collection_name)
        keys = self.db.get(self.collection_name)
        i = 0
        for key in keys:
            print(i)
            i += 1
            list = AllNumber().get_number(key)
            print("正在抓取关键词:" + list['keyword'])
            GoodsList().get_list(list)
            time.sleep(10)
예제 #5
0
class GoodsList:
    def __init__(self):
        self.count = 0
        db_name = 'bayan'
        self.collection_name = 'JDGoodsUrl'
        self.db = Mongo(db_name)

    def get_list(self, list):
        urllist = []
        headers = {
            'referer': 'https://search.jd.com/Search?keyword=minecraft&enc=utf-8&pvid=b55d6cb7986748d6a32da02876cc9874',
            'user-agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/76.0.3809.132 Safari/537.36'
        }
        del list['_id']
        pages = list['pages']
        pages = pages*2+1
        for page in range(pages)[1:]:
            url = 'https://search.jd.com/s_new.php?keyword={}&psort=3&enc=utf-8&page={}&s={}&scrolling=y&log_id={}'.format(list['keyword'], page, page*31, int(time.time()))
            response = requests.get(url, headers=headers).text
            selector = Selector(text=response)
            goods = selector.xpath('//li[@class="gl-item"]')
            for good in goods:
                goodsurl = good.xpath('.//a[@target="_blank"]/@href').extract()[0]
                list['pageUrl'] = 'https:' + goodsurl
                print(list)
                urllist.append(deepcopy(list))
        self.count = len(urllist)
        back = self.check(list['count'], urllist)
        if back == 'success':
            print(list['keyword'] + ":成功")
            return urllist
        else:
            print(list['keyword'] + ":失败,重新抓取")
            self.get_list(list)

    def check(self, count, urllist):
        if self.count == count:
            self.db.insert(self.collection_name, urllist)
            self.count = 0
            return 'success'

    def run(self):
        self.db.drop(self.collection_name)
        lists = AllNumber().get_number()
        for list in lists:
            print("正在抓取关键词:" + list['keyword'])
            GoodsList().get_list(list)
            time.sleep(1)
예제 #6
0
    def __init__(self, *args, **kwargs):
        tk.Tk.__init__(self, *args, **kwargs)
        # Inicialização do menu
        menubar = MenuBar(self)
        self.config(menu=menubar)

        # Inicializando o componente de gráficos e da base de dados
        self.database = Mongo()
        self.graphs = Graphs()

        # Configurando as fontes para centralizar o tamanho ao longo de todo o código
        self.title_font = tkfont.Font(family='Arial',
                                      size=18,
                                      weight="bold",
                                      slant="italic")
        self.button_font = tkfont.Font(family='Arial', size=16)
        self.datepicker_font = tkfont.Font(family='Arial', size=10)
        self.label_font = tkfont.Font(family='Arial', size=10)
        self.option_font = tkfont.Font(family='Arial', size=12)

        # Container principal, configurações do grid
        container = tk.Frame(self)
        container.pack(side="top", fill="both", expand=True)
        container.grid_rowconfigure(0, weight=1)
        container.grid_columnconfigure(0, weight=1)

        # Aqui cada tela é iniciada a partir da classe com o respectivo nome
        self.frames = {}
        for F in (StartPage, IndividualGraph, GroupTable):
            page_name = F.__name__
            frame = F(parent=container, controller=self)
            self.frames[page_name] = frame
            frame.grid(row=0, column=0, sticky="nsew")

        self.show_frame("StartPage")
예제 #7
0
class TelegramBot():
    def __init__(self, token, channel_name="denemeeedeee"):
        self.__updater = Updater(token=token)
        self.__dispatcher = self.__updater.dispatcher
        self.__mongo = Mongo()
        self.channel_name = channel_name

    def get_link(self, bot, update):

        if not self.channel_name == update.channel_post.chat.username:
            return False

        msg = str(update.channel_post.text)
        split_msg = msg.split("-", 2)
        title = split_msg[0]
        lang = split_msg[1].replace("[", "").replace("]", "")
        link = split_msg[2]
        self.__mongo.Insert(title, lang, link)
        return True

    def set_handler(self):
        f = LinkFilter()
        self.__get_link_handler = MessageHandler(f,
                                                 self.get_link,
                                                 channel_post_updates=True)

    def set_dispatcher(self):
        self.__dispatcher.add_handler(self.__get_link_handler)

    def start(self):
        self.__updater.start_polling()
def get_mongo():
    mongo = None
    try:
        mongo = Mongo()
        yield mongo
    finally:
        if mongo:
            mongo.mongo_client.close()
예제 #9
0
def scan_programs() -> List[int]:
    session = Session()
    state_db = Mongo(STATE_COLL)

    not_rated_programs = [
        prog_id
        for (prog_id,
             ) in session.query(Program.id).filter(Program.rating.is_(None))
    ]

    if not not_rated_programs:
        return []

    processing = state_db.find_one({'key': PROCESS_KEY})
    if processing:
        to_process = list(set(not_rated_programs) - set(processing['planned']))
        state_db.update_one({'_id': processing['_id']}, {
            '$set': {
                'key': PROCESS_KEY,
                'planned': processing['planned'] + to_process
            }
        })
    else:
        to_process = not_rated_programs
        state_db.insert_one({'key': PROCESS_KEY, 'planned': to_process})

    for program_id in to_process:
        process_program.delay(program_id)

    return to_process
예제 #10
0
def init() -> None:
    try:
        pg = Session()
        pg.execute('SELECT 1')
        mongo = Mongo()
        mongo.mongo_db.list_collections()
    except Exception as e:
        logger.error(e)
        raise e
예제 #11
0
파일: app.py 프로젝트: rumjashka/dogs_cafe
def book():
    if request.method == 'POST':
        name = request.form['name']
        email = request.form['email']
        date = request.form['date']
        add_reservation(name, email, date)
    if Mongo.get('reservation', {'date': date}).count() < 10:
        send(email, date)
        return render_template('booksuccess.html')
    else:
        return render_template('bookfall.html')
예제 #12
0
class AllNumber:
    def __init__(self):
        db_name = 'bayan'
        self.collection_name = 'key'
        self.db = Mongo(db_name)

    def get_number(self):
        keys = self.db.get(self.collection_name)
        headers = {
            'referer': 'https://search.jd.com/Search?keyword=iphone&enc=utf-8',
            'user-agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/76.0.3809.132 Safari/537.36'
        }
        goodsList = []
        for key in keys:
            url = 'https://search.jd.com/Search?keyword={}&enc=utf-8'.format(key['keyword'])
            response = requests.get(url, headers=headers).text
            soup = BeautifulSoup(response, "html.parser")
            count = re.findall(r"result_count:\'(\d+)\'", response)
            page_span = soup.find("span", class_="fp-text")
            pages = re.findall(r'\d+', page_span.find("i").text)[0]
            key['pages'] = int(pages)
            key['count'] = int(count[0])
            goodsList.append(deepcopy(key))
        return goodsList
예제 #13
0
def get_contact():
    return Mongo.get_all('journalist')
예제 #14
0
파일: app.py 프로젝트: rumjashka/journalist
def check_user(login, password):
    user=Mongo.get_user(login)
    if user and user['password']== password:
        return True
    return False
예제 #15
0
파일: __init__.py 프로젝트: danquack/xray
def main():
    """ The main function to take in all arguments, analyze, and post to mongo """
    parser = argparse.ArgumentParser(
        description='A module to parse and record host pacakges to MongoDB')
    parser.add_argument('-p',
                        '--packages',
                        action="store",
                        help='host packages file',
                        type=str)
    parser.add_argument('-s',
                        '--server',
                        action='store',
                        help='server hostname',
                        type=str)
    parser.add_argument('-m',
                        '--mounts',
                        action='store',
                        help='mount points file',
                        type=str)
    parser.add_argument('-d',
                        '--debug',
                        action='store_true',
                        default=False,
                        help='Debug output')
    parser.add_argument('-r',
                        '--runtime',
                        action='store',
                        default=False,
                        help='Run time of ansible execution')
    parser.add_argument('-t',
                        '--test',
                        action='store_true',
                        default=False,
                        help='Test host capture without pointing to db')
    parser.add_argument('--prod',
                        action='store_true',
                        default=False,
                        help='Write to prod db')
    parser.add_argument('--sleep',
                        action='store_true',
                        default=False,
                        help='Sleep a random time')
    results = parser.parse_args()

    if results.debug:
        logging.basicConfig(stream=stdout,
                            format=' %(levelname)s: %(asctime)s %(message)s',
                            level=logging.NOTSET)
    else:
        logging.basicConfig(stream=stdout,
                            format='%(levelname)s: %(asctime)s %(message)s',
                            level=logging.INFO)

    if results.sleep:
        logging.debug("Taking a light nap as requested")
        time.sleep(randint(0, 60))
        logging.debug("Starting Execution")

    start = time.time()
    try:
        with open(results.packages, 'r') as file_resource:
            packages = file_resource.read()
        try:
            host = Host(results.server)
        except BaseException:
            raise ValueError(
                "X-RAY: Packages were provided without hostname. Please use the -s flag and provide a hostname"
            )
    except ValueError as error:
        raise error
    except BaseException:
        # Assume if they are running locally they dont provide packages
        host = Host(socket.getfqdn())
        packages = get_local_packages()

    try:
        with open(results.mounts, 'r') as file_resource:
            mounts = file_resource.read()
    except BaseException:
        # Assume if they are running locally they dont provide mounts
        mounts = get_local_mounts()

    if results.runtime:
        host.last_update_duration = float(results.runtime)

    host.software = analyze_packages(packages)
    if mounts is not None:
        host.mounts = analyze_mounts(mounts)

    if not results.test:
        if results.prod:
            import mongo_info_prod
            connection = mongo_info_prod.CONNECTION_STRING
        else:
            import mongo_info_nonprod
            connection = mongo_info_nonprod.CONNECTION_STRING
        try:
            mongo = Mongo(connection)
            mongo.post(host.__dict__, start)
            exit(0)
        except Exception as error:
            logging.error(f"X-RAY:Error with db: {error}")
            exit(1)
    else:
        logging.debug(host.__dict__)
예제 #16
0
파일: app.py 프로젝트: rumjashka/journalist
        file = request.files['file']
        title=request.form['title']
        description=request.form['description']
        file.save(path.join ('static/img', file.filename))
        add_file(title, description, file.filename, datetime.utcnow())
        send_emails()
        return redirect('/')
    return render_template('add_post.html')


def send(email):
    return requests.post(
        mailgun.URL,
        auth=("api", mailgun.API_KEY),
        data={
            "from": mailgun.FROM,
            "to": email,
            "subject": "New post",
            "text": "New post is on the site! Click here to read http://127.0.0.1:5001/"
        }
    )

def send_emails():
    users=Mongo.get_all('journalist')
    for user in users:
        send(user['email'])

if (__name__ == "__main__"):
    Mongo.connect()
    app.run(port=5001, debug=True)
예제 #17
0
파일: app.py 프로젝트: rumjashka/journalist
def send_emails():
    users=Mongo.get_all('journalist')
    for user in users:
        send(user['email'])
예제 #18
0
class GoodsContent:
    def __init__(self):
        db_name = 'bayan'
        self.collection_name = 'JDGoodsUrl'
        self.save_collection_name = 'JDGoodsData'
        self.db = Mongo(db_name)
        self.conn = connRedis.OPRedis()

    def get(self, item):
        goods_id = re.findall(r'\d+', item['pageUrl'])
        headers = {
            'User-Agent':
            'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/76.0.3809.132 Safari/537.36'
        }
        url = 'https://item.jd.com/{}.html'.format(goods_id[0])
        i = 0
        while i < 5:
            try:
                response = requests.get(
                    url,
                    headers=headers,
                    proxies={
                        'https': self.conn.randomOneIp('proxy:new_ip_list')
                    },
                    timeout=5)
                break
            except:
                i += 1
                print('重新获取商品')
        print(response.url)
        if response.url == 'https://m.jd.com/404.htm?errcode=10001':
            print('该商品不存在')
            # db.goodsData.remove({"_id": item['_id']})
        current_price, original_price, compAddress, productParam, stockNum = self.get_price(
            goods_id[0])
        selector = Selector(text=response.text)
        item['pageUrl'] = "https://item.jd.com/{}.html".format(goods_id[0])
        item['_id'] = urlmd5(item['pageUrl'] + item['keyword'])
        product_name = selector.xpath(
            '//div[@class="item ellipsis"]/@title').extract()[0]
        item['productName'] = product_name  # 商品名称
        item['productId'] = int(goods_id[0])  # 商品id
        item['platform'] = '京东'  # 平台
        item['custom'] = 2  # 平台
        item['platformType'] = 3
        item['brand'] = ''
        item['servicePromise'] = ''
        item['paymentInformation'] = ''
        item['productParam'] = ''  # 商品规格
        for parm in productParam:
            item['productParam'] += parm + ', '
        shop_url = selector.xpath(
            '//div[@class="J-hove-wrap EDropdown fr"]//div[@class="name"]/a/@href'
        ).extract()[0]
        item['shopUrl'] = 'https:' + shop_url  # 店铺链接
        shop_id = selector.xpath(
            '//div[@class="J-hove-wrap EDropdown fr"]//div[@class="follow J-follow-shop"]/@data-vid'
        ).extract()[0]
        item['shopId'] = str(shop_id)  # 店铺id
        shop_name = selector.xpath(
            '//div[@class="J-hove-wrap EDropdown fr"]//div[@class="name"]/a/@title'
        ).extract()[0]
        item['shopName'] = shop_name  # 店铺名称
        item['departureAddress'] = compAddress  # 发货地
        item['currentPrice'] = current_price  # 现价
        item['originalPrice'] = original_price  # 原价
        item['stockNum'] = stockNum  # 库存
        if item['stockNum'] == 33 or item['stockNum'] == 39 or item[
                'stockNum'] == 40:  #33 现货 39|40 有货 36预订 其他无货
            item['stock'] = '有货'
        elif item['stockNum'] == 36:
            item['stock'] = '预订'
        else:
            item['stock'] = '无货'
        item['salesNumMonth'] = 0  # 月销量
        item['categories'] = ''  # 商品分类
        item['couponDescription'] = ''  # 商品描述
        item['collectionNum'] = 0  # 收藏量
        item['productSkuDetail'] = [{
            'sku_id': str(item['productId']),
            'sku_name': item['productName'],
            'sku_price': item['currentPrice'],
            'sku_stock': item['stockNum']
        }]  # 商品sku 详情
        item['productSkuDetail'] = json.dumps(item['productSkuDetail'],
                                              ensure_ascii=False)
        count, level = self.commetn_count(goods_id)
        item['cmtStarLevel'] = level  # 商品评分
        item['commentsCount'] = count  # 评论数量
        item['crawlTime'] = int(time.time() * 1000)
        item['shopScore'] = selector.xpath(
            "//div[@class='score-part']/span[@class='score-desc']/text() | //div[@class='score-part']/span[@class='score-detail']//text()"
        ).extract()
        shopScore = {'服务': '0.0', '描述': '0.0', '物流': '0.0'}
        # print(item['shopScore'])
        for i in range(0, len(item['shopScore']), 2):
            score = re.sub(r'\r+|\n+| +|\t+|n+|,+', '', item['shopScore'][i])
            if '评价' in score or '描述' in score:
                score = '描述'
            elif '物流' in score:
                score = '物流'
            elif '服务' in score:
                score = '服务'
            try:
                shopScore[score] = re.search(r'\d+.\d+',
                                             item['shopScore'][i + 1]).group(0)
            except:
                shopScore[score] = '0.0'
        item['shopScore'] = '描述:{},物流:{},服务:{}'.format(shopScore['描述'],
                                                       shopScore['物流'],
                                                       shopScore['服务'])
        attrs = selector.xpath(
            "//div[@class='p-parameter']//li//text()").extract()  # 商品详情介绍
        item['params'] = ''
        for attr in attrs:
            item['params'] += re.sub(r'\n+| +', '', attr) + ', '

        cat = re.findall(r'\[(.+)\]',
                         re.findall(r'cat:(.+)', response.text)[0])[0]
        cat = cat.replace(',', '%2C')
        promotion = self.get_promotion(item['productId'], item['shopId'], cat)
        item['promotion'] = promotion  # 商品促销
        del item['count']
        craw_date = time.localtime(item['crawlTime'] / 1000)
        craw_date = time.strftime("%Y-%m-%d", craw_date)
        url = item['shopId'] + str(
            item['productId']) + craw_date + item['platform']
        item['connectGoodsId'] = urlmd5(url)
        page = 0
        comment_list, crawlCommentsTime = GoodsComment().get(item, page)
        item['commentsData'] = comment_list
        item['crawlCommentsCount'] = len(comment_list)
        item['crawlCommentsTime'] = crawlCommentsTime
        return item

    def commetn_count(self, goods_id):
        headers = {
            'Referer':
            'https://item.m.jd.com/product/{}.html'.format(goods_id[0]),
            'User-Agent':
            'Mozilla/5.0 (iPhone; CPU iPhone OS 11_0 like Mac OS X) AppleWebKit/604.1.38 (KHTML, like Gecko) Version/11.0 Mobile/15A372 Safari/604.1'
        }
        url = 'http://sclub.jd.com/comment/productPageComments.action?productId={}&score=0&sortType=6&page=0&pageSize=10'.format(
            goods_id[0])
        url2 = 'https://club.jd.com/comment/productCommentSummaries.action?referenceIds={}'.format(
            goods_id[0])
        i = 0
        while i < 5:
            try:
                response = requests.get(
                    url,
                    headers=headers,
                    proxies={
                        'https': self.conn.randomOneIp('proxy:new_ip_list')
                    },
                    timeout=5)
                break
            except:
                i += 1
        if response.text == '':
            response = requests.get(
                url2,
                headers=headers,
                proxies={'https': self.conn.randomOneIp('proxy:new_ip_list')},
                timeout=5)
            data = json.loads(response.text)
            count = data['CommentsCount'][0]['CommentCount']
            level = data['CommentsCount'][0]['GoodRateShow']
        else:
            data = json.loads(response.text)
            count = data['productCommentSummary']['commentCount']
            level = data['productCommentSummary']['goodRateShow']
        return count, level

    def get_price(self, goods_id):
        headers = {
            'referer':
            'https://list.jd.com/list.html?cat=1320,1584,13789&tid=17675&ev=exbrand_8179',
            'user-agent':
            'Mozilla/5.0 (iPhone; CPU iPhone OS 11_0 like Mac OS X) AppleWebKit/604.1.38 (KHTML, like Gecko) Version/11.0 Mobile/15A372 Safari/604.1'
        }
        url = 'https://item.m.jd.com/product/{}.html'.format(goods_id)
        i = 0
        while i < 5:
            try:
                response = requests.get(
                    url,
                    headers=headers,
                    proxies={
                        'https': self.conn.randomOneIp('proxy:new_ip_list')
                    },
                    timeout=3)
                break
            except:
                i += 1
        price = re.findall(r"\"price\":(.+),", response.text)[0]
        price = json.loads(price)
        current_price = price['p']
        original_price = price['op']
        try:
            stock = re.findall(r"\"stock\":(.+),", response.text)[0]
            stock = json.loads(stock)
        except:
            stock = {'ir': [], 'serviceInfo': '', 'StockState': 33}
        try:
            compAddress = stock['self_D']['df']  # 发货地
        except KeyError as e:
            try:
                compAddress = stock['D']['df']
            except:
                try:
                    compAddress = re.search(r"<.+>(.+)<",
                                            stock['serviceInfo']).group(1)
                except:
                    compAddress = stock['serviceInfo']
        try:
            salePropSeq = re.findall(
                r"\"salePropSeq\":{.*\"1\":(\[.+?\]).*\]}", response.text)[0]
        except:
            salePropSeq = '[]'
        try:
            salePropSeq = json.loads(salePropSeq)
        except:
            salePropSeq = re.findall(
                r"\"salePropSeq\":{.*\"1\":(\[.+?\]),.*\]}", response.text)[0]
            try:
                salePropSeq = json.loads(salePropSeq)
            except:
                salePropSeq = []
        productParam = salePropSeq
        if productParam == [""]:
            productParam = []
        stockNum = int(stock['StockState'])  # 库存 33 现货 39|40 有货 36预订 其他无货
        return current_price, original_price, compAddress, productParam, stockNum

    def get_promotion(self, productId, shopId, cat):
        headers = {
            'User-Agent':
            'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/76.0.3809.132 Safari/537.36'
        }
        promotionUrl = 'https://cd.jd.com/promotion/v2?skuId={}&area=1_2800_2848&shopId={}&cat={}'.format(
            productId, shopId, cat)
        response = requests.get(url=promotionUrl, headers=headers)
        response.encoding = 'gbk'
        text = json.loads(response.text)['prom']
        promotion = ''
        for te in text['pickOneTag']:
            if '登录' in te['content']:
                pass
            else:
                promotion += te['content'] + ','
        for te in text['tags']:
            if '登录' in te['content']:
                pass
            else:
                promotion += te['content'] + ','
        return promotion

    def run(self):
        i = 0
        items = self.db.get(self.collection_name)
        for item in items[4358:]:
            print(i)
            goodsitem = self.get(item)
            print(goodsitem)
            post.uploadData(goodsitem)
            # self.db.insert(self.save_collection_name, goodsitem)
            i += 1
예제 #19
0
 def __init__(self):
     db_name = 'bayan'
     self.collection_name = 'JDGoodsUrl'
     self.save_collection_name = 'JDGoodsData'
     self.db = Mongo(db_name)
     self.conn = connRedis.OPRedis()
예제 #20
0
def add_contact(email):
    Mongo.insert('dognews', {'email': email})
예제 #21
0
def get_post():
    return Mongo.get_all('img')
예제 #22
0
def get_contact():
    return Mongo.get_all('dognews')
예제 #23
0
import sys

from database import Mongo
from analysis import Analysis
import os

if __name__ == '__main__':
    mongo = Mongo()
    db = mongo.connect('cw_db2')
    analysis = Analysis()
    while 1:
        os.system('CLS')
        print("-------Аналіз новин--------")
        print(
            "Список функцій: \n1. Дістати новини з ХML.\n2. Дістати новини з CSV.\n3. Аналіз обраного слова за популярністю в кожній категорії\n"
            "4. Кількість випущених статей з певним тегом.\n5. Аналіз популярності тегів.\n6. Аналіз кількості індексів з усіх статей\n7. ТОП 5 статей за переглядами."
            "\n8. Відсортований список статей за об'ємом тексту.\n9. Вихід")

        num = input("\n Обрати функцію: ")

        if num is '1':
            os.system('CLS')
            print("Дістаю новини з news2.xml...")
            mongo.fromXMLtoDB(db, "news2.xml")
            print("Done.")
        elif num is '2':
            os.system('cls')
            print("Дістаю новини з train.csv")
            mongo.fromCSVtoDB(db, "train.csv")
        elif num is '3':
            os.system('cls')
예제 #24
0
 def __init__(self, token, channel_name="denemeeedeee"):
     self.__updater = Updater(token=token)
     self.__dispatcher = self.__updater.dispatcher
     self.__mongo = Mongo()
     self.channel_name = channel_name
logger = logging.getLogger(__name__)

max_tries = 60  # 1 minute
wait_seconds = 1


@retry(
    stop=stop_after_attempt(max_tries),
    wait=wait_fixed(wait_seconds),
    before=before_log(logger, logging.INFO),
    after=after_log(logger, logging.WARN),
)
def init() -> None:
    try:
{%- if cookiecutter.use_postgres == "Yes" %}
        pg = Session()
        pg.execute('SELECT 1')
{%- endif %}
{%- if cookiecutter.use_mongo == "Yes" %}
        mongo = Mongo()
        mongo.mongo_db.list_collections()
{%- endif %}
        pass
    except Exception as e:
        logger.error(e)
        raise e


if __name__ == '__main__':
    init()
예제 #26
0
 def __init__(self):
     self.count = 0
     db_name = 'bayan'
     self.collection_name = 'JDGoodsUrl'
     self.db = Mongo(db_name)
예제 #27
0
import telebot
from telebot import types
import config
from database import Mongo

mongo_instance = Mongo(uri=config.mongo_uri)
mongo_instance.db.drop_collection('users')

bot = telebot.TeleBot(config.token)


@bot.message_handler(commands=['hello'])
def say_hello(message):
    bot.send_message(message.chat.id, 'Hello, master')


@bot.message_handler(commands=['mongo'])
def get_mongo(message):
    bot.send_message(message.chat.id,
                     'count is {}'.format(mongo_instance.db['users'].count()))


@bot.message_handler(commands=['new'])
def add_user(message):
    res = mongo_instance.db['users'].find_one(
        {'user_id': message.user_from.id})
    if res.count() > 0:
        bot.send_message(
            message.chat.id,
            'You already registered, email: {}'.format(res['email']))
        return
예제 #28
0
def add_reservation(name, email, date):
    Mongo.insert('reservation', {'name': name, 'email': email, 'date': date})
예제 #29
0
from database import Mongo

db = Mongo("university", "test_mongo")
document1 = {"name": "document1"}
document2 = {"name": "document2"}
document3 = {"name": "document3"}
db.create(document1)
db.create(document2)
db.create(document3)
print(db.list())
예제 #30
0
def get_reservation():
    return Mongo.get_all('reservation')