Ejemplo n.º 1
0
 def count_articles(count_article):
     # 统计文章数量
     collection_name = 'run_counts'
     log.info('文章数量:{}'.format(count_article))
     try:
         if count_article == 0:
             return
         db = mongo_conn()
         result = db[collection_name].find({})
         if result.count() == 0:
             db[collection_name].insert({
                 'save_name': save_name(),
                 'account_count': 1,
                 'article_count': 0,
                 'start': time_strftime(),
                 'end': None
             })
             log.info('插入文章数成功')
         for item in db[collection_name].find():
             if item.get('save_name') == save_name():
                 count = count_article + item.get(
                     'article_count') if item.get(
                         'article_count') else count_article
                 db[collection_name].update(
                     {'save_name': save_name()},
                     {'$set': {
                         'article_count': count
                     }},
                     upsert=True)
                 log.info('更新文章数量成功')
     except Exception as e:
         log.exception(e)
Ejemplo n.º 2
0
 def get_account(self):
     collection_name = 'run_counts'
     try:
         # s = requests.Session()
         # s.keep_alive = False
         # s.adapters.DEFAULT_RETRIES = 5
         url = 'http://dispatch.yunrunyuqing.com:38082/ScheduleDispatch/dispatch?type=8'
         resp = requests.get(url,
                             timeout=self.timeout,
                             headers={'Connection': 'close'})
         data = json.loads(resp.text).get('data')
         if not data:
             # 即返回None
             return
         account = json.loads(data).get('account')
         db = mongo_conn()
         result = db[collection_name].find({})
         if result.count() == 0:
             db[collection_name].insert({
                 'account_count': 1,
                 'article_count': 0,
                 'start': time_strftime(),
                 'end': None,
                 'save_name': save_name()
             })
             log.info("插入mongo成功")
         else:
             updated = False
             for item in db[collection_name].find():
                 if item.get('save_name') == save_name():
                     count = item.get(
                         'account_count'
                     ) + 1  # if item.get('account_count') else 0
                     log.info(item)
                     db[collection_name].update(
                         {'save_name': save_name()}, {
                             '$set': {
                                 'account_count': count,
                                 'end': time_strftime()
                             }
                         },
                         upsert=True)
                     updated = True
                     log.info("更新mongo成功")
                     break
             if updated is False:
                 log.info('找不到save_name,需要插入')
                 db[collection_name].insert({
                     'account_count': 1,
                     'article_count': 0,
                     'start': time_strftime(),
                     'end': None,
                     'save_name': save_name()
                 })
                 log.info("插入mongo成功")
     except Exception as e:
         log.info('调度获取account出错:{}'.format(e))
         return None
     return [account]
Ejemplo n.º 3
0
 def urls_article(html):
     collection_name = 'run_counts'
     items = re.findall('"content_url":".*?,"copyright_stat"', html)
     urls = []
     for item in items:
         url_last = item[15:-18].replace('amp;', '')
         url = 'https://mp.weixin.qq.com' + url_last
         # 部分是永久链接
         if '_biz' in url_last:
             url = re.search('http://mp.weixin.qq.*?wechat_redirect',
                             url_last).group()
             urls.append(url)
             continue
         # 有的文章链接被包含在里面,需再次匹配
         if 'content_url' in url:
             item = re.search('"content_url":".*?wechat_redirect',
                              url).group()
             url = item[15:].replace('amp;', '')
         urls.append(url)
     # 统计文章数量
     count_article = len(urls)
     log.info('文章数量:{}'.format(count_article))
     try:
         if count_article == 0:
             return urls
         db = mongo_conn()
         result = db[collection_name].find({})
         if result.count() == 0:
             db[collection_name].insert({
                 'save_name': save_name(),
                 'account_count': 1,
                 'article_count': 0,
                 'start': time_strftime(),
                 'end': None
             })
             log.info('插入文章数成功')
         for item in db[collection_name].find():
             if item.get('save_name') == save_name():
                 count = count_article + item.get(
                     'article_count') if item.get(
                         'article_count') else count_article
                 db[collection_name].update(
                     {'save_name': save_name()},
                     {'$set': {
                         'article_count': count
                     }},
                     upsert=True)
                 log.info('更新文章数量成功')
     except Exception as e:
         log.exception(e)
     return urls
Ejemplo n.º 4
0
 def get_account():
     # 老版
     # url = 'http://124.239.144.181:7114/Schedule/dispatch?type=8'
     # # url = 'http://183.131.241.60:38011/nextaccount?label=5'
     # resp = requests.get(url, timeout=30)
     # # data 可能为空
     # data_json = resp.text.get('data')
     # data = json.loads(data_json)
     # self.search_name = data.get('name')
     # print(self.search_name)
     # return self.search_name
     # 重点采集接口
     # account_all = []
     # try:
     #     url = 'http://183.131.241.60:38011/nextaccount?label=5'
     #     resp = requests.get(url, timeout=21)
     #     items = json.loads(resp.text)
     #     if len(items) == 0:
     #         return []
     #     for item in items:
     #         account_all.append(item.get('account'))
     #     log.info("开始account列表 {}".format(account_all))
     # except Exception as e:
     #     log.info('获取账号列表错误 {}'.format(e))
     #     time.sleep(5)
     # 统计账号
     collection_name = 'run_counts'
     try:
         url = 'http://dispatch.yunrunyuqing.com:38082/ScheduleDispatch/dispatch?type=8'
         resp = requests.get(url, timeout=30)
         data = json.loads(resp.text).get('data')
         if not data:
             # 即返回None
             return
         account = json.loads(data).get('account')
         db = mongo_conn()
         result = db[collection_name].find({})
         if result.count() == 0:
             db[collection_name].insert({
                 'account_count': 1,
                 'article_count': 0,
                 'start': time_strftime(),
                 'end': None,
                 'save_name': save_name()
             })
             log.info("插入mongo成功")
         else:
             updated = False
             for item in db[collection_name].find():
                 if item.get('save_name') == save_name():
                     count = item.get(
                         'account_count'
                     ) + 1  # if item.get('account_count') else 0
                     log.info(item)
                     db[collection_name].update(
                         {'save_name': save_name()}, {
                             '$set': {
                                 'account_count': count,
                                 'end': time_strftime()
                             }
                         },
                         upsert=True)
                     updated = True
                     log.info("更新mongo成功")
                     break
             if updated is False:
                 log.info('找不到save_name,需要插入')
                 db[collection_name].insert({
                     'account_count': 1,
                     'article_count': 0,
                     'start': time_strftime(),
                     'end': None,
                     'save_name': save_name()
                 })
                 log.info("插入mongo成功")
     except Exception as e:
         log.info('调度获取account出错:{}'.format(e))
         return None
     return [account]
Ejemplo n.º 5
0
def demo_test(text_model, image_model, label_dict, label_dict_en):
    """
    获取验证码图片、模型识别、提交
    :return:
    """
    image_path = utils.download_captcha()
    raw_texts, raw_images = utils.process_raw_images(
        image_path, (image_shape[0], image_shape[1]))
    utils.save_name(raw_texts[0], demo_path, 'text')
    for i, img in enumerate(raw_images):
        utils.save_name(raw_images[i], demo_path, i)

    shutil.copy(image_path, os.path.join(demo_path, 'demo.png'))

    images = np.array([np.asarray(image) for image in raw_images])
    image_predict = image_model.predict(images)
    image_result = np.argmax(image_predict, 1)
    image_prob = np.max(image_predict, 1)

    image_label = [label_dict[r].replace("\xa0", "") for r in image_result]
    image_label_en = [label_dict_en[r] for r in image_result]
    text_label = GoogleLens.get_target_text(image_path)

    print(text_label)
    print(image_label)
    print(image_label_en)

    ids = set()
    for id, r2 in enumerate(image_label):
        if text_label == r2:
            ids.add(id)

    if len(ids) == 0:
        txt, score = process.extractOne(text_label, image_label)
        print(text_label, txt)
        text_label = txt
        for id, r2 in enumerate(image_label):
            if txt == r2:
                ids.add(id)

    result = utils.submit_captcha(ids)
    utils.draw_circle(ids, demo_path, 'demo.png')

    label = {}
    for i, l in enumerate(image_label):
        label[i] = {}
        label[i]['cn'] = l
        label[i]['en'] = image_label_en[i]

    dict = {}
    dict['text_label'] = text_label
    dict['text_label_en'] = utils.find_en_word(image_label, image_label_en,
                                               text_label)
    #     translate.translate(text_label)
    dict['label'] = label
    if "成功" in result:
        dict['result'] = True
    else:
        dict['result'] = False
    with open(os.path.join(demo_path, 'file.txt'), 'w') as file:
        file.write(json.dumps(dict, indent=4, ensure_ascii=False))