Ejemplo n.º 1
0
    def feature(self, site):
        # 遍历检测
        for cmsjson in self.cmsdata:

            # 特征链接
            featureUrl = cmsjson['url']

            # 特征文本
            textRule = cmsjson['re']

            try:
                res = requests.get(site, headers=self.headers, timeout=5)
            except:
                continue

            pageContent = res.text

            if textRule not in pageContent:
                continue

            try:
                res = requests.get(site + featureUrl,
                                   headers=self.headers,
                                   timeout=5)
                if res.status_code == 200:
                    print("[+] 找到 " + site + " 是一个 " + cmsjson['name'] +
                          " 程序!")
                    Tools.writeFile(
                        "feature/output/" + cmsjson['name'] + ".txt", site)
            except:
                continue
Ejemplo n.º 2
0
 def ota_result_sync(self):
     """
     用于同步酒店优选供应商的结果
     :return:
     """
     try:
         self.logger.info('@ota_result_sync start')
         picker = Picker(self.logger)
         if config.OTA_FLIPPY_STATUS:
             self.logger.info('fliggy start ota_result_sync ')
             # 选取要轮换酒店
             fliggy_online_hotels = picker.pick_fliggy_online_hotels()
             # 更新轮换酒店供应商数据
             self.update_fliggy_hotel_supplier(fliggy_online_hotels)
             # 上线此分销商调度任务
             Tools.ota_hotels('haoqiao_fliggy_online_hotels', 206)
         if config.OTA_CTRIP_STATUS:
             # 选取要轮换酒店
             self.logger.info('ctrip start ota_result_sync ')
             ctrip_online_hotels = picker.pick_ctrip_online_hotels_info()
             # 更新轮换酒店供应商数据
             self.update_ctrip_hotel_supplier(ctrip_online_hotels)
             # 上线此分销商调度任务
             Tools.ota_hotels('haoqiao_ctrip_online_hotels', 205)
         if config.OTA_CTRIP_INTL_STATUS:
             # 选取要轮换酒店
             self.logger.info('ctrip start ota_result_sync ')
             ctrip_online_hotels = picker.pick_ctrip_online_hotels_info(
                 intl=True)
             # 更新轮换酒店供应商数据
             self.update_ctrip_hotel_supplier(ctrip_online_hotels,
                                              intl=True)
             # 上线此分销商调度任务
             Tools.ota_hotels('haoqiao_ctrip_intl_online_hotels', 204)
         if config.OTA_QUNAR_STATUS:
             # 选取要轮换酒店
             self.logger.info('qunar start ota_result_sync ')
             qunar_online_hotels = picker.pick_qunar_online_hotels()
             # 更新轮换酒店供应商数据
             self.update_qunar_hotel_supplier(qunar_online_hotels)
             # 上线此分销商调度任务
             Tools.ota_hotels('haoqiao_qunar_online_hotels', 207)
         if config.OTA_QUNAR_SPEC_STATUS:
             # 选取要轮换酒店
             self.logger.info('qunar_spec start ota_result_sync ')
             qunar_spec_online_hotels = picker.pick_qunar_spec_online_hotels(
             )
             # 更新轮换酒店供应商数据
             self.update_qunar_spec_hotel_supplier(qunar_spec_online_hotels)
             # 上线此分销商调度任务
             Tools.ota_hotels('haoqiao_ota_qunar_hotels', 208)
         self.logger.info('@ota_result_sync end')
         return True
     except:
         self.logger.error(msg=traceback.format_exc())
         return False
Ejemplo n.º 3
0
def init_app(app):
    try:
        # 设置utf-8环境
        reload(sys)
        sys.setdefaultencoding('utf-8')

        # 初始化本地日志模块
        cur_path = os.path.dirname(os.path.abspath(__file__))
        log_path = os.path.join(cur_path, 'log')
        if not Logger.init(log_path,
                           log_level=config.APP_LOG_LEVEL,
                           log_name='compare.log'):
            sys.exit(1)

        # 初始化阿里日志服务
        if not AliLog.init(to_ali_log=config.APP_LOG_TO_ALIYUN,
                           log_level=config.APP_LOG_LEVEL,
                           log_project=config.APP_LOG_PROJECT,
                           log_store=config.APP_LOG_STORE,
                           topic=config.APP_LOG_TOPIC,
                           endpoint=config.END_POINT,
                           access_key_id=config.ACCESS_KEY_ID,
                           access_key=config.ACCESS_KEY):
            sys.exit(1)

        # 初始化Tools
        Tools.set_logger(AliLog)

        # 设置gunicorn日志级别
        app.logger.setLevel(config.APP_LOG_LEVEL)
        app.logger.addHandler(Logger.logger)

        # 设置requests日志级别
        logging.getLogger("requests").setLevel(logging.WARNING)

        # 初始化mysql
        # cur_dir = os.path.dirname(os.path.abspath(__file__))
        # ini_dir = os.path.join(cur_dir, 'public')
        if not pymysql_init(logger=AliLog, hosts=config.mysql_ip_list):
            AliLog.error(msg="mysql_init failed")
            return False

        # 启动比价处理模块
        handler = CompareServer(AliLog, redis_handler)
        gevent.spawn(handler.run)
        return True
    except:
        # 启动失败退出进程
        msg = traceback.format_exc()
        print(msg)
        Logger.logger.error(msg)
        sys.exit(1)
Ejemplo n.º 4
0
 def collection_price(self, params, days, ret_info):
     """
     针对参数日期, 获取价格数据
     :param params:
     (supplier_id, hq_hotel_id ,sp_hotel_code, sp_city_code, sp_city_name, hq_city_id)
     :param days:
     [(checkin1,checkout1),(checkin2,checkout2)]
     :param ret_info: 存放结果的, 每天下所有供应商的酒店价格信息 dict
     :return:
     """
     try:
         (supplier_id, hq_hotel_id, sp_hotel_code, sp_city_code,
          sp_city_name, hq_city_id) = params
         if int(hq_hotel_id) not in self.hotel_city_map:
             self.hotel_city_map[int(hq_hotel_id)] = hq_city_id
         if int(supplier_id) in config.SUPPLIER_BLACK_LIST:
             return
         for (checkin, checkout) in days:
             try:
                 if int(supplier_id) not in self.download_suppliers:
                     continue
                 sequence = 'Compare_%s' % Tools.create_session_id()
                 cache_url = Tools.make_cache_url(supplier_id, hq_hotel_id,
                                                  sp_hotel_code, hq_city_id,
                                                  checkin, checkout,
                                                  sp_city_code,
                                                  sp_city_name, sequence)
                 price_data = self.get_cache_price_data(cache_url)
                 if price_data:
                     self.collection_price_to_dict(supplier_id, price_data,
                                                   checkin, checkout,
                                                   ret_info)
                     continue
                 dls_url = Tools.make_dls_url(supplier_id, hq_hotel_id,
                                              sp_hotel_code, hq_city_id,
                                              checkin, checkout,
                                              sp_city_code, sp_city_name,
                                              sequence)
                 price_data = self.get_dls_price_data(dls_url)
                 if price_data:
                     self.collection_price_to_dict(supplier_id, price_data,
                                                   checkin, checkout,
                                                   ret_info)
                 else:  # 无房情况
                     pass
             except:
                 self.logger.error(traceback.format_exc())
         return
     except:
         self.logger.error(traceback.format_exc())
         return
Ejemplo n.º 5
0
 def __init__(self, hq_hotel_id_set, logger, ota_hotels=None):
     self.hq_hotel_id_set = hq_hotel_id_set
     self.logger = logger
     self.download_suppliers = Tools.get_dls_suppliers()
     self.ota_hotels = ota_hotels
     self.hotel_city_map = dict()
     """记录ota各个端的酒店信息"""
Ejemplo n.º 6
0
    def search(self):
        Tools.printAscii()

        keyword = input("请输入关键字:")
        fromCount = input("从第几页开始:")
        itemCount = input("查询几条记录:")
        isSave = input("是否同时保存:")
        print()

        # 语法解析
        query = self.queryParser(keyword)

        body = {
            "query": {
                "bool": {
                    "must": query
                }
            },
            "from": fromCount,
            "size": itemCount
        }

        res = self.es.search(index="sadness", body=body)

        if res['hits']['total']['value'] == "0":
            print("[-] 没有找到相关记录!")
        else:
            infoList = []
            for host in res[u'hits']['hits']:
                obj = host['_source']
                infoList.append(obj)

            # 只输出IP地址,或者域名
            for info in infoList:
                if info['domain'] != "":
                    temp = info['domain'] + ":" + info['port']
                else:
                    temp = "http://" + info['ip'] + ":" + info['port']

                if isSave != "0":
                    Tools.writeFile("elastic/output/elastic.txt", temp)

                print("[+] " + temp)

        print()
        input("请按任意键继续...")
Ejemplo n.º 7
0
    def domain(self, ip):
        # 输出当前进度
        self.nowCount += 1
        sys.stdout.write("[+] 当前进度:{:.2f}%".format(self.nowCount / self.maxCount * 100))
        sys.stdout.write("\r")

        try:
            r = requests.get("https://site.ip138.com/" + ip.split(":")[0], headers=self.headers, timeout=5)
        except:
            return

        soup = BeautifulSoup(r.text, "lxml")
        links = soup.select("ul#list > li > a")

        if len(links) == 0:
            return None

        for link in links:
            Tools.writeFile("domain/output/sites.txt", "http:/" + link.get("href"))
Ejemplo n.º 8
0
    def host(self, ip):
        # 输出当前进度
        self.nowCount += 1
        sys.stdout.write("[+] 当前进度:{:.2f}%".format(self.nowCount /
                                                   self.maxCount * 100))
        sys.stdout.write("\r")

        ports = [80, 81, 8000, 8080]

        for port in ports:

            server = (ip, port)
            sockfd = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
            sockfd.settimeout(0.01)
            ret = sockfd.connect_ex(server)

            if not ret:
                Tools.writeFile("host/output/ips.txt", ip + ":" + str(port))
                sockfd.close()
            else:
                sockfd.close()
Ejemplo n.º 9
0
    def start(self):
        Tools.printAscii()

        startip = input("请输入起始地址:")
        endip = input("请输入结束地址:")

        print()
        print("[+] 正在扫描指定地址段中可用的地址...")

        ips = self.ipcount(startip, endip)
        self.maxCount = len(ips)

        pool = ThreadPool(processes=50)
        pool.map(self.host, ips)
        pool.close()
        pool.join()

        self.nowCount = 0
        self.maxCount = 0

        print("[+] 正在扫描指定地址段中可用的地址 done")
Ejemplo n.º 10
0
    def start(self):
        print("[+] 正在反查所有域名记录...")

        ips = Tools.getFile("host/output/ips.txt")
        self.maxCount = len(ips)

        pool = ThreadPool(processes=50)
        pool.map(self.domain, ips)
        pool.close()
        pool.join()

        self.nowCount = 0
        self.maxCount = 0

        print("[+] 正在反查所有域名记录 done")
Ejemplo n.º 11
0
def poi_search():
    session_id = Tools.create_session_id()
    try:
        # 解析数据
        request_dict = request.args.to_dict()
        # 处理请求
        result_dict = handle_compare_request(request_dict, session_id)
        # 返回操作结果
        result_string = json.dumps(result_dict)
        response = make_response(result_string)
        return response
    except:
        AliLog.error('update_hotels except[%s]' % traceback.format_exc(),
                     session_id)
        return 'error'
Ejemplo n.º 12
0
    def startHost(self):
        print("[+] 正在提取可用地址的特征...")

        ips = Tools.getFile("host/output/ips.txt")
        self.maxCount = len(ips)

        pool = ThreadPool(processes=50)
        pool.map(self.collectHost, ips)
        pool.close()
        pool.join()

        self.nowCount = 0
        self.maxCount = 0

        print("[+] 正在提取可用地址的特征 done")
Ejemplo n.º 13
0
    def startSite(self):
        print("[+] 正在提取所有域名的特征...")

        sites = Tools.getFile("domain/output/sites.txt")
        self.maxCount = len(sites)

        pool = ThreadPool(processes=50)
        pool.map(self.collectSite, sites)
        pool.close()
        pool.join()

        self.nowCount = 0
        self.maxCount = 0

        print("[+] 正在提取所有域名的特征 done")
        print()

        input("按任意键继续...")
Ejemplo n.º 14
0
 def pick_interested_hotels(self, limit=2000):
     """
     选取用户感兴趣好巧酒店, 30天以内穿透量最多的limit家好巧酒店
     :param limit: 吐出酒店数量上限
     :return: set
     """
     try:
         interested_hotels_info = dict()
         download_suppliers = Tools.get_dls_suppliers()
         if not download_suppliers:
             return set()
         day = datetime.datetime.today() - datetime.timedelta(days=30)
         for supplier_id in download_suppliers:
             try:
                 sql = "SELECT hq_hotel_id, price_num FROM summary%s WHERE stat_date >= '%s' " \
                       "ORDER BY price_num DESC limit %s;" % (supplier_id, day, limit)
                 ret = pymysql_query(sql, Const.summary)
                 if ret == DB_EXCEPTION:
                     self.logger.error(traceback.format_exc())
                     continue
                 for row in ret:
                     hq_hotel_id = row[Const.hq_hotel_id]
                     price_num = row[Const.price_num]
                     if price_num == 0:
                         break
                     if hq_hotel_id not in interested_hotels_info:
                         interested_hotels_info[hq_hotel_id] = price_num
                     else:
                         interested_hotels_info[hq_hotel_id] += price_num
             except:
                 self.logger.error(traceback.format_exc())
         # 根据最大请求数降序排序
         sorted_hotels = sorted(interested_hotels_info.items(),
                                lambda x, y: cmp(x[1], y[1]),
                                reverse=True)
         interested_hotels = set()
         for item in sorted_hotels[:limit]:
             hq_hotel_id, num = item
             interested_hotels.add(hq_hotel_id)
         return interested_hotels
     except:
         self.logger.error(traceback.format_exc())
         return set()
Ejemplo n.º 15
0
 def __init__(self, logger, redis_handler):
     self.scheduler = BackgroundScheduler()
     self.logger = logger
     self.redis_handler = redis_handler
     self.hq_hotel_id_set = set()
     self.scheduled_supplier = Tools.get_schedule_supplier()
     # self.qunar_online_hotels = None
     # self.qunar_spec_online_hotels = None
     # self.ctrip_online_hotels = None
     # self.fliggy_online_hotels = None
     # ota 定时任务 每周一晚19点跑一次
     self.scheduler.add_job(self.ota_task_sync,
                            'cron',
                            day_of_week='fri',
                            hour=19)
     self.scheduler.add_job(self.ota_result_sync,
                            'cron',
                            day_of_week='fri',
                            hour=22)
     self.scheduler.start()
Ejemplo n.º 16
0
 def start(self):
     pool = ThreadPool(processes=50)
     pool.map(self.feature, Tools.getFile("domain/output/sites.txt"))
     pool.close()
     pool.join()
Ejemplo n.º 17
0
 def update_fliggy_hotel_supplier(self, fliggy_online_hotels):
     """
     更新飞猪上线酒店优选供应商
     :return:
     """
     try:
         if len(fliggy_online_hotels.keys()) == 0:
             return True
         day = datetime.datetime.now() - datetime.timedelta(days=30 * 3)
         sql = "SELECT hq_hotel_id, substring_index(group_concat(supplier_id ORDER BY min_price_score DESC),',',10) AS 'top' " \
               "FROM hotel_supplier_rank WHERE update_time > '%s' AND hq_hotel_id in (%s) GROUP BY hq_hotel_id" % \
               (day, ','.join(fliggy_online_hotels.keys()))
         ret = pymysql_query(sql, Const.schedule)
         if ret == DB_EXCEPTION:
             self.logger.error('Exec sql=%s error' % sql)
             return False
         for row in ret:
             try:
                 hq_hotel_id = str(row[Const.hq_hotel_id])
                 ordered_suppliers = self.get_hotel_order_suppliers(
                     hq_hotel_id)
                 # 排好序的供应商列表
                 top_list = row[Const.top].split(',')
                 if len(top_list) == 1 and top_list[0] == '0':
                     fliggy_online_hotels[hq_hotel_id][
                         Const.new_suppliers] = list()
                     continue
                 area = Tools.get_hotel_area(hq_hotel_id)
                 if area == -1:
                     self.logger.error('get country error, hq_hotel_id=%s' %
                                       hq_hotel_id)
                     continue
                 # 如果酒店所在地区未在影响范围内,忽略 国家数据:104 中国大陆, 107 中国台湾, 216 中国港澳
                 if area in config.OTA_AREA_BLACK_LIST['fliggy']:
                     continue
                 fliggy_online_hotels[hq_hotel_id][Const.top] = top_list
                 fliggy_online_hotels[hq_hotel_id][
                     Const.ordered_suppliers] = ordered_suppliers
                 # 将此酒店此端的黑名单供应商过滤掉,生成可上线的供应商
                 new_supplier_list = self.filter_blacklist(
                     'fliggy', area, top_list, ordered_suppliers,
                     fliggy_online_hotels[hq_hotel_id][
                         Const.supplier_blacklist])
                 if new_supplier_list == [''
                                          ] or len(new_supplier_list) == 0:
                     new_supplier_list = fliggy_online_hotels[hq_hotel_id][
                         Const.currentsupplier]
                     self.logger.warn(
                         '@hq_hotel_id=%s get new supplier list None, stand by'
                         % hq_hotel_id)
                 fliggy_online_hotels[hq_hotel_id][
                     Const.new_suppliers] = new_supplier_list
                 self.logger.info(
                     '@fliggy hq_hotel_id=%s info=%s' %
                     (hq_hotel_id, fliggy_online_hotels[hq_hotel_id]))
                 gevent.sleep(0.01)
             except:
                 self.logger.error(msg=traceback.format_exc())
         # 更新飞猪上线表供应商列表
         self.update_fliggy_hotel_table(fliggy_online_hotels)
         # 更新日志表
         self.update_supplier_change_logger_table('fliggy',
                                                  fliggy_online_hotels)
         return True
     except:
         self.logger.error(msg=traceback.format_exc())
         return False
Ejemplo n.º 18
0
 def update_ctrip_hotel_supplier(self, ctrip_online_hotels, intl=False):
     """
     更新携程上线酒店优选供应商
     :return:
     """
     try:
         if len(ctrip_online_hotels.keys()) == 0:
             return True
         day = datetime.datetime.now() - datetime.timedelta(days=30 * 3)
         sql = "SELECT hq_hotel_id, substring_index(group_concat(supplier_id ORDER BY min_price_score DESC),',',10) AS 'top' " \
               "FROM hotel_supplier_rank WHERE update_time > '%s' AND hq_hotel_id in (%s) GROUP BY hq_hotel_id" % \
               (day, ','.join(ctrip_online_hotels.keys()))
         ret = pymysql_query(sql, Const.schedule)
         if ret == DB_EXCEPTION:
             self.logger.error('Exec sql=%s error' % sql)
             return False
         for row in ret:
             try:
                 hq_hotel_id = str(row[Const.hq_hotel_id])
                 ordered_suppliers = self.get_hotel_order_suppliers(
                     hq_hotel_id)
                 # 排好序的供应商列表
                 top_list = row[Const.top].split(',')
                 if len(top_list) == 1 and top_list[0] == '0':
                     continue
                 area = Tools.get_hotel_area(hq_hotel_id)
                 if area == -1:
                     self.logger.error('get country error, hq_hotel_id=%s' %
                                       hq_hotel_id)
                     continue
                 # 如果酒店所在地区未在影响范围内,忽略 国家数据:104 中国大陆, 107 中国台湾, 216 中国港澳
                 if area in config.OTA_AREA_BLACK_LIST['ctrip']:
                     continue
                 ctrip_online_hotels[hq_hotel_id][Const.top] = top_list
                 ctrip_online_hotels[hq_hotel_id][
                     Const.ordered_suppliers] = ordered_suppliers
                 # 将此酒店此端的黑名单供应商过滤掉,生成可上线的供应商
                 new_supplier_list = self.filter_blacklist(
                     'ctrip', area, top_list, ordered_suppliers,
                     ctrip_online_hotels[hq_hotel_id][
                         Const.supplier_blacklist])
                 if config.CTRIP_BETA:
                     mainland_white_list = [
                         str(i) for i in
                         config.OTA_WHITE_LIST['mainland'].get('ctrip')
                     ]
                     abroad_white_list = [
                         str(i) for i in
                         config.OTA_WHITE_LIST['abroad'].get('ctrip')
                     ]
                     hks_white_list = [
                         str(i)
                         for i in config.OTA_WHITE_LIST['hks'].get('ctrip')
                     ]
                     if area == 104:
                         mergy = set(mainland_white_list) & set(
                             ctrip_online_hotels[hq_hotel_id][
                                 Const.currentsupplier])
                     elif area in (107, 216):
                         mergy = set(hks_white_list) & set(
                             ctrip_online_hotels[hq_hotel_id][
                                 Const.currentsupplier])
                     else:
                         mergy = set(abroad_white_list) & set(
                             ctrip_online_hotels[hq_hotel_id][
                                 Const.currentsupplier])
                     new_supplier_list = list(
                         set(new_supplier_list) | mergy)
                 ctrip_online_hotels[hq_hotel_id][
                     Const.new_suppliers] = new_supplier_list
                 self.logger.info(
                     '@ctrip hq_hotel_id=%s info=%s' %
                     (hq_hotel_id, ctrip_online_hotels[hq_hotel_id]))
                 gevent.sleep(0.01)
             except:
                 self.logger.error(msg=traceback.format_exc())
         # 更新携程上线表供应商列表
         cursor = start_transaction(Const.ota)
         for hq_hotel_id, item in ctrip_online_hotels.items():
             try:
                 old_list = set(item[Const.currentsupplier])
                 new_list = set(item[Const.new_suppliers])
                 if old_list == new_list:
                     continue
                 if intl:
                     sql = "UPDATE haoqiao_ctrip_intl_online_hotels SET supplier_id='%s', add_time=now() WHERE hotel_id=%s" % (
                         ','.join(new_list), hq_hotel_id)
                 else:
                     sql = "UPDATE haoqiao_ctrip_online_hotels SET supplier_id='%s', add_time=now() WHERE hotel_id=%s" % (
                         ','.join(new_list), hq_hotel_id)
                 pymysql_transaction_query(sql, cursor)
             except:
                 self.logger.error(msg=traceback.format_exc())
         commit_transaction(cursor)
         # 更新日志表
         self.update_supplier_change_logger_table('ctrip',
                                                  ctrip_online_hotels)
         return True
     except:
         self.logger.error(msg=traceback.format_exc())
         return False
Ejemplo n.º 19
0
 def update_qunar_hotel_supplier(self, qunar_online_hotels):
     """
     更新去哪上线酒店优选供应商
     :return:
     """
     try:
         if len(qunar_online_hotels.keys()) == 0:
             return True
         day = datetime.datetime.now() - datetime.timedelta(days=30 * 3)
         sql = "SELECT hq_hotel_id, substring_index(group_concat(supplier_id ORDER BY min_price_score DESC),',',10) AS 'top' " \
               "FROM hotel_supplier_rank WHERE update_time > '%s' AND hq_hotel_id in (%s) GROUP BY hq_hotel_id" % \
               (day, ','.join(qunar_online_hotels.keys()))
         ret = pymysql_query(sql, Const.schedule)
         if ret == DB_EXCEPTION:
             self.logger.error('Exec sql=%s error' % sql)
             return False
         for row in ret:
             try:
                 hq_hotel_id = str(row[Const.hq_hotel_id])
                 ordered_suppliers = self.get_hotel_order_suppliers(
                     hq_hotel_id)
                 # 排好序的供应商列表
                 top_list = row[Const.top].split(',')
                 if len(top_list) == 1 and top_list[0] == '0':
                     continue
                 area = Tools.get_hotel_area(hq_hotel_id)
                 if area == -1:
                     self.logger.error('get country error, hq_hotel_id=%s' %
                                       hq_hotel_id)
                     continue
                 # 如果酒店所在地区未在影响范围内,忽略 国家数据:104 中国大陆, 107 中国台湾, 216 中国港澳
                 if area in config.OTA_AREA_BLACK_LIST['qunar']:
                     self.logger.info('@hq_hotel_id=%s area=%s ignore' %
                                      (hq_hotel_id, area))
                     continue
                 qunar_online_hotels[hq_hotel_id][Const.top] = top_list
                 qunar_online_hotels[hq_hotel_id][
                     Const.ordered_suppliers] = ordered_suppliers
                 # 将此酒店此端的黑名单供应商过滤掉,生成可上线的供应商
                 new_supplier_list = self.filter_blacklist(
                     'qunar', area, top_list, ordered_suppliers,
                     qunar_online_hotels[hq_hotel_id][
                         Const.supplier_blacklist])
                 qunar_online_hotels[hq_hotel_id][
                     Const.new_suppliers] = new_supplier_list
                 self.logger.info(
                     '@qunar hq_hotel_id=%s info=%s' %
                     (hq_hotel_id, qunar_online_hotels[hq_hotel_id]))
                 gevent.sleep(0.01)
             except:
                 self.logger.error(msg=traceback.format_exc())
         cursor = start_transaction(Const.ota)
         for hq_hotel_id, item in qunar_online_hotels.items():
             try:
                 old_list = set(item[Const.currentsupplier])
                 new_list = set(item[Const.new_suppliers])
                 if old_list == new_list:
                     continue
                 sql = "UPDATE haoqiao_qunar_online_hotels SET supplier_id='%s', add_time=now() WHERE hotel_id=%s" % (
                     ','.join(new_list), hq_hotel_id)
                 pymysql_transaction_query(sql, cursor)
             except:
                 self.logger.error(msg=traceback.format_exc())
         commit_transaction(cursor)
         # 更新日志表
         self.update_supplier_change_logger_table('qunar',
                                                  qunar_online_hotels)
         return True
     except:
         self.logger.error(msg=traceback.format_exc())
         return False
Ejemplo n.º 20
0
#!/usr/bin/env python
# -- coding: utf-8 --

import os

from crawler.work import Crawler
from elastic.search import MyElasticSearch
from public.tools import Tools

if __name__ == '__main__':

    while True:
        os.system("cls")
        Tools.printAscii()
        print("1. 采集指纹")
        print("2. 检索指纹")
        print("3. 清除缓存")
        print("4. 退出程序")
        print()
        choise = int(input("请选择一个模式:"))

        if choise == 1:
            Crawler().start()
        elif choise == 2:
            MyElasticSearch().search()
        elif choise == 3:
            Tools().clearAll()
        elif choise == 4:
            break
        else:
            continue
Ejemplo n.º 21
0
 def update_qunar_spec_hotel_supplier(self, qunar_spec_online_hotels):
     """
     更新去哪上线酒店优选供应商, 为马甲配置二等,三等供应商到haoqiao_ota_qunar_hotels
     :return:
     """
     try:
         # 去哪马甲id
         qunar_account_id_list = [3, 4]
         if len(qunar_spec_online_hotels.keys()) == 0:
             return True
         sql = "SELECT hq_hotel_id, substring_index(group_concat(supplier_id ORDER BY min_price_score DESC),',',30) AS 'top' " \
               "FROM hotel_supplier_rank WHERE hq_hotel_id in (%s) GROUP BY hq_hotel_id" % ','.join(
             qunar_spec_online_hotels.keys())
         ret = pymysql_query(sql, Const.schedule)
         if ret == DB_EXCEPTION:
             self.logger.error('Exec sql=%s error' % sql)
             return False
         cursor = start_transaction(Const.ota)
         for row in ret:
             try:
                 hq_hotel_id = str(row[Const.hq_hotel_id])
                 ordered_suppliers = self.get_hotel_order_suppliers(
                     hq_hotel_id)
                 # 排好序的供应商列表
                 top_list = row[Const.top].split(',')
                 if len(top_list) == 1 and top_list[0] == '0':
                     continue
                 area = Tools.get_hotel_area(hq_hotel_id)
                 if area == -1:
                     self.logger.error('get country error, hq_hotel_id=%s' %
                                       hq_hotel_id)
                     continue
                 # 如果酒店所在地区未在影响范围内,忽略 国家数据:104 中国大陆, 107 中国台湾, 216 中国港澳
                 if area in config.OTA_AREA_BLACK_LIST['qunar']:
                     self.logger.info('@hq_hotel_id=%s area=%s ignore' %
                                      (hq_hotel_id, area))
                     continue
                 qunar_spec_online_hotels[hq_hotel_id][Const.top] = top_list
                 qunar_spec_online_hotels[hq_hotel_id][
                     Const.ordered_suppliers] = ordered_suppliers
                 # 将此酒店此端的黑名单供应商过滤掉,生成可上线的供应商
                 first_group = self.filter_blacklist(
                     'qunar', area, top_list, ordered_suppliers,
                     qunar_spec_online_hotels[hq_hotel_id][
                         Const.supplier_blacklist])
                 mainland_white_list = [
                     str(i)
                     for i in config.OTA_WHITE_LIST['mainland'].get('qunar')
                 ]
                 abroad_white_list = [
                     str(i)
                     for i in config.OTA_WHITE_LIST['abroad'].get('qunar')
                 ]
                 hks_white_list = [
                     str(i)
                     for i in config.OTA_WHITE_LIST['hks'].get('qunar')
                 ]
                 if area == 104:
                     supplier_list = list(
                         set(mainland_white_list)
                         & set(top_list) - set(first_group) -
                         set(config.OTA_SUPPLIER_WEIGHT.keys()))
                 elif area in (107, 216):
                     supplier_list = list(
                         set(hks_white_list)
                         & set(top_list) - set(first_group) -
                         set(config.OTA_SUPPLIER_WEIGHT.keys()))
                 else:
                     supplier_list = list(
                         set(abroad_white_list)
                         & set(top_list) - set(first_group) -
                         set(config.OTA_SUPPLIER_WEIGHT.keys()))
                 x = 0
                 for qunar_account_id in qunar_account_id_list:
                     if len(supplier_list) < 5:
                         new_list = supplier_list
                     else:
                         new_list = supplier_list[x:x + 5]
                     x += 5
                     sql = "UPDATE haoqiao_ota_qunar_hotels SET supplier_id='%s', add_time=now() WHERE hotel_id=%s and qunar_account_id=%s" % (
                         ','.join(new_list), hq_hotel_id, qunar_account_id)
                     pymysql_transaction_query(sql, cursor)
                 self.logger.info(
                     '@qunar hq_hotel_id=%s info=%s' %
                     (hq_hotel_id, qunar_spec_online_hotels[hq_hotel_id]))
                 gevent.sleep(0.01)
             except:
                 self.logger.error(msg=traceback.format_exc())
         commit_transaction(cursor)
         return True
     except:
         self.logger.error(msg=traceback.format_exc())
         return False