def execute_sql(sql):
    service_platform_conn = service_platform_pool.connection()
    cursor = service_platform_conn.cursor()
    cursor.execute(sql, [])
    service_platform_conn.commit()
    cursor.close()
    service_platform_conn.close()
Example #2
0
    def _execute(self, **kwargs):
        dept_info = self.task.kwargs['dept_info']
        dest_info = self.task.kwargs['dest_info']
        source = self.task.kwargs['source']
        error_code, result, page_store_key = GT_to_database(
            tid=self.task.task_id,
            used_times=self.task.used_times,
            vacation_type=self.task.kwargs['vacation_type'],
            source=source,
            ticket=self.task.kwargs,
            need_cache=self.task.used_times == 0)

        db[source + 'GT_list'].save({
            'collections': self.task.collection,
            'task_id': self.task.task_id,
            'used_times': self.task.used_times[0],
            'stored_page_keys': page_store_key,
            'dept_info': dept_info,
            'dest_info': dest_info,
            'result': result,
            'source': self.task.kwargs['source'],
            'insert_time': datetime.datetime.now()
        })

        self.task.error_code = error_code

        sql = SQL.format(self.task.task_name)
        data = []
        if source == 'ctrip':
            for res in result:
                data.append(
                    (source, res['pid_3rd'], dept_info['id'], dest_info['id'],
                     json.dumps(res)))
        elif source == 'tuniu':
            for res in result:
                data.append(
                    (source, res['id'], dept_info['id'], dest_info['id'],
                     json.dumps(res)))
        try:
            service_platform_conn = service_platform_pool.connection()
            cursor = service_platform_conn.cursor()
            _res = cursor.executemany(sql, data)
            service_platform_conn.commit()
            cursor.close()
            service_platform_conn.close()
            self.task.get_data_per_times = len(data)
            self.task.list_task_insert_db_count = _res
        except Exception as e:
            raise ServiceStandardError(
                error_code=ServiceStandardError.MYSQL_ERROR,
                wrapped_exception=e)

        if error_code == 27 or len(data) > 0:
            self.task.error_code = 0
        else:
            raise ServiceStandardError(
                error_code=ServiceStandardError.EMPTY_TICKET)

        return len(result), error_code
Example #3
0
def insert(sql, data):
    # TODO 连接池处理未指定
    service_platform_conn = service_platform_pool.connection()
    cursor = service_platform_conn.cursor()
    _res = cursor.executemany(sql, data)
    service_platform_conn.commit()
    cursor.close()
    service_platform_conn.close()
    return _res
Example #4
0
def hilton_recover():
    sql = '''select * from detail_hotel_hilton_20180420a'''
    conn_s = service_platform_pool.connection()
    cursor = conn_s.cursor()
    cursor.execute(sql)
    query_result = cursor.fetchall()
    for i in query_result:
        print(i[4])
        collections.update_many({'args.source_id': i[4]}, {'$set': {'finished': 1}}, False, True)
Example #5
0
        def hotel_list_insert_daodao(table_name, res_data):
            try:
                service_platform_conn = service_platform_pool.connection()
                cursor = service_platform_conn.cursor()
                sel_sql = 'select id, localtion_id, source_list from {} where source_id = %s'.format(table_name)
                rep_sql = "replace into {} (id, name, name_en, city_id, country_id, `from`, source_id, localtion_id, status, source_list) VALUES (%s,%s,%s,%s,%s,%s,%s,%s,%s,%s)".format(
                    table_name)
                ins_sql = "insert into {} (name, name_en, city_id, country_id, `from`, source_id, localtion_id, status, source_list) VALUES (%s,%s,%s,%s,%s,%s,%s,%s,%s)".format(
                    table_name)
                cursor.execute(sel_sql, source_id)
                exists_localtion = {}
                for id, localtion_id, source_list in cursor.fetchall():
                    print id, localtion_id, source_list
                    exists_localtion[localtion_id] = (id, json.loads(source_list or "{}"))

                new_hotels_and_not_id = []
                if exists_localtion:
                    for _i, line in enumerate(res_data):
                        new_hotels = {}
                        mylocaltion_id, myhotels = line[6], line[8]
                        if exists_localtion.has_key(mylocaltion_id):
                            yourid, yourhotels = exists_localtion[mylocaltion_id]
                            mykeys = myhotels.keys()
                            yourkeys = yourhotels.keys()
                            if len(mykeys) > len(yourkeys):
                                for k in mykeys:
                                    new_hotels[k] = myhotels.get(k) or yourhotels.get(k)
                            else:
                                for k in yourkeys:
                                    new_hotels[k] = yourhotels.get(k) or myhotels.get(k)

                            line[8] = json.dumps(new_hotels)
                            line.insert(0, yourid)
                        else:
                            line[8] = json.dumps(line[8] or {})
                            new_hotels_and_not_id.append(line)

                    res_data = filter(lambda x:len(x)==10, res_data)
                    cursor.executemany(rep_sql, res_data)
                    service_platform_conn.commit()

                if new_hotels_and_not_id:
                    cursor.executemany(ins_sql, new_hotels_and_not_id)
                    service_platform_conn.commit()
                if not exists_localtion:
                    for line in res_data:
                        line[8] = json.dumps(line[8])
                    cursor.executemany(ins_sql, res_data)
                    service_platform_conn.commit()

                cursor.close()
                service_platform_conn.close()
            except Exception as e:
                self.logger.exception(msg="[mysql error]", exc_info=e)
                raise ServiceStandardError(error_code=ServiceStandardError.MYSQL_ERROR, wrapped_exception=e)
Example #6
0
def insert_error_f_md5_file(file_name, error_md5, file_md5, img_type):
    conn = service_platform_pool.connection()
    cursor = conn.cursor()
    if img_type == 'hotel':
        insert_sql = '''INSERT IGNORE INTO error_f_md5_file (file_name, file_md5, error_md5) VALUES (%s, %s, %s);'''
    else:
        insert_sql = '''INSERT IGNORE INTO error_f_md5_file_poi (file_name, file_md5, error_md5) VALUES (%s, %s, %s);'''
    cursor.execute(insert_sql, (file_name, file_md5, error_md5))
    conn.commit()
    cursor.close()
    conn.close()
Example #7
0
    def _execute(self, **kwargs):
        city_id = self.task.kwargs['city_id']
        country_id = self.task.kwargs['country_id']
        check_in = self.task.kwargs['check_in']
        city_url = self.task.kwargs['city_url']

        error_code, result, page_store_key, types_result_num = qyer_list_to_database(
            tid=self.task.task_id,
            used_times=self.task.used_times,
            source=self.task.kwargs['source'],
            city_id=city_id,
            check_in=check_in,
            city_url=city_url,
            need_cache=self.task.used_times == 0)

        collections.save({
            'collections': self.task.collection,
            'task_id': self.task.task_id,
            'used_times': self.task.used_times[0],
            'total_num': types_result_num,
            'stored_page_keys': page_store_key,
            'result': result,
            'insert_time': datetime.datetime.now()
        })

        self.task.error_code = error_code

        sql = SQL.format(self.task.task_name)
        data = []
        for sid, url, page_id, branch, tag in result:
            data.append(('qyer', sid, city_id, country_id, url))
        try:
            service_platform_conn = service_platform_pool.connection()
            cursor = service_platform_conn.cursor()
            _res = cursor.executemany(sql, data)
            service_platform_conn.commit()
            cursor.close()
            service_platform_conn.close()
            self.task.get_data_per_times = len(data)
            self.task.list_task_insert_db_count = _res
        except Exception as e:
            raise ServiceStandardError(
                error_code=ServiceStandardError.MYSQL_ERROR,
                wrapped_exception=e)

        if len(data) > 0:
            self.task.error_code = 0
        else:
            raise ServiceStandardError(
                error_code=ServiceStandardError.EMPTY_TICKET)

        return result, error_code
Example #8
0
def get_seek(task_name):
    sql = """select seek, priority, sequence from task_seek where task_name = '%s'"""
    conn = service_platform_pool.connection()
    cursor = conn.cursor()
    cursor.execute(sql % task_name)
    result = cursor.fetchone()
    cursor.close()
    conn.close()
    logger.info('timestramp, priority :  %s ' % str(result))
    if not result or len(result) == 0:
        return get_default_timestramp(), PRIORITY, 0

    return result
Example #9
0
 def hotel_list_insert_db(table_name, res_data):
     try:
         service_platform_conn = service_platform_pool.connection()
         cursor = service_platform_conn.cursor()
         sql = "replace into {} (name, name_en, city_id, country_id, `from`, status, source_list) VALUES (%s,%s,%s,%s,%s,%s,%s)".format(
             table_name)
         cursor.executemany(sql, res_data)
         service_platform_conn.commit()
         cursor.close()
         service_platform_conn.close()
     except Exception as e:
         self.logger.exception(msg="[mysql error]", exc_info=e)
         raise ServiceStandardError(error_code=ServiceStandardError.MYSQL_ERROR, wrapped_exception=e)
Example #10
0
def execute_sql(sql, commit=False):
    conn = service_platform_pool.connection()
    cursor = conn.cursor()
    logger.info(sql)
    cursor.execute(sql)
    if commit:
        conn.commit()
        cursor.close()
        conn.close()
        return

    result = cursor.fetchall()
    logger.info('row_count  :  %s ' % cursor.rowcount)
    cursor.close()
    conn.close()
    return result
Example #11
0
 def hotel_list_insert_db():
     try:
         service_platform_conn = service_platform_pool.connection()
         cursor = service_platform_conn.cursor()
         sql = "INSERT IGNORE INTO {} (source, source_id, city_id, country_id, hotel_url) VALUES (%s,%s,%s,%s,%s)".format(
             self.task.task_name)
         _res = cursor.executemany(sql, res_data)
         service_platform_conn.commit()
         cursor.close()
         service_platform_conn.close()
         self.task.list_task_insert_db_count = _res
         self.task.get_data_per_times = len(res_data)
     except Exception as e:
         self.logger.exception(msg="[mysql error]", exc_info=e)
         raise ServiceStandardError(
             error_code=ServiceStandardError.MYSQL_ERROR,
             wrapped_exception=e)
Example #12
0
def create_table(table_name):
    conn = service_platform_pool.connection()
    cursor = conn.cursor()
    tab_args = table_name.split('_')
    if tab_args[0] == 'detail':
        sql_file = LOAD_FILES[tab_args[1]]
    elif tab_args[0] == 'list':
        sql_file = LOAD_FILES['list']
    elif tab_args[0] == 'images':
        if tab_args[1] == 'hotel':
            sql_file = LOAD_FILES['images_hotel']
        elif tab_args[2] in ('daodao', 'qyer'):
            sql_file = LOAD_FILES['images_daodao']

    cursor.execute(sql_file % table_name)
    logger.info('已创建表: %s' % table_name)
    cursor.close()
    conn.close()
Example #13
0
def insert_all_data():
    global update_data_list
    global task_data_list
    global is_new_task
    # init insert conn and cursor
    task_conn = service_platform_pool.connection()
    task_cursor = task_conn.cursor()

    data_conn = spider_data_poi_pool.connection()
    data_cursor = data_conn.cursor()

    # update task table and insert data table
    if is_new_task:
        _res = task_cursor.executemany(
            '''UPDATE pic_detect_task_new SET status=2 WHERE id=%s;''',
            update_data_list)
    else:
        _res = task_cursor.executemany(
            '''UPDATE pic_detect_task SET status=2 WHERE id=%s;''',
            update_data_list)
    logger.debug("[update status finished][update task: {}]".format(_res))
    _res = data_cursor.executemany(
        '''INSERT IGNORE INTO PoiPictureInformation (city_id, poi_id, pic_name, is_scaned) VALUES (%s, %s, %s, 0); ''',
        task_data_list)
    logger.debug(
        "[update status finished][insert poi face detect task: {}]".format(
            _res))

    # 由于事务关系原因,当两个 sql 均运行成功后才入库,否则不 commit
    task_conn.commit()
    task_cursor.close()
    task_conn.close()

    data_conn.commit()
    data_cursor.close()
    data_conn.close()
Example #14
0
    def _execute(self, **kwargs):
        url = self.task.kwargs['url']
        source = self.task.kwargs['source']
        source_id = self.task.kwargs['source_id']
        city_id = self.task.kwargs['city_id']
        country_id = self.task.kwargs['country_id']
        hid = self.task.kwargs['hid']

        headers = {}
        other_info = {'source_id': source_id, 'city_id': city_id}

        if source in ['starwood', 'hyatt', 'gha', 'shangrila', 'fourseasons']:
            error_code, res, page_store_key_list = hotel_detail_database(
                url, source)

            if error_code == 0:
                result = parse_hotel_info(res)
            else:
                raise ServiceStandardError(error_code=error_code)
        else:
            with MySession(need_cache=True) as session:

                # booking start
                if source == 'booking':
                    headers['Referer'] = 'http://www.booking.com'

                # booking end

                session.headers.update(headers)
                start = time.time()
                if source not in ('hilton', 'ihg', 'holiday', 'accor',
                                  'marriott'):
                    page = session.get(url, timeout=240)
                    page.encoding = 'utf8'
                    content = page.text
                elif source == 'ihg':
                    url1, url2 = url.split('#####')
                    page1 = session.get(url1, timeout=240)
                    page1.encoding = 'utf8'
                    content1 = page1.text

                    page2 = session.get(url2, timeout=240)
                    page2.encoding = 'utf8'
                    content2 = page2.text

                    content = [content1, content2]
                elif source == 'holiday':
                    url2, url1 = url.split('#####')
                    page1 = requests.get(
                        url1,
                        headers={
                            'x-ihg-api-key':
                            'se9ym5iAzaW8pxfBjkmgbuGjJcr3Pj6Y',
                            'ihg-language': 'zh-CN'
                        },
                        timeout=240)
                    page1.encoding = 'utf8'
                    content1 = page1.text

                    page2 = requests.get(
                        url2,
                        timeout=240,
                        headers={
                            'accept': 'application/json, text/plain, */*',
                            'Content-Type': 'application/json; charset=UTF-8',
                            'user-agent':
                            'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_10_5) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/63.0.3239.84 Safari/537.36',
                            'ihg-language': 'zh-CN',
                        })
                    page2.encoding = 'utf8'
                    content2 = page2.text

                    page3 = requests.get(url1,
                                         headers={
                                             'x-ihg-api-key':
                                             'se9ym5iAzaW8pxfBjkmgbuGjJcr3Pj6Y'
                                         },
                                         timeout=240)
                    page3.encoding = 'utf8'
                    content3 = page3.text

                    content = (content1, content2, content3)
                elif source == 'accor':
                    proxy_url = "http://10.10.239.46:8087/proxy?source=pricelineFlight&user=crawler&passwd=spidermiaoji2014"
                    r = requests.get(proxy_url)
                    proxies = {'https': "socks5://" + str(r.text)}
                    headers = {
                        "User-Agent":
                        "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_11_6) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/63.0.3239.84 Safari/537.36"
                    }
                    page = requests.get(url,
                                        headers=headers,
                                        verify=False,
                                        proxies=proxies)
                    page.encoding = 'utf8'
                    content = page.text
                elif source == 'marriott':
                    url_list = url.split('#####')
                    url = url_list[0]

                    for i in url_list:
                        if len(i.split('=')) > 1:
                            key, value = i.split('=')[0], i.split('=')[1]
                            if key == 'longtitude':
                                other_info['longtitude'] = value
                            if key == 'latitude':
                                other_info['latitude'] = value
                        else:
                            if url_list.index(i) == 1:
                                other_info['hotel_name_en'] = i

                    url2 = url.replace("travel", "hotel-photos")
                    url3 = url.replace("travel/", "maps/travel/")
                    url4 = url.replace("hotels/", "hotels/fact-sheet/")
                    headers = {
                        'User-Agent':
                        'Mozilla/5.0 (Macintosh; Intel Mac OS X 10.11; rv:47.0) Gecko/20100101 Firefox/47.0'
                    }
                    if "https://www.marriott.com" in url:
                        page1 = requests.get(url, headers=headers, timeout=240)
                        page2 = requests.get(url2,
                                             headers=headers,
                                             timeout=240)
                        page3 = requests.get(url3,
                                             headers=headers,
                                             timeout=240)
                        page4 = requests.get(url4,
                                             headers=headers,
                                             timeout=240)

                        page1.encoding = 'utf8'
                        page2.encoding = 'utf8'
                        page3.encoding = 'utf8'
                        page4.encoding = 'utf8'

                        content1 = page1.text
                        content2 = page2.text
                        content3 = page3.text
                        content4 = page4.text
                        content = (content1, content2, content3, content4)
                    else:
                        url2 = url + "/hotel-overview"
                        page1 = requests.get(url, headers=headers, timeout=240)
                        page2 = requests.get(url2,
                                             headers=headers,
                                             timeout=240)
                        page1.encoding = 'utf8'
                        page2.encoding = 'utf8'
                        content1 = page1.text
                        content2 = page2.text
                        content = (content1, content2)
                else:
                    session.auto_update_host = False
                    hilton_index = url.find('index.html')
                    if hilton_index > -1:
                        url = url[:hilton_index]
                    split_args = url.split('/')
                    detail_url = 'http://www3.hilton.com/zh_CN/hotels/{0}/{1}/popup/hotelDetails.html'.format(
                        split_args[-3], split_args[-2])
                    map_info_url = url + 'maps-directions.html'
                    desc_url = url + 'about.html'

                    page = session.get(url)
                    map_info_page = session.get(map_info_url)
                    desc_page = session.get(desc_url)

                    detail_page = session.get(detail_url, )
                    page.encoding = 'utf8'
                    detail_page.encoding = 'utf8'
                    map_info_page.encoding = 'utf8'
                    desc_page.encoding = 'utf8'
                    __content = page.text
                    logger.info(detail_url)
                    __detail_content = detail_page.text
                    __map_info_content = map_info_page.text
                    __desc_content = desc_page.text

                    content = [
                        __content, __detail_content, __map_info_content,
                        __desc_content
                    ]
                logger.debug("[crawl_data][Takes: {}]".format(time.time() -
                                                              start))

                start = time.time()
                result = parse_hotel(content=content,
                                     url=url,
                                     other_info=other_info,
                                     source=source,
                                     part=self.task.task_name,
                                     retry_count=self.task.used_times)
                logger.debug("[parse_hotel][func: {}][Takes: {}]".format(
                    parse_hotel.func_name,
                    time.time() - start))

        try:
            data_collections = mongo_data_client['ServicePlatform'][
                self.task.task_name]
            data_collections.create_index([('source', 1), ('source_id', 1)],
                                          unique=True,
                                          background=True)
            data_collections.create_index([('location', '2dsphere')],
                                          background=True)
            tmp_result = deepcopy(result.values(backdict=True))
            lon, lat = str(result.map_info).split(',')
            lon, lat = float(lon), float(lat)
            tmp_result.update(
                {'location': {
                    'type': "Point",
                    'coordinates': [lon, lat]
                }})
            data_collections.save(tmp_result)
        except pymongo.errors.DuplicateKeyError:
            # logger.exception("[result already in db]", exc_info=e)
            logger.warning("[result already in db]")
        except Exception as exc:
            raise ServiceStandardError(
                error_code=ServiceStandardError.MONGO_ERROR,
                wrapped_exception=exc)

        start = time.time()
        try:
            service_platform_conn = service_platform_pool.connection()
            cursor = service_platform_conn.cursor()
            others_info = json.loads(result.others_info)
            others_info['hid'] = hid
            result.others_info = json.dumps(others_info)
            sql = result.generation_sql()
            sql = sql.format(table_name=self.task.task_name)
            values = result.values()
            self.logger.info(result.__dict__)
            cursor.execute(sql, values)
            service_platform_conn.commit()
            cursor.close()
            service_platform_conn.close()
        except Exception as e:
            logger.exception(e)
            raise ServiceStandardError(
                error_code=ServiceStandardError.MYSQL_ERROR,
                wrapped_exception=e)

        logger.debug("[Insert DB][Takes: {}]".format(time.time() - start))
        self.task.error_code = 0
        return self.task.error_code
Example #15
0
    # h.is_parking_free = '0'
    # h.service = '只有床'
    # h.img_items = "https://www.'baidu.com|https:'//www.baidu.com"
    # h.description = '0.5星级酒店'
    # h.accepted_cards = 'AAA'
    # h.check_in_time = '2017-09-09 12:23:45'
    # h.check_out_time = '2017-09-09 18:23:45'
    # h.hotel_url = 'https://map.baidu.com'
    h.update_time = datetime.datetime.now()
    # h.continent = 'NULL'
    h.other_info = json.dumps({'json': '个性不'})

    print h
    sql = h.generation_sql()
    values = h.values()
    print values
    sql = sql.format(table_name='aaaa')
    # sql = sql % values
    print sql
    from proj.mysql_pool import service_platform_pool
    service_platform_conn = service_platform_pool.connection()
    cursor = service_platform_conn.cursor()
    cursor.execute(sql, values)

    cursor.close()
    service_platform_conn.close()
    # c = Coordinate()
    # c.latitude = '123.12343'
    # c.longitude = '2345.4123'
    # print c
Example #16
0
    def _execute(self, **kwargs):
        id = kwargs['id']
        source = kwargs['source']
        url = kwargs['url']
        table_name = kwargs['table_name']
        t1 = time.time()

        with MySession(need_cache=False, need_proxies=True) as session:
            resp_daodao = session.get(url)
            content_daodao = resp_daodao.text
            real_daodao_url = re.findall(r"redirectPage\('(.*)'\)", content_daodao)[0]

            # print content
            real_url = None
            print id, source, url
            # self.logger.info('%s\n%s\n%s\n' % (source, url, content[-600:]))
            try:
                if source == 'agoda':
                    url_obj = urlparse.urlsplit(real_daodao_url)
                    hid = urlparse.parse_qs(url_obj.query)['hid'][0]
                    line = SourceIDS.find_one({'source_id': 'agoda'+hid})
                    assert line, '没找到agoda的hid'
                    real_url = line['url']
                elif source == 'booking':
                    real_url = real_daodao_url.replace('http', 'https').replace('.html', '.zh-cn.html').split('?', 1)[0]
                elif source == 'ctrip':
                    base_url = 'http://hotels.ctrip.com/international/{0}.html'
                    url_obj = urlparse.urlsplit(real_daodao_url)
                    try:
                        hotel_id = urlparse.parse_qs(url_obj.query)['hotelid'][0]
                    except:
                        jumpUrl = urlparse.parse_qs(url_obj.query)['jumpUrl'][0]
                        hotel_id = re.findall('/name(\w+)', urlparse.urlsplit(jumpUrl).path)[0]
                    real_url = base_url.format(hotel_id)
                elif source == 'expedia':
                    raise Exception('我是expedia')
                elif source == 'hotels':
                    base_url = 'https://ssl.hotels.cn/ho{0}/'
                    url_obj = urlparse.urlsplit(real_daodao_url)
                    hotel_id = urlparse.parse_qs(url_obj.query)['hotelid'][0]
                    real_url = base_url.format(hotel_id)
                else:
                    raise Exception('我是别的源')
            except:
                resp = session.get(real_daodao_url)
                content = resp.text

                if source == 'agoda':
                    agoda_json = re.search(r'window.searchBoxReact = (.*)(?=;)', content).group(1)
                    agoda_json = json.loads(agoda_json)
                    url = agoda_json.get('recentSearches', [])[0].get('data', {}).get('url')
                    if url.startswith('/zh-cn'):
                        base_url = 'https://www.agoda.com'
                    elif url.startswith('zh-cn'):
                        base_url = 'https://www.agoda.com/'
                    else:
                        base_url = 'https://www.agoda.com/zh-cn'
                    real_url = ''.join([base_url, url])
                elif source == 'booking':
                    base_url = 'https://www.booking.com'
                    root = html.fromstring(content)
                    real_url = root.xpath('//div[@id="hotellist_inner"]//a')[0]
                    real_url = ''.join([base_url, real_url.attrib.get('href').replace('-cn', '-com')])
                elif source == 'ctrip':
                    ctrip_json = re.search(r'hotelPositionJSON: (.*)(?=,)', content).group(1)
                    ctrip_json = json.loads(ctrip_json)
                    url = ctrip_json[0].get('url')
                    base_url = "http://hotels.ctrip.com"
                    real_url = ''.join([base_url, url])
                elif source == 'elong':
                    # hotel_id = re.search(r'hotelId":"([0-9]+)"', content).group(1)
                    hotel_id = re.search(r'data-hotelid="(\w+)"', content).group(1)

                    real_url = 'http://ihotel.elong.com/{0}/'.format(hotel_id)
                elif source == 'expedia':
                    raise Exception('我是expedia')
                elif source == 'hotels':
                    root = html.fromstring(content)
                    hotel_id = root.xpath('//div[@id="listings"]//li')[0].attrib.get('data-hotel-id')
                    real_url = "https://ssl.hotels.cn/ho{0}/?pa=1&q-check-out=2018-04-16&tab=description&q-room-0-adults=2&YGF=7&q-check-in=2018-04-15&MGT=1&WOE=1&WOD=7&ZSX=0&SYE=3&q-room-0-children=0".format(
                        hotel_id)


        print real_url
        t2 = time.time()
        self.logger.info('抓取耗时:   {}'.format(t2 - t1))

        service_platform_conn = service_platform_pool.connection()
        cursor = service_platform_conn.cursor()
        sql = "update {0} set source_list = JSON_REPLACE(source_list, '$.{1}', %s) where id = %s".format(table_name, source);
        cursor.execute(sql, (real_url, id))
        service_platform_conn.commit()
        cursor.close()
        service_platform_conn.close()

        t3 = time.time()
        self.logger.info('入库耗时:   {}'.format(t3 - t2))

        self.task.error_code = 0
        return id, source, self.task.error_code