Ejemplo n.º 1
0
def fetch_A_secucode_innercode_map(juyuan_conn: sql_base.Connection):
    '''
    给出任意一个 SecuCode, 不管是上市还是退市, 不管是变更前还是变更后, 均在这此找到唯一的InnerCode
    '''

    sql = """
    (
    select A.SecuCode, A.InnerCode, A.SecuAbbr, A.ListedDate, B.ChangeDate '退市日期'
    from gildata.SecuMain A
    left join 
    gildata.LC_ListStatus B 
    on A.InnerCode = B.InnerCode 
    and B.ChangeType = 4 
    where 
    A.SecuMarket in (83, 90) 
    and A.SecuCategory in (1, 41) 
    and A.ListedSector in (1, 2, 6, 7) 
    and A.ListedDate <= CURDATE()
    )
    UNION
    (
    SELECT
    A.SecuCode, A.InnerCode, B.SecuAbbr, A.BeginDate '启用日期', A.StopDate '停用日期'
    FROM gildata.LC_CodeChange A
    JOIN gildata.SecuMain B
    ON A.InnerCode = B.InnerCode
    AND B.SecuMarket IN (83,90)
    AND B.SecuCategory in (1, 41) 
    WHERE
    A.CodeDefine = 1
    AND A.SecuCode <> B.SecuCode
    ); 
    """

    exec_res = juyuan_conn.query(sql)
    map1 = {}
    for one in exec_res:
        map1[one['SecuCode']] = one['InnerCode']

    sql = f'''select InnerCode, SecuMarket from secumain where InnerCode in {tuple(map1.values())};'''
    res = juyuan_conn.query(sql)
    map2 = {}
    for r in res:
        map2[r['InnerCode']] = r['SecuMarket']
    info = {}

    for k, v in map1.items():
        if map2[v] == 83:
            k = "SH" + k
            info[k] = v
        elif map2[v] == 90:
            k = "SZ" + k
            info[k] = v
        else:
            raise

    return info
Ejemplo n.º 2
0
class SourceAnnouncementBaseV1(object):
    """将两个爬虫表合并生成公告基础表"""
    def __init__(self):
        self.merge_table_name = 'announcement_base'
        self.his_table = 'juchao_ant'
        self.live_table = 'juchao_kuaixun'
        self.batch_number = 10000

        self._r_spider_conn = Connection(
            host=R_SPIDER_MYSQL_HOST,
            port=R_SPIDER_MYSQL_PORT,
            user=R_SPIDER_MYSQL_USER,
            password=R_SPIDER_MYSQL_PASSWORD,
            database=R_SPIDER_MYSQL_DB,
        )
        self._spider_conn = Connection(
            host=SPIDER_MYSQL_HOST,
            port=SPIDER_MYSQL_PORT,
            user=SPIDER_MYSQL_USER,
            password=SPIDER_MYSQL_PASSWORD,
            database=SPIDER_MYSQL_DB,
        )

    def daily_update(self, deadline: datetime.datetime = None):
        if deadline is None:
            deadline = datetime.datetime.now() - datetime.timedelta(days=1)

        load_sql = '''select id, SecuCode, SecuAbbr, AntTime as PubDatetime1, \
AntTitle as Title1, AntDoc as PDFLink, CREATETIMEJZ as InsertDatetime1 from {} where \
UPDATETIMEJZ > '{}'; '''.format(self.his_table, deadline)
        logger.info(load_sql)

        items = []
        datas = self._r_spider_conn.query(load_sql)
        logger.info(len(datas))
        for data in datas:
            data = utils.process_secucode(data)
            if data:
                items.append(data)
        self._spider_conn.batch_insert(items, self.merge_table_name,
        ['SecuCode', 'SecuAbbr', 'PubDatetime1', 'InsertDatetime1', 'Title1'])

        update_sql = '''
        select A.* from juchao_kuaixun A, juchao_ant B where B.UPDATETIMEJZ > '{}' and A.code = B.SecuCode \
and A.link = B.AntDoc and A.type = '公告';  '''.format(deadline)
        datas = self._r_spider_conn.query(update_sql)
        for data in datas:
            item = {
                'PubDatetime2': data.get("pub_date"),
                'InsertDatetime2': data.get("CREATETIMEJZ"),
                'Title2': data.get("title"),
            }
            self._spider_conn.table_update(self.merge_table_name, item, 'PDFLink', data.get("link"))
Ejemplo n.º 3
0
# res2 = juyuan_conn.query(sql2)
# map2 = {}
# for one in res2:
#     map2[one['SecuCode']] = one['InnerCode']
#
#
# res3 = juyuan_conn.query(sql3)
# map3 = {}
# for one in res3:
#     map3[one['SecuCode']] = one['InnerCode']
#
# delta12 = set(map1.keys()) - set(map2.keys())
# # print(delta12)
#
# delta21 = set(map2.keys() - map1.keys())
# # print(delta21)
#
# delta23 = set(map2.keys() - map3.keys())
# print(delta23)
#
# delta32 = set(map3.keys() - map2.keys())
# print(delta32)

res4 = juyuan_conn.query(sql4)
map4 = {}

for one in res4:
    map4[one['SecuCode']] = one['InnerCode']
    if not one['SecuCode'][0] in ['0', '3', '6']:
        print(one)
Ejemplo n.º 4
0
class JuchaoCounter(object):
    def __init__(self):
        self.api = 'http://www.cninfo.com.cn/new/hisAnnouncement/query'
        self.headers = {
            'Accept': '*/*',
            'Accept-Encoding': 'gzip, deflate',
            'Accept-Language': 'zh-CN,zh;q=0.9,en;q=0.8',
            'Cache-Control': 'no-cache',
            'Connection': 'keep-alive',
            'Content-Type': 'application/x-www-form-urlencoded; charset=UTF-8',
            'Host': 'www.cninfo.com.cn',
            'Origin': 'http://www.cninfo.com.cn',
            'Pragma': 'no-cache',
            'Referer': 'http://www.cninfo.com.cn/new/commonUrl/pageOfSearch?url=disclosure/list/search&lastPage=index',
            'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_5) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/84.0.4147.125 Safari/537.36',
        }
        self._spider_conn = Connection(
            host=SPIDER_MYSQL_HOST,
            port=SPIDER_MYSQL_PORT,
            user=SPIDER_MYSQL_USER,
            password=SPIDER_MYSQL_PASSWORD,
            database=SPIDER_MYSQL_DB,
        )

        self._r_spider_conn = Connection(
            host=R_SPIDER_MYSQL_HOST,
            port=R_SPIDER_MYSQL_PORT,
            user=R_SPIDER_MYSQL_USER,
            password=R_SPIDER_MYSQL_PASSWORD,
            database=R_SPIDER_MYSQL_DB,
        )

    @property
    def codes_map(self):
        codes_map = {}
        sql = '''select code, OrgId from juchao_codemap; '''
        res = self._spider_conn.query(sql)
        for r in res:
            codes_map[r.get('OrgId')] = r.get("code")
        return codes_map

    def launch(self, org_id: str):
        codes_map = self.codes_map
        org_id_lst = sorted(list(codes_map.keys()))
        position = org_id_lst.index(org_id)
        print("position", position)
        for org_id in org_id_lst[position:]:
            code = codes_map.get(org_id)
            stock_str = ','.join([code, org_id])
            print(stock_str)
            self.get_count(stock_str)

    @retry(stop_max_attempt_number=3)
    def get_count(self, stock_str: str):
        time.sleep(random.randint(1, 3)/10)
        post_data = {
            'pageNum': 1,
            'pageSize': 30,
            'column': 'szse',
            'tabName': 'fulltext',
            'plate': '',
            'stock': stock_str,
            'searchkey': '',
            'secid': '',
            'category': '',
            'trade': '',
            'seDate': '',
            'sortName': '',
            'sortType': '',
            'isHLtitle': True,
        }
        resp = requests.post(self.api, headers=self.headers, data=post_data, timeout=3)
        if resp.status_code == 200:
            text = resp.text
            py_datas = json.loads(text)
            total_ann = py_datas.get("totalAnnouncement")
            total_rec = py_datas.get("totalRecordNum")
            record = {
                "SecuCode": stock_str.split(',')[0],
                "TotalAnn": total_ann,
                "TotalRec": total_rec,
            }
            # print(record)
            # 记录单个 code 的结束
            with open(file_name, "a") as f:
                f.write(json.dumps(record)+'\n')

    def check_count_bydate(self):
        """
        根据时间去计算每天的个数
        """
        sql = '''select SecuCode, count(*) from juchao_ant2 group by AntTime ; '''

    def check_count(self):
        sql = '''select SecuCode, count(*) from juchao_ant2 group by SecuCode ; '''
        ret = self._r_spider_conn.query(sql)
        exist_map = {}
        for r in ret:
            exist_map[r.get('SecuCode')] = r.get("count(*)")
        # print(exist_map)

        web_map = {}
        with open(file_name, "r") as f:
            lines = f.readlines()
            for line in lines:
                r = json.loads(line)
                web_map[r.get("SecuCode")] = r.get("TotalAnn")

        no_lst = []
        big_delta_lst = []
        small_delta_lst = []

        for code in web_map:
            _count = 0
            if code in _map:
                _code = _map.get(code)
                _count = exist_map.get(_code)

            exist_num = exist_map.get(code)
            exist_num += _count

            web_num = web_map.get(code)
            if not exist_num:
                no_lst.append(code)
            elif exist_num != web_num:
                delta = web_num - exist_num
                if delta > 0:
                    big_delta_lst.append((code, delta))
                    # big_delta_lst.append(code)
                else:
                    # small_delta_lst.append((code, delta))
                    small_delta_lst.append(code)

        # print(no_lst)
        # print(len(no_lst))

        print(big_delta_lst)
        print(len(big_delta_lst))
Ejemplo n.º 5
0
class UpdateInnerCode(object):
    def __init__(self):
        self._spider_conn = Connection(
            host=SPIDER_MYSQL_HOST,
            port=SPIDER_MYSQL_PORT,
            user=SPIDER_MYSQL_USER,
            password=SPIDER_MYSQL_PASSWORD,
            database=SPIDER_MYSQL_DB,
        )
        self._juyuan_conn = Connection(
            host=JUY_HOST,
            port=JUY_PORT,
            user=JUY_USER,
            password=JUY_PASSWD,
            database=JUY_DB,
        )
        self.batch_num = 10000

    def max_id(self):
        sql = '''select max(id) as max_id from announcement_base2; '''
        return self._spider_conn.get(sql).get("max_id")

    def load_innercode_map(self):
        sql = '''select secucode, innercode from secumain; '''
        _map = dict()
        ret = self._juyuan_conn.query(sql)
        for r in ret:
            _map[r.get("secucode")] = r.get("innercode")
        return _map

    def get_old_inner_code(self, secucode: str):
        sql = '''select InnerCode from LC_CodeChange where secucode = '{}';'''.format(
            secucode)
        r = self._juyuan_conn.get(sql)
        if r:
            inner_code = r.get("InnerCode")
            return inner_code

    def start(self, begin: int, end: int):
        inner_map = self.load_innercode_map()  # secucode 无前缀
        # max_id = self.max_id()
        # print(max_id)

        base_sql = '''select id, secucode from announcement_base2 where id between {} and {} and InnerCode = 0; '''

        for i in range(begin, end):
            sql = base_sql.format(i * self.batch_num,
                                  i * self.batch_num + self.batch_num)
            print(sql)
            datas = self._spider_conn.query(sql)
            print(len(datas))
            for data in datas:
                inner_code = inner_map.get(data.get('secucode')[2:])
                if inner_code is None:
                    inner_code = self.get_old_inner_code(
                        data.get('secucode')[2:])
                    if inner_code is not None:
                        inner_map.update(
                            {data.get('secucode')[2:]: inner_code})
                        self._spider_conn.table_update(
                            'announcement_base2', {'InnerCode': inner_code},
                            "id", data.get("id"))
                else:
                    self._spider_conn.table_update('announcement_base2',
                                                   {'InnerCode': inner_code},
                                                   "id", data.get("id"))
Ejemplo n.º 6
0
class SourceAnnouncementBase(object):
    """将两个爬虫表合并生成公告基础表"""
    def __init__(self):
        self.merge_table_name = 'announcement_base2'
        self.his_table = 'juchao_ant2'
        self.live_table = 'juchao_kuaixun'
        self.batch_number = 10000

        self._r_spider_conn = Connection(
            host=R_SPIDER_MYSQL_HOST,
            port=R_SPIDER_MYSQL_PORT,
            user=R_SPIDER_MYSQL_USER,
            password=R_SPIDER_MYSQL_PASSWORD,
            database=R_SPIDER_MYSQL_DB,
        )
        self._spider_conn = Connection(
            host=SPIDER_MYSQL_HOST,
            port=SPIDER_MYSQL_PORT,
            user=SPIDER_MYSQL_USER,
            password=SPIDER_MYSQL_PASSWORD,
            database=SPIDER_MYSQL_DB,
        )
        self._juyuan_conn = Connection(
            host=JUY_HOST,
            port=JUY_PORT,
            user=JUY_USER,
            password=JUY_PASSWD,
            database=JUY_DB,
        )

    def category_code_map(self):
        return {
            'category_bcgz_szsh': ('补充更正', 19),
            'category_bndbg_szsh': ('半年报', 2),
            'category_cqdq_szsh': ('澄清致歉', 12),
            'category_dshgg_szsh': ('董事会', 18),
            'category_fxts_szsh': ('风险提示', 21),
            'category_gddh_szsh': ('股东大会', 15),
            'category_gqbd_szsh': ('股权变动', 16),
            'category_gqjl_szsh': ('股权激励', 17),
            'category_gszl_szsh': ('公司治理', 24),
            'category_gszq_szsh': ('公司债', 25),
            'category_jj_szsh': ('解禁', 9),
            'category_jshgg_szsh': ('监事会', 14),
            'category_kzzq_szsh': ('可转债', 22),
            'category_ndbg_szsh': ('年报', 4),
            'category_pg_szsh': ('配股', 7),
            'category_qtrz_szsh': ('其他融资', 23),
            'category_qyfpxzcs_szsh': ('权益分派', 11),
            'category_rcjy_szsh': ('日常经营', 10),
            'category_sf_szsh': ('首发', 8),
            'category_sjdbg_szsh': ('三季报', 3),
            'category_tbclts_szsh': ('特别处理和退市', 13),
            'category_tszlq_szsh': ('退市整理期', 20),
            'category_yjdbg_szsh': ('一季报', 1),
            'category_yjygjxz_szsh': ('业绩预告', 5),
            'category_zf_szsh': ('增发', 6),
            'category_zj_szsh': ('中介报告', 26),
            'category_others': ('其他', 27),
        }

    def spider_max_id(self, table_name: str):
        sql = f'''select max(id) as max_id from {table_name}; '''
        max_id = self._r_spider_conn.get(sql).get("max_id")
        return max_id

    def check_ids(self):
        _delta = []
        max_id = self.spider_max_id(self.his_table)
        for i in range(int(max_id / self.batch_number) + 1):
            begin_id = i * self.batch_number
            end_id = i * self.batch_number + self.batch_number
            print()
            print(begin_id, end_id)
            sql1 = '''select count(id) from {} where id between {} and {}; '''.format(
                self.his_table, begin_id, end_id)
            sql2 = '''select count(id) from {} where id between {} and {}; '''.format(
                self.merge_table_name, begin_id, end_id)
            count1 = self._r_spider_conn.get(sql1).get("count(id)")
            count2 = self._r_spider_conn.get(sql2).get("count(id)")
            print(count1, count2)
            if count1 != count2:
                sl1 = '''select id from {} where id between {} and {};'''.format(
                    self.his_table, begin_id, end_id)
                sl2 = '''select id from {} where id between {} and {};'''.format(
                    self.merge_table_name, begin_id, end_id)
                rs1 = self._r_spider_conn.query(sl1)
                ids1 = set([r.get("id") for r in rs1])
                rs2 = self._r_spider_conn.query(sl2)
                ids2 = set([r.get("id") for r in rs2])
                print(list(ids1 - ids2))
                _delta.extend(list(ids1 - ids2))

        print(_delta)
        return _delta

    def process_diff_ids(self, diff_lst):
        diff_lst = tuple(diff_lst)
        category_code_map = self.category_code_map()
        _sql = '''select id, SecuCode, CategoryCode, SecuAbbr, AntTime as PubDatetime1, \
AntTitle as Title1, AntDoc as PDFLink, CREATETIMEJZ as InsertDatetime1 from  {} where id in {}; '''.format(
            self.his_table, diff_lst)
        datas = self._r_spider_conn.query(_sql)
        print(len(datas))
        for data in datas:
            data['CategoryCode'] = category_code_map.get(
                data.get("CategoryCode"))[1]
            data = utils.process_secucode(data)  # 暂时只要 0 3 6
            if data:
                print(data)
                ret = self._spider_conn.table_insert(
                    self.merge_table_name, data, [
                        'SecuCode', 'SecuAbbr', 'CategoryCode', 'PubDatetime1',
                        'InsertDatetime1', 'Title1'
                    ])
                print(ret)

    def first_load(self):
        inner_map = self.load_innercode_map()
        category_code_map = self.category_code_map()

        load_sql = '''select id, SecuCode, CategoryCode, SecuAbbr, AntTime as PubDatetime1, \
AntTitle as Title1, AntDoc as PDFLink, CREATETIMEJZ as InsertDatetime1 from  {} where \
id >= {} and id < {}; '''
        max_id = self.spider_max_id(self.his_table)
        for i in range(int(max_id / self.batch_number) + 1):
            begin_id = i * self.batch_number
            end_id = i * self.batch_number + self.batch_number
            print(begin_id, end_id)
            _sql = load_sql.format(self.his_table, begin_id, end_id)
            print(_sql)
            datas = self._r_spider_conn.query(_sql)
            for data in datas:
                inner_code = inner_map.get(data.get("SecuCode"))
                if inner_code:
                    data['InnerCode'] = inner_code  # 爬虫库中的 secucode 无前缀
                else:
                    inner_code = self.get_old_inner_code(data.get("SecuCode"))
                    if inner_code:
                        data['InnerCode'] = inner_code
                        inner_map.update({data.get("SecuCode"): inner_code})
                    else:
                        continue

                data['CategoryCode'] = category_code_map.get(
                    data.get("CategoryCode"))[1]
                data = utils.process_secucode(data)  # 暂时只要 0 3 6
                if data:
                    self._spider_conn.table_insert(
                        self.merge_table_name, data, [
                            'SecuCode', 'SecuAbbr', 'CategoryCode',
                            'PubDatetime1', 'InsertDatetime1', 'Title1',
                            'InnerCode'
                        ])

        # 将巨潮快讯的数据补充进去
        update_sql = '''select A.* from {} A, {} B \
where A.code = B.SecuCode and A.link = B.AntDoc and A.type = '公告' \
and A.id between {} and  {}; '''
        max_id = self.spider_max_id(self.live_table)
        for j in range(int(max_id / self.batch_number) + 1):
            begin_id = j * self.batch_number
            end_id = j * self.batch_number + self.batch_number
            print(begin_id, end_id)
            _sql = update_sql.format(self.live_table, self.his_table, begin_id,
                                     end_id)
            print(_sql)
            datas = self._r_spider_conn.query(_sql)
            for data in datas:
                item = {
                    'PubDatetime2': data.get("pub_date"),
                    'InsertDatetime2': data.get("CREATETIMEJZ"),
                    'Title2': data.get("title"),
                }
                self._spider_conn.table_update(self.merge_table_name, item,
                                               'PDFLink', data.get("link"))

    def load_innercode_map(self):
        sql = '''select secucode, innercode from secumain; '''
        _map = dict()
        ret = self._juyuan_conn.query(sql)
        for r in ret:
            _map[r.get("secucode")] = r.get("innercode")
        return _map

    def get_old_inner_code(self, secucode: str):
        sql = '''select InnerCode from LC_CodeChange where secucode = '{}';'''.format(
            secucode)
        r = self._juyuan_conn.get(sql)
        if r:
            inner_code = r.get("InnerCode")
            return inner_code
Ejemplo n.º 7
0
class SourceAnnouncementBase(object):
    """将两个爬虫表合并生成公告基础表"""
    def __init__(self):
        self.merge_table_name = 'announcement_base2'
        self.his_table = 'juchao_ant2'
        self.live_table = 'juchao_kuaixun'
        self.batch_number = 10000

        self._r_spider_conn = Connection(
            host=R_SPIDER_MYSQL_HOST,
            port=R_SPIDER_MYSQL_PORT,
            user=R_SPIDER_MYSQL_USER,
            password=R_SPIDER_MYSQL_PASSWORD,
            database=R_SPIDER_MYSQL_DB,
        )
        self._spider_conn = Connection(
            host=SPIDER_MYSQL_HOST,
            port=SPIDER_MYSQL_PORT,
            user=SPIDER_MYSQL_USER,
            password=SPIDER_MYSQL_PASSWORD,
            database=SPIDER_MYSQL_DB,
        )
        self._juyuan_conn = Connection(
            host=JUY_HOST,
            port=JUY_PORT,
            user=JUY_USER,
            password=JUY_PASSWD,
            database=JUY_DB,
        )

    def category_code_map(self):
        return {
         'category_bcgz_szsh': ('补充更正', 19),
         'category_bndbg_szsh': ('半年报', 2),
         'category_cqdq_szsh': ('澄清致歉', 12),
         'category_dshgg_szsh': ('董事会', 18),
         'category_fxts_szsh': ('风险提示', 21),
         'category_gddh_szsh': ('股东大会', 15),
         'category_gqbd_szsh': ('股权变动', 16),
         'category_gqjl_szsh': ('股权激励', 17),
         'category_gszl_szsh': ('公司治理', 24),
         'category_gszq_szsh': ('公司债', 25),
         'category_jj_szsh': ('解禁', 9),
         'category_jshgg_szsh': ('监事会', 14),
         'category_kzzq_szsh': ('可转债', 22),
         'category_ndbg_szsh': ('年报', 4),
         'category_pg_szsh': ('配股', 7),
         'category_qtrz_szsh': ('其他融资', 23),
         'category_qyfpxzcs_szsh': ('权益分派', 11),
         'category_rcjy_szsh': ('日常经营', 10),
         'category_sf_szsh': ('首发', 8),
         'category_sjdbg_szsh': ('三季报', 3),
         'category_tbclts_szsh': ('特别处理和退市', 13),
         'category_tszlq_szsh': ('退市整理期', 20),
         'category_yjdbg_szsh': ('一季报', 1),
         'category_yjygjxz_szsh': ('业绩预告', 5),
         'category_zf_szsh': ('增发', 6),
         'category_zj_szsh': ('中介报告', 26),
         'category_others': ('其他', 27),
         }

    def load_innercode_map(self):
        sql = '''select secucode, innercode from secumain; '''
        _map = dict()
        ret = self._juyuan_conn.query(sql)
        for r in ret:
            _map[r.get("secucode")] = r.get("innercode")
        return _map

    def get_old_inner_code(self, secucode: str):
        sql = '''select InnerCode from LC_CodeChange where secucode = '{}';'''.format(secucode)
        r = self._juyuan_conn.get(sql)
        if r:
            inner_code = r.get("InnerCode")
            return inner_code

    def daily_update(self, deadline: datetime.datetime = None):
        inner_map = self.load_innercode_map()
        category_code_map = self.category_code_map()

        if deadline is None:
            deadline = datetime.datetime.now() - datetime.timedelta(days=1)

        load_sql = '''select id, SecuCode, CategoryCode, SecuAbbr, AntTime as PubDatetime1, \
AntTitle as Title1, AntDoc as PDFLink, CREATETIMEJZ as InsertDatetime1 from {} where \
UPDATETIMEJZ > '{}'; '''.format(self.his_table, deadline)
        logger.info(f"his load sql is {load_sql}")
        datas = self._r_spider_conn.query(load_sql)
        logger.info(f"load count is {len(datas)} from his table.")
        items = []
        for data in datas:
            data['CategoryCode'] = category_code_map.get(data.get("CategoryCode"))[1]
            inner_code = inner_map.get(data.get("SecuCode"))
            if inner_code:
                data['InnerCode'] = inner_code    # 爬虫库中的 secucode 无前缀
            else:
                inner_code = self.get_old_inner_code(data.get("SecuCode"))
                if inner_code:
                    inner_map.update({data.get("SecuCode"): inner_code})
                    data['InnerCode'] = inner_code
                else:
                    continue
            data = utils.process_secucode(data)  # 暂时只要 0 3 6
            if data:
                items.append(data)

        self._spider_conn.batch_insert(items, self.merge_table_name,
        ['SecuCode', 'SecuAbbr', 'CategoryCode', 'PubDatetime1', 'InsertDatetime1', 'Title1', 'InnerCode'])

        update_sql = '''select A.* from juchao_kuaixun A, juchao_ant B where B.UPDATETIMEJZ > '{}' \
and A.code = B.SecuCode and A.link = B.AntDoc and A.type = '公告';  '''.format(deadline)
        logger.info(f"live sql is {update_sql}")
        datas = self._r_spider_conn.query(update_sql)
        logger.info(f'load count {len(datas)} from live table.')
        for data in datas:
            item = {
                'PubDatetime2': data.get("pub_date"),
                'InsertDatetime2': data.get("CREATETIMEJZ"),
                'Title2': data.get("title"),
            }
            self._spider_conn.table_update(self.merge_table_name, item, 'PDFLink', data.get("link"))