Ejemplo n.º 1
0
def simplePlot(code):
    # data_info = find_data(TSDB_CONN['stock_basic'], whereParam={'ts_code': code}, selParam={'_id':False})

    data_info = pd.read_sql("select * from stock_basic where ts_code='%s'" % code, get_conn())
    if len(data_info)<1:
        logger.info("没有该stock信息")
        return
    logger.info(data_info['fullname'][0])
    print(type(data_info['fullname'][0]))
    sql = "SELECT * FROM tsquant.daily where ts_code='%s' order by trade_date;"
    df = pd.read_sql(sql % code, get_conn())

    df['close'] = pd.to_numeric(df['close'])
    df['trade_date'] = pd.to_datetime(df['trade_date'])
    df.set_index(df['trade_date'], inplace=True)
    df.drop('trade_date', axis=1, inplace=True)
    plt.plot(df['close'], label='收盘价')
    # plt.plot(df['hfq_close'], label='后复权收盘价')
    plt.legend()
    plt.title(data_info['fullname'][0])
    plt.xlabel('日期')
    plt.ylabel('收盘价')

    plt.rcParams['font.sans-serif']=['SimHei']
    plt.show()
Ejemplo n.º 2
0
def init_base_schedule(start=start1, end=end1):

    datas = find_data(TSDB_CONN['job_basic'],
                      whereParam={
                          'start': start,
                          'end': end
                      })

    if len(datas) > 0:
        logger.info("已设置初始任务")
        return
    datas = find_data(TSDB_CONN['stock_basic'],
                      selParam={
                          '_id': False,
                          'ts_code': True
                      })
    update_requests = []
    for data in datas:
        doc = {
            'ts_code': data['ts_code'],
            'start': start,
            'end': end,
            'daily': False,
            'adj_factor': False,
            'suspend': False,
            'daily_basic': False,
            'cal_hfq': False,
            'index': False
        }
        index = {'ts_code': data['ts_code']}
        update_requests.append(UpdateOne(index, {'$set': doc}, upsert=True))
    save_data_update_one(None,
                         TSDB_CONN['job_basic'],
                         update_requests=update_requests,
                         memo='设置基础任务')
Ejemplo n.º 3
0
def __get_fina_indicator(query_name, code, start, end):
    start_date = datetime.strptime(start, '%Y%m%d')
    end_date = datetime.strptime(end, '%Y%m%d')
    date_list = []
    memo = "%s子线程执行%s[%s-%s]任务[%d/%d]" + query_name.value
    while start_date < end_date:
        temp_date = start_date+timedelta(days=360*10)
        temp_date = end_date if temp_date > end_date else temp_date
        start_str = start_date.strftime('%Y%m%d')
        end_str = temp_date.strftime('%Y%m%d')
        date_list.append({'start':start_str, 'end': end_str})
        start_date = temp_date
    all_l = 0
    i = 0
    l = len(date_list)
    for date in date_list:
        i += 1
        memo = "%s子线程执行%s[%s-%s]任务[%d/%d]%s"\
               %(threading.current_thread().name,code,date['start'], date['end'], i, l, query_name.value)
        logger.info(memo)
        data = pro.query(query_name.value, ts_code=code, start_date=date['start'], end_date=date['end'])
        res = insert_many('fina_indicator', data, memo=memo)
        memo = "更新执行任务" + query_name.value + "[%s][%s-%s]状态" % (code, date['start'],date['end'])
        all_l += res if res is not None else 0
    if all_l>0:
        u_sql = "update job_finance_basic set `%s`=1 where `ts_code`='%s' and `start`='%s' and `end`='%s'"
        mysql_execute(u_sql % ('fina_indicator', code, start, end), memo=memo)
Ejemplo n.º 4
0
def __daily_crawler_job(query_name, code_ls, fields= None):

    i = 0
    l = len(code_ls)
    for code in code_ls:
        i += 1
        logger.info('%s抓取%s%s [%d/%d]' % (threading.current_thread().name,query_name,code,i,l))
        ts_code = code[0]
        start = code[1]
        end = code[2]
        data = None
        update_field = ''
        if query_name == 'daily':
            update_field = "daily"
            data = pro.query(query_name,ts_code=ts_code, start_date=start, end_date=end)
        elif query_name == 'adj_factor':
            update_field = "adj_factor"
            data = pro.query(query_name, ts_code=ts_code)
        elif query_name == 'suspend':
            update_field = "suspend"
            data = pro.query(query_name, ts_code=ts_code,
                             fields="ts_code,suspend_date,resume_date,ann_date,suspend_reason,reason_type")
        elif query_name == 'daily_basic':
            update_field = "daily_basic"
            data = pro.query(query_name, ts_code=ts_code, start_date=start, end_date=end)
        else :
            logger.info("不在列表之内")
            return
        memo = "执行任务%s[%s]" % (query_name, code)
        result = insert_many(query_name, data, memo=memo)

        memo = "更新执行任务" + query_name + "[%s]状态" % code
        if result is not None and result > 0:
            u_sql = "update job_basic set `%s`=1 where `ts_code`='%s' and `start`='%s' and `end`='%s'"
            mysql_execute(u_sql % (update_field, ts_code, start, end) , memo=memo)
Ejemplo n.º 5
0
def __daily_crawler_job(query_name, code_ls, fields=None):

    i = 0
    l = len(code_ls)
    for code in code_ls:
        i += 1
        logger.info('%s抓取%s%s [%d/%d]' %
                    (threading.current_thread().name, query_name, code, i, l))
        ts_code = code[0]
        start = code[1]
        end = code[2]
        data = None
        update_field = ''

        if query_name == 'index_daily':
            update_field = "index_daily"
            data = pro.index_daily(ts_code=ts_code,
                                   start_date=start,
                                   end_date=end)
        elif query_name == 'index_weigth':
            update_field = "index_weigth"
            data = pro.index_weight(index_code=ts_code,
                                    start_date=start,
                                    end_date=end)
        else:
            logger.info("不在列表之内")
            return
        memo = "执行任务" + query_name
        result = insert_many(query_name, data, memo=memo)

        memo = "更新执行任务" + query_name + "[%s]状态" % code
        if result is not None and result > 0:
            u_sql = "update job_basic_index set `%s`=1 where `index_code`='%s' and `start`='%s' and `end`='%s'"
            mysql_execute(u_sql % (update_field, ts_code, start, end),
                          memo=memo)
Ejemplo n.º 6
0
def save_data_update_one(data,
                         collection,
                         update_requests=[],
                         indexs=None,
                         extra_fields=None,
                         memo=''):
    """

    :param data: 抓取的数据
    :param collection:  要保存的数据集
    :param indexs: updateOne 的键值
    :param extra_field: 额外的字段
    :param memo: 日志说明字段
    :return:
    """
    update_result = None
    if data is not None:
        update_requests = __daily_obj_2_doc(data,
                                            indexs=indexs,
                                            extra_fields=None)
    if len(update_requests) > 0:
        update_requests = update_requests
    if len(update_requests) > 0:
        update_result = collection.bulk_write(update_requests, ordered=False)
        logger.info('保存%s数据共%d条 匹配%d条 插入%d条, 更新%d条 额外字段:%s ' %
                    (memo, len(update_requests), update_result.matched_count,
                     update_result.upserted_count,
                     update_result.modified_count, extra_fields))
    else:
        logger.info('无数据更新%d' % len(update_requests))

    return update_result, len(update_requests)
Ejemplo n.º 7
0
def __daily_crawler_job(query_name, code_ls, fields=None):

    i = 0
    l = len(code_ls)
    for code in code_ls:
        i += 1
        logger.info('%s抓取%s%s [%d/%d]' %
                    (threading.current_thread().name, query_name, code, i, l))
        ts_code = code['ts_code']
        start = code['start']
        end = code['end']
        data = None
        update_field = ''
        if query_name == 'daily':
            update_field = "daily"
            index = ['ts_code', 'trade_date']
            data = pro.query(query_name,
                             ts_code=ts_code,
                             start_date=start,
                             end_date=end)
        elif query_name == 'adj_factor':
            update_field = "adj_factor"
            data = pro.query(query_name, ts_code=ts_code)
            index = ['ts_code', 'trade_date']
        elif query_name == 'suspend':
            update_field = "suspend"
            data = pro.query(
                query_name,
                ts_code=ts_code,
                fields=
                "ts_code,suspend_date,resume_date,ann_date,suspend_reason,reason_type"
            )
            index = ['ts_code']
        elif query_name == 'daily_basic':
            update_field = "daily_basic"
            data = pro.query(query_name,
                             ts_code=ts_code,
                             start_date=start,
                             end_date=end)
            index = ['ts_code', 'trade_date']
        memo = "执行任务" + query_name
        where_param = {'ts_code': ts_code, 'start': start, 'end': end}
        update_doc = {update_field: True}
        result, rlen = save_data_update_one(data,
                                            TSDB_CONN[query_name],
                                            indexs=index,
                                            memo=memo)
        memo = "更新执行任务" + query_name + "[%s]状态" % code
        flag = result is not None and (result.matched_count > 0
                                       or result.upserted_count > 0
                                       or result.modified_count > 0)
        flag = flag or rlen == 0
        if flag:
            update_date(TSDB_CONN['job_basic'],
                        where_param=where_param,
                        update_doc=update_doc,
                        memo=memo)
Ejemplo n.º 8
0
def init_base_schedule(start=start1, end=end1):
    data = mysql_execute("select * from job_basic where `start`='%s' and `end`='%s'" % (start, end))
    if data > 0:
        logger.info("已设置初始任务")
        return
    data_s = mysql_search("select ts_code from stock_basic order by ts_code")
    update_requests = []
    for data in data_s:
        doc = {'ts_code': data[0], 'start': start, 'end': end,
               'daily': '0', 'adj_factor': '0', 'suspend': '0',
               'daily_basic': '0', 'cal_hfq': '0', 'index': '0'}
        update_requests.append(doc)
    df = pd.DataFrame(update_requests)
    insert_many('job_basic', df, memo='设置基础任务')
Ejemplo n.º 9
0
def init_finance_job_base(start=start1, end=end1):
    data = mysql_execute("select * from job_finance_basic where `start`='%s' and `end`='%s'" % (start, end))
    if data > 0:
        logger.info("已经设置财务任务[%s-%s]%d" % (start, end, data))
        return
    data_s = mysql_search("select ts_code from stock_basic order by ts_code")
    update_requests = []
    for data in data_s:
        doc = {'ts_code': data[0], 'start': start, 'end': end,
               'income': '0', 'balancesheet': '0', 'cashflow': '0',
               'forecast': '0', 'express': '0', 'fina_indicator': '0',
               "fina_audit": '0', "fina_mainbz": '0'}
        update_requests.append(doc)
    df = pd.DataFrame(update_requests)
    insert_many('job_finance_basic', df, memo='设置财务数据基础任务')
Ejemplo n.º 10
0
def init_base_schedule(start=start1, end=end1):
    data = mysql_execute(
        "select * from job_basic_index where `start`='%s' and `end`='%s'" %
        (start, end))
    if data > 0:
        logger.info("已设置初始任务")
        return
    datas = mysql_search("select ts_code from index_basic order by ts_code")
    update_requests = []
    for data in datas:
        doc = {
            'index_code': data[0],
            'start': start,
            'end': end,
            'index_daily': '0',
            'index_weigth': '0'
        }
        update_requests.append(doc)
    df = pd.DataFrame(update_requests)
    insert_many('job_basic_index', df, memo='设置指数基础任务')
Ejemplo n.º 11
0
def __finance_crawler_job(query_name, code_ls):
    i = 0
    l = len(code_ls)
    for code in code_ls:
        i += 1
        logger.info('%s抓取%s%s [%d/%d]' % (threading.current_thread().name, query_name.value, code, i, l))
        ts_code = code[0]
        start = code[1]
        end = code[2]
        data = None
        update_field = query_name.value
        if query_name == QueryName.income:
            data = pro.query(query_name.value, ts_code=ts_code, start_date=start, end_date=end)
        elif query_name == QueryName.balance_sheet:
            data = pro.query(query_name.value, ts_code=ts_code, start_date=start, end_date=end)
        elif query_name == QueryName.cash_flow:
            data = pro.query(query_name.value, ts_code=ts_code, start_date=start, end_date=end)
        elif query_name == QueryName.forecast:
            data = pro.query(query_name.value, ts_code=ts_code, start_date=start, end_date=end)
        elif query_name == QueryName.express:
            data = pro.query(query_name.value, ts_code=ts_code, start_date=start, end_date=end)
        elif query_name == QueryName.fina_indicator:
            __get_fina_indicator(query_name, ts_code, start, end)
            continue
        elif query_name == QueryName.fina_audit:
            data = pro.fina_audit(ts_code=ts_code, start_date=start, end_date=end)
        elif query_name == QueryName.fina_main_bz:
            pro.fina_mainbz(ts_code=ts_code, start_date=start, end_date=end)
        else:
            logger.info("不在业务范围内")
            return

        memo = "执行任务" + query_name.value
        res = insert_many(query_name.value, data, memo=memo)
        memo = "更新执行任务" + query_name.value + "[%s]状态" % code
        if res is not None and res > 0:
            u_sql = "update job_finance_basic set `%s`=1 where `ts_code`='%s' and `start`='%s' and `end`='%s'"
            mysql_execute(u_sql % (update_field, ts_code, start, end), memo=memo)
Ejemplo n.º 12
0
    now_str = now_date.strftime('%Y%m%d')
    pre_str = pre_date.strftime('%Y%m%d')

    init_base_schedule(start=pre_str, end=now_str)
    daily_crawler_job(
        'daily',
        is_thread=True,
    )
    daily_crawler_job('adj_factor', is_thread=True)
    daily_crawler_job('suspend', is_thread=True)
    daily_crawler_job('daily_basic', is_thread=True)
    cal_hfq_close(is_thread=True, thread_n=2)


def week_baic_crawle():
    stock_basic_crawler()
    trade_cal_crawler()


if __name__ == '__main__':

    schedule.every().day.at("15:30").do(stock_daily_crawler)
    schedule.every().day.at("20:30").do(stock_daily_crawler)
    schedule.every(30).minutes.do(cal_hfq_close)
    schedule.every().friday.at("11:30").do(week_baic_crawle)

    logger.info("启动stock_schedule_daily.")
    while True:
        schedule.run_pending()
        time.sleep(10)
Ejemplo n.º 13
0
def __cal_hfq_close(code_ls, fq_type='hfq'):
    i = 0
    job_len = len(code_ls)
    update_field = ''
    memo = ''
    if fq_type == 'hfq':
        update_field = 'hfq'
        memo = '后复权'
    elif fq_type == 'qfq':
        update_field = 'qfq'
        memo = '前复权'
        return
    else:
        return

    for code in code_ls:
        i += 1
        ts_code = code['ts_code']
        start = code['start']
        end = code['end']
        thread_name = threading.current_thread().name
        logger.info('%s计算%s %s[%s-%s]后复权数据 [%d/%d]\r' %
                    (thread_name, memo, ts_code, start, end, i, job_len))
        close_data = find_data(TSDB_CONN['daily'],
                               whereParam={
                                   'ts_code': ts_code,
                                   'trade_date': {
                                       '$gte': start,
                                       '$lte': end
                                   }
                               },
                               selParam={
                                   'ts_code': True,
                                   'trade_date': True,
                                   'close': True,
                                   'high': True,
                                   'open': True,
                                   'low': True,
                                   '_id': False
                               })

        factor_data = find_data(TSDB_CONN['adj_factor'],
                                whereParam={
                                    'ts_code': ts_code,
                                    'trade_date': {
                                        '$gte': start,
                                        '$lte': end
                                    }
                                },
                                selParam={
                                    'ts_code': True,
                                    'trade_date': True,
                                    'adj_factor': True,
                                    '_id': False
                                })

        if len(close_data) < 1:
            logger.info('不计算复权数据,没有日线数据')
            continue

        pd_close = pd.DataFrame(close_data)
        pd_close.set_index('trade_date', inplace=True)

        pd_factor = pd.DataFrame(factor_data)
        pd_factor.set_index('trade_date', inplace=True)
        if pd_factor.shape[0] != pd_close.shape[0]:
            logger.info('不计算复权数据,日线和复权因子没有同步')
            continue

        fq_data = pd.concat([pd_close, pd_factor], axis=1)
        fq_data['adj_factor'] = pd.to_numeric(fq_data['adj_factor'])
        fq_data['close'] = pd.to_numeric(fq_data['close'])
        fq_data['high'] = pd.to_numeric(fq_data['high'])
        fq_data['low'] = pd.to_numeric(fq_data['low'])
        fq_data['open'] = pd.to_numeric(fq_data['open'])
        if fq_type == 'hfq':
            fq_data[update_field +
                    "close"] = fq_data['close'] * fq_data['adj_factor']
            fq_data[update_field +
                    "high"] = fq_data['high'] * fq_data['adj_factor']
            fq_data[update_field +
                    "low"] = fq_data['low'] * fq_data['adj_factor']
            fq_data[update_field +
                    "open"] = fq_data['open'] * fq_data['adj_factor']
        elif fq_type == 'qfq':
            pass
        else:
            pass
        update_requests = []
        doc = {}
        my_index = {}
        for index, row in fq_data.iterrows():
            my_index['ts_code'] = ts_code
            my_index['trade_date'] = str(index)
            doc[update_field + '_close'] = row[update_field + 'close']
            doc[update_field + '_high'] = row[update_field + 'high']
            doc[update_field + '_low'] = row[update_field + 'low']
            doc[update_field + '_open'] = row[update_field + 'open']
            doc['adj_factor'] = row['adj_factor']
            update_requests.append(
                UpdateOne(my_index, {'$set': doc}, upsert=False))
            doc = {}
            my_index = {}

        result, l = save_data_update_one(None,
                                         TSDB_CONN['daily'],
                                         update_requests,
                                         memo='daily更新%s数据' % memo)
        flag = result is not None and (result.matched_count > 0
                                       or result.upserted_count > 0
                                       or result.modified_count > 0)
        where_param = {'ts_code': ts_code, 'start': start, 'end': end}
        update_doc = {'cal_hfq': True}
        if flag:
            update_date(TSDB_CONN['job_basic'],
                        where_param=where_param,
                        update_doc=update_doc,
                        memo='更新复权任务状态')
Ejemplo n.º 14
0
def update_date(collection, where_param={}, update_doc={}, memo=''):
    result = collection.update(where_param, {"$set": update_doc})
    logger.info(memo)