예제 #1
0
def QA_SU_save_report_calendar_day(client=DATABASE,
                                   ui_log=None,
                                   ui_progress=None):
    '''
     save stock_day
    保存财报日历
    历史全部数据
    :return:
    '''
    END_DATE = QA_util_datetime_to_strdate(
        QA_util_add_months(QA_util_today_str(), -3))
    START_DATE = QA_util_datetime_to_strdate(
        QA_util_add_months(QA_util_today_str(), -12))

    date_list = list(
        pd.DataFrame.from_dict(QA_util_getBetweenQuarter(START_DATE,
                                                         END_DATE)).T.iloc[:,
                                                                           1])
    report_calendar = client.report_calendar
    report_calendar.create_index([("code", pymongo.ASCENDING),
                                  ("report_date", pymongo.ASCENDING)],
                                 unique=True)
    err = []

    def __saving_work(report_date, report_calendar):
        try:
            QA_util_log_info(
                '##JOB01 Now Saving Report_Calendar==== {}'.format(
                    str(report_date)), ui_log)

            report_calendar.insert_many(QA_util_to_json_from_pandas(
                QA_fetch_get_financial_calendar(report_date)),
                                        ordered=False)
        except Exception as error0:
            print(error0)
            err.append(str(report_date))

    for item in date_list:

        QA_util_log_info('The {} of Total {}'.format(
            (date_list.index(item) + 1), len(date_list)))

        strProgressToLog = 'DOWNLOAD PROGRESS {}'.format(
            str(float(
                (date_list.index(item) + 1) / len(date_list) * 100))[0:4] +
            '%', ui_log)
        intProgressToLog = int(
            float((date_list.index(item) + 1) / len(date_list) * 100))
        QA_util_log_info(strProgressToLog,
                         ui_log=ui_log,
                         ui_progress=ui_progress,
                         ui_progress_int_value=intProgressToLog)

        __saving_work(item, report_calendar)

    if len(err) < 1:
        QA_util_log_info('SUCCESS save report calendar ^_^', ui_log)
    else:
        QA_util_log_info(' ERROR CODE \n ', ui_log)
        QA_util_log_info(err, ui_log)
예제 #2
0
def date_func(date):
    if (date is None) or date in ['None', 0, '0']:
        d2 = datetime.datetime.strptime(QA_util_today_str(), "%Y-%m-%d")
    else:
        d2 = datetime.datetime.strptime(date, "%Y%m%d")
    d1 = datetime.datetime.strptime(QA_util_today_str(), "%Y-%m-%d")
    diff_days = d1 - d2
    return (diff_days.days)
예제 #3
0
def QA_SU_save_stock_shares_day(client=DATABASE,
                                ui_log=None,
                                ui_progress=None):
    '''
     save stock_day
    保存财报日历
    历史全部数据
    :return:
    '''
    END_DATE = QA_util_today_str()
    START_DATE = QA_util_datetime_to_strdate(
        QA_util_add_days(QA_util_today_str(), -7))

    code = list(QA_fetch_stock_list_adv()['code'])
    stock_shares = client.stock_shares
    stock_shares.create_index([("code", pymongo.ASCENDING),
                               ("begin_date", pymongo.ASCENDING),
                               ('total_shares', pymongo.DESCENDING),
                               ('reason', pymongo.DESCENDING),
                               ('send_date', pymongo.DESCENDING)],
                              unique=True)
    err = []

    def __saving_work(code, stock_shares):
        try:
            QA_util_log_info(
                '##JOB01 Now Saving SSINA shares change==== {}'.format(
                    str(code)), ui_log)

            stock_shares.insert_many(QA_util_to_json_from_pandas(
                QA_fetch_get_stock_shares_sina(code)),
                                     ordered=False)
            gc.collect()
        except Exception as error0:
            print(error0)
            err.append(str(code))

    for item in code:

        QA_util_log_info('The {} of Total {}'.format((code.index(item) + 1),
                                                     len(code)))

        strProgressToLog = 'DOWNLOAD PROGRESS {}'.format(
            str(float((code.index(item) + 1) / len(code) * 100))[0:4] + '%',
            ui_log)
        intProgressToLog = int(float((code.index(item) + 1) / len(code) * 100))
        QA_util_log_info(strProgressToLog,
                         ui_log=ui_log,
                         ui_progress=ui_progress,
                         ui_progress_int_value=intProgressToLog)

        __saving_work(item, stock_shares)

    if len(err) < 1:
        QA_util_log_info('SUCCESS save SINA shares change ^_^', ui_log)
    else:
        QA_util_log_info(' ERROR CODE \n ', ui_log)
        QA_util_log_info(err, ui_log)
예제 #4
0
def QA_fetch_interest_rate_adv(start="all", end=None, format='pd'):
    '获取股票日线'
    #code= [code] if isinstance(code,str) else code
    end = start if end is None else end
    start = str(start)[0:10]
    end = str(end)[0:10]

    # code checking
    if start == 'all':
        start = '1991-01-01'
        end = QA_util_today_str()
    if end is None:
        end = QA_util_today_str()
    return QA_fetch_interest_rate(start, end)
예제 #5
0
def QA_fetch_financial_TTM_adv(code,
                               start="all",
                               end=None,
                               format='pd',
                               collections=DATABASE.financial_TTM):
    '获取财报TTM'
    #code= [code] if isinstance(code,str) else code
    end = start if end is None else end
    start = str(start)[0:10]
    end = str(end)[0:10]

    # code checking
    if start == 'all':
        start = '2001-01-01'
        end = QA_util_today_str()

    if end is None:
        return QA_DataStruct_Financial(
            QA_fetch_financial_TTM(code, start, str(datetime.date.today())))
    else:
        series = pd.Series(data=month_data,
                           index=pd.to_datetime(month_data),
                           name='date')
        timerange = series.loc[start:end].tolist()
        return QA_DataStruct_Financial(QA_fetch_financial_TTM(
            code, start, end))
예제 #6
0
def QA_fetch_stock_fianacial_adv(
    code,
    start='all',
    end=None,
    if_drop_index=True,
):
    '获取财报TTM'
    #code= [code] if isinstance(code,str) else code
    end = start if end is None else end
    start = str(start)[0:10]
    end = str(end)[0:10]

    if start == 'all':
        start = '1990-01-01'
        end = QA_util_today_str()

    res = QA_fetch_stock_fianacial(code, start, end, format='pd')
    if res is None:
        #  todo 报告是代码不合法,还是日期不合法
        print(
            "QA Error QA_fetch_stock_fianacial_adv parameter code=%s , start=%s, end=%s call QA_fetch_stock_fianacial_adv return None"
            % (code, start, end))
        return None
    else:
        res_reset_index = res.set_index(['date', 'code'], drop=if_drop_index)
        # if res_reset_index is None:
        #     print("QA Error QA_fetch_stock_fianacial_adv set index 'datetime, code' return None")
        #     return
        return QA_DataStruct_Stock_day(res_reset_index)
def get_stock_report_ths(code):

    data = pd.DataFrame()

    for type in ['cash', 'benefit', 'debt']:
        excelFile = r'D:\{code}_{type}_report.xls'.format(code=code, type=type)
        seconds = 1
        headers = {
            'Accept':
            'text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,image/apng,*/*;q=0.8',
            'Accept-Language': 'zh-CN,zh;q=0.9',
            'Cache-Control': 'max-age=0',
            'User-Agent':
            'Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/48.0.2564.116 Safari/537.36',
            'Connection': 'keep-alive',
            '''--proxy-server''': 'http://202.20.16.82:10152'
        }

        while (os.path.exists(excelFile) != True):
            options = webdriver.ChromeOptions()
            for (key, value) in headers.items():
                options.add_argument('%s="%s"' % (key, value))
            prefs = {
                'profile.default_content_settings.popups': 0,
                'download.default_directory': 'd:\\'
            }
            options.add_experimental_option('prefs', prefs)
            driver = webdriver.Chrome(chrome_options=options)
            driver.get(
                'http://basic.10jqka.com.cn/api/stock/export.php?export={type}&type=report&code={code}'
                .format(code=code, type=type))
            sleep(seconds)
            seconds = seconds + 1

        if os.path.exists(excelFile) == True:
            try:
                df1 = pd.DataFrame(
                    pd.read_excel(excelFile,
                                  sheet_name='Worksheet')).T.reset_index()
                data = data.append(df1.T)
                driver.quit()

                try:
                    os.remove(excelFile)
                    print("Success Delete {code} {type} report file".format(
                        code=code, type=type))
                except:
                    print("NO {code} {type} report file to Delete".format(
                        code=code, type=type))
            except:
                print('Error for reading')

    res = data.T.iloc[1:, ]
    new_index = data.T[0:1].values.tolist()[0]
    new_index[0] = "report_date"
    res.columns = new_index
    res["code"] = code
    res['crawl_date'] = QA_util_today_str()
    return (res)
예제 #8
0
def QA_SU_save_stock_fianacial_momgo(start_date=None,end_date=None, ui_log = None, ui_progress = None):
    if start_date is None:
        if end_date is None:
            start_date = QA_util_today_str()
            end_date = start_date
        elif end_date is not None:
            start_date = '2008-01-01'
    elif start_date is not None:
        if end_date == None:
            end_date = QA_util_today_str()
        elif end_date is not None:
            if end_date < start_date:
                print('end_date should large than start_date')
    col = DATABASE.stock_financial_analysis
    col.create_index(
        [("CODE", ASCENDING), ("date_stamp", ASCENDING)], unique=True)

    deal_date_list = list(pd.date_range(start_date, end_date).map(lambda t:str(t.date())))
    if deal_date_list is None:
        print('not a trading day')
    else:
        for deal_date in deal_date_list:
            data = QA_util_etl_stock_quant(deal_date)
            if data is not None:
                data = data.drop_duplicates(
                    (['CODE', 'date']))
                QA_util_log_info(
                    '##JOB01 Pre Data stock financial data ============== {deal_date} '.format(deal_date=deal_date), ui_log)
                data = QA_util_to_json_from_pandas(data)
                print("got stock financial data ============== {deal_date}".format(deal_date=deal_date))
                QA_util_log_info(
                    '##JOB02 Got Data stock financial data ============== {deal_date}'.format(deal_date=deal_date), ui_log)
                try:
                    col.insert_many(data, ordered=False)
                    QA_util_log_info(
                        '##JOB03 Now stock financial data saved ============== {deal_date} '.format(deal_date=deal_date), ui_log)
                except Exception as e:
                    if isinstance(e, MemoryError):
                        col.insert_many(data, ordered=True)
                    elif isinstance(e, pymongo.bulk.BulkWriteError):
                        pass
                pass
            else:
                QA_util_log_info(
                    '##JOB01 No Data stock_fianacial_data ============== {deal_date} '.format(deal_date=deal_date), ui_log)
def QA_SU_save_stock_divyield_his(client=DATABASE,
                                  ui_log=None,
                                  ui_progress=None):
    '''
    save stock_day
    保存财报日历
    反向查询四个季度财报
    :return:
    '''
    START_DATE = '2007-01-01'
    END_DATE = QA_util_datetime_to_strdate(
        QA_util_add_months(QA_util_today_str(), -3))
    date_list = list(
        pd.DataFrame.from_dict(QA_util_getBetweenQuarter(START_DATE,
                                                         END_DATE)).T.iloc[:,
                                                                           1])
    stock_divyield = client.stock_divyield
    stock_divyield.create_index([("a_stockcode", pymongo.ASCENDING),
                                 ("report_date", pymongo.ASCENDING),
                                 ("reg_date", pymongo.ASCENDING)],
                                unique=True)
    err = []

    def __saving_work(report_date, stock_divyield):
        try:
            QA_util_log_info(
                '##JOB01 Now Saving stock_divyield==== {}'.format(
                    str(report_date)), ui_log)
            stock_divyield.insert_many(QA_util_to_json_from_pandas(
                QA_fetch_get_stock_divyield(report_date)),
                                       ordered=False)
        except Exception as error0:
            print("error : {code}".format(code=error0))
            err.append(str(report_date))

    for item in date_list:
        QA_util_log_info('The {} of Total {}'.format(
            (date_list.index(item) + 1), len(date_list)))

        strProgressToLog = 'DOWNLOAD PROGRESS {}'.format(
            str(float(
                (date_list.index(item) + 1) / len(date_list) * 100))[0:4] +
            '%', ui_log)
        intProgressToLog = int(
            float((date_list.index(item) + 1) / len(date_list) * 100))
        QA_util_log_info(strProgressToLog,
                         ui_log=ui_log,
                         ui_progress=ui_progress,
                         ui_progress_int_value=intProgressToLog)

        __saving_work(item, stock_divyield)

    if len(err) < 1:
        QA_util_log_info('SUCCESS save stock divyield ^_^', ui_log)
    else:
        QA_util_log_info(' ERROR CODE \n ', ui_log)
        QA_util_log_info(err, ui_log)
예제 #10
0
def QA_fetch_stock_divyield_adv(code,
                                start="all",
                                end=None,
                                format='pd',
                                type='crawl',
                                collections=DATABASE.stock_divyield):
    '获取股票日线'
    #code= [code] if isinstance(code,str) else code
    end = start if end is None else end
    start = str(start)[0:10]
    end = str(end)[0:10]

    # code checking
    if start == 'all':
        start = '2007-01-01'
        end = QA_util_today_str()
    if end is None:
        end = QA_util_today_str()
    return QA_DataStruct_Financial(
        QA_fetch_stock_divyield(code, start, end, type=type))
예제 #11
0
def QA_SU_save_financial_report_day(client=DATABASE,
                                    ui_log=None,
                                    ui_progress=None):
    '''
     save stock_day
    保存财报日历
    历史全部数据
    :return:
    '''
    code = list(
        QA_fetch_stock_financial_calendar_adv(
            list(QA_fetch_stock_list_adv()['code']),
            QA_util_today_str()).data['code'])
    stock_financial = client.stock_financial
    stock_financial.create_index([("code", pymongo.ASCENDING),
                                  ("report_date", pymongo.ASCENDING)],
                                 unique=True)
    err = []

    def __saving_work(code, stock_financial):
        try:
            QA_util_log_info(
                '##JOB01 Now Saving THS financial_report==== {}'.format(
                    str(code)), ui_log)

            stock_financial.insert_many(QA_util_to_json_from_pandas(
                QA_fetch_get_stock_report_ths(code)),
                                        ordered=False)
            gc.collect()
        except Exception as error0:
            print(error0)
            err.append(str(code))

    for item in code:

        QA_util_log_info('The {} of Total {}'.format((code.index(item) + 1),
                                                     len(code)))

        strProgressToLog = 'DOWNLOAD PROGRESS {}'.format(
            str(float((code.index(item) + 1) / len(code) * 100))[0:4] + '%',
            ui_log)
        intProgressToLog = int(float((code.index(item) + 1) / len(code) * 100))
        QA_util_log_info(strProgressToLog,
                         ui_log=ui_log,
                         ui_progress=ui_progress,
                         ui_progress_int_value=intProgressToLog)

        __saving_work(item, stock_financial)

    if len(err) < 1:
        QA_util_log_info('SUCCESS save THS financial_report ^_^', ui_log)
    else:
        QA_util_log_info(' ERROR CODE \n ', ui_log)
        QA_util_log_info(err, ui_log)
def get_stock_report_wy(code):

    data = pd.DataFrame()

    for type in ['lrb','zcfzb','xjllb']:
        excelFile = r'D:\{type}{code}.csv'.format(code = code, type=type)
        seconds = 1
        headers = {'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,image/apng,*/*;q=0.8',
                   'Request URL': 'http://quotes.money.163.com/service/{type}_{code}.html'.format(code=code,type=type),
                   'User-Agent': 'Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/48.0.2564.116 Safari/537.36',
                   'Remote Address':'59.111.160.246:80',
                   'Referrer Policy':'no-referrer-when-downgrade'
                   }

        while (os.path.exists(excelFile) != True):
            options = webdriver.ChromeOptions()
            for (key,value) in headers.items():
                options.add_argument('%s="%s"' % (key, value))
            #options.add_argument('headless')
            prefs = {'profile.default_content_settings.popups': 0, 'download.default_directory': 'd:\\'}
            options.add_experimental_option('prefs', prefs)
            driver = webdriver.Chrome(chrome_options=options)
            driver.get('http://quotes.money.163.com/service/{type}_{code}.html'.format(code = code, type=type))
            sleep(seconds)
            seconds = seconds + 1

        if os.path.exists(excelFile) == True:
            try:
                df1 = pd.read_csv(excelFile,encoding='ANSI', na_values=["--"," --"," "],header=0).T
                res = df1.reset_index().iloc[1:,:]
                res.columns= [x.replace('(万元)','').replace(' ','').strip() for x in df1.reset_index().iloc[:1].values.tolist()[0]]
                if type == 'xjllb':
                    res.columns= [x+'C' if x in ['财务费用', '净利润', '少数股东损益'] else x.replace('(万元)','').replace(' ','').strip() for x in list(res.columns)]
                res = res.set_index('报告日期')
                data = pd.concat([data,res],axis=1,sort=False).fillna(0)
                driver.quit()
                try:
                    os.remove(excelFile)
                    print("Success Delete {code} {type} report file".format(code=code, type=type))
                except:
                    print("NO {code} {type} report file to Delete".format(code=code, type=type))
            except:
                print('Error for reading')
    data = data * 10000
    res = data.reset_index()
    new_index = list(res.columns)
    new_index[0] = "report_date"
    res.columns = new_index
    res["code"] = code
    res['crawl_date']=QA_util_today_str()
    res = res[res['report_date'].str.contains('Unnamed')==0]
    res = res[res['report_date'].apply(len) == 10]
    return(res)
예제 #13
0
def QA_fetch_financial_code(ndays=10):
    START = str(QA_util_get_pre_trade_date(QA_util_today_str(), ndays))
    code = list(
        QA_fetch_stock_financial_calendar(
            QA.QA_fetch_stock_list_adv().code.tolist(), start=START)['code'])
    market_day = pd.DataFrame(
        QA_fetch_stock_basic_info_tushare())[['code', 'timeToMarket']]
    market_day['TM'] = market_day['timeToMarket'].apply(lambda x: str(
        QA_util_add_months(QA_util_date_int2str(int(x)), 0)
        if x > 0 else None)[0:10])
    code = list(market_day[market_day['TM'] >= START]['code'].values) + code
    return (code)
예제 #14
0
def QA_fetch_stock_financial_calendar_adv(
        code,
        start="all",
        end=None,
        type='day',
        format='pd',
        collections=DATABASE.report_calendar):
    '获取股票财报日历'
    #code= [code] if isinstance(code,str) else code
    # code checking
    if start == 'all':
        start = '2007-01-01'
        end = QA_util_today_str()
    if end is None:
        end = QA_util_today_str()

    end = start if end is None else end
    start = str(start)[0:10]
    end = str(end)[0:10]

    return QA_DataStruct_Financial(
        QA_fetch_stock_financial_calendar(code, start, end, type=type))
예제 #15
0
def QA_SU_save_financial_report_his(client=DATABASE,
                                    ui_log=None,
                                    ui_progress=None):
    '''
    save stock_day
    保存财报日历
    反向查询四个季度财报
    :return:
    '''
    YEARS = list(
        QA_util_getBetweenYear('2002-01-01', QA_util_today_str()).values())

    code = list(QA_fetch_stock_list_adv()['code'])
    stock_financial_sina = client.stock_financial_sina
    stock_financial_sina.create_index([("code", pymongo.ASCENDING),
                                       ("report_date", pymongo.ASCENDING)],
                                      unique=True)
    err = []

    def __saving_work(code, stock_financial_sina):
        try:
            QA_util_log_info(
                '##JOB01 Now Saving SINA financial_report==== {}'.format(
                    str(code)), ui_log)
            stock_financial_sina.insert_many(QA_util_to_json_from_pandas(
                QA_fetch_get_stock_report_sina(code, YEARS)),
                                             ordered=False)
        except Exception as error0:
            print(error0)
            err.append(str(code))

    for item in code:
        QA_util_log_info('The {} of Total {}'.format((code.index(item) + 1),
                                                     len(code)))

        strProgressToLog = 'DOWNLOAD PROGRESS {}'.format(
            str(float((code.index(item) + 1) / len(code) * 100))[0:4] + '%',
            ui_log)
        intProgressToLog = int(float((code.index(item) + 1) / len(code) * 100))
        QA_util_log_info(strProgressToLog,
                         ui_log=ui_log,
                         ui_progress=ui_progress,
                         ui_progress_int_value=intProgressToLog)

        __saving_work(item, stock_financial_sina)

    if len(err) < 1:
        QA_util_log_info('SUCCESS save SINA financial_report ^_^', ui_log)
    else:
        QA_util_log_info(' ERROR CODE \n ', ui_log)
        QA_util_log_info(err, ui_log)
예제 #16
0
def get_financial_report_date(report_date,
                              headers=None,
                              psize=2000,
                              vname="plsj",
                              page=1):
    data, page_num = read_financial_report_date(report_date, headers, psize,
                                                vname, page)
    data.columns = [
        'code', 'name', 'pre_date', 'first_date', 'second_date', 'third_date',
        'real_date', 'codes'
    ]
    data['report_date'] = report_date
    data['crawl_date'] = QA_util_today_str()
    return (data[data["real_date"].apply(lambda x: len(x) != 0)])
def read_stock_divyield(report_date, headers=None, page=1):
    if headers == None:
        headers = {
            'Accept':
            'text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,image/apng,*/*;q=0.8',
            'Accept-Language': 'zh-CN,zh;q=0.9',
            'Cache-Control': 'max-age=0',
            'User-Agent':
            'Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/48.0.2564.116 Safari/537.36',
            'Connection': 'keep-alive'
        }
    args = {
        "report_date": report_date,
        "unixstamp": int(round(time.time() * 1000))
    }

    strUrl1 = "http://stock.jrj.com.cn/report/js/sz/{report_date}.js?ts={unixstamp}".format(
        **args)
    if page == 1:
        headers = get_headers(report_date, headers)
    options = webdriver.ChromeOptions()
    for (key, value) in headers.items():
        options.add_argument('%s="%s"' % (key, value))

    driver = webdriver.Chrome(chrome_options=options)
    driver.get(strUrl1)
    soup = BeautifulSoup(driver.page_source, "html.parser").body.text
    driver.quit()
    start_str = 'var fhps = '.format(**args)
    res = demjson.decode(
        soup.strip(start_str).strip(';').replace(''',
,
,''', ',0,0,').replace(''',
,''', ',0,'))
    data = pd.DataFrame(res['data'])
    if data.shape[0] > 0:
        page_num = res['summary']['total']
        data = data.drop_duplicates(keep='first')
        data.columns = [
            'a_stockcode', 'a_stocksname', 'div_info', 'div_type_code',
            'bonus_shr', 'cash_bt', 'cap_shr', 'epsp', 'ps_cr', 'ps_up',
            'reg_date', 'dir_dcl_date', 'a_stockcode1', 'ex_divi_date', 'prg'
        ]
        data['report_date'] = report_date
        data['crawl_date'] = QA_util_today_str()
        return (data, page_num)
    else:
        print("No divyield data for today")
        return (None, None)
예제 #18
0
def QA_fetch_index_quant_pre_adv(code, start="all", end=None, format='pd'):
    '获取股票量化机器学习数据查询接口'
    end = start if end is None else end
    start = str(start)[0:10]
    end = str(end)[0:10]

    # code checking
    if start == 'all' or start == None:
        start = '2008-01-01'
        end = QA_util_today_str()
        data = QA_fetch_index_quant_pre(code, start, end)
        return QA_DataStruct_Stock_day(data)
    else:
        data = QA_fetch_index_quant_pre(code, start, end)
        return QA_DataStruct_Stock_day(data)
예제 #19
0
def QA_fetch_interest_rate(start,
                           end=None,
                           format='pd',
                           collections=DATABASE.interest_rate):
    '获取股票日线'
    #code= [code] if isinstance(code,str) else code
    # code checking
    if end is None:
        end = QA_util_today_str()
    if start is None:
        start = '1999-01-01'

    if QA_util_date_valid(end):

        __data = []
        cursor = collections.find(
            {
                "date_stamp": {
                    "$lte": QA_util_date_stamp(end),
                    "$gte": QA_util_date_stamp(start)
                }
            }, {"_id": 0},
            batch_size=10000)
        #res=[QA_util_dict_remove_key(data, '_id') for data in cursor]

        res = pd.DataFrame([item for item in cursor])
        try:
            res = res.drop(columns=['crawl_date', 'date_stamp'])
        except:
            res = None
        if format in ['P', 'p', 'pandas', 'pd']:
            return res
        elif format in ['json', 'dict']:
            return QA_util_to_json_from_pandas(res)
        # 多种数据格式
        elif format in ['n', 'N', 'numpy']:
            return numpy.asarray(res)
        elif format in ['list', 'l', 'L']:
            return numpy.asarray(res).tolist()
        else:
            print(
                "QA Error Interest Rate format parameter %s is none of  \"P, p, pandas, pd , json, dict , n, N, numpy, list, l, L, !\" "
                % format)
            return None
    else:
        QA_util_log_info(
            'QA Error Interest Rate data parameter start=%s end=%s is not right'
            % (start, end))
예제 #20
0
def get_stock_shares_sina(code):

    headers = {'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,image/apng,*/*;q=0.8',
               'Accept-Language': 'zh-CN,zh;q=0.9',
               'Cache-Control': 'max-age=0',
               'User-Agent': 'Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/48.0.2564.116 Safari/537.36',
               'Connection': 'keep-alive'
               }
    options = webdriver.ChromeOptions()
    for (key,value) in headers.items():
        options.add_argument('%s="%s"' % (key, value))
    options.add_argument('headless')
    res = read_data_from_sina(code,options)
    res['crawl_date']=QA_util_today_str()
    if res is None:
        return None
    else:
        return(res)
예제 #21
0
def QA_fetch_stock_target_adv(code,
                              start="all",
                              end=None,
                              type='close',
                              format='pd'):
    '获取股票量化机器学习数据查询接口'
    end = start if end is None else end
    start = str(start)[0:10]
    end = str(end)[0:10]

    # code checking
    if start == 'all' or start == None:
        start = '2008-01-01'
        end = QA_util_today_str()
        data = QA_fetch_stock_target(code, start, end, type=type)
        return QA_DataStruct_Stock_day(data)
    else:
        data = QA_fetch_stock_target(code, start, end, type=type)
        return QA_DataStruct_Stock_day(data)
def read_stock_report_wy(code):
    res1 = pd.DataFrame()
    for report_type in ['zcfzb','lrb','xjllb']:
        res = read_data_data_from_wy(code,report_type)
        if res1.shape[0]==0:
            res1 = res
        else:
            res1 = res1.join(res)
    if res1 is None:
        return None
    else:
        res1['code'] = code
        res1['crawl_date']=QA_util_today_str()
        res = res1.reset_index()
        try:
            res.columns = ['report_date' if x == 'index' else x for x in list(res1.reset_index().columns)]
        except:
            pass
        return(res)
예제 #23
0
def get_interest_rate():
    deposit = ts.get_deposit_rate()
    loan = ts.get_loan_rate()
    data = deposit[[x.startswith('定期存款整存整取') for x in deposit['deposit_type']
                    ]].pivot_table(values="rate",
                                   index=['date'],
                                   columns='deposit_type',
                                   aggfunc=sum)
    data1 = loan[[
        x.startswith('短期贷款') or x.startswith('中长期贷款')
        for x in loan['loan_type']
    ]].pivot_table(values="rate",
                   index=['date'],
                   columns='loan_type',
                   aggfunc=sum)
    data.columns = [
        x.strip().replace('''定期存款整存整取''', '').replace('''(''',
                                                      '').replace(''')''', '')
        for x in data.columns
    ]
    data.columns = ['DOYEAR' if x == '一年' else x for x in data.columns]
    data.columns = ['DTMON' if x == '三个月' else x for x in data.columns]
    data.columns = ['DTYEAR' if x == '三年' else x for x in data.columns]
    data.columns = ['DSYEAR' if x == '二年' else x for x in data.columns]
    data.columns = ['DFYEAR' if x == '五年' else x for x in data.columns]
    data.columns = ['DHYEAR' if x == '半年' else x for x in data.columns]
    data1.columns = [
        x.strip().replace('''短期贷款''',
                          '').replace('''中长期贷款''',
                                      '').replace('''(''',
                                                  '').replace(''')''', '')
        for x in data1.columns
    ]
    data1.columns = ['SYEAR' if x == '一至三年' else x for x in data1.columns]
    data1.columns = ['LTMON' if x == '三至五年' else x for x in data1.columns]
    data1.columns = ['LFYEAR' if x == '五年以上' else x for x in data1.columns]
    data1.columns = ['LHYEAR' if x == '六个月以内' else x for x in data1.columns]
    data1.columns = ['LOYEAR' if x == '六个月至一年' else x for x in data1.columns]
    res = pd.concat([data1, data], axis=1).reset_index().fillna(method='ffill')
    res.columns = ['date' if x == 'index' else x for x in res.columns]
    res['crawl_date'] = QA_util_today_str()
    return (res)
예제 #24
0
def QA_fetch_index_alpha_adv(code,
                             start="all",
                             end=None,
                             format='pd',
                             collections=DATABASE.index_alpha):
    '获取股票财报日历'
    #code= [code] if isinstance(code,str) else code
    end = start if end is None else end
    start = str(start)[0:10]
    end = str(end)[0:10]

    # code checking
    if start == 'all' or start == None:
        start = '2005-01-01'
        end = QA_util_today_str()
        data = QA_fetch_index_alpha(code, start, end)
        return QA_DataStruct_Stock_day(data)
    else:
        data = QA_fetch_index_alpha(code, start, end)
        return QA_DataStruct_Stock_day(data)
def get_stock_report_sina(code, report_year):

    headers = {
        'Accept':
        'text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,image/apng,*/*;q=0.8',
        'Accept-Language': 'zh-CN,zh;q=0.9',
        'Cache-Control': 'max-age=0',
        'User-Agent':
        'Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/48.0.2564.116 Safari/537.36',
        'Connection': 'keep-alive'
    }
    options = webdriver.ChromeOptions()
    for (key, value) in headers.items():
        options.add_argument('%s="%s"' % (key, value))
    options.add_argument('headless')
    res2 = pd.DataFrame()
    for years in report_year:
        res1 = pd.DataFrame()
        for report_type in ['BalanceSheet', 'ProfitStatement', 'CashFlow']:
            table_name = '{report_type}NewTable0'.format(
                report_type=report_type)
            if report_type == 'CashFlow':
                table_name = 'ProfitStatementNewTable0'
            res = read_data_from_sina(code, years, report_type, table_name,
                                      options)
            res1 = pd.concat([res1, res], axis=1)
        res2 = res2.append(res1)
    if res2 is None:
        return None
    else:
        res2['code'] = code
        res2['crawl_date'] = QA_util_today_str()
        res = res2.reset_index()
        try:
            res.columns = [
                'report_date' if x == 'index' else x
                for x in list(res2.reset_index().columns)
            ]
        except:
            pass
        return (res)
예제 #26
0
def QA_SU_save_financial_files():
    """本地存储financialdata
    """
    download_financialzip()
    coll = DATABASE.financial
    coll.create_index([("code", ASCENDING), ("report_date", ASCENDING)],
                      unique=True)
    for item in os.listdir(download_path):
        if item[0:4] != 'gpcw':
            print(
                "file ", item,
                " is not start with gpcw , seems not a financial file , ignore!"
            )
            continue

        date = int(item.split('.')[0][-8:])
        print('QUANTAXIS NOW SAVING {}'.format(date))
        if coll.find({'report_date': date}).count() < 3600:

            print(coll.find({'report_date': date}).count())
            data = parse_filelist([item]).reset_index().drop_duplicates(
                subset=['code', 'report_date']).sort_index()
            data["crawl_date"] = QA_util_today_str()
            data = data.assign(report_date=data['report_date'].apply(
                lambda x: QA_util_date_stamp(QA_util_date_int2str(x))))
            data = data.assign(crawl_date=data['crawl_date'].apply(
                lambda x: QA_util_date_stamp(QA_util_date_int2str(x))))
            data = QA_util_to_json_from_pandas(data)

            try:
                coll.insert_many(data, ordered=False)

            except Exception as e:
                if isinstance(e, MemoryError):
                    coll.insert_many(data, ordered=True)
                elif isinstance(e, pymongo.bulk.BulkWriteError):
                    pass
        else:
            print('ALL READY IN DATABASE')

    print('SUCCESSFULLY SAVE/UPDATE FINANCIAL DATA')
예제 #27
0
def QA_fetch_stock_quant_data_adv(code,
                                  start="all",
                                  end=None,
                                  block=True,
                                  format='pd',
                                  collections=DATABASE.stock_quant_data):
    '获取股票量化机器学习最终指标V1'
    #code= [code] if isinstance(code,str) else code
    end = start if end is None else end
    start = str(start)[0:10]
    end = str(end)[0:10]

    # code checking
    if start == 'all' or start == None:
        start = '2008-01-01'
        end = QA_util_today_str()
        data = QA_fetch_stock_quant_data(code, start, end, block)
        return QA_DataStruct_Stock_day(data)
    else:
        data = QA_fetch_stock_quant_data(code, start, end, block)
        return QA_DataStruct_Stock_day(data)
예제 #28
0
def ETL_stock_day(codes, start=None, end=None):
    if start is None:
        start = '2008-01-01'

    if end is None:
        end = QA_util_today_str()

    if start != end:
        rng = pd.Series(pd.date_range(start, end, freq='D')).apply(lambda x: str(x)[0:10])
    else:
        rng = str(start)[0:10]

    start_date = QA_util_get_pre_trade_date(start,100)
    data = QA_fetch_stock_day_adv(codes,start_date,end)
    res1 = data.to_qfq().data
    res1.columns = [x + '_qfq' for x in res1.columns]
    data = data.data.join(res1).fillna(0).reset_index()
    res = data.groupby('code').apply(pct)
    res = res.reset_index(level = 0,drop = True).reset_index().set_index(['date','code']).loc[rng].replace([np.inf, -np.inf], 0)
    res = res.where((pd.notnull(res)), None)
    return(res)
예제 #29
0
 def __init__(self):
     self.info = dict()
     self.info['date'] = QA_util_today_str()
     self.info['train_status'] = dict()
     self.info['test_status'] = dict()
     self.info['rng_status'] = dict()
예제 #30
0
def QA_SU_save_stock_fianacial_momgo_his(start_date=None,end_date=QA_util_today_str()):
    return save_stock_financial.QA_SU_save_stock_fianacial_momgo(start_date, end_date)