Esempio n. 1
0
def move_stock_count_raw_datas_to_db():
    code_list = get_stock_codes(stock_type='上市') + get_stock_codes(
        stock_type='上櫃')
    time_lines = get_time_lines(since={'year': 2013},
                                to={'year': 2020},
                                offset=Offset.YEAR)
    db = mongo_client[DB_TWSE]
    for stock_id in code_list:
        for time_line in time_lines:
            year = time_line['year']
            raw_data = get_raw_data(PATH_DIR_RAW_DATA_STOCK_COUNT + str(year),
                                    str(stock_id))
            if raw_data is not None:
                collection = db[TABLE_STOCK_COUNT]
                collection.find_one_and_update(
                    {
                        '$and': [{
                            'stock_id': str(stock_id)
                        }, {
                            'time_line': time_line
                        }]
                    }, {'$set': {
                        "content": raw_data
                    }},
                    upsert=True)
Esempio n. 2
0
    def test_integrate(self):
        # generate_predictions(['1470'])
        # generate_predictions(get_stock_codes(stock_type='上市'))
        # create_stock_datas([1213])
        # create_stock_datas(get_stock_codes(stock_type='上市'))
        # create_stock_datas(get_stock_codes(stock_type='上櫃'))
        # get_stock_codes(stock_type='上市', from_item=1413)
        # create_profit_matrix(['3232'])
        # create_profit_matrix(get_stock_codes(stock_type='上櫃'))
        # create_profit_matrix(get_stock_codes(stock_type='上櫃'))
        # stock_data = get_stock_data(6294, True)
        # s_prediction = get_predict_evaluate(stock_data)
        # generate_predictions([1102])
        # print('prediction = ', s_prediction)
        # stock = Stock('1445')
        # print(stock.price)

        # generate_predictions([1101])
        # generate_predictions(get_stock_codes(stock_type='上市') + get_stock_codes(stock_type='上櫃'))
        # print('prediction = ', s_prediction)
        stock_code_list = get_stock_codes(stock_type='上市')
        stock_code_list.extend(get_stock_codes(stock_type='上櫃'))
        # print('stock_code_list = ',stock_code_list)
        dr_list = [1262, 9103, 910322, 910482, 9105, 910708, 910861, 9110, 911608, 911616, 911619, 911622, 911868,
                   912000,
                   912398, 9136, 9157, 9188, 911613]
        stock_code_list = list(filter(lambda stock_code: stock_code not in dr_list, stock_code_list))
        print('stock_code = ', stock_code_list)
        # print('index of 1566 = ', stock_code_list.index(1566))  # 1566, 1787
        # print('sub_list = ', stock_code_list[250:])
        # sync_statements(get_stock_codes(stock_type='上市'))
        # sync_statements(get_stock_codes(stock_type='上櫃'))
        sync_statements(stock_code_list, times_to_retry=2, break_after_retry= False, option=Option.DIVIDEND_POLICY, isSync=False)
Esempio n. 3
0
def move_quarterly_raw_datas_to_db(raw_data_path, table_name):
    code_list = get_stock_codes(stock_type='上市') + get_stock_codes(
        stock_type='上櫃')
    time_lines = get_time_lines(since={'year': 2013})
    db = mongo_client[DB_TWSE]
    for stock_id in code_list:
        for time_line in time_lines:
            print('put ', stock_id, ' time_line = ', time_line)
            year = time_line['year']
            season = time_line.get('season')
            raw_data = get_raw_data(
                raw_data_path + str(year) + "Q" + str(season), str(stock_id))
            if raw_data is not None:
                collection = db[table_name]
                collection.find_one_and_update(
                    {
                        '$and': [{
                            'stock_id': str(stock_id)
                        }, {
                            'time_line': time_line
                        }]
                    }, {'$set': {
                        "content": raw_data
                    }},
                    upsert=True)
Esempio n. 4
0
 def test_parse_dividend_policy2_raw_data(self):
     # raw_data = self.get_raw_data('raw_datas/dividend_policies', "dividend_policy_" + str(3226))
     # dividend_policy_processor = DividendPolicyProcessor2()
     # dividend_policy_processor._parse_raw_data(str(3226), raw_data)
     stock_list = (get_stock_codes(stock_type='上市') + get_stock_codes(stock_type='上櫃'))
     for stock_id in stock_list:
         print('parse ', stock_id)
         raw_data = self.get_raw_data('raw_datas/dividend_policies', "dividend_policy_" + str(stock_id))
         dividend_policy_processor = DividendPolicyProcessor2()
         dividend_policy_processor._parse_raw_data(str(stock_id), raw_data)
Esempio n. 5
0
def move_dividend_policy_raw_datas_to_db():
    code_list = get_stock_codes(stock_type='上市') + get_stock_codes(
        stock_type='上櫃')
    db = mongo_client[DB_TWSE]
    for stock_id in code_list:
        raw_data = get_raw_data(PATH_DIR_RAW_DATA_DIVIDEND_POLICY,
                                str(stock_id))
        if raw_data is not None:
            collection = db[TABLE_DIVIDEND_POLICY]
            collection.find_one_and_update(
                {'$and': [{
                    'stock_id': str(stock_id)
                }]}, {'$set': {
                    "content": raw_data
                }},
                upsert=True)
Esempio n. 6
0
                }]}, {'$set': {
                    "content": raw_data
                }},
                upsert=True)


def get_prices():
    crawler = Crawler()
    df = crawler.get_data((2021, 9, 7))
    with pd.ExcelWriter(gen_output_path('data', 'prices.xlsx')) as writer:
        df.to_excel(writer)
        writer.close()


if __name__ == "__main__":
    logger.info('start')
    twse_code_list = get_stock_codes(stock_type='上市')
    tpex_code_list = get_stock_codes(stock_type='上櫃')
    sync_statements(twse_code_list)
    sync_statements(tpex_code_list)
    sync_performance(twse_code_list + tpex_code_list)
    get_prices()
    with open(gen_output_path('data', 'prices.xlsx'), 'rb') as file:
        df = pd.read_excel(file)
        file.close()
    prices = df.loc[:, '收盤價']
    errors = generate_predictions(
        prices,
        get_stock_codes(stock_type='上市') + get_stock_codes(stock_type='上櫃'))
    print('test_get_prediction errors = ', errors)
Esempio n. 7
0
 def __init__(self):
     self.__twsePriceTransformer = TWSEPriceMeasurementTransformer()
     self.__tpexPriceTransformer = TPEXPriceMeasurementTransformer()
     from evaluation_utils import get_stock_codes
     self.list_twse = get_stock_codes(stock_type='上市')
     self.list_tpex = get_stock_codes(stock_type='上櫃')