Example #1
0
def move_quarterly_raw_datas_to_db(raw_data_path, table_name):
    code_list = get_stock_codes(stock_type='上市') + get_stock_codes(
        stock_type='上櫃')
    time_lines = get_time_lines(since={'year': 2013})
    db = mongo_client[DB_TWSE]
    for stock_id in code_list:
        for time_line in time_lines:
            print('put ', stock_id, ' time_line = ', time_line)
            year = time_line['year']
            season = time_line.get('season')
            raw_data = get_raw_data(
                raw_data_path + str(year) + "Q" + str(season), str(stock_id))
            if raw_data is not None:
                collection = db[table_name]
                collection.find_one_and_update(
                    {
                        '$and': [{
                            'stock_id': str(stock_id)
                        }, {
                            'time_line': time_line
                        }]
                    }, {'$set': {
                        "content": raw_data
                    }},
                    upsert=True)
Example #2
0
def move_stock_count_raw_datas_to_db():
    code_list = get_stock_codes(stock_type='上市') + get_stock_codes(
        stock_type='上櫃')
    time_lines = get_time_lines(since={'year': 2013},
                                to={'year': 2020},
                                offset=Offset.YEAR)
    db = mongo_client[DB_TWSE]
    for stock_id in code_list:
        for time_line in time_lines:
            year = time_line['year']
            raw_data = get_raw_data(PATH_DIR_RAW_DATA_STOCK_COUNT + str(year),
                                    str(stock_id))
            if raw_data is not None:
                collection = db[TABLE_STOCK_COUNT]
                collection.find_one_and_update(
                    {
                        '$and': [{
                            'stock_id': str(stock_id)
                        }, {
                            'time_line': time_line
                        }]
                    }, {'$set': {
                        "content": raw_data
                    }},
                    upsert=True)
def _sync_balance_sheet(stock_id, start_year, to_year=None, df_balance_sheet=None):
    to = {'year': to_year} if to_year is not None else None
    balance_sheet_processor = SimpleBalanceSheetProcessor(stock_id)
    shareholder_equity_processor = ShareholderEquityProcessor(stock_id)
    if df_balance_sheet is not None:
        time_lines = get_time_lines(since={'year': start_year}, to=to)
        dfs_get = []
        for time_line in time_lines:

            row_index = "{}Q{}".format(time_line['year'], time_line['season'])
            val = df_balance_sheet.get(row_index, None)

            is_empty = val is None or len(val.values) == 0
            if is_empty:
                df_balance = balance_sheet_processor.get_data_frame(time_line['year'], time_line['season'])
                df_shareholder = shareholder_equity_processor.get_data_frame(time_line['year'], time_line['season'])
                df_combine = None if df_balance is None or df_shareholder is None else df_balance.join(df_shareholder,
                                                                                                       how='outer')
                if df_combine is not None:
                    dfs_get.append(df_combine)
        if len(dfs_get) > 0:
            dfs_get.append(df_balance_sheet)
            df_balance_sheet = pd.concat(dfs_get, sort=False).sort_index()
        return df_balance_sheet
    else:
        df_balance_statement = balance_sheet_processor.get_data_frames({'year': start_year - 1}, to=to)
        df_shareholder_equity = shareholder_equity_processor.get_data_frames({'year': start_year - 1}, to=to)
        if df_balance_statement is None or df_shareholder_equity is None:
            return None
        df_combine = df_balance_statement.join(df_shareholder_equity, how='outer')
        indexes = df_combine[df_combine['每股淨值'].isna()].index
        df_combine.drop(indexes, inplace=True)
        print('合併 = ', df_combine)
        return df_combine
Example #4
0
def fetch_simple_balance_sheet_raw_datas(stock_ids, time_lines=get_time_lines(since={'year': 2013})):
    def fetcher(stock_id, year, season):
        result = __simple_balance_sheet_data_fetcher.fetch(
            {"encodeURIComponent": 1, "step": 1, "firstin": 1, "off": 1, "queryName": "co_id",
             "inpuType": "co_id",
             "TYPEK": "all", "isnew": "false", "co_id": stock_id, "year": year - 1911, "season": season})
        has_result = not (any(element.get_text() == "查詢無資料" for element in
                              BeautifulSoup(result.content, 'html.parser').find_all('font')))
        return result.content if has_result else None

    # __fetch_datas_and_store(stock_ids, time_lines, PATH_DIR_RAW_DATA_SIMPLE_BALANCE_SHEETS, fetcher)
    __fetch_datas_and_store2(stock_ids, time_lines, fetcher, __simple_balance_sheet_repository)
    def get_data_frames(self, since, to=None, source_policy=Source.CACHE_ONLY):
        time_lines = get_time_lines(since=since, to=to)

        dfs = []
        for time_line in time_lines:
            data_frame = self.get_data_frame(time_line.get('year'),
                                             time_line.get('season'),
                                             source_policy)
            if data_frame is not None:
                dfs.append(data_frame)

        # return
        return pd.concat(dfs, sort=True) if len(dfs) > 0 else None
Example #6
0
    def get_data_frames(self, since, to=None, source_policy=Source.CACHE_ONLY):
        time_lines = get_time_lines(since=since, to=to)
        # time_first = time_lines[0]
        # if time_first.get('season') > 1:
        #     time_lines.insert(0, {'year': time_first.get('year'), 'season': (time_first.get('season') - 1)})
        # print(time_lines)

        time_lines.reverse()

        dfs = []
        cache_data_dict = None
        for time_line in time_lines:
            print('In ', time_line)
            year = time_line.get('year')
            season = time_line.get('season')
            if cache_data_dict is None:
                data_dict = self._get_data_dict(self._fetch_fields, year,
                                                season)
            else:
                data_dict = cache_data_dict

            if data_dict is None:
                continue
            if season > 1:
                cache_data_dict = self._get_data_dict(self._fetch_fields, year,
                                                      season - 1)
                if data_dict is None or cache_data_dict is None:
                    print('get None value in year ', year, ' season ', season,
                          " data_dict = ", data_dict, " cache_data_dic = ",
                          cache_data_dict)
                else:
                    for key in self._fetch_fields:
                        data_dict[key] = data_dict.get(
                            key, 0) - cache_data_dict.get(key, 0)
            else:
                cache_data_dict = None
            data_dict['業主盈餘現金流'] = data_dict.get('營業活動之淨現金流入', 0) + data_dict.get('取得不動產、廠房及設備', 0)\
                                   + data_dict.get('其他投資活動', 0)
            data_dict['自由現金流'] = data_dict.get(
                '營業活動之淨現金流入', 0) + data_dict.get('投資活動之淨現金流入', 0)
            print(data_dict)
            str_period = "{}Q{}".format(year, season)
            period_index = pd.PeriodIndex(start=pd.Period(str_period,
                                                          freq='Q'),
                                          end=pd.Period(str_period, freq='Q'),
                                          freq='Q')
            dfs.append(
                pd.DataFrame([data_dict.values()],
                             columns=data_dict.keys(),
                             index=period_index))
        return None if len(dfs) == 0 else pd.concat(dfs, sort=False)
Example #7
0
def fetch_stock_count_raw_datas(stock_ids, since_year=datetime.now().year, to_year=datetime.now().year):
    time_lines = get_time_lines(since={'year': since_year}, to={'year': to_year}, offset=Offset.YEAR)

    def fetcher(stock_id, year):
        result = __stock_count_fetcher.fetch(
            {'encodeURIComponent': 1, 'step': 1, 'firstin': 1, 'off': 1, 'queryName': 'co_id',
             't05st29_c_ifrs': 'N',
             't05st30_c_ifrs': 'N', 'inpuType': 'co_id', 'TYPEK': 'all', 'isnew': 'false', 'co_id': stock_id,
             'year': (year - 1911)}
        )
        return result.content

    # __fetch_datas_and_store(stock_ids, time_lines, PATH_DIR_RAW_DATA_STOCK_COUNT, fetcher)
    __fetch_datas_and_store2(stock_ids, time_lines, fetcher, __stock_count_repository)
Example #8
0
def get_predict_roe_by_relative(stock_id):
    now_year = datetime.now().year
    time_lines = get_time_lines(since={'year': now_year, 'season': 1})

    # list_temp_times = [time for time in time_lines[::-1] if _get_for_times(stock_id, [time]) is not None]
    # print(list_temp_times)
    last_time_available = next((time for time in time_lines[::-1] if _get_for_times(stock_id, [time]) is not None), None)
    print('last_time_available = ', last_time_available)
    if last_time_available is None:
        return None

    roe_current = _get_for_times(stock_id, time_lines[0: time_lines.index(last_time_available) + 1])
    roe_last_year_relative = _get_for_times(stock_id, get_time_lines(since={'year': now_year - 1, 'season': 1},
                                            to={'year': now_year - 1, 'season': last_time_available.get('season')}))
    roe_last_year = get_roe_in_year(stock_id, now_year - 1)
    print('roe_current = ', roe_current, ' roe_last_year_relative = ', roe_last_year_relative, ' roe_last_year = ', roe_last_year)

    if roe_current is None or roe_last_year_relative is None or roe_last_year is None:
        return None

    roe_relative = roe_last_year * (roe_current / roe_last_year_relative)

    print('roe_relative = ', roe_relative)
    return roe_relative
Example #9
0
def fetch_balance_sheet_raw_datas(stock_ids, time_lines=get_time_lines(since={'year': 2013})):
    def fetcher(stock_id, year, season):
        result = __balance_sheet_data_fetcher.fetch(
            {"encodeURIComponent": 1, "step": 1, "firstin": 1, "off": 1, "queryName": "co_id",
             "inpuType": "co_id",
             "TYPEK": "all", "isnew": "false", "co_id": stock_id, "year": year - 1911, "season": season})
        content = BeautifulSoup(result.content, 'html.parser').find_all('input')
        need_to_get_next = any(field['type'] == 'button' for field in content)
        if need_to_get_next:
            result = __balance_sheet_data_fetcher.fetch(
                {"encodeURIComponent": 1, "step": 2, "firstin": 1, "TYPEK": "sii", "co_id": stock_id,
                 "year": year - 1911, "season": season})
        has_result = not (any(element.get_text() == "查無所需資料!" for element in
                              BeautifulSoup(result.content, 'html.parser').find_all('font')))
        return result.content if has_result else None
    __fetch_datas_and_store2(stock_ids, time_lines, fetcher, __full_balance_sheet_repository)
Example #10
0
def fetch_cash_flow_raw_datas(stock_ids, time_lines=get_time_lines(since={'year': 2013})):
    def fetcher(stock_id, year, season):
        result = __cash_flow_fetcher.fetch(
            {'encodeURIComponent': 1, 'step': 1, 'firstin': 1, 'off': 1, 'queryName': 'co_id', 'inpuType': 'co_id',
             'TYPEK': 'all', 'isnew': 'false', 'co_id': stock_id, 'year': year - 1911,
             'season': season}
        )
        inputs_tag = BeautifulSoup(result.content, 'html.parser').find_all('input')
        need_to_get_next = any(field['type'] == 'button' for field in inputs_tag)
        if need_to_get_next:
            result = __cash_flow_fetcher.fetch(
                {"encodeURIComponent": 1, "step": 2, "firstin": 1, "TYPEK": "sii", "co_id": stock_id,
                 "year": year - 1911, "season": season}
            )
        has_result = not (any(element.get_text() == "查詢無資料" or element.get_text() == '查無所需資料!' for element in
                              BeautifulSoup(result.content, 'html.parser').find_all('font')))
        return result.content if has_result else None

    __fetch_datas_and_store2(stock_ids, time_lines, fetcher, __cash_flow_repository)
Example #11
0
def fetch_shareholder_equity_raw_datas(stock_ids, time_lines=get_time_lines(since={'year': 2013})):
    def fetcher(stock_id, year, season):
        result = __shareholder_equity_fetcher.fetch(
            {'encodeURIComponent': 1, 'step': 1, 'firstin': 1, 'off': 1, 'queryName': 'co_id', 'inpuType': 'co_id',
             'TYPEK': 'all', 'isnew': 'false', 'co_id': stock_id, 'year': year - 1911,
             'season': season})
        parser = BeautifulSoup(result.content, 'html.parser')
        has_result = not (any(element.get_text() == "查無資料!" for element in parser.find_all('font')))
        has_button = len(parser.find_all('input')) > 0
        if has_result and has_button:
            result = __shareholder_equity_fetcher.fetch(
                {'encodeURIComponent': 1, 'TYPEK': 'sii', 'step': 2, 'year': year - 1911, 'season': season,
                 'co_id': stock_id, 'firstin': 1})
            parser = BeautifulSoup(result.content, 'html.parser')
            has_result = not (any(element.get_text() == "查無資料!" for element in parser.find_all('font')))
        print('has result = ', has_result)
        return result.content if has_result else None

    # __fetch_datas_and_store(stock_ids, time_lines, PATH_DIR_RAW_DATA_SHAREHOLDER_EQUITY, fetcher)
    __fetch_datas_and_store2(stock_ids, time_lines, fetcher, __shareholder_repository)
def _sync_profit_statement(stock_id, start_year, to_year=None, df_profit_statement=None):
    to = {'year': to_year} if to_year is not None else None
    income_statement_processor = SimpleIncomeStatementProcessor()
    if df_profit_statement is None:
        df_profit_statement = income_statement_processor.get_data_frames(stock_id, {'year': start_year - 1}, to)
    else:
        time_lines = get_time_lines(since={'year': start_year}, to=to)
        dfs_get = []
        for time_line in time_lines:
            row_index = "{}Q{}".format(time_line['year'], time_line['season'])
            val = df_profit_statement.get(row_index, None)

            is_empty = val is None or len(val.values) == 0
            if is_empty:
                df_statement = income_statement_processor.get_data_frame(stock_id, time_line['year'], time_line['season'])
                if df_statement is not None:
                    dfs_get.append(df_statement)
        if len(dfs_get) > 0:
            dfs_get.append(df_profit_statement)
            df_profit_statement = pd.concat(dfs_get, sort=False).sort_index()
    return df_profit_statement
def _sync_cash_flow_statement(stock_id, start_year, to_year=None, df_cash_flow_statement=None):
    cash_flow_processor = CashFlowStatementProcessor(stock_id)
    to = {'year': to_year} if to_year is not None else None
    if df_cash_flow_statement is None:
        df_cash_flow_statement = cash_flow_processor.get_data_frames({'year': start_year - 1}, to=to)
    else:
        time_lines = get_time_lines(since={'year': start_year}, to=to)
        dfs_get = []
        index_string_list = df_cash_flow_statement.index.map(str).values.tolist()
        for time_line in time_lines:
            row_index = "{}Q{}".format(time_line['year'], time_line['season'])
            if not (row_index in index_string_list):
                df_statement = cash_flow_processor.get_data_frame(time_line['year'], time_line['season'])
                if df_statement is not None:
                    dfs_get.append(df_statement)

        if len(dfs_get) > 0:
            dfs_get.append(df_cash_flow_statement)
            df_cash_flow_statement = pd.concat(dfs_get, sort=False).sort_index()

    return df_cash_flow_statement
    def get_data_frames(self,
                        stock_id,
                        since,
                        to=None,
                        source_policy=Source.CACHE_ONLY):
        time_lines = get_time_lines(since=since, to=to)
        year = time_lines[0].get('year')
        season = time_lines[0].get('season')
        last_result = self._get_data_dict(stock_id, year, season -
                                          1) if season > 1 else None
        dfs = []

        for time_line in time_lines:
            data_dict = self._get_data_dict(stock_id, time_line.get('year'),
                                            time_line.get('season'))
            if data_dict is None:
                continue

            if last_result is not None:
                result = {
                    k: (v - last_result[k])
                    for (k, v) in data_dict.items()
                }
            else:
                result = data_dict
            print('result = ', result, ' last_result', last_result)

            last_result = None if time_line.get('season') == 4 else data_dict
            str_period = "{}Q{}".format(time_line.get('year'),
                                        time_line.get('season'))
            period_index = pd.PeriodIndex(start=pd.Period(str_period,
                                                          freq='Q'),
                                          end=pd.Period(str_period, freq='Q'),
                                          freq='Q')
            dfs.append(
                pd.DataFrame([result.values()],
                             columns=result.keys(),
                             index=period_index))

        return pd.concat(dfs) if len(dfs) > 0 else None
    def get_data_frames(self, since, to=None, source_policy=Source.CACHE_ONLY):
        time_lines = get_time_lines(since=since, to=to)
        dfs = []
        column_index = pd.MultiIndex.from_product(
            [self.fields_to_get, self.items_to_get], names=['first', 'second'])
        print(column_index)
        last_result = self._get_data_dict(
            time_lines[0].get('year'), time_lines[0].get('season')
        ) if len(time_lines) > 0 and time_lines[0].get('season') > 1 else None

        for time_line in time_lines:
            result = self._get_data_dict(time_line.get('year'),
                                         time_line.get('season'))
            if result is None:
                continue
            if last_result is not None:
                for key in result.keys():
                    result[key]['期初餘額'] = last_result[key]['期末餘額']
            last_result = result
            print(result)
            str_period = "{}Q{}".format(time_line.get('year'),
                                        time_line.get('season'))
            period_index = pd.PeriodIndex(start=pd.Period(str_period,
                                                          freq='Q'),
                                          end=pd.Period(str_period, freq='Q'),
                                          freq='Q')
            data_list = []
            for inner in result.values():
                data_list.extend(inner.values())
            print(data_list)
            dfs.append(
                pd.DataFrame([data_list],
                             columns=column_index,
                             index=period_index))

        # return super().get_data_frames(since, to)
        print(self.__tag, "dfs = ", dfs)
        return pd.concat(dfs) if len(dfs) > 0 else None
Example #16
0
def get_roe_in_year(stock_id, year):
    time_lines = get_time_lines(since={'year': year, 'season': 1}, to={'year': year, 'season': 4})
    roe = _get_for_times(stock_id, time_lines=time_lines)
    print("get in year ", year, ":", roe)
    return roe