Ejemplo n.º 1
0
 def __init__(self, stock_id):
     super().__init__(stock_id)
     self.__tag = "ShareholderEquityProcessor"
     self.__repository = MongoDBRepository(MongoDBMeta.SHARE_HOLDER)
     # self._data_fetcher = _ShareholderEquityFetcher()
     self.items_to_get = ('期初餘額', '期末餘額')
     self.fields_to_get = ('權益總額', )
Ejemplo n.º 2
0
 def test_sync_performance(self):
     _sync_performance(2841)
     _repository = MongoDBRepository(MongoDBMeta.DATAFRAME_PERFORMANCE)
     content = _repository.get_data(2841)
     print(content)
     print('index = ', content.index)
     print('columns = ', content.columns)
Ejemplo n.º 3
0
class StockCountProcessor:
    def __init__(self):
        self.__repository = MongoDBRepository(MongoDBMeta.STOCK_COUNT)

    def get_stock_count(self, stock_id, year):
        raw_data = self.__repository.get_data(stock_id, {'year': year})
        if raw_data is None:
            fetch_stock_count_raw_data(stock_id, year, year)
            raw_data = self.__repository.get_data(stock_id, {'year': year})
        if raw_data is None:
            return None
        bs = BeautifulSoup(raw_data, 'html.parser')
        table = bs.find_all(has_table_width_no_class)
        # print(bs.prettify())
        # print(len(table))
        # print(table[0].prettify())
        if len(table) == 0:
            return None
        rows = table[0].find_all('tr')
        for row in rows:
            r = [
                x.get_text().strip().replace(" ", "").replace(",", "")
                for x in row.find_all('td')
            ]
            print(r)

            if len(r) > 3 and r[1] == '合計':
                return int(r[3])

        return 0

    def get_data_frame(self, stock_id, since, to=None):
        if to is None or to < since:
            to = datetime.now().year
        stocks = []
        end_year = since
        start_year = since
        for year in range(since, to + 1):
            stock_count = self.get_stock_count(stock_id, year)
            print("StockCountProcessor year = ", year, " stocks = ",
                  stock_count)

            if stock_count is None:
                if start_year == year:
                    start_year = start_year + 1
                    continue
                else:
                    if len(stocks) > 0:
                        break
                    else:
                        return
            stocks.append(stock_count)
            end_year = year
        period_index = pd.PeriodIndex(start=pd.Period(start_year, freq='Y'),
                                      end=pd.Period(end_year, freq='Y'),
                                      freq='Y')
        return pd.DataFrame(data={'股數': stocks}, index=period_index)
Ejemplo n.º 4
0
    def test_fetch_data_utils(self):
        '''
        stock_code_list = get_stock_codes(stock_type='上市')
        fetch_twse_price_measurement_raw_datas(stock_code_list[0: 1])

        tpex_stock_code_list = get_stock_codes(stock_type='上櫃')
        fetch_tpex_price_measurement_raw_datas(tpex_stock_code_list[0:1])
        result = MongoDBRepository(MongoDBMeta.TPEX_PRICE_MEASUREMENT).get_data(stock_code_list[0])
        self.assertIsNotNone(result)
        '''
        '''
        fetch_dividend_policy_raw_datas(2884)
        result = MongoDBRepository(MongoDBMeta.DIVIDEND_POLICY).get_data(2884)
        self.assertIsNotNone(result)
        '''
        # '''
        fetch_shareholder_equity_raw_data(2884, 2020, 3)
        result = MongoDBRepository(MongoDBMeta.SHARE_HOLDER).get_data(2809, {'year': 2020, 'season': 3})
        self.assertIsNotNone(result)
        # '''
        '''
        fetch_simple_balance_sheet_raw_data(2884, 2020, 3)
        result = MongoDBRepository(MongoDBMeta.SIMPLE_BALANCE_SHEET).get_data(2884, {'year': 2020, 'season': 3})
        self.assertIsNotNone(result)
        '''
        '''
        fetch_balance_sheet_raw_data(2884, 2020, 3)
        result = MongoDBRepository(MongoDBMeta.FULL_BALANCE_SHEET).get_data(2884, {'year': 2020, 'season': 3})
        self.assertIsNotNone(result)
        '''
        '''
Ejemplo n.º 5
0
class TWSEPriceMeasurementTransformer:
    def __init__(self):
        self.__in_repository = MongoDBRepository(MongoDBMeta.TWSE_PRICE_MEASUREMENT)
        self.__out_repository = _data_frame_repository

    def transform_to_dataframe(self, stock_id):
        content = self.__in_repository.get_data(stock_id)
        # print(content['fields'])
        print('content = ', content)
        if content is None or content['stat'] != 'OK':
            return
        rows = []
        indexes = []
        _logger.info("TWSEPriceMeasurementTransformer transform " + str(stock_id))
        for row_items in content['data']:
            row = [str(row_item).replace(',', '') for row_item in row_items]
            row[1] = int(row[1])
            row[2] = int(row[2])
            row[3] = int(row[3])
            row[4] = float(row[4])
            row[6] = float(row[6])
            row[8] = float(row[8])
            indexes.append(pd.Period(value=str(int(row[0]) + 1911)))
            rows.append(row[1:])
        data_frame = pd.DataFrame(rows, index=indexes,
                                  columns=['成交股數', '成交金額', '成交筆數', '最高價', '日期', '最低價', '日期', '收盤平均價'])
        print(data_frame)
        self.__out_repository.put_data(stock_id, data_frame)
        return data_frame
Ejemplo n.º 6
0
 def test_sync_statements(self):
     # '''
     db_repository = MongoDBRepository(MongoDBMeta.DATAFRAME_CASH_FLOW)
     data_frame_before = db_repository.get_data(2841)
     data_frame_after = _sync_cash_flow_statement(2841, 2013, to_year=2021, df_cash_flow_statement=data_frame_before)
     print('before = ', data_frame_before)
     print('after = ', data_frame_after)
     # '''
     # '''
     db_repository = MongoDBRepository(MongoDBMeta.DATAFRAME_PROFIT_STATEMENT)
     data_frame_before = db_repository.get_data(2841)
     data_frame_after = _sync_profit_statement(2841, 2013, df_profit_statement=data_frame_before)
     print('before = ', data_frame_before)
     print('after = ', data_frame_after)
      # '''
     # '''
     db_repository = MongoDBRepository(MongoDBMeta.DATAFRAME_BALANCE_SHEET)
     data_frame_before = db_repository.get_data(2841)
     data_frame_after = _sync_balance_sheet(2841, 2013, 2019, df_balance_sheet=data_frame_before)
     print('before = ', data_frame_before)
     print('after = ', data_frame_after)
Ejemplo n.º 7
0
class TPEXPriceMeasurementTransformer:
    def __init__(self):
        self.__in_repository = MongoDBRepository(MongoDBMeta.TPEX_PRICE_MEASUREMENT)
        self.__out_repository = _data_frame_repository

    def transform_to_dataframe(self, stock_id):
        # collection = db[TABLE_TPEX_PRICE_MEASUREMENT]
        # record = collection.find_one({"stock_id": str(stock_id)})
        record = self.__in_repository.get_data(stock_id)
        _logger.info("TWSEPriceMeasurementTransformer transform " + str(stock_id))
        if record is not None:
            try:
                soup = BeautifulSoup(record, 'html.parser')
                table = soup.find('table', attrs={"class": "page-table-board"})
                rows = []
                indexes = []
                for tr in table.find_all('tr'):
                    if tr.find('td', attrs={"class": "page-table-body-center"}) is not None:
                        tds = tr.find_all('td')
                        row = [td.string.replace(',', '') for td in tds]
                        row[0] = int(row[0]) + 1911
                        row[1] = int(row[1]) * 1000
                        row[2] = int(row[2]) * 1000
                        row[3] = int(row[3]) * 1000
                        row[4] = float(row[4])
                        row[6] = float(row[6])
                        row[8] = float(row[8])
                        indexes.append(pd.Period(str(row[0])))
                        rows.append(row[1:])
                data_frame = pd.DataFrame(rows, index=indexes,
                                          columns=['成交股數', '成交金額', '成交筆數', '最高價', '日期', '最低價', '日期', '收盤平均價'])
                print(data_frame)
                self.__out_repository.put_data(stock_id, data_frame)
                return data_frame

            except Exception as inst:
                _logger.error("get exception in " + str(stock_id) + ":" + str(inst))
                traceback.print_tb(inst.__traceback__)
Ejemplo n.º 8
0
    def test_store_data_frames(self):
        # '''
        df_cash_flow_before = _sync_cash_flow_statement(4564, 2013, to_year=2019)
        db_repository = MongoDBRepository(MongoDBMeta.DATAFRAME_CASH_FLOW)
        db_repository.put_data(4564, df_cash_flow_before)
        data_frame = db_repository.get_data(4564)
        print(data_frame)
        print(data_frame.index)
        # '''

        # '''
        df_profit_statement_before = _sync_profit_statement(4564, 2013, to_year=2019)
        db_repository = MongoDBRepository(MongoDBMeta.DATAFRAME_PROFIT_STATEMENT)
        db_repository.put_data(4564, df_profit_statement_before)
        data_frame = db_repository.get_data(4564)
        print(data_frame)
        print(data_frame.index)
        # '''
        # '''
        df_balance_sheet_before = _sync_balance_sheet(4564, 2013, to_year=2020)
        print(df_balance_sheet_before)
        db_repository = MongoDBRepository(MongoDBMeta.DATAFRAME_BALANCE_SHEET)
        db_repository.put_data(4564, df_balance_sheet_before)
        data_frame = db_repository.get_data(4564)
        print(data_frame)
        print(data_frame.columns)
        # '''
        # '''
        df_dividend_before = _sync_dividend_policy(4564, 2013)
        print(df_dividend_before)
        print(df_dividend_before.index)
        db_repository = MongoDBRepository(MongoDBMeta.DATAFRAME_DIVIDEND_POLICY)
        db_repository.put_data(4564, df_dividend_before)
        data_frame = db_repository.get_data(4564)
        print(data_frame)
        print(data_frame.index)
        # '''
        # '''
        df_dividend_before = _sync_dividend_policy(4564, 2013)
        print(df_dividend_before)
        print(type(df_dividend_before.index))
        self.assertIsInstance(df_dividend_before.index, pd.PeriodIndex)
        db_repository = MongoDBRepository(MongoDBMeta.DATAFRAME_DIVIDEND_POLICY)
        db_repository.put_data(4564, df_dividend_before)
        data_frame = db_repository.get_data(4564)
        print(data_frame)
        print(data_frame.index)
        self.assertIsInstance(data_frame.index, pd.PeriodIndex)
Ejemplo n.º 9
0
class DividendPolicyProcessor2(StatementProcessor):

    def __init__(self):
        super().__init__(None)
        self.dividend_policy_fetcher = _DividendPolicyFetcher2()
        self.__repository = MongoDBRepository(MongoDBMeta.DIVIDEND_POLICY)

    def get_data_frame(self, year, season):
        pass

    def get_data_frames(self, stock_id, start_year=datetime.now().year, to_year=datetime.now().year):
        cache_df = self._parse_raw_data(stock_id, self.__repository.get_data(stock_id))
        if cache_df is None or cache_df[str(to_year)].empty or cache_df[str(to_year)].isnull().values.all():
            latest_raw_data = self._get_raw_data(stock_id, start_year=start_year, to_year=to_year)
            print('latest_raw_data = ', latest_raw_data)
            return self._parse_raw_data(stock_id=stock_id, raw_data=latest_raw_data)
        else:
            return cache_df

    def _parse_raw_data(self, stock_id, raw_data):
        try:
            soup = BeautifulSoup(raw_data, 'html.parser')
            table = soup.find('table', attrs={"class": "hasBorder", "width": "99%"})
            data_frame = pd.read_html(str(table))[0]
            print('dividend data_frame = ', data_frame)
        except Exception as e:
            print('get', e, ' when get dividend policy')
            return None
        data_frame = data_frame.iloc[3:, :]
        period_list = list(map(lambda x: pd.Period(self.__parse_period(x)), data_frame.iloc[:, 1].tolist()))
        dividend_cash_list = list(map(lambda x: float(x), data_frame.iloc[:, 10].tolist()))
        dividend_cash_stock_list = list(map(lambda x: float(x), data_frame.iloc[:, 13].tolist()))
        dividend_record_version = list(map(lambda x: int(x), data_frame.iloc[:, 3].tolist()))
        meeting_progress = list(map(lambda x: str(x), data_frame.iloc[:, 0].tolist()))
        parse_dict = {}
        for index in range(0, len(period_list)):
            period = period_list[index]
            if parse_dict.get(period) is None:
                parse_dict[period] = [dividend_cash_list[index], dividend_cash_stock_list[index],
                                      dividend_record_version[index]]
            else:
                print('duplicate ', period)
                if meeting_progress[index].find('股東會確認') != -1 and parse_dict[period][2] < dividend_record_version[index]:
                    parse_dict[period] = [dividend_cash_list[index], dividend_cash_stock_list[index],
                                          dividend_record_version[index]]
        period_list = parse_dict.keys()
        dividend_cash_list = [value[0] for value in parse_dict.values()]
        dividend_cash_stock_list = [value[1] for value in parse_dict.values()]
        dict_dividend = {'現金股利': dividend_cash_list, '配股': dividend_cash_stock_list}
        print(dict_dividend)

        now = datetime.now()

        def get_default_time_line_periods():
            periods = []
            for year in range(2013, now.year + 1):
                for quarter in range(1, 5):
                    periods.append(pd.Period(str(year) + 'Q' + str(quarter)))
            return periods

        df_dividend = pd.DataFrame(dict_dividend, index=period_list).reindex(get_default_time_line_periods()).applymap(
            lambda x: pd.np.nan if pd.isnull(x) else x)
        print('df_dividend = ', df_dividend)
        dic_dividend = {}
        for year in range(2013, now.year + 1):
            df_extract = df_dividend.loc[pd.Period(str(year) + 'Q1'):pd.Period(str(year) + 'Q4'),:]
            df_extract_sum = df_dividend.loc[pd.Period(str(year) + 'Q1'):pd.Period(str(year) + 'Q4'),:].sum()

            isnan = all(pd.np.isnan(ele) for ele in list(itertools.chain(*df_extract.values)))
            dic_dividend[pd.Period(year)] = [pd.np.nan for ele in df_extract_sum] if isnan else df_extract_sum

        print('df_dividend 2 = ', dic_dividend)
        print("\n")
        df_dividend = pd.DataFrame(dic_dividend)
        df_dividend = df_dividend.T
        print('df_dividend 3 = ', df_dividend)
        return df_dividend

    def _get_raw_data(self, stock_id, start_year=datetime.now().year, to_year=datetime.now().year):
        result = self.dividend_policy_fetcher.fetch(
            {'stock_id': stock_id, 'start_year': start_year - 1911, 'to_year': to_year - 1911})
        if result.ok is False:
            print('get content fail')
            return
        print('result content = ', result.content)

        self.__repository.put_data(stock_id, result.content)
        return result.content

    def __parse_period(self, period_string):
        if period_string.find("年年度") > -1:
            return str(int(period_string.replace("年年度", "")) + 1911) + "Q4"
        elif period_string.find("年上半年") > -1:
            return str(int(period_string.replace("年上半年", "")) + 1911) + "Q2"
        elif period_string.find("年下半年") > -1:
            return str(int(period_string.replace("年下半年", "")) + 1911) + "Q4"
        else:
            period_strings = period_string.replace("季", "").split("年第")
            return str((int(period_strings[0]) + 1911)) + "Q" + period_strings[1]
Ejemplo n.º 10
0
class ShareholderEquityProcessor(StatementProcessor):
    def __init__(self, stock_id):
        super().__init__(stock_id)
        self.__tag = "ShareholderEquityProcessor"
        self.__repository = MongoDBRepository(MongoDBMeta.SHARE_HOLDER)
        # self._data_fetcher = _ShareholderEquityFetcher()
        self.items_to_get = ('期初餘額', '期末餘額')
        self.fields_to_get = ('權益總額', )

    def get_data_frames(self, since, to=None, source_policy=Source.CACHE_ONLY):
        time_lines = get_time_lines(since=since, to=to)
        dfs = []
        column_index = pd.MultiIndex.from_product(
            [self.fields_to_get, self.items_to_get], names=['first', 'second'])
        print(column_index)
        last_result = self._get_data_dict(
            time_lines[0].get('year'), time_lines[0].get('season')
        ) if len(time_lines) > 0 and time_lines[0].get('season') > 1 else None

        for time_line in time_lines:
            result = self._get_data_dict(time_line.get('year'),
                                         time_line.get('season'))
            if result is None:
                continue
            if last_result is not None:
                for key in result.keys():
                    result[key]['期初餘額'] = last_result[key]['期末餘額']
            last_result = result
            print(result)
            str_period = "{}Q{}".format(time_line.get('year'),
                                        time_line.get('season'))
            period_index = pd.PeriodIndex(start=pd.Period(str_period,
                                                          freq='Q'),
                                          end=pd.Period(str_period, freq='Q'),
                                          freq='Q')
            data_list = []
            for inner in result.values():
                data_list.extend(inner.values())
            print(data_list)
            dfs.append(
                pd.DataFrame([data_list],
                             columns=column_index,
                             index=period_index))

        # return super().get_data_frames(since, to)
        print(self.__tag, "dfs = ", dfs)
        return pd.concat(dfs) if len(dfs) > 0 else None

    def get_data_frame(self, year, season):
        return self.get_data_frames(since={
            'year': year,
            'season': season
        },
                                    to={
                                        'year': year,
                                        'season': season
                                    })

    def _get_data_dict(self, year, season):
        raw_data = self.__repository.get_data(self._stock_id, {
            'year': year,
            'season': season
        })
        if raw_data is None:
            fetch_shareholder_equity_raw_data(self._stock_id, year, season)
            raw_data = self.__repository.get_data(self._stock_id, {
                'year': year,
                'season': season
            })
        if raw_data is not None:
            return self._parse_data(raw_data)

    def _parse_data(self, content):
        try:
            bs = BeautifulSoup(content, 'html.parser')
            # print(bs.prettify())
            tables = bs.find_all('table',
                                 attrs={
                                     "class": "hasBorder",
                                     "align": "center"
                                 })

            if len(tables) < 1:
                print('ShareholderEquityProcessor - error 1')

                return None

            table = tables[0]
            # print(table.prettify())
            rows = table.find_all('tr')

            headers = []
            rows_data = []
            for row in rows:
                columns_raw = [
                    column for column in row.contents if column != '\n'
                ]
                columns = [column.get_text().strip() for column in columns_raw]
                if len(columns) > 1:
                    if columns_raw[0].name == 'th' and len(headers) == 0:
                        headers = columns
                        if not all(field in headers
                                   for field in self.fields_to_get):
                            print('ShareholderEquityProcessor - error 2')

                            return None
                    else:
                        rows_data.append(columns)

            rows_data = [
                row_data for row_data in rows_data
                if row_data[0] in self.items_to_get
            ]
            result = {
                row_data[0]: {
                    k: int(row_data[headers.index(k)].replace(',', ''))
                    for k in self.fields_to_get
                }
                for row_data in rows_data
            }
            result2 = {
                key: {item: 0
                      for item in self.items_to_get}
                for key in self.fields_to_get
            }
            for key in result.keys():
                for key2 in result[key]:
                    result2[key2][key] = result[key][key2]

            print("result = ", result2)
            return result2

        except Exception as inst:
            print("get exception", inst)
            traceback.print_tb(inst.__traceback__)
            return None
Ejemplo n.º 11
0
 def __init__(self, stock_id):
     super().__init__(stock_id)
     self.__repository = MongoDBRepository(MongoDBMeta.CASH_FLOW)
     self._fetch_fields = ('營業活動之淨現金流入', '取得不動產、廠房及設備', '其他投資活動',
                           '投資活動之淨現金流入')
Ejemplo n.º 12
0
class CashFlowStatementProcessor(StatementProcessor):
    """業主盈餘現金流 = 營業活動之淨現金流入 + 取得不動產、廠房及設備 + 其他投資活動
       自由現金流 = 營業活動之淨現金流入 + 投資活動之淨現金流入"""
    def __init__(self, stock_id):
        super().__init__(stock_id)
        self.__repository = MongoDBRepository(MongoDBMeta.CASH_FLOW)
        self._fetch_fields = ('營業活動之淨現金流入', '取得不動產、廠房及設備', '其他投資活動',
                              '投資活動之淨現金流入')

    def get_data_frames(self, since, to=None, source_policy=Source.CACHE_ONLY):
        time_lines = get_time_lines(since=since, to=to)
        # time_first = time_lines[0]
        # if time_first.get('season') > 1:
        #     time_lines.insert(0, {'year': time_first.get('year'), 'season': (time_first.get('season') - 1)})
        # print(time_lines)

        time_lines.reverse()

        dfs = []
        cache_data_dict = None
        for time_line in time_lines:
            print('In ', time_line)
            year = time_line.get('year')
            season = time_line.get('season')
            if cache_data_dict is None:
                data_dict = self._get_data_dict(self._fetch_fields, year,
                                                season)
            else:
                data_dict = cache_data_dict

            if data_dict is None:
                continue
            if season > 1:
                cache_data_dict = self._get_data_dict(self._fetch_fields, year,
                                                      season - 1)
                if data_dict is None or cache_data_dict is None:
                    print('get None value in year ', year, ' season ', season,
                          " data_dict = ", data_dict, " cache_data_dic = ",
                          cache_data_dict)
                else:
                    for key in self._fetch_fields:
                        data_dict[key] = data_dict.get(
                            key, 0) - cache_data_dict.get(key, 0)
            else:
                cache_data_dict = None
            data_dict['業主盈餘現金流'] = data_dict.get('營業活動之淨現金流入', 0) + data_dict.get('取得不動產、廠房及設備', 0)\
                                   + data_dict.get('其他投資活動', 0)
            data_dict['自由現金流'] = data_dict.get(
                '營業活動之淨現金流入', 0) + data_dict.get('投資活動之淨現金流入', 0)
            print(data_dict)
            str_period = "{}Q{}".format(year, season)
            period_index = pd.PeriodIndex(start=pd.Period(str_period,
                                                          freq='Q'),
                                          end=pd.Period(str_period, freq='Q'),
                                          freq='Q')
            dfs.append(
                pd.DataFrame([data_dict.values()],
                             columns=data_dict.keys(),
                             index=period_index))
        return None if len(dfs) == 0 else pd.concat(dfs, sort=False)

    def get_data_frame(self, year, season, source_policy=Source.CACHE_ONLY):
        return self.get_data_frames(since={
            'year': year,
            'season': season
        },
                                    to={
                                        'year': year,
                                        'season': season
                                    },
                                    source_policy=source_policy)

    def _get_data_dict(self, fields, year, season):
        # result = self._data_fetcher.fetch(params={'stock_id': self._stock_id, 'year': year - 1911, 'season': season})
        # if result.ok is False:
        #     return None

        data_dict = {}
        try:
            raw_data = self.__repository.get_data(str(self._stock_id), {
                'year': year,
                'season': season
            })
            if raw_data is None:
                fetch_cash_flow_raw_data(self._stock_id, year, season)
                raw_data = self.__repository.get_data(str(self._stock_id), {
                    'year': year,
                    'season': season
                })
            # raw_data = get_raw_data(PATH_DIR_RAW_DATA_CASH_FLOW + str(year) + "Q" + str(season), str(self._stock_id))
            bs = BeautifulSoup(raw_data, 'html.parser')
            table = bs.find_all('table',
                                attrs={
                                    "class": "hasBorder",
                                    "align": "center"
                                })
            #print(table[0].prettify())

            rows = table[0].find_all('tr')
            for row in rows:
                r = [x.get_text() for x in row.find_all('td')]
                if len(r) == 0:
                    continue
                for field in fields:
                    if field in r[0]:
                        data_dict[field] = int(r[1].replace(',', ''))
                        break

        except Exception as inst:
            print("get exception", inst)
            traceback.print_tb(inst.__traceback__)
            return None
        # print(data_dict)
        return data_dict
Ejemplo n.º 13
0
import pandas as pd

from evaluation_utils import get_stock_list
from rdss.balance_sheet import SimpleBalanceSheetProcessor
from rdss.cashflow_statment import CashFlowStatementProcessor
from rdss.dividend_policy2 import DividendPolicyProcessor2
from rdss.shareholder_equity import ShareholderEquityProcessor
from rdss.statement_fetchers import SimpleIncomeStatementProcessor
from rdss.stock_count import StockCountProcessor
from repository.mongodb_repository import MongoDBRepository, MongoDBMeta
from stock_data import store_df, read_dfs
from twse_crawler import gen_output_path
from utils import get_time_lines
from value_measurement import PriceMeasurementProcessor2

_cash_flow_repository = MongoDBRepository(MongoDBMeta.DATAFRAME_CASH_FLOW)
_profit_statement_repository = MongoDBRepository(MongoDBMeta.DATAFRAME_PROFIT_STATEMENT)
_balance_sheet_repository = MongoDBRepository(MongoDBMeta.DATAFRAME_BALANCE_SHEET)
_dividend_policy_repository = MongoDBRepository(MongoDBMeta.DATAFRAME_DIVIDEND_POLICY)
_performance_repository = MongoDBRepository(MongoDBMeta.DATAFRAME_PERFORMANCE)


class Option(enum.IntEnum):
    BALANCE_SHEET = 1
    PROFIT_STATEMENT = 1 << 2
    CASH_FLOW_STATEMENT = 1 << 3
    DIVIDEND_POLICY = 1 << 4
    ALL = BALANCE_SHEET | PROFIT_STATEMENT | CASH_FLOW_STATEMENT | DIVIDEND_POLICY


def sync_statements(stock_codes, times_to_retry=10, break_after_retry=True, option=Option.ALL, isSync=True):
Ejemplo n.º 14
0
PATH_DIR_RAW_DATA_SHAREHOLDER_EQUITY = "out/raw_datas/shareholder_equity/"
PATH_DIR_RAW_DATA_DIVIDEND_POLICY = "out/raw_datas/dividend_policy"
PATH_DIR_RAW_DATA_STOCK_COUNT = "out/raw_datas/stock_count/"
PATH_DIR_RAW_DATA_CASH_FLOW = "out/raw_datas/cash_flow/"
PATH_DIR_RAW_DATA_PRICE_MEASUREMENT = "out/raw_datas/price_measurement/"
PATH_DIR_RAW_DATA_TPEX_PRICE_MEASUREMENT = "out/raw_datas/tpex_price_measurement"

__balance_sheet_data_fetcher = DataFetcher('https://mops.twse.com.tw/mops/web/ajax_t164sb03')
__simple_balance_sheet_data_fetcher = DataFetcher('https://mops.twse.com.tw/mops/web/ajax_t163sb01')
__shareholder_equity_fetcher = DataFetcher('https://mops.twse.com.tw/mops/web/ajax_t164sb06')
__dividend_policy_fetcher = DataFetcher('https://mops.twse.com.tw/mops/web/ajax_t05st09_2')
__stock_count_fetcher = DataFetcher('https://mops.twse.com.tw/mops/web/ajax_t16sn02')
__cash_flow_fetcher = DataFetcher('https://mops.twse.com.tw/mops/web/ajax_t164sb05')


__stock_count_repository = MongoDBRepository(MongoDBMeta.STOCK_COUNT)
__twse_price_measurement_repository = MongoDBRepository(MongoDBMeta.TWSE_PRICE_MEASUREMENT)
__tpex_price_measurement_repository = MongoDBRepository(MongoDBMeta.TPEX_PRICE_MEASUREMENT)
__dividend_policy_repository = MongoDBRepository(MongoDBMeta.DIVIDEND_POLICY)
__shareholder_repository = MongoDBRepository(MongoDBMeta.SHARE_HOLDER)
__simple_balance_sheet_repository = MongoDBRepository(MongoDBMeta.SIMPLE_BALANCE_SHEET)
__full_balance_sheet_repository = MongoDBRepository(MongoDBMeta.FULL_BALANCE_SHEET)
__cash_flow_repository = MongoDBRepository(MongoDBMeta.CASH_FLOW)

__logger = logging.getLogger("twse.DataFetcher")

# mongo_client = MongoClient('localhost', 27017)
mongo_client = MongoClient('192.168.1.109', 27017)
DB_TWSE = "TWSE"
TABLE_TWSE_PRICE_MEASUREMENT = "twse_price_measurement"
TABLE_TPEX_PRICE_MEASUREMENT = "tpex_price_measurement"
Ejemplo n.º 15
0
 def __init__(self):
     self.__in_repository = MongoDBRepository(MongoDBMeta.TPEX_PRICE_MEASUREMENT)
     self.__out_repository = _data_frame_repository
Ejemplo n.º 16
0
class SimpleIncomeStatementProcessor:
    def __init__(self):
        self.__repository = MongoDBRepository(MongoDBMeta.SIMPLE_BALANCE_SHEET)

    def get_data_frames(self,
                        stock_id,
                        since,
                        to=None,
                        source_policy=Source.CACHE_ONLY):
        time_lines = get_time_lines(since=since, to=to)
        year = time_lines[0].get('year')
        season = time_lines[0].get('season')
        last_result = self._get_data_dict(stock_id, year, season -
                                          1) if season > 1 else None
        dfs = []

        for time_line in time_lines:
            data_dict = self._get_data_dict(stock_id, time_line.get('year'),
                                            time_line.get('season'))
            if data_dict is None:
                continue

            if last_result is not None:
                result = {
                    k: (v - last_result[k])
                    for (k, v) in data_dict.items()
                }
            else:
                result = data_dict
            print('result = ', result, ' last_result', last_result)

            last_result = None if time_line.get('season') == 4 else data_dict
            str_period = "{}Q{}".format(time_line.get('year'),
                                        time_line.get('season'))
            period_index = pd.PeriodIndex(start=pd.Period(str_period,
                                                          freq='Q'),
                                          end=pd.Period(str_period, freq='Q'),
                                          freq='Q')
            dfs.append(
                pd.DataFrame([result.values()],
                             columns=result.keys(),
                             index=period_index))

        return pd.concat(dfs) if len(dfs) > 0 else None

    def get_data_frame(self,
                       stock_id,
                       year,
                       season,
                       source_policy=Source.CACHE_ONLY):
        return self.get_data_frames(stock_id=stock_id,
                                    since={
                                        'year': year,
                                        'season': season
                                    },
                                    to={
                                        'year': year,
                                        'season': season
                                    },
                                    source_policy=source_policy)

    def _get_data_dict(self, stock_id, year, season):
        # result = self.__data_fetcher.fetch({'stock_id': self._stock_id, 'year': year - 1911, 'season': season})
        # if result.ok is False:
        #     return None
        try:
            dict_datas = {}
            raw_data = self.__repository.get_data(stock_id, {
                'year': year,
                'season': season
            })
            if raw_data is None:
                fetch_simple_balance_sheet_raw_data(stock_id, year, season)
                raw_data = self.__repository.get_data(stock_id, {
                    'year': year,
                    'season': season
                })
            bs = BeautifulSoup(raw_data, 'html.parser')
            print(' get ', bs.text)
            tables = bs.find_all('table',
                                 attrs={
                                     "class": "hasBorder",
                                     "align": "center",
                                     "width": "70%"
                                 })
            table = tables[2]
            rows = table.find_all('tr')
            for row in rows:
                r = [x.get_text() for x in row.find_all('td')]
                # print(r)
                if '每股盈餘' in r[0]:
                    dict_datas['EPS'] = float(r[1])
                if '本期綜合損益總額' in r[0]:
                    dict_datas['稅後淨利'] = int(r[1].replace(',', ''))
            return dict_datas

        except Exception as inst:
            print("get exception", inst, " when get data in year ", year,
                  ' and season ', season)
            traceback.print_tb(inst.__traceback__)
            return None
Ejemplo n.º 17
0
 def __init__(self):
     self.__repository = MongoDBRepository(MongoDBMeta.SIMPLE_BALANCE_SHEET)
Ejemplo n.º 18
0
 def __init__(self):
     self.__repository = MongoDBRepository(MongoDBMeta.STOCK_COUNT)
Ejemplo n.º 19
0
 def __init__(self):
     super().__init__(None)
     self.dividend_policy_fetcher = _DividendPolicyFetcher2()
     self.__repository = MongoDBRepository(MongoDBMeta.DIVIDEND_POLICY)
Ejemplo n.º 20
0
        print(revamp_list)

        return revamp_list


def __data_frame_in_transform(content):
    data_frame = pd.read_json(content, orient='split', typ='frame')
    print(data_frame.index.values)
    index_dict = {item: pd.Period(value=str(item)) for item in data_frame.index.values}
    new_data_frame = data_frame.rename(index_dict)
    return new_data_frame


__data_frame_repository_transformer = Transformer(in_transform=yearly_period_data_frame_in_transform,
                                                  out_transform=default_data_frame_out_transform)
_data_frame_repository = MongoDBRepository(MongoDBMeta.DATAFRAME_PRICE_MEASUREMENT,
                                           transformer=__data_frame_repository_transformer)

class PriceMeasurementProcessor2:
    def __init__(self):
        self.__twsePriceTransformer = TWSEPriceMeasurementTransformer()
        self.__tpexPriceTransformer = TPEXPriceMeasurementTransformer()
        from evaluation_utils import get_stock_codes
        self.list_twse = get_stock_codes(stock_type='上市')
        self.list_tpex = get_stock_codes(stock_type='上櫃')

    def get_data_frame(self, stock_id):
        data_frame = _data_frame_repository.get_data(stock_id)
        current_years = None
        if data_frame is not None:
            current_years = list(map(lambda year_index: int(year_index.year), data_frame.index.values))
            current_years.sort()