def save_balance_sheet_of_a_season_to_temp(data=None): filename = '' try: if not data: raise Exception('save_balance_sheet_of_a_season_to_temp fail, input data is None!') date = '{}-{}'.format(data['year'], data['season']) key = '{}-{}'.format(data['stock_no'], date) filename = key target_file = data_fetch.config.FS_BALANCE_SHEET_PATH + '/{}.json'.format(date) lib.tool.init_json_file_if_not_exist(target_file) json_obj = lib.tool.get_json_obj_from_temp_file(target_file) # check data exist with open(target_file, 'w') as temp_file: json_obj[key] = data save_data = json.dumps(json_obj, indent=4, sort_keys=True) temp_file.write(save_data) return True except Exception as e: msg = 'save_balance_sheet_of_a_season_to_temp error, filename = {}, msg = {}'.format(filename, e.args) print(msg) log = data_fetch.log.Log() log.write_fetch_err_log(msg) return False
def save_income_statement_of_a_season_to_temp(data=None): try: if not data: raise Exception( 'save_income_statement_of_a_season_to_temp fail, input data is None!' ) date = '{}-{}'.format(data['year'], data['season']) key = '{}-{}'.format(data['stock_no'], date) target_file = data_fetch.config.FS_STATEMENT_OF_COMPREHENSIVE_INCOME_PATH + '/{}.json'.format( date) lib.tool.init_json_file_if_not_exist(target_file) json_obj = lib.tool.get_json_obj_from_temp_file(target_file) # convert None to empty string for key in data.keys(): if data[key] is None: data[key] = '' # check data exist with open(target_file, 'w') as temp_file: json_obj[key] = data save_data = json.dumps(json_obj, indent=4, sort_keys=True) temp_file.write(save_data) return True except Exception as e: msg = 'save_income_statement_of_a_season_to_temp error, msg = {}'.format( e.args) print(msg) log = data_fetch.log.Log() log.write_fetch_err_log(msg) return False
def download_twse_month_revenue(date='201909'): zip_file = data_fetch.config.FS_MONTH_REVENUE_PATH + '/{}_twse.zip'.format(date) try: url = data_fetch.config.TWSE_MONTH_REVENUE_URL.format(date + '_C04003.zip') response = requests.get(url) # create zip with open(zip_file, "wb") as temp_zip_file: temp_zip_file.write(response.content) # extract zip and rename xls file with zipfile.ZipFile(zip_file, 'r') as zip_ref: zip_ref.extractall(data_fetch.config.FS_MONTH_REVENUE_PATH) temp_file = data_fetch.config.FS_MONTH_REVENUE_PATH + '/{}'.format(zip_ref.namelist()[0]) new_file = data_fetch.config.FS_MONTH_REVENUE_PATH + '/{}_twse.xls'.format(date) if os.path.exists(temp_file): os.rename(temp_file, new_file) print('{} was download.'.format(new_file)) except Exception as e: msg = 'download_twse_month_revenue error, date = {}, msg = {}'.format(date, e.args) print(msg) log = data_fetch.log.Log() log.write_fetch_err_log(msg) finally: # remove zip if os.path.exists(zip_file): os.remove(zip_file)
def download_latest_twse_stock_info(): try: header = data_fetch.config.HEADER html = requests.get(data_fetch.config.TWSE_STOCK_INFO_URL, headers=header, timeout=data_fetch.config.TIMEOUT_SECONDS) soup = BeautifulSoup(html.text, "html.parser") table = soup.find("table", attrs={"class": "h4"}) rows = [] for row in table.find_all('tr'): rows.append([ val.text.strip().encode("utf8") for val in row.find_all('td') ]) with open(twse_info_file, 'w+') as f: writer = csv.writer(f) writer.writerows([["有價證券代號", "名稱", "上市日", "產業別"]]) for row in rows: if row[0] == "上市認購(售)權證": break if len(row) > 5 and row[0] != "有價證券代號及名稱": stock_info = unicode(row[0], "utf-8").split() row_list = [ stock_info[0].encode("utf8"), stock_info[1].encode("utf8"), row[2], row[4] ] writer.writerows([row_list]) print('{} was download.'.format(twse_info_file)) except Exception as e: msg = 'download_latest_twse_stock_info error, msg = {}'.format(e.args) print(msg) log = data_fetch.log.Log() log.write_fetch_err_log(msg)
def get_stock_info_from_temp(file_type='all', return_dict=False): if 'twse' == file_type: info_file = twse_info_file elif 'otc' == file_type: info_file = otc_info_file elif 'all' == file_type: return get_stock_info_from_temp( 'twse', return_dict) + get_stock_info_from_temp( 'otc', return_dict) else: return None data = [] try: stock_type = '' if 'twse' == file_type: stock_type = 'twse' if not os.path.exists(info_file): download_latest_twse_stock_info() elif 'otc' == file_type: stock_type = 'otc' if not os.path.exists(info_file): download_latest_otc_stock_info() # read csv with open(info_file) as f: csv_reader = csv.reader(f) for i, row in enumerate(csv_reader, 1): if i == 1: continue if return_dict: data.append({ "stock_no": row[0], "company_name": row[1], "date": row[2].replace('/', '-'), "business_type": row[3], "market_type": stock_type }) else: data.append([ row[0], row[1], row[2].replace('/', '-'), row[3], stock_type ]) except Exception as e: msg = 'get_stock_info_from_temp error, msg = {}'.format(e.args) print(msg) log = data_fetch.log.Log() log.write_fetch_err_log(msg) return data
def get_balance_sheet_of_a_season_from_temp(stock_no='2330', year=2019, season=1): date = '{}-{}'.format(year, season) key = '{}-{}'.format(stock_no, date) try: target_file = data_fetch.config.FS_BALANCE_SHEET_PATH + '/{}.json'.format(date) lib.tool.init_json_file_if_not_exist(target_file) json_obj = lib.tool.get_json_obj_from_temp_file(target_file) result = json_obj.get(key, None) return result except Exception as e: msg = 'get_balance_sheet_of_a_season_from_temp error, file = {}.json, msg = {}'.format(key, e.args) print(msg) log = data_fetch.log.Log() log.write_fetch_err_log(msg) return None
def download_otc_month_revenue(date='201909'): xls_file = data_fetch.config.FS_MONTH_REVENUE_PATH + '/{}_otc.xls'.format(date) try: url = data_fetch.config.OTC_MONTH_REVENUE_URL.format('O_' + date + '.xls') response = requests.get(url) file_type = response.headers['Content-Type'] if 'text/html' == file_type: raise Exception('File is not xls type') with open(xls_file, "wb") as temp_xls_file: temp_xls_file.write(response.content) print('{} was download.'.format(xls_file)) except Exception as e: msg = 'download_otc_month_revenue error, date = {}, msg = {}'.format(date, e.args) print(msg) log = data_fetch.log.Log() log.write_fetch_err_log(msg)
if market_price is None: print( "stock_no:{} ,date:{} ,market_price is None, try to loading again... " .format(stock_no, target_date)) fetch_state = insert_stock_data(stock_no, target_date) market_price = get_price(stock_no, target_date) print(fetch_state) if fetch_state and market_price is None: msg = "stock_no:{} ,date:{} , market_price is no deal , try to get last date closed price".format( stock_no, target_date) market_price = get_last_deal_date_price(stock_no) if market_price is None: msg = "stock_no:{} ,date:{} , get last price error...\n".format( stock_no) print(msg) log.write_fetch_err_log(msg) continue if fetch_state is False: market_price = get_last_deal_date_price(stock_no) if float(market_price) == 0: market_price = get_last_deal_date_price(stock_no) if eps is None: msg = "stock_no:{} ,year:2017 ,season:5 ,eps is None\n".format( stock_no) print(msg) log.write_fetch_err_log(msg) continue try:
def get_balance_sheet_of_a_season_from_url(stock_no='2330', year=2019, season=1): stockno_year_season = "{}-{}-{}".format(stock_no, year, season) try: query_year = year if 1911 < query_year: query_year -= 1911 header = data_fetch.config.HEADER url = data_fetch.config.MOPS_BALANCE_SHEET_URL # 查詢資料 form_data = { 'encodeURIComponent': '1', 'step': '1', 'firstin': '1', 'off': '1', 'keyword4': '', 'code1': '', 'TYPEK2': '', 'checkbtn': '', 'queryName': 'co_id', 'inpuType': 'co_id', 'TYPEK': 'all', 'isnew': 'false', 'co_id': stock_no, 'year': query_year, 'season': season } """ # 一般產業股 encodeURIComponent: 1, step: 1, firstin: 1, off: 1, keyword4: '', code1: '', TYPEK2: '', checkbtn: '', queryName: 'co_id', inpuType: 'co_id', TYPEK: 'all', isnew: 'false', co_id: 2330, year: 108, season': 01 # 金融保險業 encodeURIComponent:1 id: key: TYPEK:sii step:2 year:106 season:2 co_id:2884 firstin:1 """ html = requests.post(url, data=form_data, headers=header, timeout=data_fetch.config.TIMEOUT_SECONDS) soup = BeautifulSoup(html.content.decode("utf8"), "html.parser") if soup.find("h3"): msg = "website is busy!" print(msg) raise Exception(msg) h4 = soup.find("h4").find("font").text if u"查無所需資料!" == h4: msg = "website find no data for the season!" print(msg) # TODO: save no fs date to temp raise Exception(msg) table = soup.find("table", "hasBorder") all_row = table.find_all("tr") except Exception as e: msg = 'get_balance_sheet_of_a_season_from_url error, stockno_year_season = {}, msg = {}'.format(stockno_year_season, e.args) print(msg) log = data_fetch.log.Log() log.write_fetch_err_log(msg) return None data = [] for i, tr in enumerate(all_row, 1): if i > 4: row = [] for j, td in enumerate(tr.find_all("td"), 1): if j > 3: break col = td.text.strip().encode("utf8").replace(",", "") row.append(lib.tool.full_to_half(col)) data.append(row) dict_data = {} for row in data: row[0] = row[0].encode('utf8', 'ignore') row[1] = row[1].encode('utf8', 'ignore') row[2] = row[2].encode('utf8', 'ignore') if row[0] in dict_format and '' != row[1]: key = dict_format.get(row[0], None) key_p = key + "_p" if '' != row[1]and key: dict_data[key] = int(row[1]) * 1000 if '' != row[2] and key_p: dict_data[key_p] = float(row[2]) / 100.0 info = {'stock_no': stock_no, 'year': year, 'season': season} if dict_data: dict_data.update(info) dict_data = lib.tool.fill_default_value_if_column_not_exist(dict_format, dict_data, except_percent_columns) return dict_data