def download_financialzip_fromtdx(): """ 会创建一个download/文件夹 """ result = get_filename() res = [] for item, md5 in result: if item in os.listdir(download_path) and \ md5 == QA_util_file_md5('{}{}{}'.format(download_path, os.sep, item)): print('FILE {} is already in {}'.format(item, download_path)) else: print('CURRENTLY GET/UPDATE {}'.format(item[0:12])) downloadpath = download_path + '/' + item datacrawler = HistoryFinancialCrawler() datacrawler.fetch_and_parse(reporthook=None, filename=item, path_to_download=downloadpath) res.append(item) return res
def download_financialzip(): """ 会创建一个download/文件夹 """ result = get_filename() res = [] for item, md5 in result: if item in os.listdir(download_path) and md5 == QA_util_file_md5('{}{}{}'.format(download_path, os.sep, item)): print('FILE {} is already in {}'.format(item, download_path)) else: print('CURRENTLY GET/UPDATE {}'.format(item[0:12])) r = requests.get('http://data.yutiansut.com/{}'.format(item)) file = '{}{}{}'.format(download_path, os.sep, item) with open(file, "wb") as code: code.write(r.content) res.append(item) return res
def QA_fecth_local_financial_report_cn(code, report_type): report_file = report_path[report_type] + code + '.csv' df = pd.read_csv(report_file, sep='\t',encoding='GB2312', header=None) if len(df) < 1: print("{} report of stock {} cannot be loaded".format(report_type, code)) return None df = df.set_index(0).T # transpose dataframe, make rows into columns df = df[df['报表日期'].str.contains('19700101') == False] # remove rows with 19700101 and last all NA row df = df[::-1] # reverse the df df['报表日期'] = df['报表日期'].apply(lambda x: '{}-{}-{}'.format(x[:4], x[4:6], x[6:])) df['code'] = code t = os.path.getmtime(report_file) df['更新日期'] = str(datetime.datetime.fromtimestamp(t))[:10] df['hash_md5'] = QA_util_file_md5(report_file) #df['date_stamp'] = df['报表日期'].apply(lambda x: QA_util_date_stamp(x)) # create index using 报表日期 after change it to datetime, useful for later index and compare date df['报表日期'] = pd.to_datetime(df['报表日期']) df = df.set_index('报表日期', drop=False) df['报表日期'] = df['报表日期'].apply(lambda x: str(x)[0:10]) return df