def main(input_index): #from dir_control.data_dir import dir_basic_info,dir_day_history,stk_index_list from dir_control.data_dir_v1 import data_dict dir_day_history_insert = data_dict.get("day_history_insert") import os import pandas as pd import time import datetime #### ------------ para -------------------# ## set start end date ############################################## #----------------------------------------------------# input_index = str(input_index) stk_index_list = stk_index_list_gen() totol_loop = len(stk_index_list) ## comment it if not test #dir_day_history_insert = "./csv/" i = stk_index_list[int(input_index)] #### ''' f = open("process.log", 'a+') f.write(input_index+'\n') ''' stock_index = generate_stock_index(i) #print(k) try: run_download(stock_index, start_date, end_date, dir_day_history_insert) #print("sleep") except: print("the stock index cannot be download " + str(stock_index)) pass
def copy_data_to_current_folder(): from dir_control.data_dir_v1 import data_dict dir_day_history_insert = data_dict.get("day_history_insert") #dir_day_history_insert = "./csv" ## combine all data os_str = "cat %s/*.csv > all.csv" % (dir_day_history_insert) print(os_str) os.system(os_str)
def main(stock_index): from dir_control.data_dir_v1 import data_dict,stk_index_list import time dir_liutong_owner = data_dict.get("liutong_owner") try: process(stock_index,dir_liutong_owner) except: print(stock_index+' not download') pass
def main(stock_index): #from davidyu_cfg import * from dir_control.data_dir_v1 import data_dict, stk_index_list import time dir_liutong_owner = data_dict.get("liutong_owner") #dir_liutong_owner = data_dict.get("tmp") k = 0 #stk_index_list=['000011','000014'] try: process(stock_index, dir_liutong_owner) except: print(stock_index) pass
def get_text(key): raw_html = stock_news.get(key) html = cx.getHtml(raw_html) content = cx.filter_tags(html) s = cx.getText(content) #print(s) # save dir dir_all_news=data_dict.get("all_news") today=time.strftime("%Y-%m-%d", time.localtime()) file_name = key+"_"+today+".txt" files=os.path.join(dir_all_news,file_name) #print(files) f=open(files,'w+')
def main(): sys.path.append("../..") #from dir_control.data_dir import dir_basic_info,dir_day_history,stk_index_list from dir_control.data_dir_v1 import data_dict,stk_index_list dir_day_history_insert=data_dict.get("day_history_insert") import os import pandas as pd import time import datetime #### ------------ para -------------------# start_date = datetime.datetime(2009,1,1) end_date = datetime.date.today() dir_basic_info = data_dict.get("basic_info") ############################################## #----------------------------------------------------# ## remove '300' stk_index_list=[x for x in stk_index_list if str(x).zfill(6)[0]!='3'] for i in stk_index_list: try: if len(str(i))<6: ## download for ShangZhen stock_index=str(i).zfill(6)+'.sz' run_download(stock_index,start_date,end_date,dir_day_history_insert) #print("sleep") elif str(i)[0]=='6': ## download for ShangHai stock_index=str(i)+'.ss' run_download(stock_index,start_date,end_date,dir_day_history_insert) #print("sleep") except: stock_index = str(i).zfill(6)+'.sz' print(stock_index) pass time.sleep(2) ## combine all data os_str = "cat %s/*.csv > all.csv"%(dir_day_history_insert) print(os_str) os.system(os_str)
stock_news = { 'sina': "https://finance.sina.com.cn/stock/", "dfcf": "https://finance.sina.com.cn/stock/", "ifeng": "http://finance.ifeng.com/", "ftchinese": "http://www.ftchinese.com/channel/economy.html", "nature_researchAnalysis": "https://www.nature.com/research-analysis", "ifeng": "http://finance.ifeng.com/", "zdnet": "https://www.zdnet.com/" } cx = CxExtractor(threshold=86) # html = cx.getHtml("http://www.bbc.com/news/world-europe-40885324") dir_all_news=data_dict.get("all_news") print(dir_all_news) def get_text(key): raw_html = stock_news.get(key) html = cx.getHtml(raw_html) content = cx.filter_tags(html) s = cx.getText(content) #print(s) # save dir dir_all_news=data_dict.get("all_news") today=time.strftime("%Y-%m-%d", time.localtime()) file_name = key+"_"+today+".txt" files=os.path.join(dir_all_news,file_name) #print(files) f=open(files,'w+')