Example #1
0
def main(input_index):
    #from dir_control.data_dir import dir_basic_info,dir_day_history,stk_index_list
    from dir_control.data_dir_v1 import data_dict
    dir_day_history_insert = data_dict.get("day_history_insert")
    import os
    import pandas as pd
    import time
    import datetime
    #### ------------ para -------------------#
    ##  set start end date
    ##############################################
    #----------------------------------------------------#
    input_index = str(input_index)
    stk_index_list = stk_index_list_gen()
    totol_loop = len(stk_index_list)
    ## comment it if not test
    #dir_day_history_insert = "./csv/"
    i = stk_index_list[int(input_index)]
    ####
    '''
    f = open("process.log", 'a+')
    f.write(input_index+'\n')
    '''
    stock_index = generate_stock_index(i)
    #print(k)
    try:
        run_download(stock_index, start_date, end_date, dir_day_history_insert)
        #print("sleep")
    except:
        print("the stock index cannot be download " + str(stock_index))
        pass
Example #2
0
def copy_data_to_current_folder():
    from dir_control.data_dir_v1 import data_dict
    dir_day_history_insert = data_dict.get("day_history_insert")
    #dir_day_history_insert = "./csv"
    ## combine all data
    os_str = "cat %s/*.csv > all.csv" % (dir_day_history_insert)
    print(os_str)
    os.system(os_str)
Example #3
0
def main(stock_index):
    from dir_control.data_dir_v1 import data_dict,stk_index_list
    import time
    dir_liutong_owner = data_dict.get("liutong_owner")
    try:
        process(stock_index,dir_liutong_owner)
    except:
        print(stock_index+'  not download')
        pass
Example #4
0
def main(stock_index):
    #from davidyu_cfg import *
    from dir_control.data_dir_v1 import data_dict, stk_index_list
    import time
    dir_liutong_owner = data_dict.get("liutong_owner")
    #dir_liutong_owner = data_dict.get("tmp")
    k = 0
    #stk_index_list=['000011','000014']
    try:
        process(stock_index, dir_liutong_owner)
    except:
        print(stock_index)
        pass
def get_text(key):
    raw_html = stock_news.get(key)
    html = cx.getHtml(raw_html)
    content = cx.filter_tags(html)
    s = cx.getText(content)
    #print(s)
    # save dir
    dir_all_news=data_dict.get("all_news")
    today=time.strftime("%Y-%m-%d", time.localtime())
    file_name = key+"_"+today+".txt"
    files=os.path.join(dir_all_news,file_name)
    #print(files)
    f=open(files,'w+')
Example #6
0
def main():
    sys.path.append("../..")
    #from dir_control.data_dir import dir_basic_info,dir_day_history,stk_index_list
    from dir_control.data_dir_v1 import data_dict,stk_index_list
    dir_day_history_insert=data_dict.get("day_history_insert")
    import os
    import pandas as pd
    import time
    import datetime
    #### ------------ para -------------------#
    start_date = datetime.datetime(2009,1,1)
    end_date = datetime.date.today()
    dir_basic_info = data_dict.get("basic_info")
    ##############################################
    #----------------------------------------------------#
    ## remove '300'
    stk_index_list=[x for x in stk_index_list if str(x).zfill(6)[0]!='3']
    for i in stk_index_list:
        try:
            if len(str(i))<6:  ## download for ShangZhen
                stock_index=str(i).zfill(6)+'.sz'
                run_download(stock_index,start_date,end_date,dir_day_history_insert)
                #print("sleep")
            elif str(i)[0]=='6':  ## download for ShangHai
                stock_index=str(i)+'.ss'
                run_download(stock_index,start_date,end_date,dir_day_history_insert)
                #print("sleep")
        except:
            stock_index = str(i).zfill(6)+'.sz'
            print(stock_index)
            pass
        time.sleep(2)
    ## combine all data
    os_str = "cat %s/*.csv > all.csv"%(dir_day_history_insert)
    print(os_str)
    os.system(os_str)
stock_news = { 
    'sina': "https://finance.sina.com.cn/stock/",
    "dfcf": "https://finance.sina.com.cn/stock/",
    "ifeng": "http://finance.ifeng.com/",
    "ftchinese": "http://www.ftchinese.com/channel/economy.html",
    "nature_researchAnalysis": "https://www.nature.com/research-analysis",
    "ifeng": "http://finance.ifeng.com/",
    "zdnet": "https://www.zdnet.com/"
}



cx = CxExtractor(threshold=86)
# html = cx.getHtml("http://www.bbc.com/news/world-europe-40885324")
dir_all_news=data_dict.get("all_news")
print(dir_all_news)

def get_text(key):
    raw_html = stock_news.get(key)
    html = cx.getHtml(raw_html)
    content = cx.filter_tags(html)
    s = cx.getText(content)
    #print(s)
    # save dir
    dir_all_news=data_dict.get("all_news")
    today=time.strftime("%Y-%m-%d", time.localtime())
    file_name = key+"_"+today+".txt"
    files=os.path.join(dir_all_news,file_name)
    #print(files)
    f=open(files,'w+')