def get_all_news(stock_index, save_dir, end_year): #stk = '000917' make_dir(save_dir) k = 1 old_date = '' for i in range(1, 100): content1 = html_content(stock_index, i).find_url_content() if len(content1) > 0: #k=0 for url in content1: #k += 1 with eventlet.Timeout(5.5, False): date, text_out = news_in_html_url(url) if date[0:4] == str(end_year): sys.exit(0) if date == old_date: k += 1 if date != old_date: k = 1 saveFileTxt = saveFile(stock_index=stock_index, save_dir=save_dir, k=k, date=date).news_report_save_dir() content_to_txt(saveFileTxt, text_out) old_date = date time.sleep(30) else: break ## if no content break
def download_VG_many (cart_id, map_name, storing_name, number): # Spesify where to store the files directory = "../Downloaded_cartoons/" + map_name functions.make_dir(directory) # Function that check if the directory exists. If not - create it for i in range(number): # Make a new url date_ = date.today() - timedelta(int(i+1)) date_ = str(date_) td = date_.split('-') url_string_array = ["http://www.heltnormalt.no/img/",cart_id,td[0],"/",td[1],"/",td[2],".jpg"] picture_url = ''.join(url_string_array) # Make a new name for the file with timestamp name_string_array = [directory,storing_name,"_",date_,"_VG",".jpg"] tag = ''.join(name_string_array) # Downlad the picture if (functions.download_file(picture_url, tag) == 0): break if ((i+1)%10 == 0): print ("finished with picture # " , i+1) print ("*** Finished " + storing_name + ". Downloaded " + str(i) + " pictures") return
def do_download(stock_index, year1, season1): #print(stock_index) dir_name1 = os.path.join(dir_day_history_wy, stock_index) make_dir(dir_name1) try: df1 = download_data(stock_index, year1, season1) if df1.shape[0] > 1: file_name = save_file_name(dir_name1, stock_index, str(year1), str(season1)) df1.to_csv(file_name, index=0) except: print(stock_index + " not download") pass
def get_dazong_stock_list(dazong_date = "2020-02-20"): data_dir = data_dict.get("dazongjiaoyi") #dazong_date = "2020-02-20" file_name = "dazongjiaoyi_%s.csv"%(dazong_date) df1 = pd.read_csv(os.path.join(data_dir,file_name)) df2 = df1.groupby(["SECUCODE","SNAME"])['TVAL'].sum().reset_index() #df2.sort_values("TVAL") df2['SECUCODE'] = [str(x).zfill(6) for x in df2['SECUCODE'].tolist() ] df2['trade_date'] = dazong_date save_dir = data_dict.get("tmp") save_dir = os.path.join(save_dir,"dazong_data") make_dir(save_dir) save_file = "dazongjiaoyi_stock_list_%s.csv"%(dazong_date) save_file = os.path.join(save_dir,save_file) df3 = df2[df2['TVAL']>1000] df2[['SECUCODE','trade_date','TVAL']].to_csv(save_file,index=0)
def do_loop_download(stock_index): for stock_index in stk_index_list: print(stock_index) dir_name1 = os.path.join(dir_day_history_wy,stock_index) make_dir(dir_name1) for year1 in year: for season1 in season: try: df1 = download_data(stock_index,year1,season1) if df1.shape[0]>1: file_name = save_file_name(dir_name1,stock_index,str(year1),str(season1)) #print(file_name) df1.to_csv(file_name,index=0) except: pass time.sleep(5)
df3 = df2.groupby(["SNAME", "SECUCODE", "BUYERNAME", "Zyl", "PRICE"])['TVAL'].sum().reset_index() df_sname = df2.groupby(["SNAME", "SECUCODE"])['TVAL'].sum().reset_index() df4 = sort_data(df3, ["SNAME", "SECUCODE", "BUYERNAME", "Zyl", "PRICE", "TVAL"]) df5 = sort_data(df_sname, ["SNAME", "SECUCODE", "TVAL"]) df4['stock_date'] = now_date df5['stock_date'] = now_date #df4 = df3.sort_values("TVAL",ascending=False)[["SNAME","SECUCODE","BUYERNAME","Zyl","PRICE","TVAL"]] #df4['SECUCODE'] = [str(x).zfill(6) for x in df4['SECUCODE'].tolist()] print(df4.head(30)) print(df5.head(30)) return df4, df5 if __name__ == "__main__": now_date, now_date_time = get_the_datetime() now_date = now_date.replace("_", "-") now_date = "2020-08-03" data_dir = data_dict.get("dazongjiaoyi") file_in = "dazongjiaoyi_%s.csv" % (now_date) df1 = pd.read_csv(os.path.join(data_dir, file_in)) report_dir = data_dict.get("daily_report") save_dir = os.path.join(report_dir, now_date) make_dir(save_dir) df4, df5 = zyl_postive(df1, now_date) out_file = "dazongjiaoyi_report_%s.csv" % (now_date) file_name = os.path.join(save_dir, out_file) df4.round(2).to_csv(file_name, index=0)
def save_to_daily_report(self): report_dir = data_dict.get("daily_report") save_dir = os.path.join(report_dir, self.now_date) make_dir(save_dir) return save_dir
df1['stock_index'] = stock_index return df1 def save_file_name(dir_name,stock_index,year,season): file_name1 = '_'.join([stock_index,year,season]) file_name = os.path.join(dir_name,file_name1)+'.csv' return file_name dir_day_history_wy = data_dict.get("day_history_wangyi") stk_index_list = stk_index_list_gen() year = [str(x) for x in range(1998,2020)] season = [str(x) for x in [1,2,3,4]] for stock_index in stk_index_list: print(stock_index) dir_name1 = os.path.join(dir_day_history_wy,stock_index) make_dir(dir_name1) for year1 in year: for season1 in season: try: df1 = download_data(stock_index,year1,season1) if df1.shape[0]>1: file_name = save_file_name(dir_name,stock_index,year,season) #print(file_name) df1.to_csv(file_name,index=0) except: pass time.sleep(3) ''' stock_index = '601398' year = 1880 season = 2