def test_invalid_args(self): with pytest.raises(ValueError): mod.dt_to_iso8601(dtdt(2016, 1, 2, 3, 4, 5, 123456), 'foo', 'ms') with pytest.raises(ValueError): mod.dt_to_iso8601(dtdt(2016, 1, 2, 3, 4, 5, 123456), 'ext', 'foo') with pytest.raises(ValueError): mod.dt_to_iso8601('foo', 'ext', 'ms')
def create_stock_csv_file(df, category, num=10): print("Start to create init stock data - " + category) start_date = dtdt(1980, 1, 1) for i in range(num): # 순서대로 종목 stock_item 가져오기 try: code = str(df.ix[i]['종목코드']).zfill(6) except: print("ERROR - Unregisterd Stock!") continue if check_csv_file(category, code): print("Already exist: " + category + '_' + code + '.csv') continue stock_item = str(code).zfill(6) print(category, stock_item) start_idx_dates = pd.date_range(start_date.strftime('%Y/%m/%d'), periods=1) init_data = np.empty((1, len(columns))) init_data[:] = np.NAN # NaN 값으로 초기화 init_df = DataFrame(init_data, index=start_idx_dates, columns=columns) get_df = get_info_with_web_scrap(stock_item, init_df, start_idx_dates) get_df = get_df.dropna().sort_index() get_df.to_csv('./data/csv/' + category + '_' + stock_item + '.csv') print("Complete to create init stock data!")
def fomcdates(): ################################################################################ # Grabs the FOMC meeting dates # Description: The FOMC releases a statment after their meetings (historically # only when a rate decision was made). The FOMC posts its meeting materials on # its website. This script parses the webpage for the link to the minutes and # saves them. # TODO : We should honestly porobably be using the meeting dates displayed on # the webpage and not the minute links but this seems to work. # Also need to check to make sure statements are always released at 2:30 ################################################################################ # Grab the non-historical meetings first print('Reading FOMC meeting dates') url = 'https://www.federalreserve.gov/monetarypolicy/fomccalendars.htm' raw = urllib.request.urlopen(url).read() datesRaw = re.findall('monetarypolicy/fomcminutes[0-9]{8}.htm', str(raw)) datesStr = [re.findall('[0-9]{8}', dd)[0] for dd in datesRaw] dates = [dtdt.strptime(dd, '%Y%m%d') for dd in datesStr] # Code to get historical meetings # Get years first url = 'https://www.federalreserve.gov/monetarypolicy/fomc_historical_year.htm' start = 1965 end = min(dates).year # The historical data has a seperate page for each year. Loop through them for year in range(start, end): print('Reading FOMC meeting dates for ' + str(year)) url = 'https://www.federalreserve.gov/monetarypolicy/fomchistorical' + \ str(year) + '.htm' raw = urllib.request.urlopen(url).read() datesRaw = re.findall('monetarypolicy/files/FOMC[0-9]{8}Agenda.pdf', str(raw)) datesStr.extend([re.findall('[0-9]{8}', dd)[0] for dd in datesRaw]) dates = [dtdt.strptime(dd, '%Y%m%d') for dd in datesStr] dates.sort() releasedays = [x.day for x in dates] releasemonths = [x.month for x in dates] releaseyears = [x.year for x in dates] coveredday = [(x - dtdt(x.year, 1, 1)).days + 1 for x in dates] return pd.DataFrame({ 'release': 'FOMC meeting', 'releaseyear': releaseyears, 'releasemonth': releasemonths, 'releaseday': releasedays, 'releasehour': 14, 'releaseminute': 30, 'coveredyear': releaseyears, 'coveredperiod': coveredday, 'freq': 365 })
def movepath(path): flag = re.search(cvtpath, path) if flag is not None and len(flag.groups()) == 6: date = flag.groups() date = list(map(lambda x: int(x), date)) day = dtdt(*date).strftime("%a") dateemb = date[3] * 60 + date[4] jigen = None for key in jikanwariemb.keys(): if between(dateemb, *jikanwariemb[key]): jigen = key break if jigen is not None and not target.loc[jigen, day]: dirpath = os.path.join(savedirname, jikanwari.loc[jigen, day]) os.makedirs(dirpath, exist_ok=True) if not test: shutil.move(path, dirpath)
def main(): df = blsjobsdays() df = df.append(getBLScalendars(), ignore_index=True) # Some data is pulled twice df.drop_duplicates() #df = df.append(fomcdates(), ignore_index=True) #df = df.append(minutes_dates(), ignore_index=True) df['releasedate'] = pd.Series([ dtdt(df.loc[i, 'releaseyear'], df.loc[i, 'releasemonth'], df.loc[i, 'releaseday'], df.loc[i, 'releasehour'], df.loc[i, 'releaseminute']) for i in range(len(df)) ]) df = df[[ 'releasedate', 'release', 'releaseyear', 'releasemonth', 'releaseday', 'releasehour', 'releaseminute', 'coveredyear', 'coveredperiod', 'freq' ]] df = df.set_index(['releasedate', 'release']) df.sort_index(inplace=True) df.to_csv('/app/output/bls_release_dates.csv') return df
def reset_jikanwari(): jigen = { 1: "08:30~10:25", 2: "10:25~12:10", 3: "13:00~14:55", 4: "14:55~16:50", 5: "16:50~18:45", 6: "18:45~20:30", } jigen = pd.DataFrame(jigen, index=["jigen"]).T dirname = [] for i in range(22, 29): dirname.append(dtdt(*[2020, 6, i]).strftime("%a")) jikanwari = [[d + str(i) for d in dirname] for i in range(1, 7)] jikanwari = pd.DataFrame(jikanwari, columns=dirname, index=[i for i in range(1, 7)]) home = expanduser("~") base = os.path.join(home, ".myscreenshot") os.makedirs(base, exist_ok=True) jikanwari_path = os.path.join(base, "jikanwari.csv") pd.concat([jigen, jikanwari], axis=1).to_csv(jikanwari_path)
def test_ext_ms(self): assert mod.dt_to_iso8601(dtdt(2016, 1, 2, 3, 4, 5, 123456), 'ext', 'ms') == '2016-01-02T03:04:05.123456'
def test_ext_second(self): assert mod.dt_to_iso8601(dtdt(2016, 1, 2, 3, 4, 5), 'ext', 'second') == '2016-01-02T03:04:05'
def test_basic_ms(self): assert mod.dt_to_iso8601(dtdt(2016, 1, 2, 3, 4, 5, 123456), 'basic', 'ms') == '20160102T030405123456'
def test_basic_second(self): assert mod.dt_to_iso8601(dtdt(2016, 1, 2, 3, 4, 5), 'basic', 'second') == '20160102T030405'
def update_stock_data(item, kospi, kosdaq): try: code = kospi[kospi['기업명'] == item]['종목코드'].values[0] category = 'KRX' except: try: code = kosdaq[kosdaq['기업명'] == item]['종목코드'].values[0] category = 'KOSDAQ' except: sys.exit("ERROR - Unregisterd Stock!") stock_item = str(code).zfill(6) print(category, item, stock_item) # 조회 종목 CSV 파일 확인 file_path = './data/csv/' file_name = category + '_' + stock_item + '.csv' csv_file = True if check_csv_file(category, stock_item): print("Exist csv file:" + file_name) try: df = pd.read_csv(file_path + file_name, index_col='Unnamed: 0') except: df = pd.read_csv(file_path + file_name, index_col='Date') df = df.drop(df[-10:].index) # TEST1 #df = df.drop(['2016.12.06', '2016.12.07', '2016.12.08']) # TEST2 else: df = DataFrame(default_data, columns=columns, index=['1980.1.1']) csv_file = False df.index.name = 'Date' # CSV 파일의 최종 날짜 정보와 조회시점의 정보 누락분 확인 #print("Check delta date") d = [int(x) for x in df.index[-1].split('.')] end_date = dtdt(d[0], d[1], d[2]) print(end_date) check_hour = dtdt.now().hour check_min = dtdt.now().minute print(str(check_hour) + ':' + str(check_min)) market_closed = True if check_hour == 15: if (check_min > 30): today = dt.date.today() elif check_hour > 15: today = dt.date.today() else: market_closed = False today = dt.date.today() - dt.timedelta(days=1) delta = dtdt(today.year, today.month, today.day) - end_date # 구글 파이낸스에서 해당 종목 정보 받기 print("delta days:" + str(delta.days)) if delta.days <= 0: delta_dates = pd.date_range(end_date.strftime('%Y.%m.%d'), periods=1) else: delta_dates = pd.date_range(end_date.strftime('%Y.%m.%d'), periods=delta.days) need_web_scrap = True add_df = DataFrame(default_data, columns=columns) if csv_file: if delta.days > 30: try: add_df = data.DataReader( category + ":" + stock_item, "google", end_date, dtdt(today.year, today.month, today.day)) add_df.index = pd.to_datetime( add_df.index).strftime('%Y.%m.%d') need_web_scrap = False except: print("Exception to get info via GOOGLE") if (need_web_scrap): add_df = get_info_with_web_scrap(stock_item, add_df, delta_dates) add_df = add_df.dropna().sort_index() if (market_closed): print("Market closed") else: add_df.drop(add_df[-1:].index) print("Complete to get stock info via NAVER") if delta.days > 0: df = df.append(add_df).dropna() #if not os.path.isfile(file_path+file_name): df.to_csv(file_path + file_name)