def test_invalid_args(self):
     with pytest.raises(ValueError):
         mod.dt_to_iso8601(dtdt(2016, 1, 2, 3, 4, 5, 123456), 'foo', 'ms')
     with pytest.raises(ValueError):
         mod.dt_to_iso8601(dtdt(2016, 1, 2, 3, 4, 5, 123456), 'ext', 'foo')
     with pytest.raises(ValueError):
         mod.dt_to_iso8601('foo', 'ext', 'ms')
Example #2
0
def create_stock_csv_file(df, category, num=10):
    print("Start to create init stock data - " + category)

    start_date = dtdt(1980, 1, 1)

    for i in range(num):
        # 순서대로 종목 stock_item 가져오기
        try:
            code = str(df.ix[i]['종목코드']).zfill(6)

        except:
            print("ERROR - Unregisterd Stock!")
            continue

        if check_csv_file(category, code):
            print("Already exist: " + category + '_' + code + '.csv')
            continue

        stock_item = str(code).zfill(6)
        print(category, stock_item)
        start_idx_dates = pd.date_range(start_date.strftime('%Y/%m/%d'),
                                        periods=1)

        init_data = np.empty((1, len(columns)))
        init_data[:] = np.NAN  # NaN 값으로 초기화
        init_df = DataFrame(init_data, index=start_idx_dates, columns=columns)

        get_df = get_info_with_web_scrap(stock_item, init_df, start_idx_dates)
        get_df = get_df.dropna().sort_index()
        get_df.to_csv('./data/csv/' + category + '_' + stock_item + '.csv')

    print("Complete to create init stock data!")
Example #3
0
def fomcdates():
    ################################################################################
    # Grabs the FOMC meeting dates
    # Description: The FOMC releases a statment after their meetings (historically
    # only when a rate decision was made). The FOMC posts its meeting materials on
    # its website. This script parses the webpage for the link to the minutes and
    # saves them.
    #  TODO : We should honestly porobably be using the meeting dates displayed on
    # the webpage and not the minute links but this seems to work.
    # Also need to check to make sure statements are always released at 2:30
    ################################################################################
    # Grab the non-historical meetings first
    print('Reading FOMC meeting dates')
    url = 'https://www.federalreserve.gov/monetarypolicy/fomccalendars.htm'
    raw = urllib.request.urlopen(url).read()
    datesRaw = re.findall('monetarypolicy/fomcminutes[0-9]{8}.htm', str(raw))
    datesStr = [re.findall('[0-9]{8}', dd)[0] for dd in datesRaw]
    dates = [dtdt.strptime(dd, '%Y%m%d') for dd in datesStr]

    # Code to get historical meetings
    # Get years first
    url = 'https://www.federalreserve.gov/monetarypolicy/fomc_historical_year.htm'
    start = 1965
    end = min(dates).year

    # The historical data has a seperate page for each year. Loop through them
    for year in range(start, end):
        print('Reading FOMC meeting dates for ' + str(year))
        url = 'https://www.federalreserve.gov/monetarypolicy/fomchistorical' + \
            str(year) + '.htm'
        raw = urllib.request.urlopen(url).read()
        datesRaw = re.findall('monetarypolicy/files/FOMC[0-9]{8}Agenda.pdf',
                              str(raw))
        datesStr.extend([re.findall('[0-9]{8}', dd)[0] for dd in datesRaw])

    dates = [dtdt.strptime(dd, '%Y%m%d') for dd in datesStr]
    dates.sort()

    releasedays = [x.day for x in dates]
    releasemonths = [x.month for x in dates]
    releaseyears = [x.year for x in dates]

    coveredday = [(x - dtdt(x.year, 1, 1)).days + 1 for x in dates]

    return pd.DataFrame({
        'release': 'FOMC meeting',
        'releaseyear': releaseyears,
        'releasemonth': releasemonths,
        'releaseday': releasedays,
        'releasehour': 14,
        'releaseminute': 30,
        'coveredyear': releaseyears,
        'coveredperiod': coveredday,
        'freq': 365
    })
Example #4
0
    def movepath(path):
        flag = re.search(cvtpath, path)

        if flag is not None and len(flag.groups()) == 6:
            date = flag.groups()
            date = list(map(lambda x: int(x), date))
            day = dtdt(*date).strftime("%a")
            dateemb = date[3] * 60 + date[4]
            jigen = None
            for key in jikanwariemb.keys():
                if between(dateemb, *jikanwariemb[key]):
                    jigen = key
                    break
            if jigen is not None and not target.loc[jigen, day]:
                dirpath = os.path.join(savedirname, jikanwari.loc[jigen, day])
                os.makedirs(dirpath, exist_ok=True)
                if not test:
                    shutil.move(path, dirpath)
Example #5
0
def main():
    df = blsjobsdays()
    df = df.append(getBLScalendars(), ignore_index=True)
    # Some data is pulled twice
    df.drop_duplicates()
    #df = df.append(fomcdates(), ignore_index=True)
    #df = df.append(minutes_dates(), ignore_index=True)
    df['releasedate'] = pd.Series([
        dtdt(df.loc[i, 'releaseyear'], df.loc[i, 'releasemonth'],
             df.loc[i, 'releaseday'], df.loc[i, 'releasehour'],
             df.loc[i, 'releaseminute']) for i in range(len(df))
    ])

    df = df[[
        'releasedate', 'release', 'releaseyear', 'releasemonth', 'releaseday',
        'releasehour', 'releaseminute', 'coveredyear', 'coveredperiod', 'freq'
    ]]
    df = df.set_index(['releasedate', 'release'])
    df.sort_index(inplace=True)
    df.to_csv('/app/output/bls_release_dates.csv')
    return df
Example #6
0
def reset_jikanwari():
    jigen = {
        1: "08:30~10:25",
        2: "10:25~12:10",
        3: "13:00~14:55",
        4: "14:55~16:50",
        5: "16:50~18:45",
        6: "18:45~20:30",
    }
    jigen = pd.DataFrame(jigen, index=["jigen"]).T
    dirname = []
    for i in range(22, 29):
        dirname.append(dtdt(*[2020, 6, i]).strftime("%a"))
    jikanwari = [[d + str(i) for d in dirname] for i in range(1, 7)]
    jikanwari = pd.DataFrame(jikanwari,
                             columns=dirname,
                             index=[i for i in range(1, 7)])
    home = expanduser("~")
    base = os.path.join(home, ".myscreenshot")
    os.makedirs(base, exist_ok=True)
    jikanwari_path = os.path.join(base, "jikanwari.csv")
    pd.concat([jigen, jikanwari], axis=1).to_csv(jikanwari_path)
 def test_ext_ms(self):
     assert mod.dt_to_iso8601(dtdt(2016, 1, 2, 3, 4, 5, 123456), 'ext', 'ms') == '2016-01-02T03:04:05.123456'
 def test_ext_second(self):
     assert mod.dt_to_iso8601(dtdt(2016, 1, 2, 3, 4, 5), 'ext', 'second') == '2016-01-02T03:04:05'
 def test_basic_ms(self):
     assert mod.dt_to_iso8601(dtdt(2016, 1, 2, 3, 4, 5, 123456), 'basic', 'ms') == '20160102T030405123456'
 def test_basic_second(self):
     assert mod.dt_to_iso8601(dtdt(2016, 1, 2, 3, 4, 5), 'basic', 'second') == '20160102T030405'
Example #11
0
def update_stock_data(item, kospi, kosdaq):
    try:
        code = kospi[kospi['기업명'] == item]['종목코드'].values[0]
        category = 'KRX'
    except:

        try:
            code = kosdaq[kosdaq['기업명'] == item]['종목코드'].values[0]
            category = 'KOSDAQ'
        except:
            sys.exit("ERROR - Unregisterd Stock!")

    stock_item = str(code).zfill(6)
    print(category, item, stock_item)

    # 조회 종목 CSV 파일 확인
    file_path = './data/csv/'
    file_name = category + '_' + stock_item + '.csv'

    csv_file = True
    if check_csv_file(category, stock_item):
        print("Exist csv file:" + file_name)
        try:
            df = pd.read_csv(file_path + file_name, index_col='Unnamed: 0')
        except:
            df = pd.read_csv(file_path + file_name, index_col='Date')
            df = df.drop(df[-10:].index)  # TEST1
            #df = df.drop(['2016.12.06', '2016.12.07', '2016.12.08']) # TEST2
    else:
        df = DataFrame(default_data, columns=columns, index=['1980.1.1'])
        csv_file = False

    df.index.name = 'Date'
    # CSV 파일의 최종 날짜 정보와 조회시점의 정보 누락분 확인
    #print("Check delta date")
    d = [int(x) for x in df.index[-1].split('.')]
    end_date = dtdt(d[0], d[1], d[2])
    print(end_date)

    check_hour = dtdt.now().hour
    check_min = dtdt.now().minute

    print(str(check_hour) + ':' + str(check_min))
    market_closed = True

    if check_hour == 15:
        if (check_min > 30):
            today = dt.date.today()
    elif check_hour > 15:
        today = dt.date.today()
    else:
        market_closed = False
        today = dt.date.today() - dt.timedelta(days=1)

    delta = dtdt(today.year, today.month, today.day) - end_date

    # 구글 파이낸스에서 해당 종목 정보 받기
    print("delta days:" + str(delta.days))
    if delta.days <= 0:
        delta_dates = pd.date_range(end_date.strftime('%Y.%m.%d'), periods=1)
    else:
        delta_dates = pd.date_range(end_date.strftime('%Y.%m.%d'),
                                    periods=delta.days)
    need_web_scrap = True
    add_df = DataFrame(default_data, columns=columns)

    if csv_file:
        if delta.days > 30:
            try:
                add_df = data.DataReader(
                    category + ":" + stock_item, "google", end_date,
                    dtdt(today.year, today.month, today.day))
                add_df.index = pd.to_datetime(
                    add_df.index).strftime('%Y.%m.%d')

                need_web_scrap = False
            except:
                print("Exception to get info via GOOGLE")

    if (need_web_scrap):
        add_df = get_info_with_web_scrap(stock_item, add_df, delta_dates)
        add_df = add_df.dropna().sort_index()
        if (market_closed):
            print("Market closed")
        else:
            add_df.drop(add_df[-1:].index)
        print("Complete to get stock info via NAVER")

    if delta.days > 0:
        df = df.append(add_df).dropna()
        #if not os.path.isfile(file_path+file_name):
        df.to_csv(file_path + file_name)