def get_quote_yahoojp(code, start=None, end=None, interval='d'):
    base = 'http://info.finance.yahoo.co.jp/history/?code={0}.T&{1}&{2}&tm={3}&p={4}'
    start, end = web._sanitize_dates(start, end)
    start = 'sy={0}&sm={1}&sd={2}'.format(start.year, start.month, start.day)
    end = 'ey={0}&em={1}&ed={2}'.format(end.year, end.month, end.day)
    p = 1
    results = []

    if interval not in ['d', 'w', 'm', 'v']:
        raise ValueError("Invalid interval: valid values are 'd', 'w', 'm' and 'v'")

    while True:
        url = base.format(code, start, end, interval, p)
        tables = pd.read_html(url, header=0)
        if len(tables) < 2 or len(tables[1]) == 0:
            break
        results.append(tables[1])
        p += 1
    result = pd.concat(results, ignore_index=True)
    result.columns = ['Date', 'Open', 'High', 'Low', 'Close', 'Volume', 'Adj Close']
    if interval == 'm':
        result['Date'] = pd.to_datetime(result['Date'], format='%Y年%m月')
    else:
        result['Date'] = pd.to_datetime(result['Date'], format='%Y年%m月%d日')
    result = result.set_index('Date')
    result = result.sort_index()
    return result
Exemple #2
0
    def get(self, code, start=None, end=None, interval='d'):
        base = self.base_url()
        start, end = web._sanitize_dates(start, end)
        start = 'sy={0}&sm={1}&sd={2}'.format(start.year, start.month, start.day)
        end = 'ey={0}&em={1}&ed={2}'.format(end.year, end.month, end.day)
        p = 1
        results = []

        if interval not in ['d', 'w', 'm', 'v']:
            raise ValueError(
                "Invalid interval: valid values are 'd', 'w', 'm' and 'v'")

        while True:
            url = base.format(code, start, end, interval, p)
            print (url)
            tables = pd.read_html(url, header=0)
            if len(tables) < 2 or len(tables[1]) == 0:
                break
            results.append(tables[1])
            p += 1
        result = pd.concat(results, ignore_index=True)


        result.columns = [
            'Date', 'Open', 'High', 'Low', 'Close']


        result['Date'] = pd.to_datetime(result['Date'], format='%Y年%m月%d日')

        result = result.set_index('Date')
        result = result.sort_index()

        return result.asfreq('B')
Exemple #3
0
    def get(self, code, start=None, end=None, interval='d'):
        base = self.base_url()
        start, end = web._sanitize_dates(start, end)
        start = 'sy={0}&sm={1}&sd={2}'.format(start.year, start.month, start.day)
        end = 'ey={0}&em={1}&ed={2}'.format(end.year, end.month, end.day)
        p = 1
        results = []

        if interval not in ['d', 'w', 'm', 'v']:
            raise ValueError(
                "Invalid interval: valid values are 'd', 'w', 'm' and 'v'")

        while True:
            url = base.format(code, start, end, interval, p)
            tables = pd.read_html(url, header=0)
            if len(tables) < 2 or len(tables[1]) == 0:
                break
            results.append(tables[1])
            p += 1
        result = pd.concat(results, ignore_index=True)

        result.columns = [
            'Date', 'Open', 'High', 'Low', 'Close', 'Volume', 'Adj Close']
        result['Date'] = pd.to_datetime(result['Date'], format='%Y年%m月%d日')
        result = result.set_index('Date')
        result = result.sort_index()
        return result.asfreq('B')
Exemple #4
0
def get_quote_yahoojp(code, start=None, end=None, interval='d'):
    base = 'http://info.finance.yahoo.co.jp/history/?code={0}.T&{1}&{2}&tm={3}&p={4}'
    start, end = web._sanitize_dates(start, end)
    start = 'sy={0}&sm={1}&sd={2}'.format(start.year, start.month, start.day)
    end = 'ey={0}&em={1}&ed={2}'.format(end.year, end.month, end.day)
    p = 1  #ページ
    results = []

    if interval not in ['d', 'w', 'm', 'v']:
        raise ValueError(
            "Invalid interval: valid values are 'd', 'w', 'm' and 'v'")

    while True:
        url = base.format(code, start, end, interval, p)
        try:
            tables = pd.read_html(url, header=0)

        except ValueError:
            #print("Value Error")
            return []

        if len(tables) < 2 or len(tables[1]) == 0:
            break
        results.append(tables[1])
        p += 1

    if len(results) == 0:
        return []
    else:
        result = pd.concat(results, ignore_index=True)

    result.columns = [
        'Date', 'Open', 'High', 'Low', 'Close', 'Volume', 'Adj Close'
    ]
    if interval == 'm':
        result['Date'] = pd.to_datetime(result['Date'], format='%Y年%m月')
    else:
        result['Date'] = pd.to_datetime(result['Date'], format='%Y年%m月%d日')

    result = pd.concat(
        [result, pd.DataFrame({"code": [code] * len(result)})], axis=1)
    result = result.set_index('Date')
    result = result.sort_index()

    date = pd.DataFrame(result.index)
    result.index = range(len(result))
    result = pd.concat([date, result], axis=1)
    return result
Exemple #5
0
    def get(self, code, start=None, end=None, interval='d'):
        base = self.base_url(code)
        start, end = web._sanitize_dates(start, end)
        start = 'sy={0}&sm={1}&sd={2}'.format(start.year, start.month, start.day)
        end = 'ey={0}&em={1}&ed={2}'.format(end.year, end.month, end.day)
        p = 1
        results = []

        print(start)
        print(end)
        if interval not in ['d', 'w', 'm', 'v']:
            raise ValueError(
                "Invalid interval: valid values are 'd', 'w', 'm' and 'v'")

        while True:
            url = base.format(code, start, end, interval, p)
            print("url=", url)
            tables = pd.read_html(url, header=0)
            if len(tables) < 2 or len(tables[1]) == 0:
                break
            results.append(tables[1])
            print(tables[1])
            # 複数ページをスクレープ
            p += 1
        #ignore_index=True 行番号を先頭から振りなおす
        result = pd.concat(results, ignore_index=True)
        #欠損データ削除
        result = result.dropna()
#        print("DEBUG columns")
        #日経平均の場合
        if code == 998407:
            result.columns = ['Date', 'Open', 'High', 'Low', 'Close']
        else:
            result.columns = ['Date', 'Open', 'High', 'Low', 'Close', 'Volume', 'Adj Close']
#        print("DEBUG to_datetime")
        #python2
        if interval == "m":
            result['Date'] = pd.to_datetime(result['Date'], format=u'%Y年%m月')
        else:
            result['Date'] = pd.to_datetime(result['Date'], format=u'%Y年%m月%d日')
#        print("DEBUG set_index")
        #'Date'をindexに設定したため、columからは消える
        result = result.set_index('Date')
#        print("DEBUG sort_index")
        result = result.sort_index()
#        print("DEBUG asfreq", result)
        return result.asfreq('B')
Exemple #6
0
def get_data_bbg_historical(symbol, flds, start=None, end=None):
    """
    Get historical data from bloomberg. Retrieve the flds for the specified start to end date for the given symbol.

    symbol: Bloomberg identifier
    flds: list of bloomberg fields to retrieve
    """
    bbg = Dispatch('Bloomberg.Data.1')

    from pandas.io.data import _sanitize_dates
    start, end = _sanitize_dates(start, end)

    data = bbg.BLPGetHistoricalData(symbol, flds, pywintypes.Time(start.timetuple()), pywintypes.Time(end.timetuple()))
    # Convert to datetime64 and ensure nan's for strings
    cdata = zip(*[map(_convert_value, r[0]) for r in data])
    flds = ['Date'] + list(flds)
    frame = pandas.DataFrame(dict((cname, cdata[i]) for i, cname in enumerate(flds)), columns=flds)
    return frame.set_index('Date')
Exemple #7
0
def get_data_bbg_historical(symbol, flds, start=None, end=None):
    """
    Get historical data from bloomberg. Retrieve the flds for the specified start to end date for the given symbol.

    symbol: Bloomberg identifier
    flds: list of bloomberg fields to retrieve
    """
    bbg = Dispatch('Bloomberg.Data.1')

    from pandas.io.data import _sanitize_dates
    start, end = _sanitize_dates(start, end)

    data = bbg.BLPGetHistoricalData(symbol, flds,
                                    pywintypes.Time(start.timetuple()),
                                    pywintypes.Time(end.timetuple()))
    # Convert to datetime64 and ensure nan's for strings
    cdata = zip(*[map(_convert_value, r[0]) for r in data])
    flds = ['Date'] + list(flds)
    frame = pandas.DataFrame(dict(
        (cname, cdata[i]) for i, cname in enumerate(flds)),
                             columns=flds)
    return frame.set_index('Date')
def get_quote_yahoojp(code, start=None, end=None, interval='d'):
    base = 'http://info.finance.yahoo.co.jp/history/?code={0}.T&{1}&{2}&tm={3}&p={4}'
    start, end = web._sanitize_dates(start, end)
    start = 'sy={0}&sm={1}&sd={2}'.format(start.year, start.month, start.day)
    end = 'ey={0}&em={1}&ed={2}'.format(end.year, end.month, end.day)
    p = 1
    results = []

    if interval not in ['d', 'w', 'm', 'v']:
        raise ValueError(
            "Invalid interval: valid values are 'd', 'w', 'm' and 'v'")

    while True:
        url = base.format(code, start, end, interval, p)
        print url

        title = lxml.html.parse(url).find(".//title").text
        tables = pd.read_html(url, header=0)

        if len(tables) < 2 or len(tables[1]) == 0:
            break
        results.append(tables[1])
        p += 1
    result = pd.concat(results, ignore_index=True)

    result.columns = [
        'Date', 'Open', 'High', 'Low', 'Close', 'Volume', 'Adj Close'
    ]

    #  Use '年月日' to parse Japanese date.
    if interval == 'm':
        result['Date'] = pd.to_datetime(result['Date'], format='%Y年%m月')
    else:
        result['Date'] = pd.to_datetime(result['Date'], format='%Y年%m月%d日')

    result = result.set_index('Date')
    result = result.sort_index()
    return title, result