Ejemplo n.º 1
0
def test_merge():
    a = pd.TimeSeries(index=pd.date_range('2010-01-01', periods=5),
                      data=100, name='a')
    b = pd.TimeSeries(index=pd.date_range('2010-01-02', periods=5),
                      data=200, name='b')
    actual = ffn.merge(a, b)

    assert 'a' in actual
    assert 'b' in actual
    assert len(actual) == 6
    assert len(actual.columns) == 2
    assert np.isnan(actual['a'][-1])
    assert np.isnan(actual['b'][0])
    assert actual['a'][0] == 100
    assert actual['a'][1] == 100
    assert actual['b'][-1] == 200
    assert actual['b'][1] == 200

    old = actual
    old.columns = ['c', 'd']

    actual = ffn.merge(old, a, b)

    assert 'a' in actual
    assert 'b' in actual
    assert 'c' in actual
    assert 'd' in actual
    assert len(actual) == 6
    assert len(actual.columns) == 4
    assert np.isnan(actual['a'][-1])
    assert np.isnan(actual['b'][0])
    assert actual['a'][0] == 100
    assert actual['a'][1] == 100
    assert actual['b'][-1] == 200
    assert actual['b'][1] == 200
Ejemplo n.º 2
0
def test_merge():
    a = pd.Series(index=pd.date_range("2010-01-01", periods=5), data=100, name="a")
    b = pd.Series(index=pd.date_range("2010-01-02", periods=5), data=200, name="b")
    actual = ffn.merge(a, b)

    assert "a" in actual
    assert "b" in actual
    assert len(actual) == 6
    assert len(actual.columns) == 2
    assert np.isnan(actual["a"][-1])
    assert np.isnan(actual["b"][0])
    assert actual["a"][0] == 100
    assert actual["a"][1] == 100
    assert actual["b"][-1] == 200
    assert actual["b"][1] == 200

    old = actual
    old.columns = ["c", "d"]

    actual = ffn.merge(old, a, b)

    assert "a" in actual
    assert "b" in actual
    assert "c" in actual
    assert "d" in actual
    assert len(actual) == 6
    assert len(actual.columns) == 4
    assert np.isnan(actual["a"][-1])
    assert np.isnan(actual["b"][0])
    assert actual["a"][0] == 100
    assert actual["a"][1] == 100
    assert actual["b"][-1] == 200
    assert actual["b"][1] == 200
Ejemplo n.º 3
0
def test_merge():
    a = pd.Series(index=pd.date_range('2010-01-01', periods=5),
                  data=100, name='a')
    b = pd.Series(index=pd.date_range('2010-01-02', periods=5),
                  data=200, name='b')
    actual = ffn.merge(a, b)

    assert 'a' in actual
    assert 'b' in actual
    assert len(actual) == 6
    assert len(actual.columns) == 2
    assert np.isnan(actual['a'][-1])
    assert np.isnan(actual['b'][0])
    assert actual['a'][0] == 100
    assert actual['a'][1] == 100
    assert actual['b'][-1] == 200
    assert actual['b'][1] == 200

    old = actual
    old.columns = ['c', 'd']

    actual = ffn.merge(old, a, b)

    assert 'a' in actual
    assert 'b' in actual
    assert 'c' in actual
    assert 'd' in actual
    assert len(actual) == 6
    assert len(actual.columns) == 4
    assert np.isnan(actual['a'][-1])
    assert np.isnan(actual['b'][0])
    assert actual['a'][0] == 100
    assert actual['a'][1] == 100
    assert actual['b'][-1] == 200
    assert actual['b'][1] == 200
Ejemplo n.º 4
0
def test_merge():
    a = pd.Series(index=pd.date_range("2010-01-01", periods=5),
                  data=100,
                  name="a")
    b = pd.Series(index=pd.date_range("2010-01-02", periods=5),
                  data=200,
                  name="b")
    actual = ffn.merge(a, b)

    assert "a" in actual
    assert "b" in actual
    assert len(actual) == 6
    assert len(actual.columns) == 2
    assert np.isnan(actual["a"][-1])
    assert np.isnan(actual["b"][0])
    assert actual["a"][0] == 100
    assert actual["a"][1] == 100
    assert actual["b"][-1] == 200
    assert actual["b"][1] == 200

    old = actual
    old.columns = ["c", "d"]

    actual = ffn.merge(old, a, b)

    assert "a" in actual
    assert "b" in actual
    assert "c" in actual
    assert "d" in actual
    assert len(actual) == 6
    assert len(actual.columns) == 4
    assert np.isnan(actual["a"][-1])
    assert np.isnan(actual["b"][0])
    assert actual["a"][0] == 100
    assert actual["a"][1] == 100
    assert actual["b"][-1] == 200
    assert actual["b"][1] == 200
Ejemplo n.º 5
0
def test_drop_duplicate_cols():
    a = pd.Series(index=pd.date_range("2010-01-01", periods=5), data=100, name="a")
    # second version of a w/ less data
    a2 = pd.Series(index=pd.date_range("2010-01-02", periods=4), data=900, name="a")
    b = pd.Series(index=pd.date_range("2010-01-02", periods=5), data=200, name="b")
    actual = ffn.merge(a, a2, b)

    assert actual["a"].shape[1] == 2
    assert len(actual.columns) == 3

    actual = actual.drop_duplicate_cols()

    assert len(actual.columns) == 2
    assert "a" in actual
    assert "b" in actual
    assert len(actual["a"].dropna()) == 5
Ejemplo n.º 6
0
def test_drop_duplicate_cols():
    a = pd.TimeSeries(index=pd.date_range('2010-01-01', periods=5),
                      data=100, name='a')
    # second version of a w/ less data
    a2 = pd.TimeSeries(index=pd.date_range('2010-01-02', periods=4),
                       data=900, name='a')
    b = pd.TimeSeries(index=pd.date_range('2010-01-02', periods=5),
                      data=200, name='b')
    actual = ffn.merge(a, a2, b)

    assert actual['a'].shape[1] == 2
    assert len(actual.columns) == 3

    actual = actual.drop_duplicate_cols()

    assert len(actual.columns) == 2
    assert 'a' in actual
    assert 'b' in actual
    assert len(actual['a'].dropna()) == 5
Ejemplo n.º 7
0
def test_drop_duplicate_cols():
    a = pd.Series(index=pd.date_range('2010-01-01', periods=5),
                  data=100, name='a')
    # second version of a w/ less data
    a2 = pd.Series(index=pd.date_range('2010-01-02', periods=4),
                   data=900, name='a')
    b = pd.Series(index=pd.date_range('2010-01-02', periods=5),
                  data=200, name='b')
    actual = ffn.merge(a, a2, b)

    assert actual['a'].shape[1] == 2
    assert len(actual.columns) == 3

    actual = actual.drop_duplicate_cols()

    assert len(actual.columns) == 2
    assert 'a' in actual
    assert 'b' in actual
    assert len(actual['a'].dropna()) == 5
Ejemplo n.º 8
0
def test_drop_duplicate_cols():
    a = pd.Series(index=pd.date_range("2010-01-01", periods=5),
                  data=100,
                  name="a")
    # second version of a w/ less data
    a2 = pd.Series(index=pd.date_range("2010-01-02", periods=4),
                   data=900,
                   name="a")
    b = pd.Series(index=pd.date_range("2010-01-02", periods=5),
                  data=200,
                  name="b")
    actual = ffn.merge(a, a2, b)

    assert actual["a"].shape[1] == 2
    assert len(actual.columns) == 3

    actual = actual.drop_duplicate_cols()

    assert len(actual.columns) == 2
    assert "a" in actual
    assert "b" in actual
    assert len(actual["a"].dropna()) == 5
Ejemplo n.º 9
0
Archivo: data.py Proyecto: ran404/ffn
def get(tickers, provider=None, common_dates=True, forward_fill=False,
        clean_tickers=True, column_names=None, ticker_field_sep=':',
        mrefresh=False, existing=None, **kwargs):
    """
    Helper function for retrieving data as a DataFrame.

    Args:
        * tickers (list, string, csv string): Tickers to download.
        * provider (function): Provider to use for downloading data.
            By default it will be ffn.DEFAULT_PROVIDER if not provided.
        * common_dates (bool): Keep common dates only? Drop na's.
        * forward_fill (bool): forward fill values if missing. Only works
            if common_dates is False, since common_dates will remove
            all nan's, so no filling forward necessary.
        * clean_tickers (bool): Should the tickers be 'cleaned' using
            ffn.utils.clean_tickers? Basically remove non-standard
            characters (^VIX -> vix) and standardize to lower case.
        * column_names (list): List of column names if clean_tickers
            is not satisfactory.
        * ticker_field_sep (char): separator used to determine the
            ticker and field. This is in case we want to specify
            particular, non-default fields. For example, we might
            want: AAPL:Low,AAPL:High,AAPL:Close. ':' is the separator.
        * mrefresh (bool): Ignore memoization.
        * existing (DataFrame): Existing DataFrame to append returns
            to - used when we download from multiple sources
        * kwargs: passed to provider

    """

    if provider is None:
        provider = DEFAULT_PROVIDER

    tickers = utils.parse_arg(tickers)

    data = {}
    for ticker in tickers:
        t = ticker
        f = None

        # check for field
        bits = ticker.split(ticker_field_sep, 1)
        if len(bits) == 2:
            t = bits[0]
            f = bits[1]

        # call provider - check if supports memoization
        if hasattr(provider, 'mcache'):
            data[ticker] = provider(ticker=t, field=f,
                                    mrefresh=mrefresh, **kwargs)
        else:
            data[ticker] = provider(ticker=t, field=f, **kwargs)

    df = pd.DataFrame(data)
    # ensure same order as provided
    df = df[tickers]

    if existing is not None:
        df = ffn.merge(existing, df)

    if common_dates:
        df = df.dropna()

    if forward_fill:
        df = df.fillna(method='ffill')

    if column_names:
        cnames = utils.parse_arg(column_names)
        if len(cnames) != len(df.columns):
            raise ValueError(
                'column_names must be of same length as tickers')
        df.columns = cnames
    elif clean_tickers:
        df.columns = map(utils.clean_ticker, df.columns)

    return df
Ejemplo n.º 10
0
Archivo: data.py Proyecto: yetone/ffn
def get(tickers, provider=None, common_dates=True, forward_fill=False,
        clean_tickers=True, column_names=None, ticker_field_sep=':',
        mrefresh=False, existing=None, **kwargs):
    """
    Helper function for retrieving data as a DataFrame.

    Args:
        * tickers (list, string, csv string): Tickers to download.
        * provider (function): Provider to use for downloading data.
            By default it will be ffn.DEFAULT_PROVIDER if not provided.
        * common_dates (bool): Keep common dates only? Drop na's.
        * forward_fill (bool): forward fill values if missing. Only works
            if common_dates is False, since common_dates will remove
            all nan's, so no filling forward necessary.
        * clean_tickers (bool): Should the tickers be 'cleaned' using
            ffn.utils.clean_tickers? Basically remove non-standard
            characters (^VIX -> vix) and standardize to lower case.
        * column_names (list): List of column names if clean_tickers
            is not satisfactory.
        * ticker_field_sep (char): separator used to determine the
            ticker and field. This is in case we want to specify
            particular, non-default fields. For example, we might
            want: AAPL:Low,AAPL:High,AAPL:Close. ':' is the separator.
        * mrefresh (bool): Ignore memoization.
        * existing (DataFrame): Existing DataFrame to append returns
            to - used when we download from multiple sources
        * kwargs: passed to provider

    """

    if provider is None:
        provider = DEFAULT_PROVIDER

    tickers = utils.parse_arg(tickers)

    data = {}
    for ticker in tickers:
        t = ticker
        f = None

        # check for field
        bits = ticker.split(ticker_field_sep, 1)
        if len(bits) == 2:
            t = bits[0]
            f = bits[1]

        # call provider - check if supports memoization
        if hasattr(provider, 'mcache'):
            data[ticker] = provider(ticker=t, field=f,
                                    mrefresh=mrefresh, **kwargs)
        else:
            data[ticker] = provider(ticker=t, field=f, **kwargs)

    df = pd.DataFrame(data)
    # ensure same order as provided
    df = df[tickers]

    if existing is not None:
        df = ffn.merge(existing, df)

    if common_dates:
        df = df.dropna()

    if forward_fill:
        df = df.fillna(method='ffill')

    if column_names:
        cnames = utils.parse_arg(column_names)
        if len(cnames) != len(df.columns):
            raise ValueError(
                'column_names must be of same length as tickers')
        df.columns = cnames
    elif clean_tickers:
        df.columns = map(utils.clean_ticker, df.columns)

    return df