def test_merge(): a = pd.TimeSeries(index=pd.date_range('2010-01-01', periods=5), data=100, name='a') b = pd.TimeSeries(index=pd.date_range('2010-01-02', periods=5), data=200, name='b') actual = ffn.merge(a, b) assert 'a' in actual assert 'b' in actual assert len(actual) == 6 assert len(actual.columns) == 2 assert np.isnan(actual['a'][-1]) assert np.isnan(actual['b'][0]) assert actual['a'][0] == 100 assert actual['a'][1] == 100 assert actual['b'][-1] == 200 assert actual['b'][1] == 200 old = actual old.columns = ['c', 'd'] actual = ffn.merge(old, a, b) assert 'a' in actual assert 'b' in actual assert 'c' in actual assert 'd' in actual assert len(actual) == 6 assert len(actual.columns) == 4 assert np.isnan(actual['a'][-1]) assert np.isnan(actual['b'][0]) assert actual['a'][0] == 100 assert actual['a'][1] == 100 assert actual['b'][-1] == 200 assert actual['b'][1] == 200
def test_merge(): a = pd.Series(index=pd.date_range("2010-01-01", periods=5), data=100, name="a") b = pd.Series(index=pd.date_range("2010-01-02", periods=5), data=200, name="b") actual = ffn.merge(a, b) assert "a" in actual assert "b" in actual assert len(actual) == 6 assert len(actual.columns) == 2 assert np.isnan(actual["a"][-1]) assert np.isnan(actual["b"][0]) assert actual["a"][0] == 100 assert actual["a"][1] == 100 assert actual["b"][-1] == 200 assert actual["b"][1] == 200 old = actual old.columns = ["c", "d"] actual = ffn.merge(old, a, b) assert "a" in actual assert "b" in actual assert "c" in actual assert "d" in actual assert len(actual) == 6 assert len(actual.columns) == 4 assert np.isnan(actual["a"][-1]) assert np.isnan(actual["b"][0]) assert actual["a"][0] == 100 assert actual["a"][1] == 100 assert actual["b"][-1] == 200 assert actual["b"][1] == 200
def test_merge(): a = pd.Series(index=pd.date_range('2010-01-01', periods=5), data=100, name='a') b = pd.Series(index=pd.date_range('2010-01-02', periods=5), data=200, name='b') actual = ffn.merge(a, b) assert 'a' in actual assert 'b' in actual assert len(actual) == 6 assert len(actual.columns) == 2 assert np.isnan(actual['a'][-1]) assert np.isnan(actual['b'][0]) assert actual['a'][0] == 100 assert actual['a'][1] == 100 assert actual['b'][-1] == 200 assert actual['b'][1] == 200 old = actual old.columns = ['c', 'd'] actual = ffn.merge(old, a, b) assert 'a' in actual assert 'b' in actual assert 'c' in actual assert 'd' in actual assert len(actual) == 6 assert len(actual.columns) == 4 assert np.isnan(actual['a'][-1]) assert np.isnan(actual['b'][0]) assert actual['a'][0] == 100 assert actual['a'][1] == 100 assert actual['b'][-1] == 200 assert actual['b'][1] == 200
def test_drop_duplicate_cols(): a = pd.Series(index=pd.date_range("2010-01-01", periods=5), data=100, name="a") # second version of a w/ less data a2 = pd.Series(index=pd.date_range("2010-01-02", periods=4), data=900, name="a") b = pd.Series(index=pd.date_range("2010-01-02", periods=5), data=200, name="b") actual = ffn.merge(a, a2, b) assert actual["a"].shape[1] == 2 assert len(actual.columns) == 3 actual = actual.drop_duplicate_cols() assert len(actual.columns) == 2 assert "a" in actual assert "b" in actual assert len(actual["a"].dropna()) == 5
def test_drop_duplicate_cols(): a = pd.TimeSeries(index=pd.date_range('2010-01-01', periods=5), data=100, name='a') # second version of a w/ less data a2 = pd.TimeSeries(index=pd.date_range('2010-01-02', periods=4), data=900, name='a') b = pd.TimeSeries(index=pd.date_range('2010-01-02', periods=5), data=200, name='b') actual = ffn.merge(a, a2, b) assert actual['a'].shape[1] == 2 assert len(actual.columns) == 3 actual = actual.drop_duplicate_cols() assert len(actual.columns) == 2 assert 'a' in actual assert 'b' in actual assert len(actual['a'].dropna()) == 5
def test_drop_duplicate_cols(): a = pd.Series(index=pd.date_range('2010-01-01', periods=5), data=100, name='a') # second version of a w/ less data a2 = pd.Series(index=pd.date_range('2010-01-02', periods=4), data=900, name='a') b = pd.Series(index=pd.date_range('2010-01-02', periods=5), data=200, name='b') actual = ffn.merge(a, a2, b) assert actual['a'].shape[1] == 2 assert len(actual.columns) == 3 actual = actual.drop_duplicate_cols() assert len(actual.columns) == 2 assert 'a' in actual assert 'b' in actual assert len(actual['a'].dropna()) == 5
def get(tickers, provider=None, common_dates=True, forward_fill=False, clean_tickers=True, column_names=None, ticker_field_sep=':', mrefresh=False, existing=None, **kwargs): """ Helper function for retrieving data as a DataFrame. Args: * tickers (list, string, csv string): Tickers to download. * provider (function): Provider to use for downloading data. By default it will be ffn.DEFAULT_PROVIDER if not provided. * common_dates (bool): Keep common dates only? Drop na's. * forward_fill (bool): forward fill values if missing. Only works if common_dates is False, since common_dates will remove all nan's, so no filling forward necessary. * clean_tickers (bool): Should the tickers be 'cleaned' using ffn.utils.clean_tickers? Basically remove non-standard characters (^VIX -> vix) and standardize to lower case. * column_names (list): List of column names if clean_tickers is not satisfactory. * ticker_field_sep (char): separator used to determine the ticker and field. This is in case we want to specify particular, non-default fields. For example, we might want: AAPL:Low,AAPL:High,AAPL:Close. ':' is the separator. * mrefresh (bool): Ignore memoization. * existing (DataFrame): Existing DataFrame to append returns to - used when we download from multiple sources * kwargs: passed to provider """ if provider is None: provider = DEFAULT_PROVIDER tickers = utils.parse_arg(tickers) data = {} for ticker in tickers: t = ticker f = None # check for field bits = ticker.split(ticker_field_sep, 1) if len(bits) == 2: t = bits[0] f = bits[1] # call provider - check if supports memoization if hasattr(provider, 'mcache'): data[ticker] = provider(ticker=t, field=f, mrefresh=mrefresh, **kwargs) else: data[ticker] = provider(ticker=t, field=f, **kwargs) df = pd.DataFrame(data) # ensure same order as provided df = df[tickers] if existing is not None: df = ffn.merge(existing, df) if common_dates: df = df.dropna() if forward_fill: df = df.fillna(method='ffill') if column_names: cnames = utils.parse_arg(column_names) if len(cnames) != len(df.columns): raise ValueError( 'column_names must be of same length as tickers') df.columns = cnames elif clean_tickers: df.columns = map(utils.clean_ticker, df.columns) return df