def test_parse_args(): actual = utils.parse_arg('a,b,c') assert actual == ['a', 'b', 'c'] # should ignore spaces actual = utils.parse_arg(' a ,b ,c ') assert actual == ['a', 'b', 'c'] actual = utils.parse_arg('a') assert actual == ['a'] # should stay same for list actual = utils.parse_arg(['a', 'b']) assert actual == ['a', 'b'] # should stay same for dict actual = utils.parse_arg({'a': 1}) assert actual == {'a': 1}
def get(tickers, provider=None, common_dates=True, forward_fill=False, clean_tickers=True, column_names=None, ticker_field_sep=':', mrefresh=False, existing=None, **kwargs): """ Helper function for retrieving data as a DataFrame. Args: * tickers (list, string, csv string): Tickers to download. * provider (function): Provider to use for downloading data. By default it will be ffn.DEFAULT_PROVIDER if not provided. * common_dates (bool): Keep common dates only? Drop na's. * forward_fill (bool): forward fill values if missing. Only works if common_dates is False, since common_dates will remove all nan's, so no filling forward necessary. * clean_tickers (bool): Should the tickers be 'cleaned' using ffn.utils.clean_tickers? Basically remove non-standard characters (^VIX -> vix) and standardize to lower case. * column_names (list): List of column names if clean_tickers is not satisfactory. * ticker_field_sep (char): separator used to determine the ticker and field. This is in case we want to specify particular, non-default fields. For example, we might want: AAPL:Low,AAPL:High,AAPL:Close. ':' is the separator. * mrefresh (bool): Ignore memoization. * existing (DataFrame): Existing DataFrame to append returns to - used when we download from multiple sources * kwargs: passed to provider """ if provider is None: provider = DEFAULT_PROVIDER tickers = utils.parse_arg(tickers) data = {} for ticker in tickers: t = ticker f = None # check for field bits = ticker.split(ticker_field_sep, 1) if len(bits) == 2: t = bits[0] f = bits[1] # call provider - check if supports memoization if hasattr(provider, 'mcache'): data[ticker] = provider(ticker=t, field=f, mrefresh=mrefresh, **kwargs) else: data[ticker] = provider(ticker=t, field=f, **kwargs) df = pd.DataFrame(data) # ensure same order as provided df = df[tickers] if existing is not None: df = ffn.merge(existing, df) if common_dates: df = df.dropna() if forward_fill: df = df.fillna(method='ffill') if column_names: cnames = utils.parse_arg(column_names) if len(cnames) != len(df.columns): raise ValueError( 'column_names must be of same length as tickers') df.columns = cnames elif clean_tickers: df.columns = map(utils.clean_ticker, df.columns) return df