def iter(self, datasets=None, variants=None, markets=None): """ Iterate over all valid datasets, variants and markets, or only use the ones specified. For example: .. code-block:: python for dataset, variant, market, df in all_datasets.iter(): # dataset, variant and market are strings with the names. # df is a Pandas DataFrame with the actual data. :param datasets: Default is `None` which uses all valid datasets. Otherwise a list of strings with the dataset-names to use. :param variants: Default is `None` which uses all valid variants for a dataset. Otherwise a list of strings with the variant-names to use. :param markets: Default is `None` which uses all valid markets for a dataset. Otherwise a list of strings with the market-names to use. :return: Generator which iterates over: dataset (string), variant (string), market (string), df (Pandas DataFrame) """ # Load dict with info about all the datasets. info_datasets = load_info_datasets() # Use provided or all datasets? if datasets is None: datasets = datasets_all # For all datasets. for dataset in datasets: # Use provided or all valid variants for this dataset? if variants is not None: _variants = variants else: _variants = info_datasets[dataset]['variants'] # Use provided or all valid markets for this dataset? if markets is not None: _markets = markets else: _markets = info_datasets[dataset]['markets'] # For all the selected variants and markets. for variant in _variants: for market in _markets: # Get the Pandas DataFrame with the actual data. df = self.get(dataset=dataset, variant=variant, market=market) # Yield all the strings and the Pandas DataFrame. yield dataset, variant, market, df
def datasets_all(): """ Return a list of strings with the names of all available datasets. """ # Load dict with info about all the datasets. info_datasets = load_info_datasets() # Create a list of just the dataset names. datasets = list(info_datasets) return datasets
def iter_all_datasets(datasets=None): """ Create a generator for iterating over all valid datasets, variants and markets. For example: .. code-block:: python for dataset, variant, market in iter_all_datasets(): print(dataset, variant, market) This only yields the names of the datasets, variants and markets, not the actual Pandas DataFrames, use :obj:`~simfin.datasets.load_all_datasets` or the :obj:`~simfin.datasets.AllDatasets` class for that. :param datasets: If `None` then iterate over all datasets. Otherwise if this is a string or list of strings, then only iterate over these datasets. """ # Load dict with info about all the datasets. info_datasets = load_info_datasets() # Only use the given datasets? if datasets is not None: # Create a new dict which only contains the given datasets. info_datasets = { k: v for k, v in info_datasets.items() if k in datasets } # Yield all valid combinations of datasets, variants and markets. for dataset, x in info_datasets.items(): # If the list of variants is empty, use a list with None, # otherwise the for-loop below would not yield anything. if len(x['variants']) > 0: variants = x['variants'] else: variants = [None] # If the list of markets is empty, use a list with None, # otherwise the for-loop below would not yield anything. if len(x['markets']) > 0: markets = x['markets'] else: markets = [None] for variant in variants: for market in markets: yield dataset, variant, market
def datasets_startswith(names): """ Return a list of strings with dataset names that begin with the given names. :param names: String or tuple of strings. :return: List of strings. """ # Load dict with info about all the datasets. info_datasets = load_info_datasets() # Create a list of just the dataset names. datasets = list(info_datasets) # Filter the datasets so we only get the ones that start with these names. datasets = list(filter(lambda s: s.startswith(names), datasets)) return datasets
def info_datasets(dataset=None, show_columns=True): """ Show a list of all available datasets, or show the details for the given dataset. :param dataset: String with the exact name of a dataset. If None then show a list of all available datasets. :param show_columns: Boolean whether to show the columns of the given dataset. :return: `None` """ # Load dict with info about all the datasets. info = load_info_datasets() if dataset is None: # Show all available datasets. # String with list of dataset names. datasets = sorted(list(info)) datasets = ', '.join(datasets) # Used to ensure the list of datasets looks nice on multiple lines. wrapper_datasets = TextWrapper(width=80, break_on_hyphens=False, break_long_words=False, initial_indent='All datasets: ', subsequent_indent=' ') # Print the list of datasets. datasets = wrapper_datasets.wrap(datasets) print('\n'.join(datasets)) else: # Ensure the dataset name is lower-case. dataset = dataset.lower() # Lookup the info for this dataset. x = info.get(dataset) if x is None: # Dataset does not exist. Print error-message. msg = 'Dataset \'{0}\' not found.' msg = msg.format(dataset) print(msg) else: # Show dataset name. print('Dataset: ', dataset) # Used to ensure the list of variants looks nice on multiple lines. space_indent = ' ' wrapper_variants = TextWrapper(width=80, initial_indent='Variants: ', subsequent_indent=space_indent) # Used to ensure the list of markets looks nice on multiple lines. wrapper_markets = TextWrapper(width=80, initial_indent='Markets: ', subsequent_indent=space_indent) # Show list of variants for this dataset. variants = sorted(x['variants']) if len(variants) > 0: variants = ', '.join(variants) else: variants = '-' variants = wrapper_variants.wrap(variants) variants = '\n'.join(variants) print(variants) # Show list of markets for this dataset. markets = sorted(x['markets']) if len(markets) > 0: markets = ', '.join(markets) else: markets = '-' markets = wrapper_markets.wrap(markets) markets = '\n'.join(markets) print(markets) # Show columns for this dataset? if show_columns: print( 'Columns: (The * marks data that requires a paid subscription)' ) # Used to ensure the columns look nice on multiple lines. wrapper_columns = TextWrapper(width=80, initial_indent='', subsequent_indent=' ') # For each column in this dataset. for column in x['columns']: # String to indicate if column-data is premium or free. is_premium = '*' if column['is_premium'] else '-' # String with list of Python shortcuts. shortcuts = sorted(column['shortcuts']) shortcuts = ', '.join(shortcuts) # String with the column's full name and Python shortcuts. msg = '{0} \"{1}\" {2}' msg = msg.format(is_premium, column['name'], shortcuts) # Break the string into lines of some max-length so it # looks nice if it has to be printed on multiple lines. msg = wrapper_columns.wrap(msg) msg = '\n'.join(msg) # Print the lines. print(msg)