Esempio n. 1
0
    def iter(self, datasets=None, variants=None, markets=None):
        """
        Iterate over all valid datasets, variants and markets,
        or only use the ones specified. For example:

        .. code-block:: python

            for dataset, variant, market, df in all_datasets.iter():
                # dataset, variant and market are strings with the names.
                # df is a Pandas DataFrame with the actual data.

        :param datasets:
            Default is `None` which uses all valid datasets.
            Otherwise a list of strings with the dataset-names to use.

        :param variants:
            Default is `None` which uses all valid variants for a dataset.
            Otherwise a list of strings with the variant-names to use.

        :param markets:
            Default is `None` which uses all valid markets for a dataset.
            Otherwise a list of strings with the market-names to use.

        :return:
            Generator which iterates over:
            dataset (string), variant (string), market (string), df (Pandas DataFrame)
        """

        # Load dict with info about all the datasets.
        info_datasets = load_info_datasets()

        # Use provided or all datasets?
        if datasets is None:
            datasets = datasets_all

        # For all datasets.
        for dataset in datasets:
            # Use provided or all valid variants for this dataset?
            if variants is not None:
                _variants = variants
            else:
                _variants = info_datasets[dataset]['variants']

            # Use provided or all valid markets for this dataset?
            if markets is not None:
                _markets = markets
            else:
                _markets = info_datasets[dataset]['markets']

            # For all the selected variants and markets.
            for variant in _variants:
                for market in _markets:
                    # Get the Pandas DataFrame with the actual data.
                    df = self.get(dataset=dataset,
                                  variant=variant,
                                  market=market)

                    # Yield all the strings and the Pandas DataFrame.
                    yield dataset, variant, market, df
Esempio n. 2
0
def datasets_all():
    """
    Return a list of strings with the names of all available datasets.
    """

    # Load dict with info about all the datasets.
    info_datasets = load_info_datasets()

    # Create a list of just the dataset names.
    datasets = list(info_datasets)

    return datasets
Esempio n. 3
0
def iter_all_datasets(datasets=None):
    """
    Create a generator for iterating over all valid datasets, variants and
    markets. For example:

    .. code-block:: python

        for dataset, variant, market in iter_all_datasets():
            print(dataset, variant, market)

    This only yields the names of the datasets, variants and markets, not the
    actual Pandas DataFrames, use :obj:`~simfin.datasets.load_all_datasets`
    or the :obj:`~simfin.datasets.AllDatasets` class for that.

    :param datasets:
        If `None` then iterate over all datasets. Otherwise if this is a string
        or list of strings, then only iterate over these datasets.
    """

    # Load dict with info about all the datasets.
    info_datasets = load_info_datasets()

    # Only use the given datasets?
    if datasets is not None:
        # Create a new dict which only contains the given datasets.
        info_datasets = {
            k: v
            for k, v in info_datasets.items() if k in datasets
        }

    # Yield all valid combinations of datasets, variants and markets.
    for dataset, x in info_datasets.items():
        # If the list of variants is empty, use a list with None,
        # otherwise the for-loop below would not yield anything.
        if len(x['variants']) > 0:
            variants = x['variants']
        else:
            variants = [None]

        # If the list of markets is empty, use a list with None,
        # otherwise the for-loop below would not yield anything.
        if len(x['markets']) > 0:
            markets = x['markets']
        else:
            markets = [None]

        for variant in variants:
            for market in markets:
                yield dataset, variant, market
Esempio n. 4
0
def datasets_startswith(names):
    """
    Return a list of strings with dataset names that begin with the given
    names.

    :param names:
        String or tuple of strings.

    :return:
        List of strings.
    """

    # Load dict with info about all the datasets.
    info_datasets = load_info_datasets()

    # Create a list of just the dataset names.
    datasets = list(info_datasets)

    # Filter the datasets so we only get the ones that start with these names.
    datasets = list(filter(lambda s: s.startswith(names), datasets))

    return datasets
Esempio n. 5
0
def info_datasets(dataset=None, show_columns=True):
    """
    Show a list of all available datasets, or show the details for the
    given dataset.

    :param dataset:
        String with the exact name of a dataset.
        If None then show a list of all available datasets.

    :param show_columns:
        Boolean whether to show the columns of the given dataset.

    :return:
        `None`
    """

    # Load dict with info about all the datasets.
    info = load_info_datasets()

    if dataset is None:
        # Show all available datasets.

        # String with list of dataset names.
        datasets = sorted(list(info))
        datasets = ', '.join(datasets)

        # Used to ensure the list of datasets looks nice on multiple lines.
        wrapper_datasets = TextWrapper(width=80,
                                       break_on_hyphens=False,
                                       break_long_words=False,
                                       initial_indent='All datasets: ',
                                       subsequent_indent='              ')

        # Print the list of datasets.
        datasets = wrapper_datasets.wrap(datasets)
        print('\n'.join(datasets))
    else:
        # Ensure the dataset name is lower-case.
        dataset = dataset.lower()

        # Lookup the info for this dataset.
        x = info.get(dataset)

        if x is None:
            # Dataset does not exist. Print error-message.
            msg = 'Dataset \'{0}\' not found.'
            msg = msg.format(dataset)
            print(msg)
        else:
            # Show dataset name.
            print('Dataset: ', dataset)

            # Used to ensure the list of variants looks nice on multiple lines.
            space_indent = '          '
            wrapper_variants = TextWrapper(width=80,
                                           initial_indent='Variants: ',
                                           subsequent_indent=space_indent)

            # Used to ensure the list of markets looks nice on multiple lines.
            wrapper_markets = TextWrapper(width=80,
                                          initial_indent='Markets:  ',
                                          subsequent_indent=space_indent)

            # Show list of variants for this dataset.
            variants = sorted(x['variants'])
            if len(variants) > 0:
                variants = ', '.join(variants)
            else:
                variants = '-'
            variants = wrapper_variants.wrap(variants)
            variants = '\n'.join(variants)
            print(variants)

            # Show list of markets for this dataset.
            markets = sorted(x['markets'])
            if len(markets) > 0:
                markets = ', '.join(markets)
            else:
                markets = '-'
            markets = wrapper_markets.wrap(markets)
            markets = '\n'.join(markets)
            print(markets)

            # Show columns for this dataset?
            if show_columns:
                print(
                    'Columns:  (The * marks data that requires a paid subscription)'
                )

                # Used to ensure the columns look nice on multiple lines.
                wrapper_columns = TextWrapper(width=80,
                                              initial_indent='',
                                              subsequent_indent='   ')

                # For each column in this dataset.
                for column in x['columns']:
                    # String to indicate if column-data is premium or free.
                    is_premium = '*' if column['is_premium'] else '-'

                    # String with list of Python shortcuts.
                    shortcuts = sorted(column['shortcuts'])
                    shortcuts = ', '.join(shortcuts)

                    # String with the column's full name and Python shortcuts.
                    msg = '{0} \"{1}\" {2}'
                    msg = msg.format(is_premium, column['name'], shortcuts)

                    # Break the string into lines of some max-length so it
                    # looks nice if it has to be printed on multiple lines.
                    msg = wrapper_columns.wrap(msg)
                    msg = '\n'.join(msg)

                    # Print the lines.
                    print(msg)