Beispiel #1
0
def get_store_contents():
    """
    Return an ordered tuple of attributes representing the store contents.
    Useful for ensuring key properties stay the same when switching between systems.
    """
    _get_one = [
        serialized_dataframe(global_state.get_data('1')),
        global_state.get_dtypes('1'),
        global_state.get_settings('1'),
        global_state.get_metadata('1'),
        global_state.get_context_variables('1'),
        global_state.get_history('1'),
    ]
    _get_all = [
        {
            k: serialized_dataframe(v)
            for k, v in global_state.get_data().items()
        },
        global_state.get_dtypes(),
        global_state.get_settings(),
        global_state.get_metadata(),
        global_state.get_context_variables(),
        global_state.get_history(),
    ]
    _lengths = [
        len(global_state.DATA),
        len(global_state.DTYPES),
        len(global_state.SETTINGS),
        len(global_state.METADATA),
        len(global_state.CONTEXT_VARIABLES),
        len(global_state.HISTORY),
    ]
    return (_get_one, _get_all, _lengths)
Beispiel #2
0
def build_histogram(data_id, col, query, point_filter):
    data = run_query(
        handle_predefined(data_id),
        query,
        global_state.get_context_variables(data_id),
    )
    query, _ = build_group_inputs_filter(data, [point_filter])
    data = run_query(data, query)
    s = data[~pd.isnull(data[col])][col]
    hist_data, hist_labels = np.histogram(s, bins=10)
    hist_labels = list(
        map(lambda x: json_float(x, precision=3), hist_labels[1:]))
    axes_builder = build_axes(
        dict(
            data=dict(all=dict(Frequency=hist_data, Bins=hist_labels)),
            min=dict(Frequency=0),
            max=dict(Frequency=max(hist_data)),
        ),
        "Bins",
        dict(type="single", data={}),
    )
    hist_data = dict(data={"all": dict(x=hist_labels, Frequency=hist_data)})
    bars = bar_builder(
        hist_data,
        "Bins",
        ["Frequency"],
        axes_builder,
        chart_builder_passthru,
        modal=True,
    )
    bars.figure["layout"]["xaxis"]["type"] = "category"
    bars.figure["layout"]["title"]["text"] = "{} {} ({} {})".format(
        text("Histogram of"), col, len(s), text("data points"))
    return bars
Beispiel #3
0
    def query_input(query, pathname, curr_query):
        """
        dash callback for storing valid pandas dataframe queries.  This acts as an intermediary between values typed
        by the user and values that are applied to pandas dataframes.  Most of the time what the user has typed is not
        complete and thus not a valid pandas dataframe query.

        :param query: query input
        :type query: str
        :param pathname: URL path
        :param curr_query: current valid pandas dataframe query
        :return: tuple of (query (if valid), styling for query input (if invalid input), query input title (containing
        invalid query exception information)
        :rtype: tuple of (str, str, str)
        """
        try:
            data_id = get_data_id(pathname)
            data = global_state.get_data(data_id)
            ctxt_vars = global_state.get_context_variables(data_id)
            run_query(data, query, ctxt_vars)
            return query, {"line-height": "inherit"}, ""
        except BaseException as ex:
            return (
                curr_query,
                {
                    "line-height": "inherit",
                    "background-color": "pink"
                },
                str(ex),
            )
Beispiel #4
0
def get_store_contents():
    """
    Return an ordered tuple of attributes representing the store contents.
    Useful for ensuring key properties stay the same when switching between systems.
    """
    _get_one = [
        serialized_dataframe(global_state.get_data("1")),
        global_state.get_dtypes("1"),
        global_state.get_settings("1"),
        global_state.get_metadata("1"),
        global_state.get_context_variables("1"),
        global_state.get_history("1"),
    ]
    _get_all = [
        {
            int(k): serialized_dataframe(v.data)
            for k, v in global_state.items()
        },
        {int(k): v.dtypes
         for k, v in global_state.items()},
        {int(k): v.settings
         for k, v in global_state.items()},
        {int(k): v.metadata
         for k, v in global_state.items()},
        {int(k): v.context_variables
         for k, v in global_state.items()},
        {int(k): v.history
         for k, v in global_state.items()},
    ]
    _lengths = [
        global_state.size(),
    ]
    return (_get_one, _get_all, _lengths)
Beispiel #5
0
    def group_values(chart_type, group_cols, map_group_cols, pathname, inputs,
                     prev_group_vals):
        group_cols = make_list(group_cols)
        if show_input_handler(chart_type
                              or 'line')('group') and not len(group_cols):
            return [], None
        elif chart_type == 'maps':  # all maps have a group input
            group_cols = make_list(map_group_cols)
            if not len(group_cols):
                return [], None
        data_id = get_data_id(pathname)
        group_vals = run_query(global_state.get_data(data_id),
                               inputs.get('query'),
                               global_state.get_context_variables(data_id))
        group_vals = build_group_val_options(group_vals, group_cols)
        selections = []
        available_vals = [gv['value'] for gv in group_vals]
        if prev_group_vals is not None:
            selections = [
                pgv for pgv in prev_group_vals if pgv in available_vals
            ]
        if not len(selections) and len(group_vals) <= MAX_GROUPS:
            selections = available_vals

        return group_vals, selections
Beispiel #6
0
 def group_values(
     chart_type,
     group_cols,
     map_group_cols,
     cs_group_cols,
     treemap_group_cols,
     pathname,
     inputs,
     prev_group_vals,
 ):
     data_id = get_data_id(pathname)
     group_cols = group_cols
     if chart_type == "maps":
         group_cols = map_group_cols
     elif chart_type == "candlestick":
         group_cols = cs_group_cols
     elif chart_type == "treemap":
         group_cols = treemap_group_cols
     group_cols = make_list(group_cols)
     group_types = get_group_types(inputs, data_id, group_cols)
     if "groups" not in group_types:
         return [], None
     group_vals = run_query(
         global_state.get_data(data_id),
         inputs.get("query"),
         global_state.get_context_variables(data_id),
     )
     group_vals = build_group_val_options(group_vals, group_cols)
     selections = []
     available_vals = [gv["value"] for gv in group_vals]
     if prev_group_vals is not None:
         selections = [pgv for pgv in prev_group_vals if pgv in available_vals]
     if not len(selections) and len(group_vals) <= MAX_GROUPS:
         selections = available_vals
     return group_vals, selections
 def run(self):
     data = run_query(
         global_state.get_data(self.data_id),
         (global_state.get_settings(self.data_id) or {}).get("query"),
         global_state.get_context_variables(self.data_id),
     )
     return self.report.run(data)
Beispiel #8
0
 def reshape(self):
     data = run_query(
         global_state.get_data(self.data_id),
         (global_state.get_settings(self.data_id) or {}).get("query"),
         global_state.get_context_variables(self.data_id),
     )
     return self.builder.reshape(data)
Beispiel #9
0
def build_code_export(data_id, imports='import pandas as pd\n\n', query=None):
    """
    Helper function for building a string representing the code that was run to get the data you are viewing to that
    point.

    :param data_id: integer string identifier for a D-Tale process's data
    :type data_id: str
    :param imports: string representing the imports at the top of the code string
    :type imports: string, optional
    :param query: pandas dataframe query string
    :type query: str, optional
    :return: python code string
    """
    history = global_state.get_history(data_id) or []
    settings = global_state.get_settings(data_id) or {}
    ctxt_vars = global_state.get_context_variables(data_id)

    startup_code = settings.get('startup_code')
    startup_code = '# Data Re-shaping\n{}\n\n'.format(
        startup_code) if startup_code else ''
    startup_str = (
        "# DISCLAIMER: 'df' refers to the data you passed in when calling 'dtale.show'\n\n"
        '{imports}'
        '{startup}'
        'if isinstance(df, (pd.DatetimeIndex, pd.MultiIndex)):\n'
        '\tdf = df.to_frame(index=False)\n\n'
        '# remove any pre-existing indices for ease of use in the D-Tale code, but this is not required\n'
        "df = df.reset_index().drop('index', axis=1, errors='ignore')\n"
        'df.columns = [str(c) for c in df.columns]  # update columns to strings in case they are numbers\n'
    ).format(imports=imports, startup=startup_code)
    final_history = [startup_str] + history
    final_query = query
    if final_query is None:
        final_query = settings.get('query')

    if final_query is not None:
        if len(ctxt_vars or {}):
            final_history.append((
                "\n# this is injecting any context variables you may have passed into 'dtale.show'\n"
                "import dtale.global_state as dtale_global_state\n"
                "\n# DISCLAIMER: running this line in a different process than the one it originated will produce\n"
                "#             differing results\n"
                "ctxt_vars = dtale_global_state.get_context_variables('{data_id}')\n\n"
                "df = df.query('{query}', local_dict=ctxt_vars)\n").format(
                    query=final_query, data_id=data_id))
        else:
            final_history.append("df = df.query('{}')\n".format(final_query))
    elif 'query' in settings:
        final_history.append("df = df.query('{}')\n".format(settings['query']))
    if 'sort' in settings:
        cols, dirs = [], []
        for col, dir in settings['sort']:
            cols.append(col)
            dirs.append('True' if dir == 'ASC' else 'False')
        final_history.append(
            "df = df.sort_values(['{cols}'], ascending=[{dirs}])\n".format(
                cols=', '.join(cols), dirs="', '".join(dirs)))
    return final_history
Beispiel #10
0
def load_filterable_data(data_id, req, query=None):
    filtered = get_bool_arg(req, "filtered")
    curr_settings = global_state.get_settings(data_id) or {}
    if filtered:
        final_query = query or build_query(data_id, curr_settings.get("query"))
        return run_query(
            handle_predefined(data_id),
            final_query,
            global_state.get_context_variables(data_id),
            ignore_empty=True,
        )
    return global_state.get_data(data_id)
Beispiel #11
0
def build_figure_data(data_id, chart_type=None, query=None, x=None, y=None, z=None, group=None, agg=None, window=None,
                      rolling_comp=None, **kwargs):
    """
    Builds chart figure data for loading into dash:`dash_core_components.Graph <dash-core-components/graph>` components

    :param data_id: integer string identifier for a D-Tale process's data
    :type data_id: str
    :param chart_type: type of chart (line, bar, pie, scatter...)
    :type chart_type: str
    :param query: pandas dataframe query string
    :type query: str, optional
    :param x: column to use for the X-Axis
    :type x: str
    :param y: columns to use for the Y-Axes
    :type y: list of str
    :param z: column to use for the Z-Axis
    :type z: str, optional
    :param group: column(s) to use for grouping
    :type group: list of str or str, optional
    :param agg: specific aggregation that can be applied to y or z axes.  Possible values are: count, first, last mean,
                median, min, max, std, var, mad, prod, sum.  This is included in label of axis it is being applied to.
    :type agg: str, optional
    :param window: number of days to include in rolling aggregations
    :type window: int, optional
    :param rolling_comp: computation to use in rolling aggregations
    :type rolling_comp: str, optional
    :param kwargs: optional keyword arguments, here in case invalid arguements are passed to this function
    :type kwargs: dict
    :return: dictionary of series data, min/max ranges of columns used in chart
    :rtype: dict
    """
    code = None
    try:
        if not valid_chart(**dict(x=x, y=y, z=z, chart_type=chart_type, agg=agg, window=window,
                                  rolling_comp=rolling_comp)):
            return None, None

        data = run_query(
            global_state.get_data(data_id),
            query,
            global_state.get_context_variables(data_id)
        )
        code = build_code_export(data_id, query=query)
        chart_kwargs = dict(group_col=group, agg=agg, allow_duplicates=chart_type == 'scatter', rolling_win=window,
                            rolling_comp=rolling_comp)
        if chart_type in ZAXIS_CHARTS:
            chart_kwargs['z'] = z
            del chart_kwargs['group_col']
        data, chart_code = build_chart_data(data, x, y, **chart_kwargs)
        return data, code + chart_code
    except BaseException as e:
        return dict(error=str(e), traceback=str(traceback.format_exc())), code
Beispiel #12
0
 def group_values(
     chart_type,
     group_cols,
     map_group_cols,
     cs_group_cols,
     treemap_group_cols,
     funnel_group_cols,
     clustergram_group_cols,
     pareto_group_cols,
     inputs,
     prev_group_vals,
 ):
     data_id = inputs["data_id"]
     group_cols = group_cols
     if chart_type == "maps":
         group_cols = map_group_cols
     elif chart_type == "candlestick":
         group_cols = cs_group_cols
     elif chart_type == "treemap":
         group_cols = treemap_group_cols
     elif chart_type == "funnel":
         group_cols = funnel_group_cols
     elif chart_type == "clustergram":
         group_cols = clustergram_group_cols
     elif chart_type == "pareto":
         group_cols = pareto_group_cols
     group_cols = make_list(group_cols)
     group_types = get_group_types(inputs, group_cols)
     if "groups" not in group_types:
         return [], None
     group_vals = run_query(
         handle_predefined(data_id),
         inputs.get("query"),
         global_state.get_context_variables(data_id),
     )
     group_vals = build_group_val_options(group_vals, group_cols)
     selections = []
     available_vals = [gv["value"] for gv in group_vals]
     if prev_group_vals is not None:
         selections = [
             pgv for pgv in prev_group_vals if pgv in available_vals
         ]
     if not len(selections) and len(group_vals) <= MAX_GROUPS:
         selections = available_vals
     return group_vals, selections
Beispiel #13
0
    def __init__(self, data_id, req):
        self.data_id = data_id
        self.analysis_type = get_str_arg(req, "type")
        curr_settings = global_state.get_settings(data_id) or {}
        self.query = build_query(data_id, curr_settings.get("query"))

        data = run_query(
            handle_predefined(data_id),
            self.query,
            global_state.get_context_variables(self.data_id),
        )
        self.selected_col = find_selected_column(
            data, get_str_arg(req, "col", "values"))
        self.data = data[~pd.isnull(data[self.selected_col])]
        self.dtype = find_dtype(self.data[self.selected_col])
        self.classifier = classify_type(self.dtype)
        self.code = build_code_export(
            data_id,
            imports="{}\n".format("\n".join([
                "import numpy as np",
                "import pandas as pd",
                "import plotly.graph_objs as go",
            ])),
        )

        if self.analysis_type is None:
            self.analysis_type = ("histogram" if self.classifier
                                  in ["F", "I", "D"] else "value_counts")

        if self.analysis_type == "geolocation":
            self.analysis = GeolocationAnalysis(req)
        elif self.analysis_type == "histogram":
            self.analysis = HistogramAnalysis(req)
        elif self.analysis_type == "categories":
            self.analysis = CategoryAnalysis(req)
        elif self.analysis_type == "value_counts":
            self.analysis = ValueCountAnalysis(req)
        elif self.analysis_type == "word_value_counts":
            self.analysis = WordValueCountAnalysis(req)
        elif self.analysis_type == "qq":
            self.analysis = QQAnalysis()
Beispiel #14
0
    def group_values(group_cols, pathname, inputs, prev_group_vals):
        group_cols = make_list(group_cols)
        if not show_input_handler(inputs.get(
                'chart_type', 'line'))('group') or not len(group_cols):
            return [], None
        print('loading group vals...')
        data_id = get_data_id(pathname)
        group_vals = run_query(global_state.get_data(data_id),
                               inputs.get('query'),
                               global_state.get_context_variables(data_id))
        group_vals = build_group_val_options(group_vals, group_cols)
        selections = []
        available_vals = [gv['value'] for gv in group_vals]
        if prev_group_vals is not None:
            selections = [
                pgv for pgv in prev_group_vals if pgv in available_vals
            ]
        if not len(selections) and len(group_vals) <= MAX_GROUPS:
            selections = available_vals

        return group_vals, selections
Beispiel #15
0
    def group_values(chart_type, group_cols, map_group_cols, pathname, inputs,
                     prev_group_vals):
        group_cols = make_list(map_group_cols if chart_type ==
                               "maps" else group_cols)
        if not show_group_input(inputs, group_cols):
            return [], None
        data_id = get_data_id(pathname)
        group_vals = run_query(
            global_state.get_data(data_id),
            inputs.get("query"),
            global_state.get_context_variables(data_id),
        )
        group_vals = build_group_val_options(group_vals, group_cols)
        selections = []
        available_vals = [gv["value"] for gv in group_vals]
        if prev_group_vals is not None:
            selections = [
                pgv for pgv in prev_group_vals if pgv in available_vals
            ]
        if not len(selections) and len(group_vals) <= MAX_GROUPS:
            selections = available_vals

        return group_vals, selections
Beispiel #16
0
    def query_input(query, curr_query, curr_marks, data_id):
        """
        dash callback for storing valid pandas dataframe queries.  This acts as an intermediary between values typed
        by the user and values that are applied to pandas dataframes.  Most of the time what the user has typed is not
        complete and thus not a valid pandas dataframe query.

        :param query: query input
        :type query: str
        :param data_id: identifier for the data we are viewing
        :type data_id: string
        :param curr_query: current valid pandas dataframe query
        :return: tuple of (query (if valid), styling for query input (if invalid input), query input title (containing
        invalid query exception information)
        :rtype: tuple of (str, str, str)
        """
        try:
            data = handle_predefined(data_id)
            ctxt_vars = global_state.get_context_variables(data_id)
            df = run_query(data, query, ctxt_vars)
            return (
                query,
                {
                    "line-height": "inherit"
                },
                "",
                build_slider_counts(df, data_id, query),
            )
        except BaseException as ex:
            return (
                curr_query,
                {
                    "line-height": "inherit",
                    "background-color": "pink"
                },
                str(ex),
                curr_marks,
            )
Beispiel #17
0
def build_code_export(data_id, imports="import pandas as pd\n\n", query=None):
    """
    Helper function for building a string representing the code that was run to get the data you are viewing to that
    point.

    :param data_id: integer string identifier for a D-Tale process's data
    :type data_id: str
    :param imports: string representing the imports at the top of the code string
    :type imports: string, optional
    :param query: pandas dataframe query string
    :type query: str, optional
    :return: python code string
    """
    history = global_state.get_history(data_id) or []
    settings = global_state.get_settings(data_id) or {}
    ctxt_vars = global_state.get_context_variables(data_id)

    startup_code = settings.get("startup_code") or ""
    if startup_code and not startup_code.endswith("\n"):
        startup_code += "\n"
    xarray_setup = ""
    if data_id in global_state.DATASETS:
        xarray_dims = global_state.get_dataset_dim(data_id)
        if len(xarray_dims):
            xarray_setup = (
                "df = ds.sel({selectors}).to_dataframe()\n"
                "df = df.reset_index().drop('index', axis=1, errors='ignore')\n"
                "df = df.set_index(list(ds.dims.keys()))\n"
            ).format(
                selectors=", ".join(
                    "{}='{}'".format(k, v) for k, v in xarray_dims.items()
                )
            )
        else:
            xarray_setup = (
                "df = ds.to_dataframe()\n"
                "df = df.reset_index().drop('index', axis=1, errors='ignore')\n"
                "df = df.set_index(list(ds.dims.keys()))\n"
            )
    startup_str = (
        "# DISCLAIMER: 'df' refers to the data you passed in when calling 'dtale.show'\n\n"
        "{imports}"
        "{xarray_setup}"
        "{startup}"
        "if isinstance(df, (pd.DatetimeIndex, pd.MultiIndex)):\n"
        "\tdf = df.to_frame(index=False)\n\n"
        "# remove any pre-existing indices for ease of use in the D-Tale code, but this is not required\n"
        "df = df.reset_index().drop('index', axis=1, errors='ignore')\n"
        "df.columns = [str(c) for c in df.columns]  # update columns to strings in case they are numbers\n"
    ).format(imports=imports, xarray_setup=xarray_setup, startup=startup_code)
    final_history = [startup_str] + history
    final_query = query
    if final_query is None:
        final_query = settings.get("query")

    if final_query is not None and final_query != "":
        if len(ctxt_vars or {}):
            final_history.append(
                (
                    "\n# this is injecting any context variables you may have passed into 'dtale.show'\n"
                    "import dtale.global_state as dtale_global_state\n"
                    "\n# DISCLAIMER: running this line in a different process than the one it originated will produce\n"
                    "#             differing results\n"
                    "ctxt_vars = dtale_global_state.get_context_variables('{data_id}')\n\n"
                    "df = df.query({query}, local_dict=ctxt_vars)\n"
                ).format(query=triple_quote(final_query), data_id=data_id)
            )
        else:
            final_history.append(
                "df = df.query({})\n".format(triple_quote(final_query))
            )
    elif settings.get("query"):
        final_history.append(
            "df = df.query({})\n".format(triple_quote(settings["query"]))
        )
    if "sort" in settings:
        cols, dirs = [], []
        for col, dir in settings["sort"]:
            cols.append(col)
            dirs.append("True" if dir == "ASC" else "False")
        final_history.append(
            "df = df.sort_values(['{cols}'], ascending=[{dirs}])\n".format(
                cols=", ".join(cols), dirs="', '".join(dirs)
            )
        )
    return final_history