Beispiel #1
0
def build_histogram(data_id, col, query, point_filter):
    data = run_query(
        global_state.get_data(data_id),
        query,
        global_state.get_context_variables(data_id),
    )
    data = run_query(data, build_group_inputs_filter(data, [point_filter]))
    s = data[~pd.isnull(data[col])][col]
    hist_data, hist_labels = np.histogram(s, bins=10)
    hist_labels = list(map(lambda x: json_float(x, precision=3), hist_labels[1:]))
    axes_builder = build_axes(
        None,
        "Bins",
        dict(type="single", data={}),
        dict(Frequency=0),
        dict(Frequency=max(hist_data)),
        data=pd.DataFrame(dict(Frequency=hist_data, Bins=hist_labels)),
    )
    hist_data = dict(data={"all": dict(x=hist_labels, Frequency=hist_data)})
    bars = bar_builder(
        hist_data,
        "Bins",
        ["Frequency"],
        axes_builder,
        chart_builder_passthru,
        modal=True,
    )
    bars.figure["layout"]["xaxis"]["type"] = "category"
    bars.figure["layout"]["title"]["text"] = "Histogram of {} ({} data points)".format(
        col, len(s)
    )
    return bars
Beispiel #2
0
    def query_input(query, pathname, curr_query):
        """
        dash callback for storing valid pandas dataframe queries.  This acts as an intermediary between values typed
        by the user and values that are applied to pandas dataframes.  Most of the time what the user has typed is not
        complete and thus not a valid pandas dataframe query.

        :param query: query input
        :type query: str
        :param pathname: URL path
        :param curr_query: current valid pandas dataframe query
        :return: tuple of (query (if valid), styling for query input (if invalid input), query input title (containing
        invalid query exception information)
        :rtype: tuple of (str, str, str)
        """
        try:
            data_id = get_data_id(pathname)
            data = global_state.get_data(data_id)
            ctxt_vars = global_state.get_context_variables(data_id)
            run_query(data, query, ctxt_vars)
            return query, {"line-height": "inherit"}, ""
        except BaseException as ex:
            return (
                curr_query,
                {
                    "line-height": "inherit",
                    "background-color": "pink"
                },
                str(ex),
            )
Beispiel #3
0
def test_run_query():
    df = pd.DataFrame(
        [dict(a=1, b=2, c=3),
         dict(a=2, b=3, c=4),
         dict(a=3, b=4, c=5)])

    with pytest.raises(Exception):
        utils.run_query(df, "a == 4")

    assert len(utils.run_query(df, "a in @a", {"a": [1, 2, 3]})) == 3

    if PY3:
        df = pd.DataFrame([
            {
                "a.b": 1,
                "b": 2,
                "c": 3
            },
            {
                "a.b": 2,
                "b": 3,
                "c": 4
            },
            {
                "a.b": 3,
                "b": 4,
                "c": 5
            },
        ])
        assert len(utils.run_query(df, "a.b == 1")) == 1
Beispiel #4
0
def test_run_query():
    df = pd.DataFrame(
        [dict(a=1, b=2, c=3),
         dict(a=2, b=3, c=4),
         dict(a=3, b=4, c=5)])

    with pytest.raises(Exception):
        utils.run_query(df, 'a == 4')

    assert len(utils.run_query(df, 'a in @a', {'a': [1, 2, 3]})) == 3

    if PY3:
        df = pd.DataFrame([{
            'a.b': 1,
            'b': 2,
            'c': 3
        }, {
            'a.b': 2,
            'b': 3,
            'c': 4
        }, {
            'a.b': 3,
            'b': 4,
            'c': 5
        }])
        assert len(utils.run_query(df, 'a.b == 1')) == 1
Beispiel #5
0
    def group_values(chart_type, group_cols, map_group_cols, pathname, inputs,
                     prev_group_vals):
        group_cols = make_list(group_cols)
        if show_input_handler(chart_type
                              or 'line')('group') and not len(group_cols):
            return [], None
        elif chart_type == 'maps':  # all maps have a group input
            group_cols = make_list(map_group_cols)
            if not len(group_cols):
                return [], None
        data_id = get_data_id(pathname)
        group_vals = run_query(global_state.get_data(data_id),
                               inputs.get('query'),
                               global_state.get_context_variables(data_id))
        group_vals = build_group_val_options(group_vals, group_cols)
        selections = []
        available_vals = [gv['value'] for gv in group_vals]
        if prev_group_vals is not None:
            selections = [
                pgv for pgv in prev_group_vals if pgv in available_vals
            ]
        if not len(selections) and len(group_vals) <= MAX_GROUPS:
            selections = available_vals

        return group_vals, selections
Beispiel #6
0
def retrieve_chart_data(df, *args, **kwargs):
    """
    Retrieves data from a dataframe for x, y, z & group inputs complete with date frequency
    formatting (:meth:`dtale.charts.utils.date_freq_handler`) if specified

    :param df: dataframe that contains data for chart
    :type df: :class:`pandas:pandas.DataFrame`
    :param args: columns to use
    :type args: iterable of str
    :return: dataframe of data required for chart construction
    :rtype: :class:`pandas:pandas.DataFrame`
    """
    freq_handler = date_freq_handler(df)
    cols = flatten_lists([make_list(a) for a in args])
    all_code = []
    all_data = []
    for col in cols:
        if col is not None:
            s, code = freq_handler(col)
            all_data.append(s)
            if code is not None:
                all_code.append(code)
    all_data = pd.concat(all_data, axis=1)
    all_code = ["chart_data = pd.concat(["] + all_code + ["], axis=1)"]
    if len(make_list(kwargs.get("group_val"))):
        filters = build_group_inputs_filter(all_data, kwargs["group_val"])
        all_data = run_query(all_data, filters)
        all_code.append("chart_data = chart_data.query({})".format(
            triple_quote(filters)))
    return all_data, all_code
Beispiel #7
0
 def reshape(self):
     data = run_query(
         global_state.get_data(self.data_id),
         (global_state.get_settings(self.data_id) or {}).get("query"),
         global_state.get_context_variables(self.data_id),
     )
     return self.builder.reshape(data)
Beispiel #8
0
def build_figure_data(data_id, chart_type=None, query=None, x=None, y=None, z=None, group=None, agg=None, window=None,
                      rolling_comp=None, **kwargs):
    """
    Builds chart figure data for loading into dash:`dash_core_components.Graph <dash-core-components/graph>` components

    :param data_id: integer string identifier for a D-Tale process's data
    :type data_id: str
    :param chart_type: type of chart (line, bar, pie, scatter...)
    :type chart_type: str
    :param query: pandas dataframe query string
    :type query: str, optional
    :param x: column to use for the X-Axis
    :type x: str
    :param y: columns to use for the Y-Axes
    :type y: list of str
    :param z: column to use for the Z-Axis
    :type z: str, optional
    :param group: column(s) to use for grouping
    :type group: list of str or str, optional
    :param agg: specific aggregation that can be applied to y or z axes.  Possible values are: count, first, last mean,
                median, min, max, std, var, mad, prod, sum.  This is included in label of axis it is being applied to.
    :type agg: str, optional
    :param window: number of days to include in rolling aggregations
    :type window: int, optional
    :param rolling_comp: computation to use in rolling aggregations
    :type rolling_comp: str, optional
    :param kwargs: optional keyword arguments, here in case invalid arguements are passed to this function
    :type kwargs: dict
    :return: dictionary of series data, min/max ranges of columns used in chart
    :rtype: dict
    """
    code = None
    try:
        if not valid_chart(**dict(x=x, y=y, z=z, chart_type=chart_type, agg=agg, window=window,
                                  rolling_comp=rolling_comp)):
            return None, None

        data = run_query(
            global_state.get_data(data_id),
            query,
            global_state.get_context_variables(data_id)
        )
        code = build_code_export(data_id, query=query)
        chart_kwargs = dict(group_col=group, agg=agg, allow_duplicates=chart_type == 'scatter', rolling_win=window,
                            rolling_comp=rolling_comp)
        if chart_type in ZAXIS_CHARTS:
            chart_kwargs['z'] = z
            del chart_kwargs['group_col']
        data, chart_code = build_chart_data(data, x, y, **chart_kwargs)
        return data, code + chart_code
    except BaseException as e:
        return dict(error=str(e), traceback=str(traceback.format_exc())), code
Beispiel #9
0
    def query_input(query, pathname, curr_query):
        """
        dash callback for storing valid pandas dataframe queries.  This acts as an intermediary between values typed
        by the user and values that are applied to pandas dataframes.  Most of the time what the user has typed is not
        complete and thus not a valid pandas dataframe query.

        :param query: query input
        :type query: str
        :param pathname: URL path
        :param curr_query: current valid pandas dataframe query
        :return: tuple of (query (if valid), styling for query input (if invalid input), query input title (containing
        invalid query exception information)
        :rtype: tuple of (str, str, str)
        """
        try:
            run_query(DATA[get_data_id(pathname)], query)
            return query, {'line-height': 'inherit'}, ''
        except BaseException as ex:
            return curr_query, {
                'line-height': 'inherit',
                'background-color': 'pink'
            }, str(ex)
Beispiel #10
0
 def remove(self, df):
     group = self.cfg.get("group")
     duplicates = [g for _, g in df.groupby(group) if len(g) > 1]
     if not duplicates:
         raise NoDuplicatesToShowException("No duplicates to show!")
     duplicates = pd.concat(duplicates)
     group_filter = None
     if self.cfg.get("filter"):
         group_filter = build_group_inputs_filter(
             df, [{col: val
                   for col, val in zip(group, self.cfg["filter"])}])
         duplicates = run_query(duplicates, group_filter)
     code = self._build_code(group_filter)
     self.startup_kwargs["name"] = "{group}_duplicates".format(
         group="_".join(group))
     return duplicates, code
Beispiel #11
0
    def group_values(group_cols, pathname, inputs, prev_group_vals):
        group_cols = make_list(group_cols)
        if not show_input_handler(inputs.get(
                'chart_type', 'line'))('group') or not len(group_cols):
            return [], None
        print('loading group vals...')
        data_id = get_data_id(pathname)
        group_vals = run_query(global_state.get_data(data_id),
                               inputs.get('query'),
                               global_state.get_context_variables(data_id))
        group_vals = build_group_val_options(group_vals, group_cols)
        selections = []
        available_vals = [gv['value'] for gv in group_vals]
        if prev_group_vals is not None:
            selections = [
                pgv for pgv in prev_group_vals if pgv in available_vals
            ]
        if not len(selections) and len(group_vals) <= MAX_GROUPS:
            selections = available_vals

        return group_vals, selections
Beispiel #12
0
    def group_values(chart_type, group_cols, map_group_cols, pathname, inputs,
                     prev_group_vals):
        group_cols = make_list(map_group_cols if chart_type ==
                               "maps" else group_cols)
        if not show_group_input(inputs, group_cols):
            return [], None
        data_id = get_data_id(pathname)
        group_vals = run_query(
            global_state.get_data(data_id),
            inputs.get("query"),
            global_state.get_context_variables(data_id),
        )
        group_vals = build_group_val_options(group_vals, group_cols)
        selections = []
        available_vals = [gv["value"] for gv in group_vals]
        if prev_group_vals is not None:
            selections = [
                pgv for pgv in prev_group_vals if pgv in available_vals
            ]
        if not len(selections) and len(group_vals) <= MAX_GROUPS:
            selections = available_vals

        return group_vals, selections