Beispiel #1
0
def get_chart_data(data_id):
    """
    :class:`flask:flask.Flask` route which builds data associated with a chart.js chart

    :param data_id: integer string identifier for a D-Tale process's data
    :type data_id: str
    :param query: string from flask.request.args['query'] which is applied to DATA using the query() function
    :param x: string from flask.request.args['x'] column to be used as x-axis of chart
    :param y: string from flask.request.args['y'] column to be used as y-axis of chart
    :param group: string from flask.request.args['group'] comma-separated string of columns to group chart data by
    :param agg: string from flask.request.args['agg'] points to a specific function that can be applied to
                :func: pandas.core.groupby.DataFrameGroupBy.  Possible values are: count, first, last mean,
                median, min, max, std, var, mad, prod, sum
    :returns: JSON {
        data: {
            series1: { x: [x1, x2, ..., xN], y: [y1, y2, ..., yN] },
            series2: { x: [x1, x2, ..., xN], y: [y1, y2, ..., yN] },
            ...,
            seriesN: { x: [x1, x2, ..., xN], y: [y1, y2, ..., yN] },
        },
        min: minY,
        max: maxY,
    } or {error: 'Exception message', traceback: 'Exception stacktrace'}
    """
    try:
        query = get_str_arg(request, 'query')
        data = DATA[data_id]
        if query:
            try:
                data = data.query(query)
            except BaseException as e:
                return jsonify(dict(error='Invalid query: {}'.format(str(e))))
            if not len(data):
                return jsonify(
                    dict(error='query "{}" found no data, please alter'.format(
                        query)))
        x = get_str_arg(request, 'x')
        y = get_json_arg(request, 'y')
        group_col = get_json_arg(request, 'group')
        agg = get_str_arg(request, 'agg')
        allow_duplicates = get_bool_arg(request, 'allowDupes')
        window = get_int_arg(request, 'rollingWin')
        comp = get_str_arg(request, 'rollingComp')
        data = build_chart(data,
                           x,
                           y,
                           group_col,
                           agg,
                           allow_duplicates,
                           rolling_win=window,
                           rolling_comp=comp)
        data['success'] = True
        return jsonify(data)
    except BaseException as e:
        return jsonify(
            dict(error=str(e), traceback=str(traceback.format_exc())))
Beispiel #2
0
def update_settings(data_id):
    """
    :class:`flask:flask.Flask` route which updates global SETTINGS for current port

    :param data_id: integer string identifier for a D-Tale process's data
    :type data_id: str
    :param settings: JSON string from flask.request.args['settings'] which gets decoded and stored in SETTINGS variable
    :return: JSON
    """
    try:
        global SETTINGS

        curr_settings = SETTINGS.get(data_id, {})
        updated_settings = dict_merge(curr_settings, get_json_arg(request, 'settings', {}))
        SETTINGS[data_id] = updated_settings
        return jsonify(dict(success=True))
    except BaseException as e:
        return jsonify(dict(error=str(e), traceback=str(traceback.format_exc())))
Beispiel #3
0
def get_data(data_id):
    """
    :class:`flask:flask.Flask` route which returns current rows from DATA (based on scrollbar specs and saved settings)
    to front-end as JSON

    :param data_id: integer string identifier for a D-Tale process's data
    :type data_id: str
    :param ids: required dash separated string "START-END" stating a range of row indexes to be returned to the screen
    :param query: string from flask.request.args['query'] which is applied to DATA using the query() function
    :param sort: JSON string from flask.request.args['sort'] which is applied to DATA using the sort_values() or
                 sort_index() function.  Here is the JSON structure: [col1,dir1],[col2,dir2],....[coln,dirn]
    :return: JSON {
        results: [
            {dtale_index: 1, col1: val1_1, ...,colN: valN_1},
            ...,
            {dtale_index: N2, col1: val1_N2, ...,colN: valN_N2}
        ],
        columns: [{name: col1, dtype: 'int64'},...,{name: colN, dtype: 'datetime'}],
        total: N2,
        success: True/False
    }
    """
    try:
        global SETTINGS, DATA, DTYPES
        data = DATA[data_id]

        # this will check for when someone instantiates D-Tale programatically and directly alters the internal
        # state of the dataframe (EX: d.data['new_col'] = 'foo')
        curr_dtypes = [c['name'] for c in DTYPES[data_id]]
        if any(c not in curr_dtypes for c in data.columns):
            data, _ = format_data(data)
            DATA[data_id] = data
            DTYPES[data_id] = build_dtypes_state(data)

        params = retrieve_grid_params(request)
        ids = get_json_arg(request, 'ids')
        if ids is None:
            return jsonify({})

        col_types = DTYPES[data_id]
        f = grid_formatter(col_types)
        curr_settings = SETTINGS.get(data_id, {})
        if curr_settings.get('sort') != params.get('sort'):
            data = sort_df_for_grid(data, params)
            DATA[data_id] = data
        if params.get('sort') is not None:
            curr_settings = dict_merge(curr_settings, dict(sort=params['sort']))
        else:
            curr_settings = {k: v for k, v in curr_settings.items() if k != 'sort'}
        data = filter_df_for_grid(data, params)
        if params.get('query') is not None:
            curr_settings = dict_merge(curr_settings, dict(query=params['query']))
        else:
            curr_settings = {k: v for k, v in curr_settings.items() if k != 'query'}
        SETTINGS[data_id] = curr_settings

        total = len(data)
        results = {}
        for sub_range in ids:
            sub_range = list(map(int, sub_range.split('-')))
            if len(sub_range) == 1:
                sub_df = data.iloc[sub_range[0]:sub_range[0] + 1]
                sub_df = f.format_dicts(sub_df.itertuples())
                results[sub_range[0]] = dict_merge({IDX_COL: sub_range[0]}, sub_df[0])
            else:
                [start, end] = sub_range
                sub_df = data.iloc[start:] if end >= len(data) - 1 else data.iloc[start:end + 1]
                sub_df = f.format_dicts(sub_df.itertuples())
                for i, d in zip(range(start, end + 1), sub_df):
                    results[i] = dict_merge({IDX_COL: i}, d)
        return_data = dict(results=results, columns=[dict(name=IDX_COL, dtype='int64')] + DTYPES[data_id], total=total)
        return jsonify(return_data)
    except BaseException as e:
        return jsonify(dict(error=str(e), traceback=str(traceback.format_exc())))
Beispiel #4
0
def get_scatter(data_id):
    """
    :class:`flask:flask.Flask` route which returns data used in correlation of two columns for scatter chart

    :param data_id: integer string identifier for a D-Tale process's data
    :type data_id: str
    :param query: string from flask.request.args['query'] which is applied to DATA using the query() function
    :param cols: comma-separated string from flask.request.args['cols'] containing names of two columns in dataframe
    :param dateCol: string from flask.request.args['dateCol'] with name of date-type column in dateframe for timeseries
    :param date: string from flask.request.args['date'] date value in dateCol to filter dataframe to
    :returns: JSON {
        data: [{col1: 0.123, col2: 0.123, index: 1},...,{col1: 0.123, col2: 0.123, index: N}],
        stats: {
        stats: {
            correlated: 50,
            only_in_s0: 1,
            only_in_s1: 2,
            pearson: 0.987,
            spearman: 0.879,
        }
        x: col1,
        y: col2
    } or {error: 'Exception message', traceback: 'Exception stacktrace'}
    """
    cols = get_json_arg(request, 'cols')
    query = get_str_arg(request, 'query')
    date = get_str_arg(request, 'date')
    date_col = get_str_arg(request, 'dateCol')
    rolling = get_bool_arg(request, 'rolling')
    try:
        data = DATA[data_id]
        if query:
            data = data.query(query)

        idx_col = str('index')
        y_cols = [cols[1], idx_col]
        if rolling:
            window = get_int_arg(request, 'window')
            idx = min(data[data[date_col] == date].index) + 1
            data = data.iloc[max(idx - window, 0):idx]
            data = data[list(set(cols)) + [date_col]].dropna(how='any')
            y_cols.append(date_col)
        else:
            data = data[data[date_col] == date] if date else data
            data = data[list(set(cols))].dropna(how='any')

        data[idx_col] = data.index
        s0 = data[cols[0]]
        s1 = data[cols[1]]
        pearson = s0.corr(s1, method='pearson')
        spearman = s0.corr(s1, method='spearman')
        stats = dict(
            pearson='N/A' if pd.isnull(pearson) else pearson,
            spearman='N/A' if pd.isnull(spearman) else spearman,
            correlated=len(data),
            only_in_s0=len(data[data[cols[0]].isnull()]),
            only_in_s1=len(data[data[cols[1]].isnull()])
        )

        if len(data) > 15000:
            return jsonify(
                stats=stats,
                error='Dataset exceeds 15,000 records, cannot render scatter. Please apply filter...'
            )
        data = build_chart(data, cols[0], y_cols, allow_duplicates=True)
        data['x'] = cols[0]
        data['y'] = cols[1]
        data['stats'] = stats
        return jsonify(data)
    except BaseException as e:
        return jsonify(dict(error=str(e), traceback=str(traceback.format_exc())))