Ejemplo n.º 1
0
def test_getters(builtin_pkg):
    req = build_req_tuple(
        {'int': '1', 'empty_int': '', 'str': 'hello', 'empty_str': '', 'bool': 'true', 'float': '1.1'}
    )
    val = utils.get_str_arg(req, 'str')
    assert isinstance(val, str) and val == 'hello'
    val = utils.get_str_arg(req, 'str_def', default='def')
    assert val == 'def'
    val = utils.get_str_arg(req, 'empty_str')
    assert val is None
    with mock.patch('{}.str'.format(builtin_pkg), mock.Mock(side_effect=Exception)):
        val = utils.get_str_arg(req, 'str', default='def')
        assert val == 'def'
    val = utils.get_int_arg(req, 'int')
    assert isinstance(val, int) and val == 1
    val = utils.get_int_arg(req, 'int_def', default=2)
    assert val == 2
    val = utils.get_int_arg(req, 'empty_int')
    assert val is None
    with mock.patch('{}.int'.format(builtin_pkg), mock.Mock(side_effect=Exception)):
        val = utils.get_int_arg(req, 'int', default=2)
        assert val == 2
    val = utils.get_bool_arg(req, 'bool')
    assert isinstance(val, bool) and val

    val = utils.get_float_arg(req, 'float')
    assert isinstance(val, float) and val == 1.1
    val = utils.get_float_arg(req, 'int_def', default=2.0)
    assert val == 2.0
    val = utils.get_float_arg(req, 'empty_float')
    assert val is None
    with mock.patch('{}.float'.format(builtin_pkg), mock.Mock(side_effect=Exception)):
        val = utils.get_float_arg(req, 'float', default=2.0)
        assert val == 2
Ejemplo n.º 2
0
def get_histogram(data_id):
    """
    :class:`flask:flask.Flask` route which returns output from numpy.histogram to front-end as JSON

    :param data_id: integer string identifier for a D-Tale process's data
    :type data_id: str
    :param col: string from flask.request.args['col'] containing name of a column in your dataframe
    :param query: string from flask.request.args['query'] which is applied to DATA using the query() function
    :param bins: the number of bins to display in your histogram, options on the front-end are 5, 10, 20, 50
    :returns: JSON {results: DATA, desc: output from pd.DataFrame[col].describe(), success: True/False}
    """
    col = get_str_arg(request, 'col', 'values')
    query = get_str_arg(request, 'query')
    bins = get_int_arg(request, 'bins', 20)
    try:
        data = DATA[data_id]
        if query:
            data = data.query(query)

        selected_col = find_selected_column(data, col)
        data = data[~pd.isnull(data[selected_col])][[selected_col]]
        hist = np.histogram(data, bins=bins)

        desc = load_describe(data[selected_col])
        return jsonify(data=[json_float(h) for h in hist[0]], labels=['{0:.1f}'.format(l) for l in hist[1]], desc=desc)
    except BaseException as e:
        return jsonify(dict(error=str(e), traceback=str(traceback.format_exc())))
Ejemplo n.º 3
0
 def __init__(self, req):
     self.category_col = get_str_arg(req, "categoryCol")
     self.category_agg = get_str_arg(req, "categoryAgg", "mean")
     self.aggs = [
         "count",
         "sum" if self.category_agg == "pctsum" else self.category_agg,
     ]
     self.top = get_int_arg(req, "top")
Ejemplo n.º 4
0
def get_chart_data(data_id):
    """
    :class:`flask:flask.Flask` route which builds data associated with a chart.js chart

    :param data_id: integer string identifier for a D-Tale process's data
    :type data_id: str
    :param query: string from flask.request.args['query'] which is applied to DATA using the query() function
    :param x: string from flask.request.args['x'] column to be used as x-axis of chart
    :param y: string from flask.request.args['y'] column to be used as y-axis of chart
    :param group: string from flask.request.args['group'] comma-separated string of columns to group chart data by
    :param agg: string from flask.request.args['agg'] points to a specific function that can be applied to
                :func: pandas.core.groupby.DataFrameGroupBy.  Possible values are: count, first, last mean,
                median, min, max, std, var, mad, prod, sum
    :returns: JSON {
        data: {
            series1: { x: [x1, x2, ..., xN], y: [y1, y2, ..., yN] },
            series2: { x: [x1, x2, ..., xN], y: [y1, y2, ..., yN] },
            ...,
            seriesN: { x: [x1, x2, ..., xN], y: [y1, y2, ..., yN] },
        },
        min: minY,
        max: maxY,
    } or {error: 'Exception message', traceback: 'Exception stacktrace'}
    """
    try:
        query = get_str_arg(request, 'query')
        data = DATA[data_id]
        if query:
            try:
                data = data.query(query)
            except BaseException as e:
                return jsonify(dict(error='Invalid query: {}'.format(str(e))))
            if not len(data):
                return jsonify(
                    dict(error='query "{}" found no data, please alter'.format(
                        query)))
        x = get_str_arg(request, 'x')
        y = get_json_arg(request, 'y')
        group_col = get_json_arg(request, 'group')
        agg = get_str_arg(request, 'agg')
        allow_duplicates = get_bool_arg(request, 'allowDupes')
        window = get_int_arg(request, 'rollingWin')
        comp = get_str_arg(request, 'rollingComp')
        data = build_chart(data,
                           x,
                           y,
                           group_col,
                           agg,
                           allow_duplicates,
                           rolling_win=window,
                           rolling_comp=comp)
        data['success'] = True
        return jsonify(data)
    except BaseException as e:
        return jsonify(
            dict(error=str(e), traceback=str(traceback.format_exc())))
Ejemplo n.º 5
0
def test_getters(builtin_pkg):
    req = build_req_tuple({
        "int": "1",
        "empty_int": "",
        "str": "hello",
        "empty_str": "",
        "bool": "true",
        "float": "1.1",
    })
    val = utils.get_str_arg(req, "str")
    assert isinstance(val, str) and val == "hello"
    val = utils.get_str_arg(req, "str_def", default="def")
    assert val == "def"
    val = utils.get_str_arg(req, "empty_str")
    assert val is None
    with mock.patch("{}.str".format(builtin_pkg),
                    mock.Mock(side_effect=Exception)):
        val = utils.get_str_arg(req, "str", default="def")
        assert val == "def"
    val = utils.get_int_arg(req, "int")
    assert isinstance(val, int) and val == 1
    val = utils.get_int_arg(req, "int_def", default=2)
    assert val == 2
    val = utils.get_int_arg(req, "empty_int")
    assert val is None
    with mock.patch("{}.int".format(builtin_pkg),
                    mock.Mock(side_effect=Exception)):
        val = utils.get_int_arg(req, "int", default=2)
        assert val == 2
    val = utils.get_bool_arg(req, "bool")
    assert isinstance(val, bool) and val

    val = utils.get_float_arg(req, "float")
    assert isinstance(val, float) and val == 1.1
    val = utils.get_float_arg(req, "int_def", default=2.0)
    assert val == 2.0
    val = utils.get_float_arg(req, "empty_float")
    assert val is None
    with mock.patch("{}.float".format(builtin_pkg),
                    mock.Mock(side_effect=Exception)):
        val = utils.get_float_arg(req, "float", default=2.0)
        assert val == 2
Ejemplo n.º 6
0
def get_correlations_ts(data_id):
    """
    :class:`flask:flask.Flask` route which returns timeseries of Pearson correlations of two columns with numeric data
    using :meth:`pandas:pandas.DataFrame.corr`

    :param data_id: integer string identifier for a D-Tale process's data
    :type data_id: str
    :param query: string from flask.request.args['query'] which is applied to DATA using the query() function
    :param cols: comma-separated string from flask.request.args['cols'] containing names of two columns in dataframe
    :param dateCol: string from flask.request.args['dateCol'] with name of date-type column in dateframe for timeseries
    :returns: JSON {
        data: {:col1:col2: {data: [{corr: 0.99, date: 'YYYY-MM-DD'},...], max: 0.99, min: 0.99}
    } or {error: 'Exception message', traceback: 'Exception stacktrace'}
    """
    try:
        query = get_str_arg(request, 'query')
        data = DATA[data_id]
        data = data.query(query) if query is not None else data
        cols = get_str_arg(request, 'cols')
        cols = json.loads(cols)
        date_col = get_str_arg(request, 'dateCol')
        rolling_window = get_int_arg(request, 'rollingWindow')
        if rolling_window:
            [col1, col2] = list(set(cols))
            data = data[[date_col, col1, col2]].set_index(date_col)
            data = data[[col1,
                         col2]].rolling(rolling_window).corr().reset_index()
            data = data.dropna()
            data = data[data['level_1'] == col1][[date_col, col2]]
        else:
            data = data.groupby(date_col)[list(
                set(cols))].corr(method='pearson')
            data.index.names = ['date', 'column']
            data = data.reset_index()
            data = data[data.column == cols[0]][['date', cols[1]]]
        data.columns = ['date', 'corr']
        return_data = build_chart(data.fillna(0), 'date', 'corr')
        return_data['success'] = True
        return jsonify(return_data)
    except BaseException as e:
        return jsonify(
            dict(error=str(e), traceback=str(traceback.format_exc())))
Ejemplo n.º 7
0
def get_scatter(data_id):
    """
    :class:`flask:flask.Flask` route which returns data used in correlation of two columns for scatter chart

    :param data_id: integer string identifier for a D-Tale process's data
    :type data_id: str
    :param query: string from flask.request.args['query'] which is applied to DATA using the query() function
    :param cols: comma-separated string from flask.request.args['cols'] containing names of two columns in dataframe
    :param dateCol: string from flask.request.args['dateCol'] with name of date-type column in dateframe for timeseries
    :param date: string from flask.request.args['date'] date value in dateCol to filter dataframe to
    :returns: JSON {
        data: [{col1: 0.123, col2: 0.123, index: 1},...,{col1: 0.123, col2: 0.123, index: N}],
        stats: {
        stats: {
            correlated: 50,
            only_in_s0: 1,
            only_in_s1: 2,
            pearson: 0.987,
            spearman: 0.879,
        }
        x: col1,
        y: col2
    } or {error: 'Exception message', traceback: 'Exception stacktrace'}
    """
    cols = get_json_arg(request, 'cols')
    query = get_str_arg(request, 'query')
    date = get_str_arg(request, 'date')
    date_col = get_str_arg(request, 'dateCol')
    rolling = get_bool_arg(request, 'rolling')
    try:
        data = DATA[data_id]
        if query:
            data = data.query(query)

        idx_col = str('index')
        y_cols = [cols[1], idx_col]
        if rolling:
            window = get_int_arg(request, 'window')
            idx = min(data[data[date_col] == date].index) + 1
            data = data.iloc[max(idx - window, 0):idx]
            data = data[list(set(cols)) + [date_col]].dropna(how='any')
            y_cols.append(date_col)
        else:
            data = data[data[date_col] == date] if date else data
            data = data[list(set(cols))].dropna(how='any')

        data[idx_col] = data.index
        s0 = data[cols[0]]
        s1 = data[cols[1]]
        pearson = s0.corr(s1, method='pearson')
        spearman = s0.corr(s1, method='spearman')
        stats = dict(
            pearson='N/A' if pd.isnull(pearson) else pearson,
            spearman='N/A' if pd.isnull(spearman) else spearman,
            correlated=len(data),
            only_in_s0=len(data[data[cols[0]].isnull()]),
            only_in_s1=len(data[data[cols[1]].isnull()])
        )

        if len(data) > 15000:
            return jsonify(
                stats=stats,
                error='Dataset exceeds 15,000 records, cannot render scatter. Please apply filter...'
            )
        data = build_chart(data, cols[0], y_cols, allow_duplicates=True)
        data['x'] = cols[0]
        data['y'] = cols[1]
        data['stats'] = stats
        return jsonify(data)
    except BaseException as e:
        return jsonify(dict(error=str(e), traceback=str(traceback.format_exc())))
Ejemplo n.º 8
0
 def __init__(self, req):
     self.top = get_int_arg(req, "top")
     self.ordinal_col = get_str_arg(req, "ordinalCol")
     self.ordinal_agg = get_str_arg(req, "ordinalAgg", "sum")
     self.cleaners = get_str_arg(req, "cleaner")
Ejemplo n.º 9
0
 def __init__(self, req):
     self.bins = get_int_arg(req, "bins", 20)
     self.target = get_str_arg(req, "target")
Ejemplo n.º 10
0
 def __init__(self, req):
     self.bins = get_int_arg(req, "bins", 20)
Ejemplo n.º 11
0
 def __init__(self, req):
     self.bins = get_int_arg(req, "bins", 20)
     self.target = get_str_arg(req, "target")
     self.density = get_bool_arg(req, "density")