def test_getters(builtin_pkg): req = build_req_tuple( {'int': '1', 'empty_int': '', 'str': 'hello', 'empty_str': '', 'bool': 'true', 'float': '1.1'} ) val = utils.get_str_arg(req, 'str') assert isinstance(val, str) and val == 'hello' val = utils.get_str_arg(req, 'str_def', default='def') assert val == 'def' val = utils.get_str_arg(req, 'empty_str') assert val is None with mock.patch('{}.str'.format(builtin_pkg), mock.Mock(side_effect=Exception)): val = utils.get_str_arg(req, 'str', default='def') assert val == 'def' val = utils.get_int_arg(req, 'int') assert isinstance(val, int) and val == 1 val = utils.get_int_arg(req, 'int_def', default=2) assert val == 2 val = utils.get_int_arg(req, 'empty_int') assert val is None with mock.patch('{}.int'.format(builtin_pkg), mock.Mock(side_effect=Exception)): val = utils.get_int_arg(req, 'int', default=2) assert val == 2 val = utils.get_bool_arg(req, 'bool') assert isinstance(val, bool) and val val = utils.get_float_arg(req, 'float') assert isinstance(val, float) and val == 1.1 val = utils.get_float_arg(req, 'int_def', default=2.0) assert val == 2.0 val = utils.get_float_arg(req, 'empty_float') assert val is None with mock.patch('{}.float'.format(builtin_pkg), mock.Mock(side_effect=Exception)): val = utils.get_float_arg(req, 'float', default=2.0) assert val == 2
def get_histogram(data_id): """ :class:`flask:flask.Flask` route which returns output from numpy.histogram to front-end as JSON :param data_id: integer string identifier for a D-Tale process's data :type data_id: str :param col: string from flask.request.args['col'] containing name of a column in your dataframe :param query: string from flask.request.args['query'] which is applied to DATA using the query() function :param bins: the number of bins to display in your histogram, options on the front-end are 5, 10, 20, 50 :returns: JSON {results: DATA, desc: output from pd.DataFrame[col].describe(), success: True/False} """ col = get_str_arg(request, 'col', 'values') query = get_str_arg(request, 'query') bins = get_int_arg(request, 'bins', 20) try: data = DATA[data_id] if query: data = data.query(query) selected_col = find_selected_column(data, col) data = data[~pd.isnull(data[selected_col])][[selected_col]] hist = np.histogram(data, bins=bins) desc = load_describe(data[selected_col]) return jsonify(data=[json_float(h) for h in hist[0]], labels=['{0:.1f}'.format(l) for l in hist[1]], desc=desc) except BaseException as e: return jsonify(dict(error=str(e), traceback=str(traceback.format_exc())))
def __init__(self, req): self.category_col = get_str_arg(req, "categoryCol") self.category_agg = get_str_arg(req, "categoryAgg", "mean") self.aggs = [ "count", "sum" if self.category_agg == "pctsum" else self.category_agg, ] self.top = get_int_arg(req, "top")
def get_chart_data(data_id): """ :class:`flask:flask.Flask` route which builds data associated with a chart.js chart :param data_id: integer string identifier for a D-Tale process's data :type data_id: str :param query: string from flask.request.args['query'] which is applied to DATA using the query() function :param x: string from flask.request.args['x'] column to be used as x-axis of chart :param y: string from flask.request.args['y'] column to be used as y-axis of chart :param group: string from flask.request.args['group'] comma-separated string of columns to group chart data by :param agg: string from flask.request.args['agg'] points to a specific function that can be applied to :func: pandas.core.groupby.DataFrameGroupBy. Possible values are: count, first, last mean, median, min, max, std, var, mad, prod, sum :returns: JSON { data: { series1: { x: [x1, x2, ..., xN], y: [y1, y2, ..., yN] }, series2: { x: [x1, x2, ..., xN], y: [y1, y2, ..., yN] }, ..., seriesN: { x: [x1, x2, ..., xN], y: [y1, y2, ..., yN] }, }, min: minY, max: maxY, } or {error: 'Exception message', traceback: 'Exception stacktrace'} """ try: query = get_str_arg(request, 'query') data = DATA[data_id] if query: try: data = data.query(query) except BaseException as e: return jsonify(dict(error='Invalid query: {}'.format(str(e)))) if not len(data): return jsonify( dict(error='query "{}" found no data, please alter'.format( query))) x = get_str_arg(request, 'x') y = get_json_arg(request, 'y') group_col = get_json_arg(request, 'group') agg = get_str_arg(request, 'agg') allow_duplicates = get_bool_arg(request, 'allowDupes') window = get_int_arg(request, 'rollingWin') comp = get_str_arg(request, 'rollingComp') data = build_chart(data, x, y, group_col, agg, allow_duplicates, rolling_win=window, rolling_comp=comp) data['success'] = True return jsonify(data) except BaseException as e: return jsonify( dict(error=str(e), traceback=str(traceback.format_exc())))
def test_getters(builtin_pkg): req = build_req_tuple({ "int": "1", "empty_int": "", "str": "hello", "empty_str": "", "bool": "true", "float": "1.1", }) val = utils.get_str_arg(req, "str") assert isinstance(val, str) and val == "hello" val = utils.get_str_arg(req, "str_def", default="def") assert val == "def" val = utils.get_str_arg(req, "empty_str") assert val is None with mock.patch("{}.str".format(builtin_pkg), mock.Mock(side_effect=Exception)): val = utils.get_str_arg(req, "str", default="def") assert val == "def" val = utils.get_int_arg(req, "int") assert isinstance(val, int) and val == 1 val = utils.get_int_arg(req, "int_def", default=2) assert val == 2 val = utils.get_int_arg(req, "empty_int") assert val is None with mock.patch("{}.int".format(builtin_pkg), mock.Mock(side_effect=Exception)): val = utils.get_int_arg(req, "int", default=2) assert val == 2 val = utils.get_bool_arg(req, "bool") assert isinstance(val, bool) and val val = utils.get_float_arg(req, "float") assert isinstance(val, float) and val == 1.1 val = utils.get_float_arg(req, "int_def", default=2.0) assert val == 2.0 val = utils.get_float_arg(req, "empty_float") assert val is None with mock.patch("{}.float".format(builtin_pkg), mock.Mock(side_effect=Exception)): val = utils.get_float_arg(req, "float", default=2.0) assert val == 2
def get_correlations_ts(data_id): """ :class:`flask:flask.Flask` route which returns timeseries of Pearson correlations of two columns with numeric data using :meth:`pandas:pandas.DataFrame.corr` :param data_id: integer string identifier for a D-Tale process's data :type data_id: str :param query: string from flask.request.args['query'] which is applied to DATA using the query() function :param cols: comma-separated string from flask.request.args['cols'] containing names of two columns in dataframe :param dateCol: string from flask.request.args['dateCol'] with name of date-type column in dateframe for timeseries :returns: JSON { data: {:col1:col2: {data: [{corr: 0.99, date: 'YYYY-MM-DD'},...], max: 0.99, min: 0.99} } or {error: 'Exception message', traceback: 'Exception stacktrace'} """ try: query = get_str_arg(request, 'query') data = DATA[data_id] data = data.query(query) if query is not None else data cols = get_str_arg(request, 'cols') cols = json.loads(cols) date_col = get_str_arg(request, 'dateCol') rolling_window = get_int_arg(request, 'rollingWindow') if rolling_window: [col1, col2] = list(set(cols)) data = data[[date_col, col1, col2]].set_index(date_col) data = data[[col1, col2]].rolling(rolling_window).corr().reset_index() data = data.dropna() data = data[data['level_1'] == col1][[date_col, col2]] else: data = data.groupby(date_col)[list( set(cols))].corr(method='pearson') data.index.names = ['date', 'column'] data = data.reset_index() data = data[data.column == cols[0]][['date', cols[1]]] data.columns = ['date', 'corr'] return_data = build_chart(data.fillna(0), 'date', 'corr') return_data['success'] = True return jsonify(return_data) except BaseException as e: return jsonify( dict(error=str(e), traceback=str(traceback.format_exc())))
def get_scatter(data_id): """ :class:`flask:flask.Flask` route which returns data used in correlation of two columns for scatter chart :param data_id: integer string identifier for a D-Tale process's data :type data_id: str :param query: string from flask.request.args['query'] which is applied to DATA using the query() function :param cols: comma-separated string from flask.request.args['cols'] containing names of two columns in dataframe :param dateCol: string from flask.request.args['dateCol'] with name of date-type column in dateframe for timeseries :param date: string from flask.request.args['date'] date value in dateCol to filter dataframe to :returns: JSON { data: [{col1: 0.123, col2: 0.123, index: 1},...,{col1: 0.123, col2: 0.123, index: N}], stats: { stats: { correlated: 50, only_in_s0: 1, only_in_s1: 2, pearson: 0.987, spearman: 0.879, } x: col1, y: col2 } or {error: 'Exception message', traceback: 'Exception stacktrace'} """ cols = get_json_arg(request, 'cols') query = get_str_arg(request, 'query') date = get_str_arg(request, 'date') date_col = get_str_arg(request, 'dateCol') rolling = get_bool_arg(request, 'rolling') try: data = DATA[data_id] if query: data = data.query(query) idx_col = str('index') y_cols = [cols[1], idx_col] if rolling: window = get_int_arg(request, 'window') idx = min(data[data[date_col] == date].index) + 1 data = data.iloc[max(idx - window, 0):idx] data = data[list(set(cols)) + [date_col]].dropna(how='any') y_cols.append(date_col) else: data = data[data[date_col] == date] if date else data data = data[list(set(cols))].dropna(how='any') data[idx_col] = data.index s0 = data[cols[0]] s1 = data[cols[1]] pearson = s0.corr(s1, method='pearson') spearman = s0.corr(s1, method='spearman') stats = dict( pearson='N/A' if pd.isnull(pearson) else pearson, spearman='N/A' if pd.isnull(spearman) else spearman, correlated=len(data), only_in_s0=len(data[data[cols[0]].isnull()]), only_in_s1=len(data[data[cols[1]].isnull()]) ) if len(data) > 15000: return jsonify( stats=stats, error='Dataset exceeds 15,000 records, cannot render scatter. Please apply filter...' ) data = build_chart(data, cols[0], y_cols, allow_duplicates=True) data['x'] = cols[0] data['y'] = cols[1] data['stats'] = stats return jsonify(data) except BaseException as e: return jsonify(dict(error=str(e), traceback=str(traceback.format_exc())))
def __init__(self, req): self.top = get_int_arg(req, "top") self.ordinal_col = get_str_arg(req, "ordinalCol") self.ordinal_agg = get_str_arg(req, "ordinalAgg", "sum") self.cleaners = get_str_arg(req, "cleaner")
def __init__(self, req): self.bins = get_int_arg(req, "bins", 20) self.target = get_str_arg(req, "target")
def __init__(self, req): self.bins = get_int_arg(req, "bins", 20)
def __init__(self, req): self.bins = get_int_arg(req, "bins", 20) self.target = get_str_arg(req, "target") self.density = get_bool_arg(req, "density")