def view_schema(key): data_info = get_data_info(key) if not data_info or 'data' not in data_info: raise Exception('no data for ' + key) data = data_info['data'] if isinstance(data, pd.DataFrame): return data_json.to_json({ 'type': 'table', 'data': { 'count': -1, 'bytes': -1, 'schema': [{ 'column-name': c, 'column-type': data.dtypes[c].name } for c in data.columns.to_series()] } }) elif psdf and isinstance(data, psdf.DataFrame): return data_json.to_json({ 'type': 'table', 'data': { 'count': -1, 'bytes': -1, 'schema': [{ 'column-name': c[0], 'column-type': c[1] } for c in data.dtypes] } })
def view_data(key): data_info = get_data_info(key) if not data_info or 'data' not in data_info: raise Exception('no data for ' + key) data = data_info['data'] type_name = status_gateway.get_field(data_info['status'], 'typeName') if isinstance(data, pd.DataFrame): return data_json.to_json({ 'type': 'table', 'data': { 'count': data.shape[0], 'bytes': -1, 'schema': [{'column-name': c, 'column-type': data.dtypes[c].name} for c in data.columns.to_series()], 'data': [[d[col_name] for col_name in data.columns] for index, d in data.iterrows()] } }) elif psdf and isinstance(data, psdf.DataFrame): return data_json.to_json({ 'type': 'table', 'data': { 'count': data.count(), 'bytes': -1, 'schema': [{'column-name': c[0], 'column-type': c[1]} for c in data.dtypes], 'data': [[col for col in row] for row in data.collect()] } }) else: return data_json.to_json({ 'type': type_name, 'data': data })
def unload_model(model, path): dir_ = os.path.dirname(path) if not os.path.exists(dir_): os.makedirs(dir_) with open(path, 'wb') as fp: json.dump(data_json.to_json(model, for_redis=True), codecs.getwriter('utf-8')(fp), ensure_ascii=False)
def unload_model(path, **params): outputs = params['linked']['outputs'] model = outputs['model'] def getModelFromInputs(): for k,v in params.items(): if k is model: return v return {} dir_ = os.path.dirname(path) if not os.path.exists(dir_): os.makedirs(dir_) with open(path, 'wb') as fp: #json.dump(data_json.to_json(params["model_0"], for_redis=True), codecs.getwriter('utf-8')(fp), ensure_ascii=False) json.dump(data_json.to_json(getModelFromInputs(), for_redis=True), codecs.getwriter('utf-8')(fp), ensure_ascii=False) return {"model":model}
def view_data(key): data_info = get_data_info(key) if not data_info or 'data' not in data_info: raise Exception('no data for ' + key) data = data_info['data'] type_name = status_gateway.get_field(data_info['status'], 'typeName') if isinstance(data, pd.DataFrame): def schema_map(df): _dtypemap = { 'int': 'long', 'int32': 'int', 'int64': 'long', 'float64': 'double', 'U': 'string', 'S': 'string', 'object': 'string', 'string': 'string', 'bool': 'boolean', 'boolean': 'boolean', 'array(double)': 'array(double)', 'array(int)': 'array(int)', 'array(long)': 'array(long)', 'array(string)': 'array(string)', 'array(boolean)': 'array(string)' } def col_dtype(col): for elem in col: if isinstance(elem, np.ndarray): if elem.dtype.kind in {'S', 'U'}: return 'array(string)' else: return 'array(' + _dtypemap[elem.dtype.name] + ')' elif isinstance(elem, list): return 'array(' + col_dtype(elem) + ')' else: if not isna(elem): if isinstance(elem, str): return 'string' elif isinstance(elem, bool): return 'boolean' elif isinstance(elem, int): return 'long' elif isinstance(elem, float): return 'double' return 'object' for colname, dtype in zip(df.columns, df.dtypes): dt = col_dtype(df[colname]) \ if dtype.name == 'object' else dtype.name yield {'column-name': colname, 'column-type': _dtypemap[dt]} def ensure_none(df): val = df.values val[isna(df.values)] = None return val return data_json.to_json({ 'type': 'table', 'data': { 'count': data.shape[0], 'bytes': -1, 'schema': list(schema_map(data)), 'data': ensure_none(data) } }) elif psdf and isinstance(data, psdf.DataFrame): return data_json.to_json({ 'type': 'table', 'data': { 'count': data.count(), 'bytes': -1, 'schema': [{ 'column-name': c[0], 'column-type': c[1] } for c in data.dtypes], 'data': [[col for col in row] for row in data.collect()] } }) else: return data_json.to_json({'type': type_name, 'data': data})
def unload_model(path, **params): linked = params['linked'] if 'outputs' in linked: outputs = linked['outputs'] else: outputs = linked['outData'] param = linked['param'] def getDataFromInputs(data): for k, v in params.items(): if k is data: return v return {} if 'model' in outputs: model = getDataFromInputs(outputs['model']) else: model_table = dict() i = 0 if isinstance(outputs, list): for v in outputs: model_table['table_{}'.format(i + 1)] = getDataFromInputs(v) i += 1 else: for k, v in outputs.items(): model_table['table_{}'.format(i + 1)] = getDataFromInputs(v) i += 1 if ('groupby' in param and param['groupby']) or ('group_by' in param and param['group_by']): if 'groupby' in param: group_by = param['groupby'] else: group_by = param['group_by'] sample_table = model_table['table_1'] groups = sample_table[group_by].drop_duplicates().values group_keys = np.array( [_group_key_from_list(row) for row in groups]) group_key_dict = { k: v.tolist() for k, v in zip(group_keys, groups) } model = { '_grouped_data': _grouped_data(group_by=group_by, group_key_dict=group_key_dict) } for group_key in group_key_dict: group_key_row = group_key_dict[group_key] tmp_model_table = model_table.copy() for k, v in tmp_model_table.items(): for group_by_col, group in zip(group_by, group_key_row): v = v[v[group_by_col] == group] tmp_model_table[k] = v.reset_index(drop=True) model['_grouped_data']['data'][group_key] = _unload_model( tmp_model_table, linked, param) else: model = _unload_model(model_table, linked, param) dir_ = os.path.dirname(path) if not os.path.exists(dir_): os.makedirs(dir_) with open(path, 'wb') as fp: json.dump(data_json.to_json(model, for_redis=True), codecs.getwriter('utf-8')(fp), ensure_ascii=False) return {'model': model}
def set(key, data): gateway.put_kv_data(key, data_json.to_json(data, for_redis=True))