Ejemplo n.º 1
0
def view_schema(key):
    data_info = get_data_info(key)
    if not data_info or 'data' not in data_info:
        raise Exception('no data for ' + key)

    data = data_info['data']
    if isinstance(data, pd.DataFrame):
        return data_json.to_json({
            'type': 'table',
            'data': {
                'count':
                -1,
                'bytes':
                -1,
                'schema': [{
                    'column-name': c,
                    'column-type': data.dtypes[c].name
                } for c in data.columns.to_series()]
            }
        })
    elif psdf and isinstance(data, psdf.DataFrame):
        return data_json.to_json({
            'type': 'table',
            'data': {
                'count':
                -1,
                'bytes':
                -1,
                'schema': [{
                    'column-name': c[0],
                    'column-type': c[1]
                } for c in data.dtypes]
            }
        })
Ejemplo n.º 2
0
def view_data(key):
    data_info = get_data_info(key)
    if not data_info or 'data' not in data_info:
        raise Exception('no data for ' + key)

    data = data_info['data']
    type_name = status_gateway.get_field(data_info['status'], 'typeName')
    if isinstance(data, pd.DataFrame):
        return data_json.to_json({
            'type': 'table',
            'data': {
                'count': data.shape[0],
                'bytes': -1,
                'schema': [{'column-name': c, 'column-type': data.dtypes[c].name} for c in data.columns.to_series()],
                'data': [[d[col_name] for col_name in data.columns] for index, d in data.iterrows()]
            }
        })
    elif psdf and isinstance(data, psdf.DataFrame):
        return data_json.to_json({
            'type': 'table',
            'data': {
                'count': data.count(),
                'bytes': -1,
                'schema': [{'column-name': c[0], 'column-type': c[1]} for c in data.dtypes],
                'data': [[col for col in row] for row in data.collect()]
            }
        })
    else:
        return data_json.to_json({
            'type': type_name,
            'data': data
        })
Ejemplo n.º 3
0
def unload_model(model, path):
    dir_ = os.path.dirname(path)
    if not os.path.exists(dir_):
        os.makedirs(dir_)
    with open(path, 'wb') as fp:
        json.dump(data_json.to_json(model, for_redis=True),
                  codecs.getwriter('utf-8')(fp),
                  ensure_ascii=False)
Ejemplo n.º 4
0
def unload_model(path, **params):
    outputs = params['linked']['outputs']
    model = outputs['model']

    def getModelFromInputs():
        for k,v in params.items():
            if k is model:
                return v
        return {}

    dir_ = os.path.dirname(path)
    if not os.path.exists(dir_):
        os.makedirs(dir_)
    with open(path, 'wb') as fp:
        #json.dump(data_json.to_json(params["model_0"], for_redis=True), codecs.getwriter('utf-8')(fp), ensure_ascii=False)
        json.dump(data_json.to_json(getModelFromInputs(), for_redis=True), codecs.getwriter('utf-8')(fp), ensure_ascii=False)

    return {"model":model}
Ejemplo n.º 5
0
def view_data(key):
    data_info = get_data_info(key)
    if not data_info or 'data' not in data_info:
        raise Exception('no data for ' + key)

    data = data_info['data']
    type_name = status_gateway.get_field(data_info['status'], 'typeName')
    if isinstance(data, pd.DataFrame):

        def schema_map(df):
            _dtypemap = {
                'int': 'long',
                'int32': 'int',
                'int64': 'long',
                'float64': 'double',
                'U': 'string',
                'S': 'string',
                'object': 'string',
                'string': 'string',
                'bool': 'boolean',
                'boolean': 'boolean',
                'array(double)': 'array(double)',
                'array(int)': 'array(int)',
                'array(long)': 'array(long)',
                'array(string)': 'array(string)',
                'array(boolean)': 'array(string)'
            }

            def col_dtype(col):
                for elem in col:
                    if isinstance(elem, np.ndarray):
                        if elem.dtype.kind in {'S', 'U'}:
                            return 'array(string)'
                        else:
                            return 'array(' + _dtypemap[elem.dtype.name] + ')'
                    elif isinstance(elem, list):
                        return 'array(' + col_dtype(elem) + ')'
                    else:
                        if not isna(elem):
                            if isinstance(elem, str):
                                return 'string'
                            elif isinstance(elem, bool):
                                return 'boolean'
                            elif isinstance(elem, int):
                                return 'long'
                            elif isinstance(elem, float):
                                return 'double'
                return 'object'

            for colname, dtype in zip(df.columns, df.dtypes):
                dt = col_dtype(df[colname]) \
                    if dtype.name == 'object' else dtype.name
                yield {'column-name': colname, 'column-type': _dtypemap[dt]}

        def ensure_none(df):
            val = df.values
            val[isna(df.values)] = None
            return val

        return data_json.to_json({
            'type': 'table',
            'data': {
                'count': data.shape[0],
                'bytes': -1,
                'schema': list(schema_map(data)),
                'data': ensure_none(data)
            }
        })
    elif psdf and isinstance(data, psdf.DataFrame):
        return data_json.to_json({
            'type': 'table',
            'data': {
                'count':
                data.count(),
                'bytes':
                -1,
                'schema': [{
                    'column-name': c[0],
                    'column-type': c[1]
                } for c in data.dtypes],
                'data': [[col for col in row] for row in data.collect()]
            }
        })
    else:
        return data_json.to_json({'type': type_name, 'data': data})
Ejemplo n.º 6
0
def unload_model(path, **params):
    linked = params['linked']
    if 'outputs' in linked:
        outputs = linked['outputs']
    else:
        outputs = linked['outData']
    param = linked['param']

    def getDataFromInputs(data):
        for k, v in params.items():
            if k is data:
                return v
        return {}

    if 'model' in outputs:
        model = getDataFromInputs(outputs['model'])
    else:
        model_table = dict()
        i = 0
        if isinstance(outputs, list):
            for v in outputs:
                model_table['table_{}'.format(i + 1)] = getDataFromInputs(v)
                i += 1
        else:
            for k, v in outputs.items():
                model_table['table_{}'.format(i + 1)] = getDataFromInputs(v)
                i += 1
        if ('groupby' in param
                and param['groupby']) or ('group_by' in param
                                          and param['group_by']):
            if 'groupby' in param:
                group_by = param['groupby']
            else:
                group_by = param['group_by']
            sample_table = model_table['table_1']
            groups = sample_table[group_by].drop_duplicates().values
            group_keys = np.array(
                [_group_key_from_list(row) for row in groups])
            group_key_dict = {
                k: v.tolist()
                for k, v in zip(group_keys, groups)
            }
            model = {
                '_grouped_data':
                _grouped_data(group_by=group_by, group_key_dict=group_key_dict)
            }
            for group_key in group_key_dict:
                group_key_row = group_key_dict[group_key]
                tmp_model_table = model_table.copy()
                for k, v in tmp_model_table.items():
                    for group_by_col, group in zip(group_by, group_key_row):
                        v = v[v[group_by_col] == group]
                    tmp_model_table[k] = v.reset_index(drop=True)
                model['_grouped_data']['data'][group_key] = _unload_model(
                    tmp_model_table, linked, param)
        else:
            model = _unload_model(model_table, linked, param)
    dir_ = os.path.dirname(path)
    if not os.path.exists(dir_):
        os.makedirs(dir_)
    with open(path, 'wb') as fp:
        json.dump(data_json.to_json(model, for_redis=True),
                  codecs.getwriter('utf-8')(fp),
                  ensure_ascii=False)
    return {'model': model}
Ejemplo n.º 7
0
 def set(key, data):
     gateway.put_kv_data(key, data_json.to_json(data, for_redis=True))