Exemple #1
0
def agg(file,groupby, applyname, func):
    """Operates on a groupby column in a csv file and applies a function
    Example Usage:
   ./csvcli.py cvsops --file ext/input.csv --groupby last_name --applyname count --func npmedian
    Processing csvfile: ext/input.csv and groupby name: last_name and applyname: count
    2017-06-22 14:07:52,532 - nlib.utils - INFO - Loading appliable functions/plugins: npmedian
    2017-06-22 14:07:52,533 - nlib.utils - INFO - Loading appliable functions/plugins: npsum
    2017-06-22 14:07:52,533 - nlib.utils - INFO - Loading appliable functions/plugins: numpy
    2017-06-22 14:07:52,533 - nlib.utils - INFO - Loading appliable functions/plugins: tanimoto
    last_name
    eagle    17.0
    lee       3.0
    smith    13.5
    Name: count, dtype: float64
    """
    if not file and not groupby and not applyname and not func:
        click.echo("--file and --column and --applyname --func are required")
        sys.exit(1)

    click.echo("Processing csvfile: {file} and groupby name: {groupby} and applyname: {applyname}".\
            format(file=file, groupby=groupby, applyname=applyname))
    #Load Plugins and grab correct one
    plugins = utils.plugins_map()
    appliable_func = plugins[func]
    res = csvops.group_by_operations(data=file, 
            groupby_column_name=groupby, apply_column_name=applyname,
            func=appliable_func)
    click.echo(res)
Exemple #2
0
def csv_aggregate_columns(groupbyop):
    """Aggregate column in an uploaded csv
    
    ---
        consumes:  application/json
        parameters:
            -   in: path
                name:  Appliable Function (i.e.  npsum, npmedian)
                type:  string
                required: true
                description:  appliable function, which must be registered (check /api/funcs)
            -   in: query
                name: column
                type: string
                description:  The column to process in an aggregation
                required:  True
            -   in: query    
                name: group_by
                type: string
                description:  The column to group_by in an aggregation
                required:  True
            -   in: header
                name:  Content-Type
                type:  string
                description:  Requires "Content-Type:application/json" to be set
                required:  True
            -   in: body
                name: payload
                type:  string
                description:  base64 encoded csv file
                required: True

        responses:
            200:
                description: Returns an aggregated CSV.

    """

    #TO DO?:  Make this into a helper function
    #Return 415 if not valid content type
    content_type = request.headers.get('Content-Type')
    content_type_log_msg = "Content-Type is set to:  {content_type}".\
        format(content_type=content_type)
    log.info(content_type_log_msg)
    if not content_type == "application/json":
        wrong_method_log_msg =\
             "Wrong Content-Type in request: {content_type} sent, but requires application/json".\
            format(content_type=content_type)
        log.info(wrong_method_log_msg)
        return jsonify({
            "content_type": content_type,
            "error_msg": wrong_method_log_msg
        }), status.HTTP_415_UNSUPPORTED_MEDIA_TYPE

    #Parse Query Parameters and Retrieve Values
    query_string = request.query_string
    query_string_msg = "Request Query String: {query_string}".format(
        query_string=query_string)
    log.info(query_string_msg)
    column = request.args.get("column")
    group_by = request.args.get("group_by")

    #Query Parameter logging and handling
    query_parameters_log_msg = "column: [{column}] and group_by: [{group_by}] Query Parameter values".\
        format(column=column, group_by=group_by)
    log.info(query_parameters_log_msg)
    if not column or not group_by:
        error_msg = "Query Parameter column or group_by not set"
        log.info(error_msg)
        return jsonify({
            "column": column,
            "group_by": group_by,
            "error_msg": error_msg
        }), status.HTTP_400_BAD_REQUEST

    #Load Plugins and grab correct one
    plugins = utils.plugins_map()
    appliable_func = plugins[groupbyop]

    #TO DO?:  Add some additional error handling (invalid column name, etc)
    #Unpack data and operate on it
    data, _ = _b64decode_helper(request)
    #Returns Pandas Series
    res = csvops.group_by_operations(data,
                                     groupby_column_name=group_by,
                                     apply_column_name=column,
                                     func=appliable_func)
    log.info(res)
    return res.to_json(), status.HTTP_200_OK