Ejemplo n.º 1
0
def download_prediction(model_id):
    token = DataServer.extract_token(request)

    # check if user sent token
    if token is None:
        return jsonify({"detail": "Authentication credentials were not provided."}), 401

    # send a request to the data server to authenticate the user via the token
    user = DataServer.auth_user(token)
    # none is returned if the token is invalid
    if user is None:
        return jsonify({"detail": "Invalid token."}), 401

    # get data information from the data server
    user_model = DataServer.get_user_model(user_id=user['user_id'], model_id=model_id)
    if user_model is None:
        return jsonify({"detail": "The model specified was not found."}), 404

    # check if the user owns the data to be uploaded
    if user_model['owner'] != user['user_id']:
        return jsonify({"detail": "You do not have permission to perform this action."}), 403

    user_id = user['user_id']
    model_name = user_model['name']

    user_model_dir = os.path.join(MODEL_DIR, f"{user_id}")
    if not os.path.exists(user_model_dir):
        return jsonify({'detail': 'No predictions were preformed on this model before.'}), 404

    if not check_file(get_model_dir(user_id=user_id, model_id=model_id)):
        return jsonify({'detail': 'No predictions were preformed on this model before.'}), 404

    if not check_file(get_model_info_dir(user_id=user_id, model_id=model_id)):
        return jsonify({'detail': 'No predictions were preformed on this model before.'}), 404

    model_info_dir = get_model_info_dir(user_id=user_id, model_id=model_id)
    model_info_file = open(model_info_dir, mode='r')
    model_info_text = model_info_file.read()
    model_info_file.close()
    model_info = json.loads(model_info_text)

    # check if the file for the prediction exists
    user_model_dir = os.path.join(PREDICT_DIR, f"{user_id}")
    if not os.path.exists(user_model_dir):
        return jsonify({'detail': 'No predictions were preformed on this model before'}), 404

    if not check_file(get_predict_dir(user_id=user_id, model_id=model_id,
                                      file_type=model_info['dataset_info']['type'])):
        return jsonify({'detail': 'No predictions were preformed on this model before'}), 404

    return Response(
        response=stream_with_context(
            read_file_chunks(get_predict_dir(user_id=user_id, model_id=model_id,
                                             file_type=model_info['dataset_info']['type']))),
        status=200,
        headers={
            'Content-Disposition': f'attachment; filename={model_name}.{model_info["dataset_info"]["type"]}'
        },
        mimetype='application/octet-stream'
    )
Ejemplo n.º 2
0
def predict(
    user_id: int,
    model_id: int,
    to_drop: list,
    predict_data_dir: str,
    model_info: dict,
):

    model_dir = get_model_dir(user_id=user_id, model_id=model_id)
    print(model_dir)
    trained_model_file = open(model_dir, 'rb')
    model_data = pickle.load(trained_model_file, fix_imports=True)
    trained_model_file.close()
    file_type = model_info['dataset_info']['type']
    if file_type in file_extensions['csv']:
        df = pd.read_csv(predict_data_dir)
    elif file_type in file_extensions['json']:
        df = pd.read_json(predict_data_dir, orient='split')
    elif file_type in file_extensions['excel']:
        excel = pd.ExcelFile(predict_data_dir)
        df = pd.read_excel(predict_data_dir, excel.sheet_names[0])
    else:
        raise Exception('Unsupported file type')

    to_drop = [] if to_drop is None else to_drop

    df = df.drop(columns=[col for col in to_drop if col in df])

    label_encodes = model_data['label_encoders']
    for column in label_encodes:
        le = label_encodes[column]
        df[column] = le.fit_transform(df[column])

    y_col = model_info['y_col']
    if y_col in df:
        x = df.loc[:, df.columns != y_col]
    else:
        x = df

    normalize_scaler = model_data['normalize_scaler']

    model = model_data['model']

    if normalize_scaler is not None:
        y = DataFrame(data=model.predict(normalize_scaler.transform(x)),
                      columns=[y_col])
    else:
        y = DataFrame(data=model.predict(x), columns=[y_col])

    predict_dir = get_predict_dir(user_id=user_id,
                                  model_id=model_id,
                                  file_type=file_type)

    if file_type in file_extensions['csv']:
        y.to_csv(predict_dir)
    elif file_type in file_extensions['json']:
        y.to_json(predict_dir, orient='split')
    elif file_type in file_extensions['excel']:
        y.to_excel(predict_dir, sheet_name='Sheet1')
Ejemplo n.º 3
0
def model_info():
    # validate if the request is JSON or not
    if not request.is_json:
        return jsonify({'detail': 'Invalid format'}), 400

    # check if the data server is the one requesting by checking the token
    if not DataServer.check_data_token(request):
        return jsonify({'detail': 'Unauthorized'}), 401

    # validate main request arguments
    request_data = request.get_json()
    missing_args = [item[0] for item in GET_MODEL_INFO_ARGS if item[0] not in request_data]
    if len(missing_args) == 1 and missing_args[0] == 'model_id':
        pass
    elif len(missing_args) != 0:
        return jsonify({'detail': 'Missing arguments', 'missing_args': missing_args}), 400
    invalid_args = \
        [
            {
                'arg': GET_MODEL_INFO_ARGS[index][0],
                'sent_type': type(request_data[GET_MODEL_INFO_ARGS[index][0]]).__name__,
                'expected_type': GET_MODEL_INFO_ARGS[index][1].__name__
            }
            for index in range(len(GET_MODEL_INFO_ARGS))
            if type(request_data[GET_MODEL_INFO_ARGS[index][0]])
               != GET_MODEL_INFO_ARGS[index][1]
        ]
    if len(invalid_args) != 0:
        return jsonify({'detail': 'Invalid types for arguments.', 'invalid_args': invalid_args}), 400

    user_id = request_data['user_id']
    model_id = request_data['model_id']

    # get data information from the data server
    user_model = DataServer.get_user_model(user_id=user_id, model_id=model_id)
    if user_model is None:
        return jsonify({"detail": "The model specified was not found."}), 404

    # check if the user owns the data to be uploaded
    if user_model['owner'] != user_id:
        return jsonify({"detail": "The model specified does not belong to the user."}), 403

    user_model_dir = os.path.join(MODEL_DIR, f"{user_id}")
    if not os.path.exists(user_model_dir):
        return jsonify({'detail': 'The model has never been trained before.'}), 404

    if not check_file(get_model_dir(user_id=user_id, model_id=model_id)):
        return jsonify({'detail': 'The model has never been trained before.'}), 404

    if not check_file(get_model_info_dir(user_id=user_id, model_id=model_id)):
        return jsonify({'detail': 'The model has never been trained before.'}), 404

    model_info_dir = get_model_info_dir(user_id=user_id, model_id=model_id)
    model_info_file = open(model_info_dir, mode='r')
    model_info_text = model_info_file.read()
    model_info_file.close()

    return model_info_text, 200
Ejemplo n.º 4
0
def download_model(model_id):
    token = DataServer.extract_token(request)

    # check if user sent token
    if token is None:
        return jsonify({"detail": "Authentication credentials were not provided."}), 401

    # send a request to the data server to authenticate the user via the token
    user = DataServer.auth_user(token)
    # none is returned if the token is invalid
    if user is None:
        return jsonify({"detail": "Invalid token."}), 401

    # get data information from the data server
    user_model = DataServer.get_user_model(user_id=user['user_id'], model_id=model_id)
    if user_model is None:
        return jsonify({"detail": "The model specified was not found."}), 404

    # check if the user owns the data to be uploaded
    if user_model['owner'] != user['user_id']:
        return jsonify({"detail": "You do not have permission to perform this action."}), 403

    user_id = user['user_id']
    model_name = user_model['name']

    # check if the file for th model exists
    user_model_dir = os.path.join(MODEL_DIR, f"{user_id}")
    if not os.path.exists(user_model_dir):
        return jsonify({'detail': 'Model was never trained before.'}), 404

    if not check_file(get_model_dir(user_id=user_id, model_id=model_id)):
        return jsonify({'detail': 'Model was never trained before.'}), 404

    return Response(
        response=stream_with_context(read_file_chunks(get_model_dir(user_id=user_id, model_id=model_id))),
        status=200,
        headers={
            'Content-Disposition': f'attachment; filename={model_name}.pickle'
        },
        mimetype='application/octet-stream'
    )
Ejemplo n.º 5
0
def predict(model_id):
    token = DataServer.extract_token(request)

    # check if user sent token
    if token is None:
        return jsonify({"detail": "Authentication credentials were not provided."}), 401

    # send a request to the data server to authenticate the user via the token
    user = DataServer.auth_user(token)
    # none is returned if the token is invalid
    if user is None:
        return jsonify({"detail": "Invalid token."}), 401

    user_id = user['user_id']
    # get data information from the data server
    user_model = DataServer.get_user_model(user_id=user_id, model_id=model_id)
    if user_model is None:
        return jsonify({"detail": "The model specified was not found."}), 404

    # check if the user owns the data to be uploaded
    if user_model['owner'] != user['user_id']:
        return jsonify({"detail": "You do not have permission to perform this action."}), 403

    # check if the file for the model exists
    user_model_dir = os.path.join(MODEL_DIR, f"{user_id}")
    if not os.path.exists(user_model_dir):
        return jsonify({'detail': 'Model was never trained before.'}), 404

    if not check_file(get_model_dir(user_id=user_id, model_id=model_id)):
        return jsonify({'detail': 'Model was never trained before.'}), 404

    if not check_file(get_model_info_dir(user_id=user_id, model_id=model_id)):
        return jsonify({'detail': 'Model was never trained before.'}), 404

    # create directories for the user if it doesn't exist
    user_prediction_dir = os.path.join(PREDICT_DIR, f"{user['user_id']}")
    if not os.path.exists(user_prediction_dir):
        os.makedirs(user_prediction_dir)

    model_info_dir = get_model_info_dir(user_id=user_id, model_id=model_id)
    model_info_file = open(model_info_dir, mode='r')
    model_info_text = model_info_file.read()
    model_info_file.close()
    model_info = json.loads(model_info_text)

    dataset_type = model_info['dataset_info']['type']
    dataset_column_info = model_info['dataset_info']['columns']

    # file is sent in request body, open a temp file and start writing to it
    temp_file_dir = os.path.join(PREDICT_DIR, f"{user['user_id']}", f'temp.{dataset_type}')
    with open(temp_file_dir, "wb") as file:
        file.write(request.data)

    # try to open the file and get some information about it before overwriting the existing file
    try:
        prediction_dataset_column_info = get_columns(temp_file_dir, dataset_type)
    except (UnicodeDecodeError, EmptyDataError):
        os.remove(temp_file_dir)
        return jsonify({'detail': f"Invalid {dataset_type} file."}), 415

    # check columns of the prediction data compared to the training data
    invalid_columns = []
    dataset_column_info_dict = {column['name']: column for column in dataset_column_info}
    for column in prediction_dataset_column_info:
        if column['name'] in dataset_column_info_dict:
            if column['type'] != dataset_column_info_dict[column['name']]['type']:
                invalid_columns.append({
                    'detail': 'Unexpected type',
                    'name': column['name'],
                    'expected_type': dataset_column_info_dict[column['name']]['type']
                })
        else:
            if column['name'] not in model_info['to_drop'] and column['name'] != model_info['y_col']:
                invalid_columns.append({
                    'detail': 'Missing column that was not dropped during training',
                    'name': column['name'],
                })
    if len(invalid_columns) != 0:
        return jsonify({'detail': 'The columns of the prediction file do not match the columns used for training',
                        'invalid_columns': invalid_columns}), 400

    to_drop = [col_name for col_name in model_info['to_drop'] if col_name in dataset_column_info_dict]

    from predict import predict as predict_data
    predict_data(user_id=user_id, model_id=model_id, to_drop=to_drop,
                 predict_data_dir=temp_file_dir, model_info=model_info,)

    # return status code CREATED
    return jsonify({
        'detail': 'Prediction done. You can access the data from the given endpoint.',
        'endpoint': f'/download/p/{model_id}'
    }), 201