def transform_input(data, cat_features, cont_features, norm_prefix):

    # This will fail if any of the continuous variables are missing.
    #
    features = (cat_features + cont_features)
    feature_data = data[features].copy()

    # Deal with nulls on input.
    #
    empty_string_replaced = feature_data.replace('', np.nan, inplace=False)
    data_without_nulls = empty_string_replaced.dropna()
    data_without_nulls.reset_index(drop=True, inplace=True)

    # Convert the categorical feature values into one-hot vectors, removing
    # the original categorical feature columns.  Reuse the same categorical
    # encoder that we trained on.
    #
    cat_enc = afs.read_object(
        afs.get_file_location(afs.CATEGORICAL_ENCODER_FILE))
    with_one_hot = cat_enc.transform(data_without_nulls.copy())

    # The continuous encoder was written by us, so its methods aren't
    # exactly consistent with the categorical data encoder.
    #
    cont_enc = afs.read_object(
        afs.get_file_location(afs.CONTINUOUS_ENCODER_FILE))

    # Convert the continuous feature values into normalized features,
    # removing the original continuous feature columns.
    #
    with_normalized = afs.fit_normalized(with_one_hot, cont_enc, norm_prefix)

    encoded_list = with_normalized.values.tolist()  # to python list

    return torch.FloatTensor(encoded_list)  # to tensor
def load_model_from_file():
    m = afs.get_model()
    model_params_location = afs.get_file_location(afs.MODEL_FILE)

    # Note the use of the CPU here.  Currently, the docker image does not
    # support CUDA. FIXME: It should be possible to do a GPU at runtime.
    #
    m.load_state_dict(torch.load(model_params_location, map_location='cpu'))
    return m
 def predict_rand():
     hyper = afs.read_object(afs.get_file_location(afs.HYPER_FILE))
     input_tensor = torch.rand(
         1, hyper.input_size)  # one row of columns with random values
     return get_json_response(input_tensor, config['collection'])
 def gethyper():
     hyper = afs.read_object(afs.get_file_location(afs.HYPER_FILE))
     return jsonify({'hyper': hyper})
def create_app():
    app = Flask(__name__)

    # Get the configuration for datastore and model.
    #
    config = afs.read_object(afs.get_file_location(afs.CONFIG_FILE))
    print("Inference service was started.")

    @app.route("/")
    def status():
        return jsonify({"status": "ok"})

    @app.route('/gethyper', methods=['GET', 'POST'])
    def gethyper():
        hyper = afs.read_object(afs.get_file_location(afs.HYPER_FILE))
        return jsonify({'hyper': hyper})

    @app.route('/getmodel', methods=['GET', 'POST'])
    def getmodel():
        # This is the function that print() uses.
        #
        modelrep = model.__repr__()
        return jsonify({'model': modelrep})

    # The prediction stub just generates a zero tensor to
    # test the model
    #
    @app.route('/predict_zeroes', methods=['GET', 'POST'])
    def predict_zeroes():
        hyper = afs.read_object(afs.get_file_location(afs.HYPER_FILE))
        input_tensor = torch.zeros(
            1, hyper.input_size)  # one row of columns with zeroes
        return get_json_response(input_tensor, config['collection'])

    @app.route('/predict_ones', methods=['GET', 'POST'])
    def predict_ones():
        hyper = afs.read_object(afs.get_file_location(afs.HYPER_FILE))
        input_tensor = torch.ones(
            1, hyper.input_size)  # one row of columns with ones
        return get_json_response(input_tensor, config['collection'])

    @app.route('/predict_rand', methods=['GET', 'POST'])
    def predict_rand():
        hyper = afs.read_object(afs.get_file_location(afs.HYPER_FILE))
        input_tensor = torch.rand(
            1, hyper.input_size)  # one row of columns with random values
        return get_json_response(input_tensor, config['collection'])

    # Allow the POST input to be either an array of samples, or a single row.
    # Currently, only works for one row.  FIXME.
    #
    @app.route('/predict', methods=['GET', 'POST'])
    def predict():
        content = request.get_json()
        if content != None:
            list_content = content if type(content) is list else [content]

            # The Docker image may throw a deprecation warning, like this:
            #   ... pandas.io.json.json_normalize is deprecated,
            #       use pandas.json_normalize instead
            # However, don't make that change until the non-docker environment
            # uses the same pandas version as the docker image.
            #
            raw_data = pd.io.json.json_normalize(list_content)
            input_tensor = transform_input(
                raw_data, config['collection']['cat_features'],
                config['collection']['cont_features'], afs.norm_prefix)
            return get_json_response(input_tensor, config['collection'])
        else:
            return jsonify({
                'predict_index': None,
                'predict_human': None,
                'probabilities': None
            })

    return app
예제 #6
0
# Read the configuration based on the --config option
#
import importlib
cfg = importlib.import_module(arglist.config)
config = cfg.config

# Save the configuration in a well-known pickle file, so at inference time,
# it will be easy to find.
#
# This will save the object 'obj' to disk at file 'target'
#
def save_to_disk(obj, target):
    filehandler = open(target, 'wb')
    pickle.dump(obj, filehandler)
    filehandler.close()
save_to_disk(config, afs.get_file_location(afs.CONFIG_FILE))

# +
# A utility for making a connection to mongo.
#
# Returns null if 'db' does not exist in the mongodb instance.
# Will fail if host/port don't specify a mongodb endpoint, or
# if username/password are needed and are incorrect.
#
def _connect_mongo(host, port, username, password, db):
    if username and password:
        mongo_uri = 'mongodb://%s:%s@%s:%s/%s' % (username, password, host, port, db)
        conn = MongoClient(mongo_uri)
    else:
        conn = MongoClient(host, port)