def transform_input(data, cat_features, cont_features, norm_prefix): # This will fail if any of the continuous variables are missing. # features = (cat_features + cont_features) feature_data = data[features].copy() # Deal with nulls on input. # empty_string_replaced = feature_data.replace('', np.nan, inplace=False) data_without_nulls = empty_string_replaced.dropna() data_without_nulls.reset_index(drop=True, inplace=True) # Convert the categorical feature values into one-hot vectors, removing # the original categorical feature columns. Reuse the same categorical # encoder that we trained on. # cat_enc = afs.read_object( afs.get_file_location(afs.CATEGORICAL_ENCODER_FILE)) with_one_hot = cat_enc.transform(data_without_nulls.copy()) # The continuous encoder was written by us, so its methods aren't # exactly consistent with the categorical data encoder. # cont_enc = afs.read_object( afs.get_file_location(afs.CONTINUOUS_ENCODER_FILE)) # Convert the continuous feature values into normalized features, # removing the original continuous feature columns. # with_normalized = afs.fit_normalized(with_one_hot, cont_enc, norm_prefix) encoded_list = with_normalized.values.tolist() # to python list return torch.FloatTensor(encoded_list) # to tensor
def load_model_from_file(): m = afs.get_model() model_params_location = afs.get_file_location(afs.MODEL_FILE) # Note the use of the CPU here. Currently, the docker image does not # support CUDA. FIXME: It should be possible to do a GPU at runtime. # m.load_state_dict(torch.load(model_params_location, map_location='cpu')) return m
def predict_rand(): hyper = afs.read_object(afs.get_file_location(afs.HYPER_FILE)) input_tensor = torch.rand( 1, hyper.input_size) # one row of columns with random values return get_json_response(input_tensor, config['collection'])
def gethyper(): hyper = afs.read_object(afs.get_file_location(afs.HYPER_FILE)) return jsonify({'hyper': hyper})
def create_app(): app = Flask(__name__) # Get the configuration for datastore and model. # config = afs.read_object(afs.get_file_location(afs.CONFIG_FILE)) print("Inference service was started.") @app.route("/") def status(): return jsonify({"status": "ok"}) @app.route('/gethyper', methods=['GET', 'POST']) def gethyper(): hyper = afs.read_object(afs.get_file_location(afs.HYPER_FILE)) return jsonify({'hyper': hyper}) @app.route('/getmodel', methods=['GET', 'POST']) def getmodel(): # This is the function that print() uses. # modelrep = model.__repr__() return jsonify({'model': modelrep}) # The prediction stub just generates a zero tensor to # test the model # @app.route('/predict_zeroes', methods=['GET', 'POST']) def predict_zeroes(): hyper = afs.read_object(afs.get_file_location(afs.HYPER_FILE)) input_tensor = torch.zeros( 1, hyper.input_size) # one row of columns with zeroes return get_json_response(input_tensor, config['collection']) @app.route('/predict_ones', methods=['GET', 'POST']) def predict_ones(): hyper = afs.read_object(afs.get_file_location(afs.HYPER_FILE)) input_tensor = torch.ones( 1, hyper.input_size) # one row of columns with ones return get_json_response(input_tensor, config['collection']) @app.route('/predict_rand', methods=['GET', 'POST']) def predict_rand(): hyper = afs.read_object(afs.get_file_location(afs.HYPER_FILE)) input_tensor = torch.rand( 1, hyper.input_size) # one row of columns with random values return get_json_response(input_tensor, config['collection']) # Allow the POST input to be either an array of samples, or a single row. # Currently, only works for one row. FIXME. # @app.route('/predict', methods=['GET', 'POST']) def predict(): content = request.get_json() if content != None: list_content = content if type(content) is list else [content] # The Docker image may throw a deprecation warning, like this: # ... pandas.io.json.json_normalize is deprecated, # use pandas.json_normalize instead # However, don't make that change until the non-docker environment # uses the same pandas version as the docker image. # raw_data = pd.io.json.json_normalize(list_content) input_tensor = transform_input( raw_data, config['collection']['cat_features'], config['collection']['cont_features'], afs.norm_prefix) return get_json_response(input_tensor, config['collection']) else: return jsonify({ 'predict_index': None, 'predict_human': None, 'probabilities': None }) return app
# Read the configuration based on the --config option # import importlib cfg = importlib.import_module(arglist.config) config = cfg.config # Save the configuration in a well-known pickle file, so at inference time, # it will be easy to find. # # This will save the object 'obj' to disk at file 'target' # def save_to_disk(obj, target): filehandler = open(target, 'wb') pickle.dump(obj, filehandler) filehandler.close() save_to_disk(config, afs.get_file_location(afs.CONFIG_FILE)) # + # A utility for making a connection to mongo. # # Returns null if 'db' does not exist in the mongodb instance. # Will fail if host/port don't specify a mongodb endpoint, or # if username/password are needed and are incorrect. # def _connect_mongo(host, port, username, password, db): if username and password: mongo_uri = 'mongodb://%s:%s@%s:%s/%s' % (username, password, host, port, db) conn = MongoClient(mongo_uri) else: conn = MongoClient(host, port)