def run(request_body: dict): # Get agent/skill activation request body api_endpoint = request_body["apiEndpoint"] project = request_body["projectId"] token = request_body["token"] connection_name = request_body["payload"]["connection_name"] query = request_body["payload"]["query"] try: # Create Cortex client client = Cortex.client(api_endpoint=api_endpoint, project=project, token=token) # Get connection and create mongo client connection = client.get_connection(connection_name) params = dict(map(lambda l: (l['name'], l['value']), connection['params'])) mongo = MongoClient(params["uri"]) # Use connection database = params.get("database") collection = params.get("collection") if database and collection: result = list(mongo[database][collection].find(query)) else: result = {"error": "collection, database and query must be provided"} except Exception as e: result = {"error": str(e)} logger.exception(e) # Return result return {'payload': result}
def run(req: dict): payload = req["payload"] instances = payload["instances"] exp_name = payload["exp_name"] run_id = None if "run_id" in payload: run_id = payload["run_id"] # if model is not loaded client = Cortex.client(api_endpoint=req["apiEndpoint"], project=req["projectId"], token=req["token"]) model_ctx[exp_name] = init_model(exp_name, run_id, client, req["projectId"]) # retrieve model from the context model_obj = model_ctx[exp_name] # using encoder from model object encoder = model_obj["encoder"] instances = np.array(instances, dtype=object) instances = instances if instances.ndim == 2 else np.reshape(instances, (1, -1)) instances = encoder(instances) # predict predictions = model_obj["model"].predict(instances) scores = model_obj["model"].predict_proba(instances) labels = model_obj["model"].classes_ return { "payload": { "predictions": predictions.tolist(), "scores": scores.tolist(), "labels": labels.tolist() } }
def domain_controller(params: dict): # initialise the Cortex client api_endpoint = params.get('apiEndpoint') token = params.get('token') project_id = params.get('projectId') client = Cortex.client(api_endpoint=api_endpoint, token=token, project=project_id) # # get secrets keys # os.environ["AWS_SECRET_ACCESS_KEY"] = str(client.get_secret("awssecretkey")) # os.environ["AWS_ACCESS_KEY_ID"] = str(client.get_secret("awspublickey")) # just in case there are old environment variables for hadron for key in os.environ.keys(): if key.startswith('HADRON'): del os.environ[key] # extract the payload payload = params.get('payload', {}) # get the domain contract repo from the payload uri_pm_repo = payload.get('domain_contract_repo') if not isinstance(uri_pm_repo, str): raise KeyError( "The message parameters passed do not have the mandatory 'domain_contract_repo' payload key" ) # extract any extra kwargs hadron_kwargs = payload.get('hadron_kwargs', {}) # export and pop any environment variable from the kwargs for key in hadron_kwargs.copy().keys(): if str(key).isupper(): os.environ[key] = hadron_kwargs.pop(key) # pop the run_controller attributes from the kwargs run_book = hadron_kwargs.pop('runbook', None) mod_tasks = hadron_kwargs.pop('mod_tasks', None) repeat = hadron_kwargs.pop('repeat', None) sleep = hadron_kwargs.pop('sleep', None) run_time = hadron_kwargs.pop('run_time', None) run_cycle_report = hadron_kwargs.pop('run_cycle_report', None) source_check_uri = hadron_kwargs.pop('source_check_uri', None) # instantiate the Controller passing any remaining kwargs controller = Controller.from_env(uri_pm_repo=uri_pm_repo, default_save=False, has_contract=True, **hadron_kwargs) # run the controller nano services. controller.run_controller(run_book=run_book, mod_tasks=mod_tasks, repeat=repeat, sleep=sleep, run_time=run_time, source_check_uri=source_check_uri, run_cycle_report=run_cycle_report)
def delete(req: dict): payload = req['payload'] client = Cortex.client(api_endpoint=req["apiEndpoint"], project=req["projectId"], token=req["token"]) session_id = None if "session_id" in payload: session_id = payload["session_id"] else: return {'payload': "session_id is required"} result = SessionClient(client).delete_session(session_id, req["projectId"]) return {"payload": result}
def start(req: dict): payload = req['payload'] client = Cortex.client(api_endpoint=req["apiEndpoint"], project=req["projectId"], token=req["token"]) ttl = None description = "No description given" if "ttl" in payload: ttl = payload["ttl"] if "description" in payload: description = payload["description"] session_client = SessionClient(client) session = session_client.start_session(ttl, description, req["projectId"]) return {'payload': {"session_id": session}}
def run(request_body: dict): # Get agent/skill activation request body api_endpoint = request_body["apiEndpoint"] project = request_body["projectId"] token = request_body["token"] experiment_name = request_body["payload"]["experiment_name"] instance = request_body["payload"]["instance"] # Create Cortex client and get experiment client = Cortex.client(api_endpoint=api_endpoint, project=project, token=token) experiment = client.experiment(experiment_name) # Get model from last experiment run exp_run = experiment.last_run() model = exp_run.get_artifact('model') # Return model predict return {'payload': model.predict(instance).tolist()}
def process(params): # create a Cortex client instance from the job's parameters client = Cortex.client(api_endpoint=params['apiEndpoint'], token=params['token']) # get the agent payload payload = params.get('payload',{}) # You can print logs to the console these are collected by docker/k8s print(f'Got payload: {payload}') # use the `client` instance to use Cortex client libraries content_client = ManagedContentClient(client); if 'activationId' in params: file_name = f'jobchain-data-{params["activationId"]}' else: # file_name = f'jobchain-data-{int(time.time())}' # Read `recordCount` from payload, have a default value of raising an Exception is recommended. record_count = payload.get('recordCount', 1000) # This is streaming the records to Cortex's managed content content_client.upload_streaming(file_name, datagen_stream(record_count), 'application/x-jsonlines') print(f'Wrote datafile to managed content key: {file_name}')
def put(req: dict): payload = req['payload'] client = Cortex.client(api_endpoint=req["apiEndpoint"], project=req["projectId"], token=req["token"]) session_id = None if "session_id" in payload: session_id = payload["session_id"] else: return {'payload': "session_id is required"} data = {} if "data" in payload: data = payload["data"] else: return {'payload': "data is required"} result = SessionClient(client).put_session_data(session_id, data, req["projectId"]) return {"payload": result}
def get(req: dict): payload = req['payload'] client = Cortex.client(api_endpoint=req["apiEndpoint"], project=req["projectId"], token=req["token"]) session_id = None if "session_id" in payload: session_id = payload["session_id"] else: return {'payload': "session_id is required"} key = None if "key" in payload: key = payload["key"] if len(key) < 1: key = None session_client = SessionClient(client) session = session_client.get_session_data(session_id, key, req["projectId"]) return {'payload': session}
def process(params): # create a Cortex client instance from the job's parameters client = Cortex.client(api_endpoint=params['apiEndpoint'], token=params['token']) # get he agent payload payload = params['payload'] # You can print logs to the console these are collected by docker/k8s print(f'Got payload: {payload}') if 'activationId' in params: content_key = f'jobchain-data-{params["activationId"]}' else: if 'datafileKey' not in payload: raise Exception("'datafileKey' is required in the payload") content_key = payload['datafileKey'] print(f'Fetching datafile from managed content: {content_key}') # use the `client` instance to use Cortex client libraries content_client = ManagedContentClient(client) # This is streaming the records to Cortex's managed content # if this was called as part of an agent content = content_client.download(content_key) df = pd.read_json(content, lines=True) counts = df['color'].value_counts() print(f'{counts.to_json()}')
def load_model(api_endpoint: str, token: str, project_id: str, experiment_name: str, run_id: str, artifact_key: str): global model if not experiment_name: raise ValueError( "experiment-name is required if a model is not initialized") # Initialize Cortex Client client = Cortex.client(api_endpoint=api_endpoint, token=token, project=project_id) # Load Model from the experiment run logging.info("Loading model artifacts from experiment run...") try: experiment = client.experiment(experiment_name) run = experiment.get_run(run_id) if run_id else experiment.last_run() model = run.get_artifact(artifact_key) except Exception as e: logging.error("Error: Failed to load model: {}".format(e)) raise logging.info("Model Loaded!")
# Save model pickle.dump(clf, open(local_pickle_file, "wb")) # The starting point for the job if __name__ == '__main__': # Get agent/skill activation request body request_body = json.loads(sys.argv[1]) api_endpoint = request_body["apiEndpoint"] project = request_body["projectId"] token = request_body["token"] experiment_name = request_body["payload"]["experiment_name"] train_and_save_model() # Create Cortex client and create experiment client = Cortex.client(api_endpoint=api_endpoint, project=project, token=token) experiment = client.experiment(experiment_name) # Upload model to experiment run in Cortex model = open(local_pickle_file, "rb") run = experiment.start_run() run.log_artifact_stream("model", model) run.set_meta("algo", "RandomForestClassifier Model") print( f'Created experiment "{experiment_name}". Started Run {run.id}. Logged RandomForestClassifier model.' )
def train(params): project = params['projectId'] # create a Cortex client instance from the job's parameters client = Cortex.client(api_endpoint=params['apiEndpoint'], project=project, token=params['token']) payload = params['payload'] # Read connection connection_name = payload['connection_name'] print(f'Reading connection {connection_name}') connection = client.get_connection(connection_name) # Download training data using connection download_training_data(connection) print(f'Downloaded training data for {connection_name}') random.seed(0) np.random.seed(0) # Load dataset data = pd.read_csv('german_credit_eval.csv') # Separate outcome y = data['outcome'] x = data.drop('outcome', axis=1) # Bring in test and training data x_train, x_test, y_train, y_test = train_test_split(x, y, random_state=0) # Create an encoder cat_columns = [ 'checkingstatus', 'history', 'purpose', 'savings', 'employ', 'status', 'others', 'property', 'age', 'otherplans', 'housing', 'job', 'telephone', 'foreign' ] encoder = CatEncoder(cat_columns, x, normalize=True) encoded_x_train = encoder(x_train.values) encoded_x_test = encoder(x_test.values) # Train a decision tree model dtree = DecisionTreeClassifier(criterion='entropy', random_state=0) dtree.fit(encoded_x_train, y_train.values) dtree_acc = dtree.score(encoded_x_test, y_test.values) # Train a multi-layer perceptron model mlp = MLPClassifier(hidden_layer_sizes=(20, 20), max_iter=2000) mlp.fit(encoded_x_train, y_train.values) mlp_acc = mlp.score(encoded_x_test, y_test.values) # Train a support vector machine model SVM = svm.SVC(gamma='scale', probability=True) SVM.fit(encoded_x_train, y_train.values) svm_acc = SVM.score(encoded_x_test, y_test.values) # Train a logistic regression model logit = LogisticRegression(random_state=0, solver='lbfgs') logit.fit(encoded_x_train, y_train.values) logit_acc = logit.score(encoded_x_test, y_test.values) # Save model meta-data model_name = payload["model_name"] save_model(client, project, model_name, payload.get("model_title", ""), payload.get("model_description", ""), payload.get("model_source", ""), payload.get("model_type", ""), payload.get("model_status", ""), payload.get("model_tags", [])) # Save models as pickle files and Save experiments pickle_model(dtree, encoder, 'Decision Tree', dtree_acc, 'Basic Decision Tree model', 'german_credit_dtree.pkl') pickle_model(logit, encoder, 'LOGIT', logit_acc, 'Basic LOGIT model', 'german_credit_logit.pkl') pickle_model(mlp, encoder, 'MLP', mlp_acc, 'Basic MLP model', 'german_credit_mlp.pkl') pickle_model(SVM, encoder, 'SVM', svm_acc, 'Basic SVM model', 'german_credit_svm.pkl') save_experiment(client, 'gc_dtree_exp', 'german_credit_dtree.pkl', 'DecisionTreeClassifier', model_name, project) save_experiment(client, 'gc_logit_exp', 'german_credit_logit.pkl', 'LogisticRegression', model_name, project) save_experiment(client, 'gc_mlp_exp', 'german_credit_mlp.pkl', 'MLPClassifier', model_name, project) save_experiment(client, 'gc_svm_exp', 'german_credit_svm.pkl', 'SVM', model_name, project)
def make_batch_predictions(input_params): conn_params = {} url = input_params["apiEndpoint"] token = input_params["token"] project = input_params["projectId"] skill_name = input_params["skillName"] outcome = input_params["properties"]["outcome"] # Initialize Cortex Client client = Cortex.client(api_endpoint=url, token=token, project=project) # Read cortex connection details connection = client.get_connection(input_params["properties"]["connection-name"]) for p in connection['params']: conn_params.update({p['name']: p['value']}) log_message(msg=f"Connection Params: {str(conn_params)}", log=get_logger(skill_name), level=logging.INFO) # Load Model from the experiment run model = load_model(client, input_params["properties"]["experiment-name"], input_params["properties"]["run-id"], project) if connection.get("connectionType") == "s3": output_path = input_params["properties"]["output-path"] secret_key = input_params["properties"]["aws-secret"] conf = SparkConf().set("fs.s3a.access.key", conn_params.get('publicKey')) \ .set("fs.s3a.secret.key", secret_key) \ .set("fs.s3a.endpoint", conn_params.get("s3Endpoint")) \ .set("fs.s3a.impl", "org.apache.hadoop.fs.s3a.S3AFileSystem") # Initialize spark session spark = initialize_spark_session(conf=conf) sc = spark.sparkContext # Get S3 file path of the dataset file = conn_params["uri"] # Create spark data-frame for prediction df = spark.read.option("inferSchema", True).csv(file, header=True) df = df.drop(outcome) log_message(msg=f"DataFrame Schema: {str(df.printSchema())}", log=get_logger(skill_name), level=logging.INFO) # Make predictions df = score_predictions(df, model, outcome, sc, skill_name) # Converting struct to double for t in df.dtypes: if t[1] == "struct<_1:double>": df = df.withColumn(t[0], col(t[0]).getField("_1")) # Writing to output df.write.csv(output_path, mode='append', header=True) elif connection.get("connectionType") == "mongo": output_collection = input_params["properties"]["output-collection"] mongo_uri = input_params["properties"]["mongo-uri"] database = conn_params.get("database") collection = conn_params.get("collection") spark = initialize_spark_session(conf=None) sc = spark.sparkContext df = spark.read.format("com.mongodb.spark.sql.DefaultSource").option("uri", mongo_uri) \ .option("database", database) \ .option("collection", collection).load() df = df.drop(outcome, "_id") log_message(msg=f"DataFrame Schema after Prediction: {str(df.printSchema())}", log=get_logger(skill_name), level=logging.INFO) # Score predictions df = score_predictions(df, model, outcome, sc, skill_name) for t in df.dtypes: if t[1] == "struct<_1:double>": df = df.withColumn(t[0], col(t[0]).getField("_1")) # Writing to output df.write.format("com.mongodb.spark.sql.DefaultSource") \ .mode("append").option("uri", mongo_uri) \ .option("database", database) \ .option("collection", output_collection).save() else: # Implement based on requirement spark = initialize_spark_session(conf=None) spark.stop()
from cortex import Cortex from cortex.model import Model, ModelClient from cortex.experiment import Experiment, ExperimentClient from cortex.connection import ConnectionClient, Connection from cortex.skill import SkillClient params = { "projectId": PROJECT_ID, "apiEndpoint": API_ENDPOINT, "token": CORTEX_TOKEN } if __name__ == "__main__": client = Cortex.client(api_endpoint=params['apiEndpoint'], project=params['projectId'], token=params['token']) cc = ConnectionClient(client) conn_params = {} with open("conn.json") as f: conn_params = json.load(f) conn_params["params"] = [] for name, value in CONN_PARAMS.items(): conn_params["params"].append({"name": name, "value": value}) # create a secret called awssecretadmin in your project which contains the aws secret key # create a connection cc.save_connection(project=PROJECT_ID, connection=conn_params)
for y in val.keys(): s_val = val[y] args.append(key) args.append("{}={}".format(y, s_val)) return args if __name__ == '__main__': input_params = json.loads(sys.argv[1]) url = input_params["apiEndpoint"] token = input_params["token"] project = input_params["projectId"] skill_name = input_params["skillName"] experiment_name = input_params["properties"]["experiment-name"] run_id = input_params["properties"]["run-id"] client = Cortex.client(api_endpoint=url, token=token, project=project) experiment_client = ExperimentClient(client) result = experiment_client.get_experiment(experiment_name, project) experiment = Experiment(result, project, experiment_client) run = experiment.get_run(run_id) conn_params = {} connection = client.get_connection( input_params["properties"]["connection-name"]) for p in connection['params']: conn_params.update({p['name']: p['value']}) spark_config = run.get_artifact('spark-config') log_message(msg=f"Spark Config: {str(spark_config)}", log=get_logger(skill_name), level=logging.INFO) run_args = get_runtime_args(spark_config) run_args.append("local:///opt/spark/work-dir/src/main/python/main.py")
def make_batch_predictions(input_params): logging.info("Batch Prediction: Invoke Request:{}".format(input_params)) conn_params = {} url = input_params["apiEndpoint"] token = input_params["token"] project = input_params["projectId"] outcome = input_params["properties"]["outcome"] batch_size = int(input_params["properties"]["batch-size"]) try: # Initialize Cortex Client client = Cortex.client(api_endpoint=url, token=token, project=project) # Read cortex connection details connection = client.get_connection( input_params["properties"]["connection-name"]) for p in connection['params']: conn_params.update({p['name']: p['value']}) print(conn_params) logging.info("connection params", conn_params) # Load Model from the experiment run model = load_model(client, input_params["properties"]["experiment-name"], input_params["properties"]["run-id"], input_params["properties"]["model-artifact"]) logging.info("Model Loaded!") if connection.get("connectionType") == "s3": s3_output_path = input_params["properties"]["output-path"] # Get S3 file path of the dataset uri = conn_params["uri"] s3_client = init_s3_client(conn_params.get('publicKey'), conn_params.get('secretKey')) local_path = download_file(s3_client, uri) output_path = 'temp.csv' for chunked_df in pd.read_csv(local_path, header=0, sep=",", chunksize=batch_size): if outcome in chunked_df.columns.tolist(): chunked_df = chunked_df.drop(outcome, axis=1) logging.info("Processing records of size: {}".format( chunked_df.shape[0])) # Score Predictions for a Batch predicted_df = score_predictions(chunked_df, model=model) if not os.path.isfile(output_path): predicted_df.to_csv(output_path, index=False) predicted_df.to_csv(output_path, mode='a', header=False, index=False) # Uploading file to S3 upload_file(s3_client, output_path, s3_output_path) elif connection.get("connectionType") == "mongo": output_collection = input_params["properties"]["output-collection"] mongo_uri = conn_params.get("uri") database = conn_params.get("database") collection = conn_params.get("collection") client = pymongo.MongoClient(mongo_uri) total_records = client[database][collection].count({}) skip_records = 0 while skip_records < total_records: cursor = client[database][collection].find( {}).limit(batch_size).skip(skip_records) # Expand the cursor and construct the DataFrame chunked_df = pd.DataFrame(list(cursor)) if not chunked_df.empty: if outcome in chunked_df.columns.tolist(): chunked_df = chunked_df.drop([outcome, "_id"], axis=1) logging.info("Processing records of size: {}".format( chunked_df.shape[0])) # Score Predictions for a Batch predicted_df = score_predictions(chunked_df, model=model) predicted_df.reset_index(inplace=True, drop=True) data_dict = predicted_df.to_dict("records") # Insert collection client[database][output_collection].insert_many(data_dict) skip_records += batch_size else: break client.close() logging.info("Prediction Job Completed!") except Exception as e: logging.error("Error while processing batch predictions. Message: ", e)
def get_interfaces(schema_version): cortex = Cortex.client() builder = ProfileBuilder(cortex) profile_schema = builder.profiles(schema_version) return profile_schema